import logging
import asyncio
from typing import Dict, Any, List, Union, Tuple, Optional
from dataclasses import dataclass
import time

logger = logging.getLogger(__name__)

from modules.connectors.connectorAiOpenai import AiOpenai
from modules.connectors.connectorAiAnthropic import AiAnthropic
from modules.connectors.connectorAiPerplexity import AiPerplexity
from modules.connectors.connectorAiTavily import ConnectorWeb
from modules.datamodels.datamodelAi import (
    AiCallOptions, 
    AiCallRequest, 
    AiCallResponse,
    OperationType, 
    ProcessingMode, 
    Priority, 
    ModelTags, 
    OPERATION_TAG_MAPPING, 
    PROCESSING_MODE_PRIORITY_MAPPING
)
from modules.datamodels.datamodelWeb import (
    WebResearchRequest,
    WebResearchActionResult,
    WebSearchResultItem,
    WebCrawlResultItem,
    WebSearchRequest,
    WebCrawlRequest,
)
from modules.datamodels.datamodelChat import ActionDocument


# Comprehensive model registry with capability tags and function mapping
aiModels: Dict[str, Dict[str, Any]] = {
    # OpenAI Models
    "openai_callAiBasic": {
        "connector": "openai",
        "function": "callAiBasic",
        "llmName": "gpt-4o",
        "contextLength": 128000,
        "costPer1kTokens": 0.03,
        "costPer1kTokensOutput": 0.06,
        "speedRating": 8,
        "qualityRating": 9,
        "capabilities": ["text_generation", "chat", "reasoning", "analysis"],
        "tags": ["text", "chat", "reasoning", "analysis", "general"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.03 + (bytesReceived / 4 / 1000) * 0.06
    },
    "openai_callAiBasic_gpt35": {
        "connector": "openai",
        "function": "callAiBasic",
        "llmName": "gpt-3.5-turbo",
        "contextLength": 16000,
        "costPer1kTokens": 0.0015,
        "costPer1kTokensOutput": 0.002,
        "speedRating": 9,
        "qualityRating": 7,
        "capabilities": ["text_generation", "chat", "reasoning"],
        "tags": ["text", "chat", "reasoning", "general", "fast"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002
    },
    "openai_callAiImage": {
        "connector": "openai",
        "function": "callAiImage",
        "llmName": "gpt-4o",
        "contextLength": 128000,
        "costPer1kTokens": 0.03,
        "costPer1kTokensOutput": 0.06,
        "speedRating": 7,
        "qualityRating": 9,
        "capabilities": ["image_analysis", "vision", "multimodal"],
        "tags": ["image", "vision", "multimodal"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.03 + (bytesReceived / 4 / 1000) * 0.06
    },
    "openai_generateImage": {
        "connector": "openai",
        "function": "generateImage",
        "llmName": "dall-e-3",
        "contextLength": 0,
        "costPer1kTokens": 0.04,
        "costPer1kTokensOutput": 0.0,
        "speedRating": 6,
        "qualityRating": 9,
        "capabilities": ["image_generation", "art", "visual_creation"],
        "tags": ["image_generation", "art", "visual"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.04
    },
    
    # Anthropic Models
    "anthropic_callAiBasic": {
        "connector": "anthropic",
        "function": "callAiBasic",
        "llmName": "claude-3-5-sonnet-20241022",
        "contextLength": 200000,
        "costPer1kTokens": 0.015,
        "costPer1kTokensOutput": 0.075,
        "speedRating": 7,
        "qualityRating": 10,
        "capabilities": ["text_generation", "chat", "reasoning", "analysis"],
        "tags": ["text", "chat", "reasoning", "analysis", "high_quality"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075
    },
    "anthropic_callAiImage": {
        "connector": "anthropic",
        "function": "callAiImage",
        "llmName": "claude-3-5-sonnet-20241022",
        "contextLength": 200000,
        "costPer1kTokens": 0.015,
        "costPer1kTokensOutput": 0.075,
        "speedRating": 7,
        "qualityRating": 10,
        "capabilities": ["image_analysis", "vision", "multimodal"],
        "tags": ["image", "vision", "multimodal", "high_quality"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075
    },
    
    # Perplexity Models
    "perplexity_callAiBasic": {
        "connector": "perplexity",
        "function": "callAiBasic",
        "llmName": "llama-3.1-sonar-large-128k-online",
        "contextLength": 128000,
        "costPer1kTokens": 0.005,
        "costPer1kTokensOutput": 0.005,
        "speedRating": 8,
        "qualityRating": 8,
        "capabilities": ["text_generation", "chat", "reasoning", "web_search"],
        "tags": ["text", "chat", "reasoning", "web_search", "cost_effective"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.005
    },
    "perplexity_callAiWithWebSearch": {
        "connector": "perplexity",
        "function": "callAiWithWebSearch",
        "llmName": "sonar-pro",
        "contextLength": 128000,
        "costPer1kTokens": 0.01,
        "costPer1kTokensOutput": 0.01,
        "speedRating": 7,
        "qualityRating": 9,
        "capabilities": ["text_generation", "web_search", "research"],
        "tags": ["text", "web_search", "research", "high_quality"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.01 + (bytesReceived / 4 / 1000) * 0.01
    },
    "perplexity_researchTopic": {
        "connector": "perplexity",
        "function": "researchTopic",
        "llmName": "mistral-7b-instruct",
        "contextLength": 32000,
        "costPer1kTokens": 0.002,
        "costPer1kTokensOutput": 0.002,
        "speedRating": 8,
        "qualityRating": 8,
        "capabilities": ["web_search", "research", "information_gathering"],
        "tags": ["web_search", "research", "information", "cost_effective"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.002 + (bytesReceived / 4 / 1000) * 0.002
    },
    "perplexity_answerQuestion": {
        "connector": "perplexity",
        "function": "answerQuestion",
        "llmName": "mistral-7b-instruct",
        "contextLength": 32000,
        "costPer1kTokens": 0.002,
        "costPer1kTokensOutput": 0.002,
        "speedRating": 8,
        "qualityRating": 8,
        "capabilities": ["web_search", "question_answering", "research"],
        "tags": ["web_search", "qa", "research", "cost_effective"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.002 + (bytesReceived / 4 / 1000) * 0.002
    },
    "perplexity_getCurrentNews": {
        "connector": "perplexity",
        "function": "getCurrentNews",
        "llmName": "mistral-7b-instruct",
        "contextLength": 32000,
        "costPer1kTokens": 0.002,
        "costPer1kTokensOutput": 0.002,
        "speedRating": 8,
        "qualityRating": 8,
        "capabilities": ["web_search", "news", "current_events"],
        "tags": ["web_search", "news", "current_events", "cost_effective"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.002 + (bytesReceived / 4 / 1000) * 0.002
    },
    
    # Tavily Web Models
    "tavily_search": {
        "connector": "tavily",
        "function": "search",
        "llmName": "tavily-search",
        "contextLength": 0,
        "costPer1kTokens": 0.0,  # Not token-based
        "costPer1kTokensOutput": 0.0,  # Not token-based
        "speedRating": 8,
        "qualityRating": 8,
        "capabilities": ["web_search", "information_retrieval", "url_discovery"],
        "tags": ["web", "search", "urls", "information"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived, searchDepth="basic", numRequests=1: (
            # Basic search: 1 credit, Advanced: 2 credits
            # Cost per credit: $0.008
            numRequests * (1 if searchDepth == "basic" else 2) * 0.008
        )
    },
    "tavily_extract": {
        "connector": "tavily",
        "function": "extract",
        "llmName": "tavily-extract",
        "contextLength": 0,
        "costPer1kTokens": 0.0,
        "costPer1kTokensOutput": 0.0,
        "speedRating": 6,
        "qualityRating": 8,
        "capabilities": ["web_crawling", "content_extraction", "text_extraction"],
        "tags": ["web", "extract", "content"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived, extractionDepth="basic", numSuccessfulUrls=1: (
            # Basic: 1 credit per 5 URLs, Advanced: 2 credits per 5 URLs
            # Only charged for successful extractions
            (numSuccessfulUrls / 5) * (1 if extractionDepth == "basic" else 2) * 0.008
        )
    },
    "tavily_crawl": {
        "connector": "tavily",
        "function": "crawl",
        "llmName": "tavily-crawl",
        "contextLength": 0,
        "costPer1kTokens": 0.0,
        "costPer1kTokensOutput": 0.0,
        "speedRating": 6,
        "qualityRating": 8,
        "capabilities": ["web_crawling", "content_extraction", "mapping"],
        "tags": ["web", "crawl", "map", "extract"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived, numPages=10, extractionDepth="basic", withInstructions=False, numSuccessfulExtractions=10: (
            # Crawl = Mapping + Extraction
            # Mapping: 1 credit per 10 pages (2 if with instructions)
            # Extraction: 1 credit per 5 successful extractions (2 if advanced)
            ((numPages / 10) * (2 if withInstructions else 1) +
            (numSuccessfulExtractions / 5) * (1 if extractionDepth == "basic" else 2)) * 0.008
        )
    },
    "tavily_scrape": {
        "connector": "tavily",
        "function": "scrape",
        "llmName": "tavily-search-extract",
        "contextLength": 0,
        "costPer1kTokens": 0.0,
        "costPer1kTokensOutput": 0.0,
        "speedRating": 6,
        "qualityRating": 8,
        "capabilities": ["web_search", "web_crawling", "content_extraction", "information_retrieval"],
        "tags": ["web", "search", "crawl", "extract", "content", "information"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived, searchDepth="basic", numSuccessfulUrls=1, extractionDepth="basic": (
            # Combines search + extraction
            # Search cost + extraction cost
            (1 if searchDepth == "basic" else 2) +
            (numSuccessfulUrls / 5) * (1 if extractionDepth == "basic" else 2)
        ) * 0.008
    },

    # Internal Models
    "internal_extraction": {
        "connector": "internal",
        "function": "extract",
        "llmName": "internal-extractor",
        "contextLength": 0,
        "costPer1kTokens": 0.0,
        "costPer1kTokensOutput": 0.0,
        "speedRating": 8,
        "qualityRating": 8,
        "capabilities": ["document_extraction", "content_processing"],
        "tags": ["internal", "extraction", "document_processing"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: 0.001 + (bytesSent + bytesReceived) / (1024 * 1024) * 0.01  # $0.001 base + $0.01/MB
    },
    "internal_generation": {
        "connector": "internal",
        "function": "generate",
        "llmName": "internal-generator",
        "contextLength": 0,
        "costPer1kTokens": 0.0,
        "costPer1kTokensOutput": 0.0,
        "speedRating": 7,
        "qualityRating": 8,
        "capabilities": ["document_generation", "content_creation"],
        "tags": ["internal", "generation", "document_creation"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: 0.002 + (bytesReceived / (1024 * 1024)) * 0.005  # $0.002 base + $0.005/MB output
    },
    "internal_rendering": {
        "connector": "internal",
        "function": "render",
        "llmName": "internal-renderer",
        "contextLength": 0,
        "costPer1kTokens": 0.0,
        "costPer1kTokensOutput": 0.0,
        "speedRating": 6,
        "qualityRating": 9,
        "capabilities": ["document_rendering", "format_conversion"],
        "tags": ["internal", "rendering", "format_conversion"],
        "calculatePriceUsd": lambda processingTime, bytesSent, bytesReceived: 0.003 + (bytesReceived / (1024 * 1024)) * 0.008  # $0.003 base + $0.008/MB output
    }
}


@dataclass(slots=True)
class AiObjects:
    """Centralized AI interface: selects model and calls connector. Includes web functionality."""

    openaiService: AiOpenai
    anthropicService: AiAnthropic
    perplexityService: AiPerplexity
    tavilyService: ConnectorWeb

    def __post_init__(self) -> None:
        if self.openaiService is None:
            raise TypeError("openaiService must be provided")
        if self.anthropicService is None:
            raise TypeError("anthropicService must be provided")
        if self.perplexityService is None:
            raise TypeError("perplexityService must be provided")
        if self.tavilyService is None:
            raise TypeError("tavilyService must be provided")

    @classmethod
    async def create(cls) -> "AiObjects":
        """Create AiObjects instance with all connectors initialized."""
        openaiService = AiOpenai()
        anthropicService = AiAnthropic()
        perplexityService = AiPerplexity()
        tavilyService = await ConnectorWeb.create()
        
        return cls(
            openaiService=openaiService,
            anthropicService=anthropicService,
            perplexityService=perplexityService,
            tavilyService=tavilyService
        )

    def _estimateCost(self, modelInfo: Dict[str, Any], contentSize: int) -> float:
        estimatedTokens = contentSize / 4
        inputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokens"]
        outputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokensOutput"] * 0.1
        return inputCost + outputCost


    def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
        """Select the best model based on operation type, tags, and requirements."""
        totalSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
        candidates: Dict[str, Dict[str, Any]] = {}
        
        # Determine required tags from operation type
        requiredTags = options.requiredTags
        if not requiredTags:
            requiredTags = OPERATION_TAG_MAPPING.get(options.operationType, [ModelTags.TEXT, ModelTags.CHAT])
        
        
        # Override priority based on processing mode if not explicitly set
        effectivePriority = options.priority
        if options.priority == Priority.BALANCED:
            effectivePriority = PROCESSING_MODE_PRIORITY_MAPPING.get(options.processingMode, Priority.BALANCED)
        
        logger.info(f"Model selection - Operation: {options.operationType}, Required tags: {requiredTags}, Priority: {effectivePriority}")
        
        for name, info in aiModels.items():
            logger.info(f"Checking model: {name}, tags: {info.get('tags', [])}, function: {info.get('function', 'unknown')}")
            # Check context length
            if info["contextLength"] > 0 and totalSize > info["contextLength"] * 0.8:
                continue
            
            # Check cost constraints
            if options.maxCost is not None:
                if self._estimateCost(info, totalSize) > options.maxCost:
                    continue
            
            # Check required tags/capabilities
            modelTags = info.get("tags", [])
            if requiredTags and not all(tag in modelTags for tag in requiredTags):
                logger.info(f"  -> Skipping {name}: missing required tags. Has: {modelTags}, needs: {requiredTags}")
                continue
            else:
                logger.info(f"  -> {name} passed tag check")
            
            # Check processing mode requirements
            if options.processingMode == ProcessingMode.DETAILED and ModelTags.FAST in modelTags:
                # Skip fast models for detailed processing
                continue
            
            candidates[name] = info
            logger.info(f"  -> {name} added to candidates")
        
        logger.info(f"Final candidates: {list(candidates.keys())}")
        
        if not candidates:
            logger.info("No candidates found, using fallback")
            # Fallback based on operation type
            if options.operationType == OperationType.IMAGE_ANALYSIS:
                logger.info("Using fallback: openai_callAiImage")
                return "openai_callAiImage"
            elif options.operationType == OperationType.IMAGE_GENERATION:
                logger.info("Using fallback: openai_generateImage")
                return "openai_generateImage"
            elif options.operationType == OperationType.WEB_RESEARCH:
                logger.info("Using fallback: perplexity_callAiWithWebSearch")
                return "perplexity_callAiWithWebSearch"
            else:
                logger.info("Using fallback: openai_callAiBasic_gpt35")
                return "openai_callAiBasic_gpt35"
        
        # Special handling for planning operations - use Claude for consistency
        if options.operationType in [OperationType.GENERATE_PLAN, OperationType.ANALYSE_CONTENT]:
            if "anthropic_callAiBasic" in candidates:
                logger.info("Planning operation: Selected Claude (anthropic_callAiBasic) for highest quality")
                return "anthropic_callAiBasic"
            
            # Fallback to GPT-4o if Claude not available
            if "openai_callAiBasic" in candidates:
                logger.info("Planning operation: Selected GPT-4o (openai_callAiBasic) as fallback")
                return "openai_callAiBasic"
        
        # Select based on priority for other operations
        if effectivePriority == Priority.SPEED:
            selected = max(candidates, key=lambda k: candidates[k]["speedRating"])
            logger.info(f"Selected by SPEED: {selected}")
            return selected
        elif effectivePriority == Priority.QUALITY:
            selected = max(candidates, key=lambda k: candidates[k]["qualityRating"])
            logger.info(f"Selected by QUALITY: {selected}")
            return selected
        elif effectivePriority == Priority.COST:
            selected = min(candidates, key=lambda k: candidates[k]["costPer1kTokens"])
            logger.info(f"Selected by COST: {selected}")
            return selected
        else:  # BALANCED
            def balancedScore(name: str) -> float:
                info = candidates[name]
                return info["qualityRating"] * 0.4 + info["speedRating"] * 0.3 + (10 - info["costPer1kTokens"] * 1000) * 0.3
            
            selected = max(candidates, key=balancedScore)
            logger.info(f"Selected by BALANCED: {selected}")
            return selected

    def _getFallbackModels(self, operationType: str) -> List[str]:
        """Get ordered list of fallback models for a given operation type."""
        fallbackMappings = {
            OperationType.GENERAL: [
                "openai_callAiBasic_gpt35",  # Fast and reliable
                "openai_callAiBasic",         # High quality
                "anthropic_callAiBasic",      # Alternative high quality
                "perplexity_callAiBasic"      # Cost effective
            ],
            OperationType.IMAGE_ANALYSIS: [
                "openai_callAiImage",         # Primary image analysis
                "anthropic_callAiImage"       # Alternative image analysis
            ],
            OperationType.IMAGE_GENERATION: [
                "openai_generateImage"         # Only image generation model
            ],
            OperationType.WEB_RESEARCH: [
                "perplexity_callAiWithWebSearch",  # Primary web research
                "perplexity_callAiBasic",          # Alternative with web search
                "openai_callAiBasic"               # Fallback to general model
            ],
            OperationType.GENERATE_PLAN: [
                "anthropic_callAiBasic",      # Best for planning
                "openai_callAiBasic",         # High quality alternative
                "openai_callAiBasic_gpt35"   # Fast fallback
            ],
            OperationType.ANALYSE_CONTENT: [
                "anthropic_callAiBasic",     # Best for analysis
                "openai_callAiBasic",        # High quality alternative
                "openai_callAiBasic_gpt35"  # Fast fallback
            ]
        }
        
        return fallbackMappings.get(operationType, fallbackMappings[OperationType.GENERAL])

    def _connectorFor(self, modelName: str):
        """Get the appropriate connector for the model."""
        connectorType = aiModels[modelName]["connector"]
        if connectorType == "openai":
            return self.openaiService
        elif connectorType == "anthropic":
            return self.anthropicService
        elif connectorType == "perplexity":
            return self.perplexityService
        elif connectorType == "tavily":
            return self.tavilyService
        else:
            raise ValueError(f"Unknown connector type: {connectorType}")

    async def call(self, request: AiCallRequest) -> AiCallResponse:
        """Call AI model for text generation with fallback mechanism."""
        
        prompt = request.prompt
        context = request.context or ""
        options = request.options

        # Calculate input bytes
        inputBytes = len((prompt + context).encode("utf-8"))

        # Compress optionally (prompt/context) - simple truncation fallback kept here
        def maybeTruncate(text: str, limit: int) -> str:
            data = text.encode("utf-8")
            if len(data) <= limit:
                return text
            return data[:limit].decode("utf-8", errors="ignore") + "... [truncated]"

        if options.compressPrompt and len(prompt.encode("utf-8")) > 2000:
            prompt = maybeTruncate(prompt, 2000)
        if options.compressContext and len(context.encode("utf-8")) > 70000:
            context = maybeTruncate(context, 70000)

        # Derive generation parameters
        temperature = getattr(options, "temperature", None)
        if temperature is None:
            temperature = 0.2
        maxTokens = getattr(options, "maxTokens", None)
        # Provide a generous default to avoid truncation for long outputs
        if maxTokens is None:
            # If resultFormat suggests large outputs (e.g., html, json), allow more tokens
            wants_large = str(getattr(options, "resultFormat", "")).lower() in ["html", "json", "md", "markdown"]
            maxTokens = 8000 if wants_large else 2000

        messages: List[Dict[str, Any]] = []
        if context:
            messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
        messages.append({"role": "user", "content": prompt})

        # Get fallback models for this operation type
        fallbackModels = self._getFallbackModels(options.operationType)
        
        # Try primary model first, then fallbacks
        lastError = None
        for attempt, modelName in enumerate(fallbackModels):
            try:
                logger.info(f"Attempting AI call with model: {modelName} (attempt {attempt + 1}/{len(fallbackModels)})")
                
                # Start timing
                startTime = time.time()
                
                connector = self._connectorFor(modelName)
                functionName = aiModels[modelName]["function"]
                
                # Call the appropriate function
                if functionName == "callAiBasic":
                    if aiModels[modelName]["connector"] == "openai":
                        content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
                    elif aiModels[modelName]["connector"] == "perplexity":
                        content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
                    else:
                        response = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
                        content = response["choices"][0]["message"]["content"]
                elif functionName == "callAiWithWebSearch":
                    # Perplexity web search function
                    query = prompt
                    if context:
                        query = f"Context: {context}\n\nQuery: {prompt}"
                    content = await connector.callAiWithWebSearch(query)
                elif functionName == "researchTopic":
                    # Perplexity research function
                    content = await connector.researchTopic(prompt)
                elif functionName == "answerQuestion":
                    # Perplexity question answering function
                    content = await connector.answerQuestion(prompt, context)
                elif functionName == "getCurrentNews":
                    # Perplexity news function
                    content = await connector.getCurrentNews(prompt)
                else:
                    raise ValueError(f"Function {functionName} not supported for text generation")

                # Calculate timing and output bytes
                endTime = time.time()
                processingTime = endTime - startTime
                outputBytes = len(content.encode("utf-8"))
                
                # Calculate price
                priceUsd = aiModels[modelName]["calculatePriceUsd"](processingTime, inputBytes, outputBytes)
                
                logger.info(f"✅ AI call successful with model: {modelName}")
                return AiCallResponse(
                    content=content, 
                    modelName=modelName, 
                    priceUsd=priceUsd,
                    processingTime=processingTime,
                    bytesSent=inputBytes,
                    bytesReceived=outputBytes,
                    errorCount=0
                )
                
            except Exception as e:
                lastError = e
                logger.warning(f"❌ AI call failed with model {modelName}: {str(e)}")
                
                # If this is not the last model, try the next one
                if attempt < len(fallbackModels) - 1:
                    logger.info(f"🔄 Trying next fallback model...")
                    continue
                else:
                    # All models failed
                    logger.error(f"💥 All {len(fallbackModels)} models failed for operation {options.operationType}")
                    break

        # All fallback attempts failed - return error response
        errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}"
        logger.error(errorMsg)
        return AiCallResponse(
            content=errorMsg,
            modelName="error",
            priceUsd=0.0,
            processingTime=0.0,
            bytesSent=inputBytes,
            bytesReceived=0,
            errorCount=1
        )

    async def callImage(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None, options: AiCallOptions = None) -> AiCallResponse:
        """Call AI model for image analysis with fallback mechanism."""

        if options is None:
            options = AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS)
        
        # Calculate input bytes (prompt + image data)
        inputBytes = len(prompt.encode("utf-8")) + len(imageData) if isinstance(imageData, bytes) else len(prompt.encode("utf-8")) + len(str(imageData).encode("utf-8"))
        
        # Get fallback models for image analysis
        fallbackModels = self._getFallbackModels(OperationType.IMAGE_ANALYSIS)
        
        # Try primary model first, then fallbacks
        lastError = None
        for attempt, modelName in enumerate(fallbackModels):
            try:
                logger.info(f"Attempting image analysis with model: {modelName} (attempt {attempt + 1}/{len(fallbackModels)})")
                
                # Start timing
                startTime = time.time()
                
                connector = self._connectorFor(modelName)
                functionName = aiModels[modelName]["function"]
                
                if functionName == "callAiImage":
                    content = await connector.callAiImage(prompt, imageData, mimeType)
                    
                    # Calculate timing and output bytes
                    endTime = time.time()
                    processingTime = endTime - startTime
                    outputBytes = len(content.encode("utf-8"))
                    
                    # Calculate price
                    priceUsd = aiModels[modelName]["calculatePriceUsd"](processingTime, inputBytes, outputBytes)
                    
                    logger.info(f"✅ Image analysis successful with model: {modelName}")
                    return AiCallResponse(
                        content=content,
                        modelName=modelName,
                        priceUsd=priceUsd,
                        processingTime=processingTime,
                        bytesSent=inputBytes,
                        bytesReceived=outputBytes,
                        errorCount=0
                    )
                else:
                    raise ValueError(f"Function {functionName} not supported for image analysis")
                    
            except Exception as e:
                lastError = e
                logger.warning(f"❌ Image analysis failed with model {modelName}: {str(e)}")
                
                # If this is not the last model, try the next one
                if attempt < len(fallbackModels) - 1:
                    logger.info(f"🔄 Trying next fallback model for image analysis...")
                    continue
                else:
                    # All models failed
                    logger.error(f"💥 All {len(fallbackModels)} models failed for image analysis")
                    break

        # All fallback attempts failed - return error response
        errorMsg = f"All AI models failed for image analysis. Last error: {str(lastError)}"
        logger.error(errorMsg)
        return AiCallResponse(
            content=errorMsg,
            modelName="error",
            priceUsd=0.0,
            processingTime=0.0,
            bytesSent=inputBytes,
            bytesReceived=0,
            errorCount=1
        )

    async def generateImage(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", options: AiCallOptions = None) -> AiCallResponse:
        """Generate an image using AI."""
        
        if options is None:
            options = AiCallOptions(operationType=OperationType.IMAGE_GENERATION)
        
        # Calculate input bytes
        inputBytes = len(prompt.encode("utf-8"))
        
        # Select model for image generation
        modelName = self._selectModel(prompt, "", options)
        
        try:
            # Start timing
            startTime = time.time()
            
            connector = self._connectorFor(modelName)
            functionName = aiModels[modelName]["function"]
            
            if functionName == "generateImage":
                result = await connector.generateImage(prompt, size, quality, style)
                content = str(result)
            elif functionName == "generateImageWithVariations":
                results = await connector.generateImageWithVariations(prompt, 1, size, quality, style)
                result = results[0] if results else {}
                content = str(result)
            elif functionName == "generateImageWithChat":
                content = await connector.generateImageWithChat(prompt, size, quality, style)
            else:
                raise ValueError(f"Function {functionName} not supported for image generation")
            
            # Calculate timing and output bytes
            endTime = time.time()
            processingTime = endTime - startTime
            outputBytes = len(content.encode("utf-8"))
            
            # Calculate price
            priceUsd = aiModels[modelName]["calculatePriceUsd"](processingTime, inputBytes, outputBytes)
            
            logger.info(f"✅ Image generation successful with model: {modelName}")
            return AiCallResponse(
                content=content,
                modelName=modelName,
                priceUsd=priceUsd,
                processingTime=processingTime,
                bytesSent=inputBytes,
                bytesReceived=outputBytes,
                errorCount=0
            )
            
        except Exception as e:
            logger.error(f"❌ Image generation failed with model {modelName}: {str(e)}")
            return AiCallResponse(
                content=f"Image generation failed: {str(e)}",
                modelName=modelName,
                priceUsd=0.0,
                processingTime=0.0,
                bytesSent=inputBytes,
                bytesReceived=0,
                errorCount=1
            )

    # Web functionality methods - Simple interface to Tavily connector
    async def search_websites(self, query: str, max_results: int = 5, **kwargs) -> List[WebSearchResultItem]:
        """Search for websites using Tavily."""
        request = WebSearchRequest(
            query=query,
            max_results=max_results,
            **kwargs
        )
        result = await self.tavilyService.search(request)
        
        if result.success and result.documents:
            return result.documents[0].documentData.results
        return []

    async def crawl_websites(self, urls: List[str], extract_depth: str = "advanced", format: str = "markdown") -> List[WebCrawlResultItem]:
        """Crawl websites using Tavily."""
        from pydantic import HttpUrl
        from urllib.parse import urlparse
        
        # Safely create HttpUrl objects with proper scheme handling
        http_urls = []
        for url in urls:
            try:
                # Ensure URL has a scheme
                parsed = urlparse(url)
                if not parsed.scheme:
                    url = f"https://{url}"
                
                # Use HttpUrl with scheme parameter (this works for all URLs)
                http_urls.append(HttpUrl(url, scheme="https"))
                        
            except Exception as e:
                logger.warning(f"Skipping invalid URL {url}: {e}")
                continue
        
        if not http_urls:
            return []
        
        request = WebCrawlRequest(
            urls=http_urls,
            extract_depth=extract_depth,
            format=format
        )
        result = await self.tavilyService.crawl(request)
        
        if result.success and result.documents:
            return result.documents[0].documentData.results
        return []

    async def extract_content(self, urls: List[str], extract_depth: str = "advanced", format: str = "markdown") -> Dict[str, str]:
        """Extract content from URLs and return as dictionary."""
        crawl_results = await self.crawl_websites(urls, extract_depth, format)
        return {str(result.url): result.content for result in crawl_results}

    # Core Web Tools - Clean interface for web operations
    async def readPage(self, url: str, extract_depth: str = "advanced") -> Optional[str]:
        """Read a single web page and return its content (HTML/Markdown)."""
        logger.debug(f"Reading page: {url}")
        try:
            # URL encode the URL to handle spaces and special characters
            from urllib.parse import quote, urlparse, urlunparse
            parsed = urlparse(url)
            encoded_url = urlunparse((
                parsed.scheme,
                parsed.netloc,
                parsed.path,
                parsed.params,
                parsed.query,
                parsed.fragment
            ))
            
            # Manually encode query parameters to handle spaces
            if parsed.query:
                encoded_query = quote(parsed.query, safe='=&')
                encoded_url = urlunparse((
                    parsed.scheme,
                    parsed.netloc,
                    parsed.path,
                    parsed.params,
                    encoded_query,
                    parsed.fragment
                ))
            
            logger.debug(f"URL encoded: {url} -> {encoded_url}")
            
            content = await self.extract_content([encoded_url], extract_depth, "markdown")
            result = content.get(encoded_url)
            if result:
                logger.debug(f"Successfully read page {encoded_url}: {len(result)} chars")
            else:
                logger.warning(f"No content returned for page {encoded_url}")
            return result
        except Exception as e:
            logger.warning(f"Failed to read page {url}: {e}")
            return None

    async def getUrlsFromPage(self, url: str, extract_depth: str = "advanced") -> List[str]:
        """Get all URLs from a web page, with redundancies removed."""
        try:
            content = await self.readPage(url, extract_depth)
            if not content:
                return []
            
            links = self._extractLinksFromContent(content, url)
            # Remove duplicates while preserving order
            seen = set()
            unique_links = []
            for link in links:
                if link not in seen:
                    seen.add(link)
                    unique_links.append(link)
            
            logger.debug(f"Extracted {len(unique_links)} unique URLs from {url}")
            return unique_links
            
        except Exception as e:
            logger.warning(f"Failed to get URLs from page {url}: {e}")
            return []

    def filterUrlsOnlyPages(self, urls: List[str], max_per_domain: int = 10) -> List[str]:
        """Filter URLs to get only links for pages to follow (no images, etc.)."""
        from urllib.parse import urlparse
        
        def _isHtmlCandidate(url: str) -> bool:
            lower = url.lower()
            blocked = ('.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.ico', '.bmp',
                       '.mp4', '.mp3', '.avi', '.mov', '.mkv',
                       '.pdf', '.zip', '.rar', '.7z', '.tar', '.gz',
                       '.css', '.js', '.woff', '.woff2', '.ttf', '.eot')
            return not lower.endswith(blocked)
        
        # Group by domain
        domain_links = {}
        for link in urls:
            domain = urlparse(link).netloc
            if domain not in domain_links:
                domain_links[domain] = []
            domain_links[domain].append(link)
        
        # Filter and cap per domain
        filtered_links = []
        for domain, domain_link_list in domain_links.items():
            seen = set()
            domain_filtered = []
            
            for link in domain_link_list:
                if link in seen:
                    continue
                if not _isHtmlCandidate(link):
                    continue
                seen.add(link)
                domain_filtered.append(link)
                if len(domain_filtered) >= max_per_domain:
                    break
            
            filtered_links.extend(domain_filtered)
            logger.debug(f"Domain {domain}: {len(domain_link_list)} -> {len(domain_filtered)} links")
        
        return filtered_links

    def _extractLinksFromContent(self, content: str, base_url: str) -> List[str]:
        """Extract links from HTML/Markdown content."""
        try:
            import re
            from urllib.parse import urljoin, urlparse, quote, urlunparse
            
            def _cleanUrl(url: str) -> str:
                """Clean and encode URL to remove spaces and invalid characters."""
                # Remove quotes and extra spaces
                url = url.strip().strip('"\'')
                
                # If it's a relative URL, make it absolute first
                if not url.startswith(('http://', 'https://')):
                    url = urljoin(base_url, url)
                
                # Parse and re-encode the URL properly
                parsed = urlparse(url)
                if parsed.query:
                    # Encode query parameters properly
                    encoded_query = quote(parsed.query, safe='=&')
                    url = urlunparse((
                        parsed.scheme,
                        parsed.netloc,
                        parsed.path,
                        parsed.params,
                        encoded_query,
                        parsed.fragment
                    ))
                
                return url
            
            links = []
            
            # Extract HTML links: <a href="url"> format
            html_link_pattern = r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>'
            html_links = re.findall(html_link_pattern, content, re.IGNORECASE)
            
            for url in html_links:
                if url and not url.startswith('#') and not url.startswith('javascript:'):
                    try:
                        cleaned_url = _cleanUrl(url)
                        links.append(cleaned_url)
                        logger.debug(f"Extracted HTML link: {url} -> {cleaned_url}")
                    except Exception as e:
                        logger.debug(f"Failed to clean HTML link {url}: {e}")
            
            # Extract markdown links: [text](url) format
            markdown_link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
            markdown_links = re.findall(markdown_link_pattern, content)
            
            for text, url in markdown_links:
                if url and not url.startswith('#'):
                    try:
                        cleaned_url = _cleanUrl(url)
                        # Only keep URLs from the same domain
                        if urlparse(cleaned_url).netloc == urlparse(base_url).netloc:
                            links.append(cleaned_url)
                            logger.debug(f"Extracted markdown link: {url} -> {cleaned_url}")
                    except Exception as e:
                        logger.debug(f"Failed to clean markdown link {url}: {e}")
            
            # Extract plain URLs in the text
            url_pattern = r'https?://[^\s\)]+'
            plain_urls = re.findall(url_pattern, content)
            
            for url in plain_urls:
                try:
                    clean_url = url.rstrip('.,;!?')
                    cleaned_url = _cleanUrl(clean_url)
                    if urlparse(cleaned_url).netloc == urlparse(base_url).netloc:
                        if cleaned_url not in links:  # Avoid duplicates
                            links.append(cleaned_url)
                            logger.debug(f"Extracted plain URL: {url} -> {cleaned_url}")
                except Exception as e:
                    logger.debug(f"Failed to clean plain URL {url}: {e}")
            
            logger.debug(f"Total links extracted and cleaned: {len(links)}")
            return links
            
        except Exception as e:
            logger.warning(f"Failed to extract links from content: {e}")
            return []

    def _normalizeUrl(self, url: str) -> str:
        """Normalize URL to handle variations that should be considered duplicates."""
        if not url:
            return url
        
        # Remove trailing slashes and fragments
        url = url.rstrip('/')
        if '#' in url:
            url = url.split('#')[0]
        
        # Handle common URL variations
        url = url.replace('http://', 'https://')  # Normalize protocol
        
        return url

    async def crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10, global_processed_urls: Optional[set] = None) -> Dict[str, str]:
        """
        Recursively crawl URLs up to specified depth.
        
        Args:
            urls: List of starting URLs to crawl
            max_depth: Maximum depth to crawl (1=main pages only, 2=main+sub-pages, etc.)
            extract_depth: Tavily extract depth setting
            max_per_domain: Maximum URLs per domain per level
            global_processed_urls: Optional global set to track processed URLs across sessions
            
        Returns:
            Dictionary mapping URL -> content for all crawled pages
        """
        logger.info(f"Starting recursive crawl: {len(urls)} starting URLs, max_depth={max_depth}")
        
        # URL index to track all processed URLs (local + global)
        processed_urls = set()
        if global_processed_urls is not None:
            # Use global index if provided, otherwise create local one
            processed_urls = global_processed_urls
            logger.info(f"Using global URL index with {len(processed_urls)} already processed URLs")
        else:
            logger.info("Using local URL index for this crawl session")
        
        all_content = {}
        
        # Current level URLs to process
        current_level_urls = urls.copy()
        
        try:
            for depth in range(1, max_depth + 1):
                logger.info(f"=== DEPTH LEVEL {depth}/{max_depth} ===")
                logger.info(f"Processing {len(current_level_urls)} URLs at depth {depth}")
                
                # URLs found at this level (for next iteration)
                next_level_urls = []
                
                for url in current_level_urls:
                    # Normalize URL for duplicate checking
                    normalized_url = self._normalizeUrl(url)
                    if normalized_url in processed_urls:
                        logger.debug(f"URL {url} (normalized: {normalized_url}) already processed, skipping")
                        continue
                    
                    try:
                        logger.info(f"Processing URL at depth {depth}: {url}")
                        logger.debug(f"Total processed URLs so far: {len(processed_urls)}")
                        
                        # Read page content
                        content = await self.readPage(url, extract_depth)
                        if content:
                            all_content[url] = content
                            processed_urls.add(normalized_url)
                            logger.info(f"✓ Successfully processed {url}: {len(content)} chars")
                            
                            # Get URLs from this page for next level
                            page_urls = await self.getUrlsFromPage(url, extract_depth)
                            logger.info(f"Found {len(page_urls)} URLs on {url}")
                            
                            # Filter URLs and add to next level
                            filtered_urls = self.filterUrlsOnlyPages(page_urls, max_per_domain)
                            logger.info(f"Filtered to {len(filtered_urls)} valid URLs")
                            
                            # Add new URLs to next level (avoiding already processed ones)
                            new_urls_count = 0
                            for new_url in filtered_urls:
                                normalized_new_url = self._normalizeUrl(new_url)
                                if normalized_new_url not in processed_urls:
                                    next_level_urls.append(new_url)
                                    new_urls_count += 1
                                else:
                                    logger.debug(f"URL {new_url} (normalized: {normalized_new_url}) already processed, skipping")
                            
                            logger.info(f"Added {new_urls_count} new URLs to next level from {url}")
                        else:
                            logger.warning(f"✗ No content extracted from {url}")
                            processed_urls.add(normalized_url)  # Mark as processed to avoid retry
                            
                    except Exception as e:
                        logger.warning(f"✗ Failed to process URL {url} at depth {depth}: {e}")
                        processed_urls.add(normalized_url)  # Mark as processed to avoid retry
                
                # Prepare for next iteration
                current_level_urls = next_level_urls
                logger.info(f"Depth {depth} completed. Found {len(next_level_urls)} URLs for next level")
                
                # Stop if no more URLs to process
                if not current_level_urls:
                    logger.info(f"No more URLs found at depth {depth}, stopping recursion")
                    break
            
            logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
            logger.info(f"Total URLs processed (including skipped): {len(processed_urls)}")
            logger.info(f"Unique URLs found: {len(all_content)}")
            return all_content
            
        except asyncio.TimeoutError:
            logger.warning(f"Crawling timed out, returning partial results: {len(all_content)} pages crawled so far")
            return all_content
        except Exception as e:
            logger.error(f"Crawling failed with error: {e}, returning partial results: {len(all_content)} pages crawled so far")
            return all_content

    async def webQuery(self, query: str, context: str = "", options: AiCallOptions = None) -> AiCallResponse:
        """Use Perplexity AI to provide the best answers for web-related queries."""
        
        if options is None:
            options = AiCallOptions(operationType=OperationType.WEB_RESEARCH)
        
        # Calculate input bytes
        inputBytes = len((query + context).encode("utf-8"))
        
        # Create a comprehensive prompt for web queries
        webPrompt = f"""You are an expert web researcher and information analyst. Please provide a comprehensive and accurate answer to the following web-related query.

Query: {query}

{f"Additional Context: {context}" if context else ""}

Please provide:
1. A clear, well-structured answer to the query
2. Key points and important details
3. Relevant insights and analysis
4. Any important considerations or caveats
5. Suggestions for further research if applicable

Format your response in a clear, professional manner that would be helpful for someone researching this topic."""

        try:
            # Start timing
            startTime = time.time()
            
            # Use Perplexity for web research with search capabilities
            response = await self.perplexityService.callAiWithWebSearch(webPrompt)
            
            # Calculate timing and output bytes
            endTime = time.time()
            processingTime = endTime - startTime
            outputBytes = len(response.encode("utf-8"))
            
            # Calculate price (use perplexity model pricing)
            priceUsd = aiModels["perplexity_callAiWithWebSearch"]["calculatePriceUsd"](processingTime, inputBytes, outputBytes)
            
            logger.info(f"✅ Web query successful with Perplexity")
            return AiCallResponse(
                content=response,
                modelName="perplexity_callAiWithWebSearch",
                priceUsd=priceUsd,
                processingTime=processingTime,
                bytesSent=inputBytes,
                bytesReceived=outputBytes,
                errorCount=0
            )
        except Exception as e:
            logger.error(f"Perplexity web query failed: {str(e)}")
            return AiCallResponse(
                content=f"Web query failed: {str(e)}",
                modelName="perplexity_callAiWithWebSearch",
                priceUsd=0.0,
                processingTime=0.0,
                bytesSent=inputBytes,
                bytesReceived=0,
                errorCount=1
            )

    # Utility methods
    async def listAvailableModels(self, connectorType: str = None) -> List[Dict[str, Any]]:
        """List available models, optionally filtered by connector type."""
        if connectorType:
            return [info for name, info in aiModels.items() if info["connector"] == connectorType]
        return list(aiModels.values())

    async def getModelInfo(self, modelName: str) -> Dict[str, Any]:
        """Get information about a specific model."""
        if modelName not in aiModels:
            raise ValueError(f"Model {modelName} not found")
        return aiModels[modelName]

    async def getModelsByCapability(self, capability: str) -> List[str]:
        """Get model names that support a specific capability."""
        return [name for name, info in aiModels.items() if capability in info.get("capabilities", [])]

    async def getModelsByTag(self, tag: str) -> List[str]:
        """Get model names that have a specific tag."""
        return [name for name, info in aiModels.items() if tag in info.get("tags", [])]

    async def selectRelevantWebsites(self, websites: List[str], userQuestion: str) -> Tuple[List[str], str]:
        """Select most relevant websites using AI analysis. Returns (selected_websites, ai_response)."""
        if len(websites) <= 1:
            return websites, "Only one website available, no selection needed"
        
        try:
            # Create website summaries for AI analysis
            websiteSummaries = []
            for i, url in enumerate(websites, 1):
                from urllib.parse import urlparse
                domain = urlparse(url).netloc
                summary = f"{i}. {url} (Domain: {domain})"
                websiteSummaries.append(summary)
            
            selectionPrompt = f"""
            Based on this user request: "{userQuestion}"
            
            I have {len(websites)} websites found. Please select the most relevant website(s) for this request.
            
            Available websites:
            {chr(10).join(websiteSummaries)}
            
            Please respond with the website number(s) (1, 2, 3, etc.) that are most relevant.
            Format: 1,3,5 (or just 1 for single selection)
            """
            
            # Use Perplexity to select the best websites
            response = await self.webQuery(selectionPrompt)
            
            # Parse the selection
            import re
            numbers = re.findall(r'\d+', response)
            if numbers:
                selectedWebsites = []
                for num in numbers:
                    index = int(num) - 1
                    if 0 <= index < len(websites):
                        selectedWebsites.append(websites[index])
                
                if selectedWebsites:
                    logger.info(f"AI selected {len(selectedWebsites)} websites")
                    return selectedWebsites, response
            
            # Fallback to first website
            logger.warning("AI selection failed, using first website")
            return websites[:1], f"AI selection failed, fallback to first website. AI response: {response}"
            
        except Exception as e:
            logger.error(f"Error in website selection: {str(e)}")
            return websites[:1], f"Error in website selection: {str(e)}"

    async def analyzeContentWithChunking(self, allContent: Dict[str, str], userQuestion: str) -> str:
        """Analyze content using AI with chunking for large content."""
        logger.info(f"Analyzing {len(allContent)} websites with AI")
        
        # Process content in chunks to avoid token limits
        chunkSize = 50000  # 50k chars per chunk
        allChunks = []
        
        for url, content in allContent.items():
            filteredContent = self._filterContent(content)
            if len(filteredContent) <= chunkSize:
                allChunks.append((url, filteredContent))
                logger.info(f"Content from {url}: {len(filteredContent)} chars (single chunk)")
            else:
                # Split large content into chunks
                chunkCount = (len(filteredContent) + chunkSize - 1) // chunkSize
                logger.info(f"Content from {url}: {len(filteredContent)} chars (split into {chunkCount} chunks)")
                for i in range(0, len(filteredContent), chunkSize):
                    chunk = filteredContent[i:i+chunkSize]
                    chunkNum = i//chunkSize + 1
                    allChunks.append((f"{url} (part {chunkNum})", chunk))
        
        logger.info(f"Processing {len(allChunks)} content chunks")
        
        # Analyze each chunk
        chunkAnalyses = []
        for i, (url, chunk) in enumerate(allChunks, 1):
            logger.info(f"Analyzing chunk {i}/{len(allChunks)}: {url}")
            
            try:
                analysisPrompt = f"""
                Analyze this web content and extract relevant information for: {userQuestion}
                
                Source: {url}
                Content: {chunk}
                
                Please extract key information relevant to the query.
                """
                
                analysis = await self.webQuery(analysisPrompt)
                chunkAnalyses.append(analysis)
                logger.info(f"Chunk {i}/{len(allChunks)} analyzed successfully")
                
            except Exception as e:
                logger.error(f"Chunk {i}/{len(allChunks)} error: {e}")
        
        # Combine all chunk analyses
        if chunkAnalyses:
            logger.info(f"Combining {len(chunkAnalyses)} chunk analyses")
            combinedAnalysis = "\n\n".join(chunkAnalyses)
            
            # Final synthesis
            try:
                logger.info("Performing final synthesis of all analyses")
                synthesisPrompt = f"""
                Based on these partial analyses, provide a comprehensive answer to: {userQuestion}
                
                Partial analyses:
                {combinedAnalysis}
                
                Please provide a clear, well-structured answer to the query.
                """
                
                finalAnalysis = await self.webQuery(synthesisPrompt)
                logger.info("Final synthesis completed successfully")
                return finalAnalysis
                        
            except Exception as e:
                logger.error(f"Synthesis error: {e}")
                return combinedAnalysis
        else:
            logger.error("No content could be analyzed")
            return "No content could be analyzed"

    def _filterContent(self, content: str) -> str:
        """Filter out navigation, ads, and other nonsense content."""
        lines = content.split('\n')
        filteredLines = []
        
        for line in lines:
            line = line.strip()
            # Skip empty lines
            if not line:
                continue
            # Skip navigation elements
            if any(skip in line.lower() for skip in [
                'toggle navigation', 'log in', 'sign up', 'cookies', 'privacy policy',
                'terms of service', 'subscribe', 'newsletter', 'follow us', 'share this',
                'advertisement', 'sponsored', 'banner', 'popup', 'modal'
            ]):
                continue
            # Skip image references without context
            if line.startswith('![Image') and '](' in line:
                continue
            # Skip pure links without context
            if line.startswith('[') and line.endswith(')') and '---' in line:
                continue
            # Keep meaningful content
            if len(line) > 10:  # Skip very short lines
                filteredLines.append(line)
        
        return '\n'.join(filteredLines)