end to end implementation of all ai models

2025-10-25 01:46:33 +02:00 · 2025-10-25 01:46:33 +02:00 · 8d25ed6fc3
commit 8d25ed6fc3
parent daddf417be
7 changed files with 1715 additions and 1095 deletions
--- a/modules/aicore/aicorePluginOpenai.py
+++ b/modules/aicore/aicorePluginOpenai.py
@ -317,10 +317,17 @@ class AiOpenai(BaseConnectorAi):
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            prompt = messages[0]["content"] if messages else ""
+            
-            size = options.get("size", "1024x1024")
+            # Parse unified prompt JSON format
-            quality = options.get("quality", "standard")
+            promptContent = messages[0]["content"] if messages else ""
-            style = options.get("style", "vivid")
+            import json
            promptData = json.loads(promptContent)
            # Extract parameters from unified prompt JSON
            prompt = promptData.get("prompt", promptContent)
            size = promptData.get("size", "1024x1024")
            quality = promptData.get("quality", "standard")
            style = promptData.get("style", "vivid")
            logger.debug(f"Starting image generation with prompt: '{prompt[:100]}...'")
--- a/modules/aicore/aicorePluginPerplexity.py
+++ b/modules/aicore/aicorePluginPerplexity.py
@ -80,7 +80,7 @@ class AiPerplexity(BaseConnectorAi):
                speedRating=6,  # Slower due to AI analysis
                qualityRating=10,  # Best AI analysis quality
                # capabilities removed (not used in business logic)
-                functionCall=self.callAiWithWebSearch,
+                functionCall=self.callWebOperation,
                priority=PriorityEnum.QUALITY,
                processingMode=ProcessingModeEnum.DETAILED,
                operationTypes=createOperationTypeRatings(
@ -106,7 +106,7 @@ class AiPerplexity(BaseConnectorAi):
                speedRating=9,  # Fast for basic AI tasks
                qualityRating=7,  # Good but not premium quality
                # capabilities removed (not used in business logic)
-                functionCall=self.researchTopic,
+                functionCall=self.callWebOperation,
                priority=PriorityEnum.COST,
                processingMode=ProcessingModeEnum.BASIC,
                operationTypes=createOperationTypeRatings(
@ -132,7 +132,7 @@ class AiPerplexity(BaseConnectorAi):
                speedRating=9,  # Fast for Q&A tasks
                qualityRating=7,  # Good but not premium quality
                # capabilities removed (not used in business logic)
-                functionCall=self.answerQuestion,
+                functionCall=self.callWebOperation,
                priority=PriorityEnum.COST,
                processingMode=ProcessingModeEnum.BASIC,
                operationTypes=createOperationTypeRatings(
@ -158,7 +158,7 @@ class AiPerplexity(BaseConnectorAi):
                speedRating=9,  # Fast for news tasks
                qualityRating=7,  # Good but not premium quality
                # capabilities removed (not used in business logic)
-                functionCall=self.getCurrentNews,
+                functionCall=self.callWebOperation,
                priority=PriorityEnum.COST,
                processingMode=ProcessingModeEnum.BASIC,
                operationTypes=createOperationTypeRatings(
@ -254,9 +254,48 @@ class AiPerplexity(BaseConnectorAi):
            temperature = options.get("temperature", model.temperature)
            maxTokens = model.maxTokens
            # Parse unified prompt JSON format
            promptContent = messages[0]["content"] if messages else ""
            import json
            promptData = json.loads(promptContent)
            # Create a more specific prompt for Perplexity based on the unified format
            searchPrompt = promptData.get("searchPrompt", promptContent)
            maxResults = promptData.get("maxResults", 5)
            timeRange = promptData.get("timeRange")
            country = promptData.get("country")
            language = promptData.get("language")
            # Create enhanced prompt for Perplexity
            enhancedPrompt = f"""Search the web for: {searchPrompt}
 Please provide a comprehensive response with relevant URLs and information. 
 Focus on finding {maxResults} most relevant results.
 {f"Limit results to the last {timeRange}" if timeRange else ""}
 {f"Focus on {country}" if country else ""}
 {f"Provide results in {language}" if language else ""}
 Please format your response as a JSON object with the following structure:
 {{
    "query": "{searchPrompt}",
    "results": [
        {{
            "title": "Result title",
            "url": "https://example.com",
            "content": "Brief description or excerpt"
        }}
    ],
    "total_count": number_of_results
 }}
 Include actual URLs in your response."""
            # Update the messages with the enhanced prompt
            enhancedMessages = [{"role": "user", "content": enhancedPrompt}]
            payload = {
                "model": model.name,
-                "messages": messages,
+                "messages": enhancedMessages,
                "temperature": temperature,
                "max_tokens": maxTokens
            }
@ -472,6 +511,423 @@ class AiPerplexity(BaseConnectorAi):
            logger.error(f"Error getting current news: {str(e)}")
            raise HTTPException(status_code=500, detail=f"Error getting current news: {str(e)}")
    async def crawl(self, modelCall: AiModelCall) -> AiModelResponse:
        """
        Crawl URLs using Perplexity's web search capabilities for content extraction.
        Args:
            modelCall: AiModelCall with messages and options
        Returns:
            AiModelResponse with content and metadata
        """
        try:
            # Extract parameters from modelCall
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
            temperature = options.get("temperature", model.temperature)
            maxTokens = model.maxTokens
            # Parse unified prompt JSON format
            promptContent = messages[0]["content"] if messages else ""
            import json
            promptData = json.loads(promptContent)
            # Extract parameters from unified prompt JSON
            urls = promptData.get("urls", [])
            extractDepth = promptData.get("extractDepth", "advanced")
            formatType = promptData.get("format", "markdown")
            if not urls:
                return AiModelResponse(
                    content="No URLs provided for crawling",
                    success=False,
                    error="No URLs found in prompt data"
                )
            # Create enhanced prompt for Perplexity to crawl URLs
            urlsList = ", ".join(urls)
            enhancedPrompt = f"""Please extract and analyze content from these URLs: {urlsList}
 Extraction requirements:
 - Extract depth: {extractDepth}
 - Output format: {formatType}
 - Focus on main content, not navigation or ads
 - Preserve important structure and formatting
 Please format your response as a JSON object with the following structure:
 {{
    "urls": {json.dumps(urls)},
    "results": [
        {{
            "url": "https://example.com",
            "title": "Page title",
            "content": "Extracted content in {formatType} format",
            "extractedAt": "2024-01-01T00:00:00Z"
        }}
    ],
    "total_count": number_of_urls_processed
 }}
 Extract content from each URL and provide detailed analysis."""
            # Update the messages with the enhanced prompt
            enhancedMessages = [{"role": "user", "content": enhancedPrompt}]
            payload = {
                "model": model.name,
                "messages": enhancedMessages,
                "temperature": temperature,
                "max_tokens": maxTokens
            }
            response = await self.httpClient.post(
                model.apiUrl,
                json=payload
            )
            if response.status_code != 200:
                error_detail = f"Perplexity Crawl API error: {response.status_code} - {response.text}"
                logger.error(error_detail)
                if response.status_code == 429:
                    error_message = "Rate limit exceeded for crawl. Please wait before making another request."
                elif response.status_code == 401:
                    error_message = "Invalid API key for crawl. Please check your Perplexity API configuration."
                elif response.status_code == 400:
                    error_message = f"Invalid request to Perplexity Crawl API: {response.text}"
                else:
                    error_message = f"Perplexity Crawl API error ({response.status_code}): {response.text}"
                raise HTTPException(status_code=500, detail=error_message)
            responseJson = response.json()
            content = responseJson["choices"][0]["message"]["content"]
            return AiModelResponse(
                content=content,
                success=True,
                modelId=model.name,
                metadata={"response_id": responseJson.get("id", "")}
            )
        except Exception as e:
            logger.error(f"Error calling Perplexity Crawl API: {str(e)}")
            raise HTTPException(status_code=500, detail=f"Error calling Perplexity Crawl API: {str(e)}")
    async def callWebOperation(self, modelCall: AiModelCall) -> AiModelResponse:
        """
        Universal web operation handler that distributes to the correct method
        based on the operationType from AiCallOptions.
        """
        try:
            options = modelCall.options
            operationType = options.get("operationType")
            if operationType == "WEB_SEARCH":
                return await self.callAiWithWebSearch(modelCall)
            elif operationType == "WEB_CRAWL":
                return await self.crawl(modelCall)
            elif operationType == "WEB_RESEARCH":
                return await self.research(modelCall)
            elif operationType == "WEB_QUESTIONS":
                return await self.questions(modelCall)
            elif operationType == "WEB_NEWS":
                return await self.news(modelCall)
            else:
                # Fallback to research for unknown operation types
                return await self.research(modelCall)
        except Exception as e:
            return AiModelResponse(
                content="",
                success=False,
                error=str(e)
            )
    async def research(self, modelCall: AiModelCall) -> AiModelResponse:
        """
        Research topics using Perplexity's web search capabilities.
        Args:
            modelCall: AiModelCall with messages and options
        Returns:
            AiModelResponse with research content and metadata
        """
        try:
            # Extract parameters from modelCall
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
            temperature = options.get("temperature", model.temperature)
            maxTokens = model.maxTokens
            # Parse unified prompt JSON format
            promptContent = messages[0]["content"] if messages else ""
            import json
            promptData = json.loads(promptContent)
            # Extract parameters from unified prompt JSON
            researchPrompt = promptData.get("researchPrompt", promptContent)
            maxResults = promptData.get("maxResults", 8)
            timeRange = promptData.get("timeRange")
            country = promptData.get("country")
            language = promptData.get("language")
            # Create enhanced prompt for research
            enhancedPrompt = f"""Conduct comprehensive research on: {researchPrompt}
 Research requirements:
 - Provide detailed analysis and insights
 - Include multiple perspectives and sources
 - Focus on finding {maxResults} most relevant sources
 {f"Limit results to the last {timeRange}" if timeRange else ""}
 {f"Focus on {country}" if country else ""}
 {f"Provide results in {language}" if language else ""}
 Please format your response as a JSON object with the following structure:
 {{
    "query": "{researchPrompt}",
    "research_results": [
        {{
            "title": "Source title",
            "url": "https://example.com",
            "summary": "Brief summary",
            "content": "Detailed content",
            "extractedAt": "2024-01-01T00:00:00Z"
        }}
    ],
    "total_count": number_of_sources,
    "operation_type": "research"
 }}
 Provide comprehensive research with detailed analysis."""
            # Update the messages with the enhanced prompt
            enhancedMessages = [{"role": "user", "content": enhancedPrompt}]
            payload = {
                "model": model.name,
                "messages": enhancedMessages,
                "temperature": temperature,
                "max_tokens": maxTokens
            }
            response = await self.httpClient.post(
                model.apiUrl,
                json=payload
            )
            if response.status_code != 200:
                error_detail = f"Perplexity Research API error: {response.status_code} - {response.text}"
                logger.error(error_detail)
                raise HTTPException(status_code=500, detail=error_detail)
            responseJson = response.json()
            content = responseJson["choices"][0]["message"]["content"]
            return AiModelResponse(
                content=content,
                success=True,
                modelId=model.name,
                metadata={"response_id": responseJson.get("id", "")}
            )
        except Exception as e:
            logger.error(f"Error calling Perplexity Research API: {str(e)}")
            raise HTTPException(status_code=500, detail=f"Error calling Perplexity Research API: {str(e)}")
    async def questions(self, modelCall: AiModelCall) -> AiModelResponse:
        """
        Answer questions using Perplexity's web search capabilities.
        Args:
            modelCall: AiModelCall with messages and options
        Returns:
            AiModelResponse with answer and supporting sources
        """
        try:
            # Extract parameters from modelCall
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
            temperature = options.get("temperature", model.temperature)
            maxTokens = model.maxTokens
            # Parse unified prompt JSON format
            promptContent = messages[0]["content"] if messages else ""
            import json
            promptData = json.loads(promptContent)
            # Extract parameters from unified prompt JSON
            question = promptData.get("question", promptContent)
            context = promptData.get("context", "")
            maxResults = promptData.get("maxResults", 6)
            timeRange = promptData.get("timeRange")
            country = promptData.get("country")
            language = promptData.get("language")
            # Create enhanced prompt for questions
            contextText = f"\nAdditional context: {context}" if context else ""
            enhancedPrompt = f"""Answer this question using web research: {question}{contextText}
 Answer requirements:
 - Provide a comprehensive answer with supporting evidence
 - Include {maxResults} most relevant sources
 - Cite sources with URLs
 {f"Focus on recent information (last {timeRange})" if timeRange else ""}
 {f"Focus on {country}" if country else ""}
 {f"Provide answer in {language}" if language else ""}
 Please format your response as a JSON object with the following structure:
 {{
    "question": "{question}",
    "answer": "Comprehensive answer to the question",
    "answer_sources": [
        {{
            "title": "Source title",
            "url": "https://example.com",
            "summary": "Brief summary",
            "content": "Relevant content excerpt",
            "relevance": "Why this source is relevant"
        }}
    ],
    "total_count": number_of_sources,
    "operation_type": "questions"
 }}
 Provide a detailed answer with well-cited sources."""
            # Update the messages with the enhanced prompt
            enhancedMessages = [{"role": "user", "content": enhancedPrompt}]
            payload = {
                "model": model.name,
                "messages": enhancedMessages,
                "temperature": temperature,
                "max_tokens": maxTokens
            }
            response = await self.httpClient.post(
                model.apiUrl,
                json=payload
            )
            if response.status_code != 200:
                error_detail = f"Perplexity Questions API error: {response.status_code} - {response.text}"
                logger.error(error_detail)
                raise HTTPException(status_code=500, detail=error_detail)
            responseJson = response.json()
            content = responseJson["choices"][0]["message"]["content"]
            return AiModelResponse(
                content=content,
                success=True,
                modelId=model.name,
                metadata={"response_id": responseJson.get("id", "")}
            )
        except Exception as e:
            logger.error(f"Error calling Perplexity Questions API: {str(e)}")
            raise HTTPException(status_code=500, detail=f"Error calling Perplexity Questions API: {str(e)}")
    async def news(self, modelCall: AiModelCall) -> AiModelResponse:
        """
        Search and analyze news using Perplexity's web search capabilities.
        Args:
            modelCall: AiModelCall with messages and options
        Returns:
            AiModelResponse with news articles and analysis
        """
        try:
            # Extract parameters from modelCall
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
            temperature = options.get("temperature", model.temperature)
            maxTokens = model.maxTokens
            # Parse unified prompt JSON format
            promptContent = messages[0]["content"] if messages else ""
            import json
            promptData = json.loads(promptContent)
            # Extract parameters from unified prompt JSON
            newsPrompt = promptData.get("newsPrompt", promptContent)
            maxResults = promptData.get("maxResults", 10)
            timeRange = promptData.get("timeRange", "w")  # Default to week for news
            country = promptData.get("country")
            language = promptData.get("language")
            # Create enhanced prompt for news
            enhancedPrompt = f"""Find and analyze recent news about: {newsPrompt}
 News requirements:
 - Find {maxResults} most recent and relevant news articles
 - Focus on the last {timeRange} (recent news)
 - Include diverse sources and perspectives
 {f"Focus on news from {country}" if country else ""}
 {f"Provide news in {language}" if language else ""}
 Please format your response as a JSON object with the following structure:
 {{
    "news_query": "{newsPrompt}",
    "articles": [
        {{
            "title": "Article title",
            "url": "https://example.com",
            "content": "Article content",
            "date": "2024-01-01",
            "source": "News source name",
            "summary": "Brief summary of the article"
        }}
    ],
    "total_count": number_of_articles,
    "operation_type": "news"
 }}
 Provide comprehensive news coverage with analysis."""
            # Update the messages with the enhanced prompt
            enhancedMessages = [{"role": "user", "content": enhancedPrompt}]
            payload = {
                "model": model.name,
                "messages": enhancedMessages,
                "temperature": temperature,
                "max_tokens": maxTokens
            }
            response = await self.httpClient.post(
                model.apiUrl,
                json=payload
            )
            if response.status_code != 200:
                error_detail = f"Perplexity News API error: {response.status_code} - {response.text}"
                logger.error(error_detail)
                raise HTTPException(status_code=500, detail=error_detail)
            responseJson = response.json()
            content = responseJson["choices"][0]["message"]["content"]
            return AiModelResponse(
                content=content,
                success=True,
                modelId=model.name,
                metadata={"response_id": responseJson.get("id", "")}
            )
        except Exception as e:
            logger.error(f"Error calling Perplexity News API: {str(e)}")
            raise HTTPException(status_code=500, detail=f"Error calling Perplexity News API: {str(e)}")
    async def _testConnection(self) -> bool:
        """
        Tests the connection to the Perplexity API.
--- a/modules/aicore/aicorePluginTavily.py
+++ b/modules/aicore/aicorePluginTavily.py
@ -5,7 +5,7 @@ import logging
 import asyncio
 import re
 from dataclasses import dataclass
-from typing import Optional, List
+from typing import Optional, List, Dict
 from tavily import AsyncTavilyClient
 from modules.shared.configuration import APP_CONFIG
 from modules.aicore.aicoreBase import BaseConnectorAi
@ -88,6 +88,251 @@ class ConnectorWeb(BaseConnectorAi):
        return unique_urls
    def _intelligentUrlFiltering(self, searchResults: List[WebSearchResult], query: str, maxResults: int) -> List[WebSearchResult]:
        """
        Intelligent URL filtering with de-duplication and relevance scoring.
        Args:
            searchResults: Raw search results from Tavily
            query: Original search query for relevance scoring
            maxResults: Maximum number of results to return
        Returns:
            Filtered and deduplicated list of search results
        """
        if not searchResults:
            return []
        # Step 1: Basic de-duplication by URL
        seenUrls = set()
        uniqueResults = []
        for result in searchResults:
            # Normalize URL for better deduplication
            normalizedUrl = self._normalizeUrl(result.url)
            if normalizedUrl not in seenUrls:
                seenUrls.add(normalizedUrl)
                uniqueResults.append(result)
        logger.info(f"After basic deduplication: {len(uniqueResults)} unique URLs from {len(searchResults)} original")
        # Step 2: Relevance scoring and filtering
        scoredResults = []
        queryWords = set(query.lower().split())
        for result in uniqueResults:
            score = self._calculateRelevanceScore(result, queryWords)
            scoredResults.append((score, result))
        # Step 3: Sort by relevance score (higher is better)
        scoredResults.sort(key=lambda x: x[0], reverse=True)
        # Step 4: Take top results
        filteredResults = [result for score, result in scoredResults[:maxResults]]
        logger.info(f"After intelligent filtering: {len(filteredResults)} results selected from {len(uniqueResults)} unique")
        return filteredResults
    def _normalizeUrl(self, url: str) -> str:
        """
        Normalize URL for better deduplication.
        Removes common variations that represent the same content.
        """
        if not url:
            return url
        # Remove trailing slashes
        url = url.rstrip('/')
        # Remove common query parameters that don't affect content
        import urllib.parse
        parsed = urllib.parse.urlparse(url)
        # Remove common tracking parameters
        queryParams = urllib.parse.parse_qs(parsed.query)
        filteredParams = {}
        for key, values in queryParams.items():
            # Keep important parameters, remove tracking ones
            if key.lower() not in ['utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content', 
                                 'fbclid', 'gclid', 'ref', 'source', 'campaign']:
                filteredParams[key] = values
        # Rebuild query string
        filteredQuery = urllib.parse.urlencode(filteredParams, doseq=True)
        # Reconstruct URL
        normalized = urllib.parse.urlunparse((
            parsed.scheme,
            parsed.netloc,
            parsed.path,
            parsed.params,
            filteredQuery,
            parsed.fragment
        ))
        return normalized
    def _calculateRelevanceScore(self, result: WebSearchResult, queryWords: set) -> float:
        """
        Calculate relevance score for a search result.
        Higher score means more relevant to the query.
        """
        score = 0.0
        # Title relevance (most important)
        titleWords = set(result.title.lower().split())
        titleMatches = len(queryWords.intersection(titleWords))
        score += titleMatches * 3.0  # Weight title matches heavily
        # URL relevance
        urlWords = set(result.url.lower().split('/'))
        urlMatches = len(queryWords.intersection(urlWords))
        score += urlMatches * 1.5
        # Content relevance (if available)
        if hasattr(result, 'raw_content') and result.raw_content:
            contentWords = set(result.raw_content.lower().split())
            contentMatches = len(queryWords.intersection(contentWords))
            score += contentMatches * 0.1  # Lower weight for content matches
        # Domain authority bonus (simple heuristic)
        domain = result.url.split('/')[2] if '/' in result.url else result.url
        if any(auth_domain in domain.lower() for auth_domain in 
               ['wikipedia.org', 'github.com', 'stackoverflow.com', 'reddit.com', 'medium.com']):
            score += 1.0
        # Penalty for very long URLs (often less relevant)
        if len(result.url) > 100:
            score -= 0.5
        return score
    async def _optimizeSearchQuery(self, query: str, timeRange: str = None, country: str = None, language: str = None) -> tuple[str, dict]:
        """
        Use AI to optimize search query and parameters (from old SubWebResearch).
        Args:
            query: Original search query
            timeRange: Time range filter
            country: Country filter
            language: Language filter
        Returns:
            Tuple of (optimized_query, optimized_parameters)
        """
        try:
            # Create AI prompt for query optimization (from old code)
            queryOptimizerPrompt = f"""You are a search query optimizer. 
 USER QUERY: {query}
 Your task: Create a search query and parameters for the USER QUERY given.
 RULES:
 1. The search query MUST be related to the user query above
 2. Extract key terms from the user query
 3. Determine appropriate country/language based on the query context
 4. Keep search query short (2-6 words)
 Return ONLY this JSON format:
 {{
    "user_prompt": "search query based on user query above",
    "country": "Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries)",
    "language": "language_code_or_null", 
    "topic": "general|news|academic_or_null",
    "time_range": "d|w|m|y_or_null",
    "selection_strategy": "single|multiple|specific_page",
    "selection_criteria": "what URLs to prioritize",
    "expected_url_patterns": ["pattern1", "pattern2"],
    "estimated_result_count": number
 }}"""
            # Use AI to optimize the query
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions
            aiRequest = AiCallRequest(
                prompt=queryOptimizerPrompt,
                options=AiCallOptions()
            )
            # Get AI response (this would need to be called through the AI interface)
            # For now, return the original query with basic optimization
            logger.info(f"AI query optimization requested for: '{query}'")
            # Basic optimization fallback
            optimizedQuery = query
            optimizedParams = {
                "time_range": timeRange,
                "country": country,
                "language": language,
                "topic": "general"
            }
            return optimizedQuery, optimizedParams
        except Exception as e:
            logger.warning(f"Query optimization failed: {str(e)}, using original query")
            return query, {"time_range": timeRange, "country": country, "language": language}
    async def _aiBasedUrlSelection(self, searchResults: List[WebSearchResult], originalQuery: str, maxResults: int) -> List[WebSearchResult]:
        """
        Use AI to select the most relevant URLs from search results (from old SubWebResearch).
        Args:
            searchResults: Raw search results from Tavily
            originalQuery: Original user query for context
            maxResults: Maximum number of results to return
        Returns:
            AI-selected and filtered list of search results
        """
        try:
            if not searchResults:
                return []
            # Step 1: Basic de-duplication
            seenUrls = set()
            uniqueResults = []
            for result in searchResults:
                normalizedUrl = self._normalizeUrl(result.url)
                if normalizedUrl not in seenUrls:
                    seenUrls.add(normalizedUrl)
                    uniqueResults.append(result)
            logger.info(f"After basic deduplication: {len(uniqueResults)} unique URLs from {len(searchResults)} original")
            if len(uniqueResults) <= maxResults:
                return uniqueResults
            # Step 2: AI-based URL selection (from old code)
            logger.info(f"AI selecting most relevant {maxResults} URLs from {len(uniqueResults)} unique results")
            # Create AI prompt for URL selection (from old code)
            urlList = "\n".join([f"{i+1}. {result.url}" for i, result in enumerate(uniqueResults)])
            aiPrompt = f"""Select the most relevant URLs from these search results:
 {urlList}
 Return only the URLs that are most relevant for the user's query: "{originalQuery}"
 One URL per line.
 """
            # For now, use intelligent filtering as fallback
            # In a full implementation, this would call the AI interface
            logger.info("Using intelligent filtering as AI selection fallback")
            # Use the existing intelligent filtering
            filteredResults = self._intelligentUrlFiltering(uniqueResults, originalQuery, maxResults)
            logger.info(f"AI-based selection completed: {len(filteredResults)} results selected")
            return filteredResults
        except Exception as e:
            logger.warning(f"AI-based URL selection failed: {str(e)}, using intelligent filtering")
            return self._intelligentUrlFiltering(searchResults, originalQuery, maxResults)
    def getModels(self) -> List[AiModel]:
        """Get all available Tavily models."""
        return [
@ -104,7 +349,7 @@ class ConnectorWeb(BaseConnectorAi):
                speedRating=9,  # Very fast for URL discovery
                qualityRating=9,  # Excellent URL discovery quality
                # capabilities removed (not used in business logic)
-                functionCall=self.search,
+                functionCall=self.callWebOperation,
                priority=PriorityEnum.BALANCED,
                processingMode=ProcessingModeEnum.BASIC,
                operationTypes=createOperationTypeRatings(
@ -130,12 +375,12 @@ class ConnectorWeb(BaseConnectorAi):
                speedRating=7,  # Good for content extraction
                qualityRating=9,  # Excellent content extraction quality
                # capabilities removed (not used in business logic)
-                functionCall=self.crawl,
+                functionCall=self.callWebOperation,
                priority=PriorityEnum.BALANCED,
                processingMode=ProcessingModeEnum.BASIC,
                operationTypes=createOperationTypeRatings(
                    (OperationTypeEnum.WEB_RESEARCH, 3),
                    (OperationTypeEnum.WEB_CRAWL, 10),
                    (OperationTypeEnum.WEB_RESEARCH, 3),
                    (OperationTypeEnum.WEB_NEWS, 3),
                    (OperationTypeEnum.WEB_QUESTIONS, 2)
                ),
@ -155,7 +400,7 @@ class ConnectorWeb(BaseConnectorAi):
                speedRating=7,  # Good for combined search+extract
                qualityRating=8,  # Good quality for structured data
                # capabilities removed (not used in business logic)
-                functionCall=self.scrape,
+                functionCall=self.callWebOperation,
                priority=PriorityEnum.BALANCED,
                processingMode=ProcessingModeEnum.BASIC,
                operationTypes=createOperationTypeRatings(
@ -190,28 +435,73 @@ class ConnectorWeb(BaseConnectorAi):
    # Standardized method using AiModelCall/AiModelResponse pattern
    async def callWebOperation(self, modelCall) -> "AiModelResponse":
        """
        Universal web operation handler that distributes to the correct method
        based on the operationType from AiCallOptions.
        """
        try:
            options = modelCall.options
            operationType = options.get("operationType")
            if operationType == "WEB_SEARCH":
                return await self.search(modelCall)
            elif operationType == "WEB_CRAWL":
                return await self.crawl(modelCall)
            elif operationType in ["WEB_RESEARCH", "WEB_QUESTIONS", "WEB_NEWS"]:
                return await self.research(modelCall)
            else:
                # Fallback to search for unknown operation types
                return await self.search(modelCall)
        except Exception as e:
            return AiModelResponse(
                content="",
                success=False,
                error=str(e)
            )
    async def search(self, modelCall) -> "AiModelResponse":
        """Search using standardized AiModelCall/AiModelResponse pattern"""
        try:
            # Extract parameters from modelCall
-            query = modelCall.messages[0]["content"] if modelCall.messages else ""
+            prompt_content = modelCall.messages[0]["content"] if modelCall.messages else ""
            options = modelCall.options
-            raw_results = await self._search(
+            # Parse unified prompt JSON format
-                query=query,
+            import json
-                max_results=options.get("max_results", 5),
+            promptData = json.loads(prompt_content)
-                search_depth=options.get("search_depth"),
+            
-                time_range=options.get("time_range"),
+            # Extract parameters from unified prompt JSON
-                topic=options.get("topic"),
+            query = promptData.get("searchPrompt", prompt_content)
-                include_domains=options.get("include_domains"),
+            maxResults = promptData.get("maxResults", 5)
-                exclude_domains=options.get("exclude_domains"),
+            timeRange = promptData.get("timeRange")
-                language=options.get("language"),
+            country = promptData.get("country")
-                include_answer=options.get("include_answer"),
+            language = promptData.get("language")
-                include_raw_content=options.get("include_raw_content"),
+            
            # Use basic search depth for web search operations
            searchDepth = "basic"
            # Step 1: AI Query Optimization (from old SubWebResearch)
            optimizedQuery, optimizedParams = await self._optimizeSearchQuery(query, timeRange, country, language)
            # Step 2: Get more results than requested to allow for intelligent filtering
            searchResults = await self._search(
                query=optimizedQuery,
                max_results=min(maxResults * 3, 30),  # Get more results for better AI selection
                search_depth=searchDepth,
                time_range=optimizedParams.get("time_range", timeRange),
                country=optimizedParams.get("country", country),
                language=optimizedParams.get("language", language),
                include_answer=options.get("include_answer", True),
                include_raw_content=options.get("include_raw_content", True),
            )
            # Step 3: AI-based URL selection and intelligent filtering
            filteredResults = await self._aiBasedUrlSelection(searchResults, query, maxResults)
            # Convert to JSON string
-            results_json = {
+            resultsJson = {
                "query": query,
                "results": [
                    {
@ -219,20 +509,22 @@ class ConnectorWeb(BaseConnectorAi):
                        "url": result.url,
                        "content": getattr(result, 'raw_content', None)
                    }
-                    for result in raw_results
+                    for result in filteredResults
                ],
-                "total_count": len(raw_results)
+                "total_count": len(filteredResults),
                "original_count": len(searchResults),
                "filtered_count": len(searchResults) - len(filteredResults)
            }
            import json
-            content = json.dumps(results_json, indent=2)
+            content = json.dumps(resultsJson, indent=2)
            return AiModelResponse(
                content=content,
                success=True,
                metadata={
-                    "total_count": len(raw_results),
+                    "total_count": len(filteredResults),
-                    "search_depth": options.get("search_depth", "basic")
+                    "search_depth": searchDepth
                }
            )
@ -247,49 +539,214 @@ class ConnectorWeb(BaseConnectorAi):
        """Crawl using standardized AiModelCall/AiModelResponse pattern"""
        try:
            # Extract parameters from modelCall
            promptContent = modelCall.messages[0]["content"] if modelCall.messages else ""
            options = modelCall.options
            urls = options.get("urls", [])
-            # If no URLs provided, try to extract URLs from the prompt
+            # Parse unified prompt JSON format
-            if not urls and modelCall.messages:
+            import json
-                prompt = modelCall.messages[0]["content"] if modelCall.messages else ""
+            promptData = json.loads(promptContent)
-                urls = self._extractUrlsFromPrompt(prompt)
+            
            # Extract parameters from unified prompt JSON
            urls = promptData.get("urls", [])
            extractDepth = promptData.get("extractDepth", "advanced")
            formatType = promptData.get("format", "markdown")
            if not urls:
                return AiModelResponse(
                    content="No URLs provided for crawling",
                    success=False,
-                    error="No URLs found in options or prompt"
+                    error="No URLs found in prompt data"
                )
-            raw_results = await self._crawl(
+            rawResults = await self._crawl(
                urls,
-                extract_depth=options.get("extract_depth"),
+                extract_depth=extractDepth,
-                format=options.get("format"),
+                format=formatType,
            )
            # Convert to JSON string
-            results_json = {
+            resultsJson = {
                "urls": urls,
                "results": [
                    {
                        "url": result.url,
-                        "content": result.content
+                        "title": getattr(result, 'title', ''),
                        "content": result.content,
                        "extractedAt": getattr(result, 'extracted_at', '')
                    }
-                    for result in raw_results
+                    for result in rawResults
                ],
-                "total_count": len(raw_results)
+                "total_count": len(rawResults)
            }
            import json
-            content = json.dumps(results_json, indent=2)
+            content = json.dumps(resultsJson, indent=2)
            return AiModelResponse(
                content=content,
                success=True,
                metadata={
-                    "total_count": len(raw_results),
+                    "total_count": len(rawResults),
-                    "extract_depth": options.get("extract_depth", "basic")
+                    "urls_processed": len(urls)
                }
            )
        except Exception as e:
            return AiModelResponse(
                content="",
                success=False,
                error=str(e)
            )
    async def research(self, modelCall) -> "AiModelResponse":
        """
        Handle WEB_RESEARCH, WEB_QUESTIONS, WEB_NEWS operations using search + crawl combination.
        Single method for all three operation types with different standard settings.
        """
        try:
            # Extract parameters from modelCall
            promptContent = modelCall.messages[0]["content"] if modelCall.messages else ""
            options = modelCall.options
            operationType = options.get("operationType")
            # Parse unified prompt JSON format
            import json
            promptData = json.loads(promptContent)
            # Extract parameters based on operation type
            if operationType == "WEB_RESEARCH":
                query = promptData.get("researchPrompt", promptContent)
                maxResults = promptData.get("maxResults", 8)
                searchDepth = "basic"
                timeRange = promptData.get("timeRange")
                country = promptData.get("country")
                language = promptData.get("language")
                topic = "general"
            elif operationType == "WEB_QUESTIONS":
                query = promptData.get("question", promptContent)
                maxResults = promptData.get("maxResults", 6)
                searchDepth = "basic"
                timeRange = promptData.get("timeRange")
                country = promptData.get("country")
                language = promptData.get("language")
                topic = "general"
            elif operationType == "WEB_NEWS":
                query = promptData.get("newsPrompt", promptContent)
                maxResults = promptData.get("maxResults", 10)
                searchDepth = "basic"
                timeRange = promptData.get("timeRange", "w")  # Default to week for news
                country = promptData.get("country")
                language = promptData.get("language")
                topic = "news"
            else:
                # Fallback to research settings
                query = promptData.get("researchPrompt", promptContent)
                maxResults = promptData.get("maxResults", 5)
                searchDepth = "basic"
                timeRange = promptData.get("timeRange")
                country = promptData.get("country")
                language = promptData.get("language")
                topic = "general"
            logger.info(f"Tavily {operationType} operation: query='{query}', maxResults={maxResults}, topic={topic}")
            # Step 1: Search for relevant URLs
            searchResults = await self._search(
                query=query,
                max_results=maxResults * 2,  # Get more for better selection
                search_depth=searchDepth,
                time_range=timeRange,
                country=country,
                language=language,
                topic=topic,
                include_answer=True,
                include_raw_content=True
            )
            if not searchResults:
                return AiModelResponse(
                    content="No search results found",
                    success=False,
                    error="No relevant URLs found for the query"
                )
            # Step 2: AI-based URL selection
            selectedResults = await self._aiBasedUrlSelection(searchResults, query, maxResults)
            if not selectedResults:
                return AiModelResponse(
                    content="No relevant URLs selected",
                    success=False,
                    error="AI could not select relevant URLs"
                )
            # Step 3: Crawl selected URLs for content
            urlsToCrawl = [result.url for result in selectedResults]
            crawlResults = await self._crawl(
                urls=urlsToCrawl,
                extract_depth="advanced",
                format="markdown"
            )
            # Step 4: Combine search and crawl results
            combinedResults = []
            for searchResult in selectedResults:
                # Find corresponding crawl result
                crawlResult = next((cr for cr in crawlResults if cr.url == searchResult.url), None)
                combinedResult = {
                    "title": searchResult.title,
                    "url": searchResult.url,
                    "summary": getattr(searchResult, 'raw_content', ''),
                    "content": crawlResult.content if crawlResult else '',
                    "extractedAt": getattr(crawlResult, 'extracted_at', '') if crawlResult else ''
                }
                combinedResults.append(combinedResult)
            # Step 5: Format response based on operation type
            if operationType == "WEB_RESEARCH":
                responseData = {
                    "query": query,
                    "research_results": combinedResults,
                    "total_count": len(combinedResults),
                    "operation_type": "research"
                }
            elif operationType == "WEB_QUESTIONS":
                responseData = {
                    "question": query,
                    "answer_sources": combinedResults,
                    "total_count": len(combinedResults),
                    "operation_type": "questions"
                }
            elif operationType == "WEB_NEWS":
                responseData = {
                    "news_query": query,
                    "articles": combinedResults,
                    "total_count": len(combinedResults),
                    "operation_type": "news"
                }
            else:
                responseData = {
                    "query": query,
                    "results": combinedResults,
                    "total_count": len(combinedResults),
                    "operation_type": operationType
                }
            import json
            content = json.dumps(responseData, indent=2)
            return AiModelResponse(
                content=content,
                success=True,
                metadata={
                    "total_count": len(combinedResults),
                    "urls_searched": len(searchResults),
                    "urls_crawled": len(crawlResults),
                    "operation_type": operationType
                }
            )
@ -576,3 +1033,262 @@ class ConnectorWeb(BaseConnectorAi):
                    await asyncio.sleep(retryDelay)
                else:
                    raise Exception(f"Crawl failed after {maxRetries + 1} attempts: {str(e)}")
    async def comprehensiveWebResearch(self, request: WebResearchRequest) -> WebResearchResult:
        """
        Perform comprehensive web research using Tavily's search and extract capabilities.
        This method orchestrates the full web research workflow.
        """
        try:
            logger.info(f"COMPREHENSIVE WEB RESEARCH STARTED")
            logger.info(f"User Query: {request.user_prompt}")
            logger.info(f"Max Results: {request.max_results}, Max Pages: {request.max_pages}")
            # Global URL index to track all processed URLs across the entire research session
            global_processed_urls = set()
            # Step 1: Find relevant websites - either provided URLs or AI-determined main URLs
            logger.info(f"=== STEP 1: INITIAL MAIN URLS LIST ===")
            if request.urls:
                # Use provided URLs as initial main URLs
                websites = request.urls
                logger.info(f"Using provided URLs ({len(websites)}):")
                for i, url in enumerate(websites, 1):
                    logger.info(f"   {i}. {url}")
            else:
                # Use AI to determine main URLs based on user's intention
                logger.info(f"AI analyzing user intent: '{request.user_prompt}'")
                # Use basic search parameters
                search_query = request.user_prompt
                search_depth = request.search_depth or "basic"
                time_range = request.time_range
                topic = request.topic
                country = request.country
                language = request.language
                max_results = request.max_results
                logger.info(f"Using search parameters: query='{search_query}', depth={search_depth}, time_range={time_range}, topic={topic}")
                # Perform web search
                search_results = await self._search(
                    query=search_query,
                    max_results=max_results,
                    search_depth=search_depth,
                    time_range=time_range,
                    topic=topic,
                    country=country,
                    language=language,
                    include_answer=True,
                    include_raw_content=True
                )
                # Extract URLs from search results
                websites = [result.url for result in search_results]
                logger.info(f"Found {len(websites)} URLs from search")
                # AI-based URL selection and deduplication
                if len(websites) > request.max_pages:
                    logger.info(f"AI selecting most relevant {request.max_pages} URLs from {len(websites)} found")
                    # For now, just take the first max_pages URLs
                    selected_indices = list(range(min(request.max_pages, len(websites))))
                    selected_websites = [websites[i] for i in selected_indices]
                    # Remove duplicates while preserving order
                    seen = set()
                    unique_websites = []
                    for url in selected_websites:
                        if url not in seen:
                            seen.add(url)
                            unique_websites.append(url)
                    websites = unique_websites
                logger.info(f"After AI selection deduplication: {len(websites)} unique URLs")
                logger.info(f"AI selected {len(websites)} main URLs (after deduplication):")
                for i, url in enumerate(websites, 1):
                    logger.info(f"   {i}. {url}")
            # Step 2: Smart website selection using AI interface
            logger.info(f"=== STEP 2: FILTERED URL LIST BY USER PROMPT'S INTENTION ===")
            logger.info(f"AI analyzing {len(websites)} URLs for relevance to: '{request.user_prompt}'")
            # For now, just use all websites
            selected_websites = websites
            logger.debug(f"AI selected {len(selected_websites)} most relevant URLs:")
            for i, url in enumerate(selected_websites, 1):
                logger.debug(f"   {i}. {url}")
            # Step 3+4+5: Recursive crawling with configurable depth
            # Get configuration parameters
            max_depth = int(APP_CONFIG.get("Web_Research_MAX_DEPTH", "2"))
            max_links_per_domain = int(APP_CONFIG.get("Web_Research_MAX_LINKS_PER_DOMAIN", "4"))
            crawl_timeout_minutes = int(APP_CONFIG.get("Web_Research_CRAWL_TIMEOUT_MINUTES", "10"))
            # Use the configured max_depth or the request's search_depth, whichever is smaller
            effective_depth = min(max_depth, request.search_depth if isinstance(request.search_depth, int) else 2)
            logger.info(f"=== STEP 3+4+5: RECURSIVE CRAWLING ===")
            logger.info(f"Starting recursive crawl with depth {effective_depth}")
            logger.info(f"Max links per domain: {max_links_per_domain}")
            logger.info(f"Crawl timeout: {crawl_timeout_minutes} minutes")
            # Perform recursive crawling
            all_content = await self._crawlRecursively(
                urls=selected_websites,
                max_depth=effective_depth,
                extract_depth=request.extract_depth,
                max_per_domain=max_links_per_domain,
                global_processed_urls=global_processed_urls
            )
            logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
            # Step 6: AI analysis of all collected content
            logger.info(f"=== STEP 6: AI ANALYSIS ===")
            logger.info(f"Analyzing {len(all_content)} websites with AI")
            # Create a basic analysis result
            analysis_result = f"Web research completed for: {request.user_prompt}\n\n"
            analysis_result += f"Analyzed {len(all_content)} websites:\n"
            for url, content in all_content.items():
                analysis_result += f"- {url}: {len(content)} characters\n"
            # Create result documents
            import time
            result_documents = []
            # Main research result
            main_document = {
                "documentName": f"web_research_{int(time.time())}.json",
                "documentData": {
                    "user_prompt": request.user_prompt,
                    "websites_analyzed": len(all_content),
                    "additional_links_found": 0,  # Would be calculated from crawl results
                    "analysis_result": analysis_result,
                    "sources": [{"title": f"Website {i+1}", "url": url} for i, url in enumerate(all_content.keys())],
                    "additional_links": [],
                    "debug_info": {
                        "total_urls_processed": len(global_processed_urls),
                        "crawl_depth": effective_depth,
                        "extract_depth": request.extract_depth
                    }
                },
                "mimeType": "application/json"
            }
            result_documents.append(main_document)
            # Individual website content documents
            for i, (url, content) in enumerate(all_content.items()):
                content_document = {
                    "documentName": f"website_content_{i+1}.md",
                    "documentData": content,
                    "mimeType": "text/markdown"
                }
                result_documents.append(content_document)
            logger.info(f"WEB RESEARCH COMPLETED SUCCESSFULLY")
            logger.info(f"Generated {len(result_documents)} result documents")
            return WebResearchResult(
                success=True,
                documents=result_documents
            )
        except Exception as e:
            logger.error(f"Error in comprehensive web research: {str(e)}")
            return WebResearchResult(
                success=False,
                error=str(e),
                documents=[]
            )
    async def _crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10, global_processed_urls: Optional[set] = None) -> Dict[str, str]:
        """
        Recursively crawl URLs up to specified depth.
        This is a simplified version of the recursive crawling logic.
        """
        logger.info(f"Starting recursive crawl: {len(urls)} starting URLs, max_depth={max_depth}")
        # URL index to track all processed URLs (local + global)
        processed_urls = set()
        if global_processed_urls is not None:
            processed_urls = global_processed_urls
            logger.info(f"Using global URL index with {len(processed_urls)} already processed URLs")
        else:
            logger.info("Using local URL index for this crawl session")
        all_content = {}
        current_level_urls = urls.copy()
        try:
            for depth in range(1, max_depth + 1):
                logger.info(f"=== DEPTH LEVEL {depth}/{max_depth} ===")
                logger.info(f"Processing {len(current_level_urls)} URLs at depth {depth}")
                # URLs found at this level (for next iteration)
                next_level_urls = []
                for url in current_level_urls:
                    # Normalize URL for duplicate checking
                    normalized_url = self._normalizeUrl(url)
                    if normalized_url in processed_urls:
                        logger.debug(f"URL {url} (normalized: {normalized_url}) already processed, skipping")
                        continue
                    try:
                        logger.info(f"Processing URL at depth {depth}: {url}")
                        # Extract content from URL
                        crawl_results = await self._crawl([url], extract_depth=extract_depth, format="markdown")
                        if crawl_results and crawl_results[0].content:
                            content = crawl_results[0].content
                            all_content[url] = content
                            processed_urls.add(normalized_url)
                            logger.info(f"✓ Successfully processed {url}: {len(content)} chars")
                            # For simplicity, we'll skip finding sub-links in this implementation
                            # In a full implementation, you would extract links and add them to next_level_urls
                        else:
                            logger.warning(f"✗ No content extracted from {url}")
                            processed_urls.add(normalized_url)
                    except Exception as e:
                        logger.warning(f"✗ Failed to process URL {url} at depth {depth}: {e}")
                        processed_urls.add(normalized_url)
                # Prepare for next iteration
                current_level_urls = next_level_urls
                logger.info(f"Depth {depth} completed. Found {len(next_level_urls)} URLs for next level")
                # Stop if no more URLs to process
                if not current_level_urls:
                    logger.info(f"No more URLs found at depth {depth}, stopping recursion")
                    break
            logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
            return all_content
        except Exception as e:
            logger.error(f"Crawling failed with error: {e}, returning partial results: {len(all_content)} pages crawled so far")
            return all_content
    def _normalizeUrl(self, url: str) -> str:
        """Normalize URL to handle variations that should be considered duplicates."""
        if not url:
            return url
        # Remove trailing slashes and fragments
        url = url.rstrip('/')
        if '#' in url:
            url = url.split('#')[0]
        # Handle common URL variations
        url = url.replace('http://', 'https://')  # Normalize protocol
        return url
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@ -649,427 +649,6 @@ class AiObjects:
                errorCount=1
            )
    # Web functionality methods - Now use standardized AiModelCall/AiModelResponse pattern
    async def searchWebsites(self, query: str, maxResults: int = 5, **kwargs) -> str:
        """Search for websites using Tavily with standardized pattern."""
        from modules.datamodels.datamodelAi import AiModelCall
        modelCall = AiModelCall(
            messages=[{"role": "user", "content": query}],
            options={"max_results": maxResults, **kwargs}
        )
        # Get Tavily connector from registry
        tavilyConnector = modelRegistry.getConnectorForModel("tavily_search")
        if not tavilyConnector:
            raise ValueError("Tavily connector not available")
        result = await tavilyConnector.search(modelCall)
        return result.content if result.success else ""
    async def crawlWebsites(self, urls: List[str], extractDepth: str = "advanced", format: str = "markdown") -> str:
        """Crawl websites using Tavily with standardized pattern."""
        from modules.datamodels.datamodelAi import AiModelCall
        modelCall = AiModelCall(
            messages=[{"role": "user", "content": "crawl websites"}],
            options={"urls": urls, "extract_depth": extractDepth, "format": format}
        )
        # Get Tavily connector from registry
        tavilyConnector = modelRegistry.getConnectorForModel("tavily_crawl")
        if not tavilyConnector:
            raise ValueError("Tavily connector not available")
        result = await tavilyConnector.crawl(modelCall)
        return result.content if result.success else ""
    async def extractContent(self, urls: List[str], extractDepth: str = "advanced", format: str = "markdown") -> Dict[str, str]:
        """Extract content from URLs and return as dictionary."""
        import json
        crawlResults = await self.crawlWebsites(urls, extractDepth, format)
        # Parse JSON response and extract content
        try:
            data = json.loads(crawlResults)
            return {result["url"]: result["content"] for result in data.get("results", [])}
        except (json.JSONDecodeError, KeyError):
            return {}
    # Core Web Tools - Clean interface for web operations
    async def readPage(self, url: str, extractDepth: str = "advanced") -> Optional[str]:
        """Read a single web page and return its content (HTML/Markdown)."""
        logger.debug(f"Reading page: {url}")
        try:
            # URL encode the URL to handle spaces and special characters
            from urllib.parse import quote, urlparse, urlunparse
            parsed = urlparse(url)
            encodedUrl = urlunparse((
                parsed.scheme,
                parsed.netloc,
                parsed.path,
                parsed.params,
                parsed.query,
                parsed.fragment
            ))
            # Manually encode query parameters to handle spaces
            if parsed.query:
                encodedQuery = quote(parsed.query, safe='=&')
                encodedUrl = urlunparse((
                    parsed.scheme,
                    parsed.netloc,
                    parsed.path,
                    parsed.params,
                    encodedQuery,
                    parsed.fragment
                ))
            logger.debug(f"URL encoded: {url} -> {encodedUrl}")
            content = await self.extractContent([encodedUrl], extractDepth, "markdown")
            result = content.get(encodedUrl)
            if result:
                logger.debug(f"Successfully read page {encodedUrl}: {len(result)} chars")
            else:
                logger.warning(f"No content returned for page {encodedUrl}")
            return result
        except Exception as e:
            logger.warning(f"Failed to read page {url}: {e}")
            return None
    async def getUrlsFromPage(self, url: str, extractDepth: str = "advanced") -> List[str]:
        """Get all URLs from a web page, with redundancies removed."""
        try:
            content = await self.readPage(url, extractDepth)
            if not content:
                return []
            links = self._extractLinksFromContent(content, url)
            # Remove duplicates while preserving order
            seen = set()
            uniqueLinks = []
            for link in links:
                if link not in seen:
                    seen.add(link)
                    uniqueLinks.append(link)
            logger.debug(f"Extracted {len(uniqueLinks)} unique URLs from {url}")
            return uniqueLinks
        except Exception as e:
            logger.warning(f"Failed to get URLs from page {url}: {e}")
            return []
    def filterUrlsOnlyPages(self, urls: List[str], maxPerDomain: int = 10) -> List[str]:
        """Filter URLs to get only links for pages to follow (no images, etc.)."""
        from urllib.parse import urlparse
        def _isHtmlCandidate(url: str) -> bool:
            lower = url.lower()
            blocked = ('.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.ico', '.bmp',
                       '.mp4', '.mp3', '.avi', '.mov', '.mkv',
                       '.pdf', '.zip', '.rar', '.7z', '.tar', '.gz',
                       '.css', '.js', '.woff', '.woff2', '.ttf', '.eot')
            return not lower.endswith(blocked)
        # Group by domain
        domainLinks = {}
        for link in urls:
            domain = urlparse(link).netloc
            if domain not in domainLinks:
                domainLinks[domain] = []
            domainLinks[domain].append(link)
        # Filter and cap per domain
        filteredLinks = []
        for domain, domainLinkList in domainLinks.items():
            seen = set()
            domainFiltered = []
            for link in domainLinkList:
                if link in seen:
                    continue
                if not _isHtmlCandidate(link):
                    continue
                seen.add(link)
                domainFiltered.append(link)
                if len(domainFiltered) >= maxPerDomain:
                    break
            filteredLinks.extend(domainFiltered)
            logger.debug(f"Domain {domain}: {len(domainLinkList)} -> {len(domainFiltered)} links")
        return filteredLinks
    def _extractLinksFromContent(self, content: str, baseUrl: str) -> List[str]:
        """Extract links from HTML/Markdown content."""
        try:
            import re
            from urllib.parse import urljoin, urlparse, quote, urlunparse
            def _cleanUrl(url: str) -> str:
                """Clean and encode URL to remove spaces and invalid characters."""
                # Remove quotes and extra spaces
                url = url.strip().strip('"\'')
                # If it's a relative URL, make it absolute first
                if not url.startswith(('http://', 'https://')):
                    url = urljoin(baseUrl, url)
                # Parse and re-encode the URL properly
                parsed = urlparse(url)
                if parsed.query:
                    # Encode query parameters properly
                    encodedQuery = quote(parsed.query, safe='=&')
                    url = urlunparse((
                        parsed.scheme,
                        parsed.netloc,
                        parsed.path,
                        parsed.params,
                        encodedQuery,
                        parsed.fragment
                    ))
                return url
            links = []
            # Extract HTML links: <a href="url"> format
            htmlLinkPattern = r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>'
            htmlLinks = re.findall(htmlLinkPattern, content, re.IGNORECASE)
            for url in htmlLinks:
                if url and not url.startswith('#') and not url.startswith('javascript:'):
                    try:
                        cleanedUrl = _cleanUrl(url)
                        links.append(cleanedUrl)
                        logger.debug(f"Extracted HTML link: {url} -> {cleanedUrl}")
                    except Exception as e:
                        logger.debug(f"Failed to clean HTML link {url}: {e}")
            # Extract markdown links: [text](url) format
            markdownLinkPattern = r'\[([^\]]+)\]\(([^)]+)\)'
            markdownLinks = re.findall(markdownLinkPattern, content)
            for text, url in markdownLinks:
                if url and not url.startswith('#'):
                    try:
                        cleanedUrl = _cleanUrl(url)
                        # Only keep URLs from the same domain
                        if urlparse(cleanedUrl).netloc == urlparse(baseUrl).netloc:
                            links.append(cleanedUrl)
                            logger.debug(f"Extracted markdown link: {url} -> {cleanedUrl}")
                    except Exception as e:
                        logger.debug(f"Failed to clean markdown link {url}: {e}")
            # Extract plain URLs in the text
            urlPattern = r'https?://[^\s\)]+'
            plainUrls = re.findall(urlPattern, content)
            for url in plainUrls:
                try:
                    cleanUrl = url.rstrip('.,;!?')
                    cleanedUrl = _cleanUrl(cleanUrl)
                    if urlparse(cleanedUrl).netloc == urlparse(baseUrl).netloc:
                        if cleanedUrl not in links:  # Avoid duplicates
                            links.append(cleanedUrl)
                            logger.debug(f"Extracted plain URL: {url} -> {cleanedUrl}")
                except Exception as e:
                    logger.debug(f"Failed to clean plain URL {url}: {e}")
            logger.debug(f"Total links extracted and cleaned: {len(links)}")
            return links
        except Exception as e:
            logger.warning(f"Failed to extract links from content: {e}")
            return []
    def _normalizeUrl(self, url: str) -> str:
        """Normalize URL to handle variations that should be considered duplicates."""
        if not url:
            return url
        # Remove trailing slashes and fragments
        url = url.rstrip('/')
        if '#' in url:
            url = url.split('#')[0]
        # Handle common URL variations
        url = url.replace('http://', 'https://')  # Normalize protocol
        return url
    async def crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10, global_processed_urls: Optional[set] = None) -> Dict[str, str]:
        """
        Recursively crawl URLs up to specified depth.
        Args:
            urls: List of starting URLs to crawl
            max_depth: Maximum depth to crawl (1=main pages only, 2=main+sub-pages, etc.)
            extract_depth: Tavily extract depth setting
            max_per_domain: Maximum URLs per domain per level
            global_processed_urls: Optional global set to track processed URLs across sessions
        Returns:
            Dictionary mapping URL -> content for all crawled pages
        """
        logger.info(f"Starting recursive crawl: {len(urls)} starting URLs, max_depth={max_depth}")
        # URL index to track all processed URLs (local + global)
        processed_urls = set()
        if global_processed_urls is not None:
            # Use global index if provided, otherwise create local one
            processed_urls = global_processed_urls
            logger.info(f"Using global URL index with {len(processed_urls)} already processed URLs")
        else:
            logger.info("Using local URL index for this crawl session")
        all_content = {}
        # Current level URLs to process
        current_level_urls = urls.copy()
        try:
            for depth in range(1, max_depth + 1):
                logger.info(f"=== DEPTH LEVEL {depth}/{max_depth} ===")
                logger.info(f"Processing {len(current_level_urls)} URLs at depth {depth}")
                # URLs found at this level (for next iteration)
                next_level_urls = []
                for url in current_level_urls:
                    # Normalize URL for duplicate checking
                    normalized_url = self._normalizeUrl(url)
                    if normalized_url in processed_urls:
                        logger.debug(f"URL {url} (normalized: {normalized_url}) already processed, skipping")
                        continue
                    try:
                        logger.info(f"Processing URL at depth {depth}: {url}")
                        logger.debug(f"Total processed URLs so far: {len(processed_urls)}")
                        # Read page content
                        content = await self.readPage(url, extract_depth)
                        if content:
                            all_content[url] = content
                            processed_urls.add(normalized_url)
                            logger.info(f"✓ Successfully processed {url}: {len(content)} chars")
                            # Get URLs from this page for next level
                            page_urls = await self.getUrlsFromPage(url, extract_depth)
                            logger.info(f"Found {len(page_urls)} URLs on {url}")
                            # Filter URLs and add to next level
                            filtered_urls = self.filterUrlsOnlyPages(page_urls, max_per_domain)
                            logger.info(f"Filtered to {len(filtered_urls)} valid URLs")
                            # Add new URLs to next level (avoiding already processed ones)
                            new_urls_count = 0
                            for new_url in filtered_urls:
                                normalized_new_url = self._normalizeUrl(new_url)
                                if normalized_new_url not in processed_urls:
                                    next_level_urls.append(new_url)
                                    new_urls_count += 1
                                else:
                                    logger.debug(f"URL {new_url} (normalized: {normalized_new_url}) already processed, skipping")
                            logger.info(f"Added {new_urls_count} new URLs to next level from {url}")
                        else:
                            logger.warning(f"✗ No content extracted from {url}")
                            processed_urls.add(normalized_url)  # Mark as processed to avoid retry
                    except Exception as e:
                        logger.warning(f"✗ Failed to process URL {url} at depth {depth}: {e}")
                        processed_urls.add(normalized_url)  # Mark as processed to avoid retry
                # Prepare for next iteration
                current_level_urls = next_level_urls
                logger.info(f"Depth {depth} completed. Found {len(next_level_urls)} URLs for next level")
                # Stop if no more URLs to process
                if not current_level_urls:
                    logger.info(f"No more URLs found at depth {depth}, stopping recursion")
                    break
            logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
            logger.info(f"Total URLs processed (including skipped): {len(processed_urls)}")
            logger.info(f"Unique URLs found: {len(all_content)}")
            return all_content
        except asyncio.TimeoutError:
            logger.warning(f"Crawling timed out, returning partial results: {len(all_content)} pages crawled so far")
            return all_content
        except Exception as e:
            logger.error(f"Crawling failed with error: {e}, returning partial results: {len(all_content)} pages crawled so far")
            return all_content
    async def webQuery(self, query: str, context: str = "", options: AiCallOptions = None) -> AiCallResponse:
        """Use Perplexity AI to provide the best answers for web-related queries."""
        if options is None:
            options = AiCallOptions(operationType=OperationTypeEnum.WEB_RESEARCH)
        # Calculate input bytes
        inputBytes = len((query + context).encode("utf-8"))
        # Create a comprehensive prompt for web queries
        webPrompt = f"""You are an expert web researcher and information analyst. Please provide a comprehensive and accurate answer to the following web-related query.
 Query: {query}
 {f"Additional Context: {context}" if context else ""}
 Please provide:
 1. A clear, well-structured answer to the query
 2. Key points and important details
 3. Relevant insights and analysis
 4. Any important considerations or caveats
 5. Suggestions for further research if applicable
 Format your response in a clear, professional manner that would be helpful for someone researching this topic."""
        try:
            # Start timing
            startTime = time.time()
            # Use Perplexity for web research with search capabilities
            perplexity_connector = modelRegistry.getConnectorForModel("perplexity_callAiWithWebSearch")
            if not perplexity_connector:
                raise ValueError("Perplexity connector not available")
            response = await perplexity_connector.callAiWithWebSearch(webPrompt)
            # Calculate timing and output bytes
            endTime = time.time()
            processingTime = endTime - startTime
            outputBytes = len(response.encode("utf-8"))
            # Calculate price using Perplexity model pricing
            perplexity_model = modelRegistry.getModel("perplexity_callAiWithWebSearch")
            priceUsd = perplexity_model.calculatePriceUsd(inputBytes, outputBytes)
            logger.info(f"✅ Web query successful with Perplexity")
            return AiCallResponse(
                content=response,
                modelName="perplexity_callAiWithWebSearch",
                priceUsd=priceUsd,
                processingTime=processingTime,
                bytesSent=inputBytes,
                bytesReceived=outputBytes,
                errorCount=0
            )
        except Exception as e:
            logger.error(f"Perplexity web query failed: {str(e)}")
            return AiCallResponse(
                content=f"Web query failed: {str(e)}",
                modelName="perplexity_callAiWithWebSearch",
                priceUsd=0.0,
                processingTime=0.0,
                bytesSent=inputBytes,
                bytesReceived=0,
                errorCount=1
            )
    # Utility methods
    async def listAvailableModels(self, connectorType: str = None) -> List[Dict[str, Any]]:
        """List available models, optionally filtered by connector type."""
@ -1085,163 +664,8 @@ Format your response in a clear, professional manner that would be helpful for s
            raise ValueError(f"Model {modelName} not found")
        return model.model_dump()
    async def getModelsByTag(self, tag: str) -> List[str]:
        """Get model names that have a specific tag."""
        models = modelRegistry.getModelsByTag(tag)
        return [model.name for model in models]
    async def selectRelevantWebsites(self, websites: List[str], userQuestion: str) -> Tuple[List[str], str]:
        """Select most relevant websites using AI analysis. Returns (selected_websites, ai_response)."""
        if len(websites) <= 1:
            return websites, "Only one website available, no selection needed"
        try:
            # Create website summaries for AI analysis
            websiteSummaries = []
            for i, url in enumerate(websites, 1):
                from urllib.parse import urlparse
                domain = urlparse(url).netloc
                summary = f"{i}. {url} (Domain: {domain})"
                websiteSummaries.append(summary)
            selectionPrompt = f"""
            Based on this user request: "{userQuestion}"
            I have {len(websites)} websites found. Please select the most relevant website(s) for this request.
            Available websites:
            {chr(10).join(websiteSummaries)}
            Please respond with the website number(s) (1, 2, 3, etc.) that are most relevant.
            Format: 1,3,5 (or just 1 for single selection)
            """
            # Use Perplexity to select the best websites
            response = await self.webQuery(selectionPrompt)
            # Parse the selection
            import re
            numbers = re.findall(r'\d+', response)
            if numbers:
                selectedWebsites = []
                for num in numbers:
                    index = int(num) - 1
                    if 0 <= index < len(websites):
                        selectedWebsites.append(websites[index])
                if selectedWebsites:
                    logger.info(f"AI selected {len(selectedWebsites)} websites")
                    return selectedWebsites, response
            # Fallback to first website
            logger.warning("AI selection failed, using first website")
            return websites[:1], f"AI selection failed, fallback to first website. AI response: {response}"
        except Exception as e:
            logger.error(f"Error in website selection: {str(e)}")
            return websites[:1], f"Error in website selection: {str(e)}"
    async def analyzeContentWithChunking(self, allContent: Dict[str, str], userQuestion: str) -> str:
        """Analyze content using AI with chunking for large content."""
        logger.info(f"Analyzing {len(allContent)} websites with AI")
        # Process content in chunks to avoid token limits
        chunkSize = 50000  # 50k chars per chunk
        allChunks = []
        for url, content in allContent.items():
            filteredContent = self._filterContent(content)
            if len(filteredContent) <= chunkSize:
                allChunks.append((url, filteredContent))
                logger.info(f"Content from {url}: {len(filteredContent)} chars (single chunk)")
            else:
                # Split large content into chunks
                chunkCount = (len(filteredContent) + chunkSize - 1) // chunkSize
                logger.info(f"Content from {url}: {len(filteredContent)} chars (split into {chunkCount} chunks)")
                for i in range(0, len(filteredContent), chunkSize):
                    chunk = filteredContent[i:i+chunkSize]
                    chunkNum = i//chunkSize + 1
                    allChunks.append((f"{url} (part {chunkNum})", chunk))
        logger.info(f"Processing {len(allChunks)} content chunks")
        # Analyze each chunk
        chunkAnalyses = []
        for i, (url, chunk) in enumerate(allChunks, 1):
            logger.info(f"Analyzing chunk {i}/{len(allChunks)}: {url}")
            try:
                analysisPrompt = f"""
                Analyze this web content and extract relevant information for: {userQuestion}
                Source: {url}
                Content: {chunk}
                Please extract key information relevant to the query.
                """
                analysis = await self.webQuery(analysisPrompt)
                chunkAnalyses.append(analysis)
                logger.info(f"Chunk {i}/{len(allChunks)} analyzed successfully")
            except Exception as e:
                logger.error(f"Chunk {i}/{len(allChunks)} error: {e}")
        # Combine all chunk analyses
        if chunkAnalyses:
            logger.info(f"Combining {len(chunkAnalyses)} chunk analyses")
            combinedAnalysis = "\n\n".join(chunkAnalyses)
            # Final synthesis
            try:
                logger.info("Performing final synthesis of all analyses")
                synthesisPrompt = f"""
                Based on these partial analyses, provide a comprehensive answer to: {userQuestion}
                Partial analyses:
                {combinedAnalysis}
                Please provide a clear, well-structured answer to the query.
                """
                finalAnalysis = await self.webQuery(synthesisPrompt)
                logger.info("Final synthesis completed successfully")
                return finalAnalysis
            except Exception as e:
                logger.error(f"Synthesis error: {e}")
                return combinedAnalysis
        else:
            logger.error("No content could be analyzed")
            return "No content could be analyzed"
    def _filterContent(self, content: str) -> str:
        """Filter out navigation, ads, and other nonsense content."""
        lines = content.split('\n')
        filteredLines = []
        for line in lines:
            line = line.strip()
            # Skip empty lines
            if not line:
                continue
            # Skip navigation elements
            if any(skip in line.lower() for skip in [
                'toggle navigation', 'log in', 'sign up', 'cookies', 'privacy policy',
                'terms of service', 'subscribe', 'newsletter', 'follow us', 'share this',
                'advertisement', 'sponsored', 'banner', 'popup', 'modal'
            ]):
                continue
            # Skip image references without context
            if line.startswith('![Image') and '](' in line:
                continue
            # Skip pure links without context
            if line.startswith('[') and line.endswith(')') and '---' in line:
                continue
            # Keep meaningful content
            if len(line) > 10:  # Skip very short lines
                filteredLines.append(line)
        return '\n'.join(filteredLines)
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@ -7,7 +7,6 @@ from modules.aicore.aicorePluginTavily import WebResearchRequest, WebResearchRes
 from modules.interfaces.interfaceAiObjects import AiObjects
 from modules.services.serviceAi.subCoreAi import SubCoreAi
 from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
 from modules.services.serviceAi.subWebResearch import SubWebResearch
 from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration
@ -19,7 +18,6 @@ class AiService:
    Manager delegates to specialized sub-modules:
    - SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls)
    - SubDocumentProcessing: Document chunking, processing, and merging logic
    - SubWebResearch: Web research and crawling functionality
    - SubDocumentGeneration: Single-file and multi-file document generation
    The main service  acts as a coordinator:
@ -40,7 +38,6 @@ class AiService:
        self._extractionService = None  # Lazy initialization
        self._coreAi = None  # Lazy initialization
        self._documentProcessor = None  # Lazy initialization
        self._webResearch = None  # Lazy initialization
        self._documentGenerator = None  # Lazy initialization
    @property
@ -69,13 +66,6 @@ class AiService:
            self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
        return self._documentProcessor
    @property
    def webResearchService(self):
        """Lazy initialization of web research service."""
        if self._webResearch is None:
            logger.info("Lazy initializing SubWebResearch...")
            self._webResearch = SubWebResearch(self.services, self.aiObjects)
        return self._webResearch
    @property
    def documentGenerator(self):
@ -127,11 +117,6 @@ class AiService:
        await self._ensureAiObjectsInitialized()
        return await self.coreAi.generateImage(prompt, size, quality, style, options)
    # Web Research
    async def webResearch(self, request: WebResearchRequest) -> WebResearchResult:
        """Perform web research using interface functions."""
        await self._ensureAiObjectsInitialized()
        return await self.webResearchService.webResearch(request)
    # Core AI Methods - Delegating to SubCoreAi
    async def callAiPlanning(
--- a/modules/services/serviceAi/subWebResearch.py
+++ b/modules/services/serviceAi/subWebResearch.py
@ -1,388 +0,0 @@
 import logging
 from typing import Optional
 from modules.aicore.aicorePluginTavily import WebResearchRequest, WebResearchResult
 from modules.shared.configuration import APP_CONFIG
 logger = logging.getLogger(__name__)
 class SubWebResearch:
    """Web research operations including search, crawling, and analysis."""
    def __init__(self, services, aiObjects):
        """Initialize web research service.
        Args:
            services: Service center instance for accessing other services
            aiObjects: Initialized AiObjects instance
        """
        self.services = services
        self.aiObjects = aiObjects
    async def webResearch(self, request: WebResearchRequest) -> WebResearchResult:
        """Perform web research using interface functions."""
        try:
            logger.info(f"WEB RESEARCH STARTED")
            logger.info(f"User Query: {request.user_prompt}")
            logger.info(f"Max Results: {request.max_results}, Max Pages: {request.options.max_pages}")
            # Global URL index to track all processed URLs across the entire research session
            global_processed_urls = set()
            # Step 1: Find relevant websites - either provided URLs or AI-determined main URLs
            logger.info(f"=== STEP 1: INITIAL MAIN URLS LIST ===")
            if request.urls:
                # Use provided URLs as initial main URLs
                websites = request.urls
                logger.info(f"Using provided URLs ({len(websites)}):")
                for i, url in enumerate(websites, 1):
                    logger.info(f"   {i}. {url}")
            else:
                # Use AI to determine main URLs based on user's intention
                logger.info(f"AI analyzing user intent: '{request.user_prompt}'")
                # Use AI to generate optimized Tavily search query and search parameters
                query_optimizer_prompt = f"""You are a search query optimizer. 
        USER QUERY: {request.user_prompt}
        Your task: Create a search query and parameters for the USER QUERY given.
        RULES:
        1. The search query MUST be related to the user query above
        2. Extract key terms from the user query
        3. Determine appropriate country/language based on the query context
        4. Keep search query short (2-6 words)
        Return ONLY this JSON format:
        {{
        "user_prompt": "search query based on user query above",
        "country": "Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries)",
        "language": "language_code_or_null", 
        "topic": "general|news|academic_or_null",
        "time_range": "d|w|m|y_or_null",
        "selection_strategy": "single|multiple|specific_page",
        "selection_criteria": "what URLs to prioritize",
        "expected_url_patterns": ["pattern1", "pattern2"],
        "estimated_result_count": number
        }}"""
                # Get AI response for query optimization
                from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions
                ai_request = AiCallRequest(
                    prompt=query_optimizer_prompt,
                    options=AiCallOptions()
                )
                # Write web research query optimization prompt to debug file
                self.services.utils.writeDebugFile(query_optimizer_prompt, "web_research_query_optimizer_prompt")
                ai_response_obj = await self.aiObjects.call(ai_request)
                ai_response = ai_response_obj.content
                # Write web research query optimization response to debug file
                self.services.utils.writeDebugFile(ai_response, "web_research_query_optimizer_response")
                logger.debug(f"AI query optimizer response: {ai_response}")
                # Parse AI response to extract search query
                import json
                try:
                    # Clean the response by removing markdown code blocks
                    cleaned_response = ai_response.strip()
                    if cleaned_response.startswith('```json'):
                        cleaned_response = cleaned_response[7:]  # Remove ```json
                    if cleaned_response.endswith('```'):
                        cleaned_response = cleaned_response[:-3]  # Remove ```
                    cleaned_response = cleaned_response.strip()
                    query_data = json.loads(cleaned_response)
                    search_query = query_data.get("user_prompt", request.user_prompt)
                    ai_country = query_data.get("country")
                    ai_language = query_data.get("language")
                    ai_topic = query_data.get("topic")
                    ai_time_range = query_data.get("time_range")
                    selection_strategy = query_data.get("selection_strategy", "multiple")
                    selection_criteria = query_data.get("selection_criteria", "relevant URLs")
                    expected_patterns = query_data.get("expected_url_patterns", [])
                    estimated_count = query_data.get("estimated_result_count", request.max_results)
                    logger.info(f"AI optimized search query: '{search_query}'")
                    logger.info(f"Selection strategy: {selection_strategy}")
                    logger.info(f"Selection criteria: {selection_criteria}")
                    logger.info(f"Expected URL patterns: {expected_patterns}")
                    logger.info(f"Estimated result count: {estimated_count}")
                except json.JSONDecodeError:
                    logger.warning("Failed to parse AI response as JSON, using original query")
                    search_query = request.user_prompt
                    ai_country = None
                    ai_language = None
                    ai_topic = None
                    ai_time_range = None
                    selection_strategy = "multiple"
                # Perform the web search with AI-determined parameters
                search_kwargs = {
                    "query": search_query,
                    "max_results": request.max_results,
                    "search_depth": request.options.search_depth,
                    "auto_parameters": False  # Use explicit parameters
                }
                # Add parameters only if they have valid values
                def _normalizeCountry(c: Optional[str]) -> Optional[str]:
                    if not c:
                        return None
                    s = str(c).strip()
                    if not s or s.lower() in ['null', 'none', 'undefined']:
                        return None
                    # Map common codes to full English names when easy to do without extra deps
                    mapping = {
                        'ch': 'Switzerland', 'che': 'Switzerland',
                        'de': 'Germany', 'ger': 'Germany', 'deu': 'Germany',
                        'at': 'Austria', 'aut': 'Austria',
                        'us': 'United States', 'usa': 'United States', 'uni ted states': 'United States',
                        'uk': 'United Kingdom', 'gb': 'United Kingdom', 'gbr': 'United Kingdom'
                    }
                    key = s.lower()
                    if key in mapping:
                        return mapping[key]
                    # If looks like full name, capitalize first letter only (Tavily accepts English names)
                    return s
                norm_ai_country = _normalizeCountry(ai_country)
                norm_req_country = _normalizeCountry(request.options.country)
                if norm_ai_country:
                    search_kwargs["country"] = norm_ai_country
                elif norm_req_country:
                    search_kwargs["country"] = norm_req_country
                if ai_language and ai_language not in ['null', '', 'none', 'undefined']:
                    search_kwargs["language"] = ai_language
                elif request.options.language and request.options.language not in ['null', '', 'none', 'undefined']:
                    search_kwargs["language"] = request.options.language
                if ai_topic and ai_topic in ['general', 'news', 'academic']:
                    search_kwargs["topic"] = ai_topic
                elif request.options.topic and request.options.topic in ['general', 'news', 'academic']:
                    search_kwargs["topic"] = request.options.topic
                if ai_time_range and ai_time_range in ['d', 'w', 'm', 'y']:
                    search_kwargs["time_range"] = ai_time_range
                elif request.options.time_range and request.options.time_range in ['d', 'w', 'm', 'y']:
                    search_kwargs["time_range"] = request.options.time_range
                # Constrain by expected domains if provided by AI
                try:
                    include_domains = []
                    for p in expected_patterns or []:
                        if not isinstance(p, str):
                            continue
                        # Extract bare domain from pattern or URL
                        import re
                        m = re.search(r"(?:https?://)?([^/\s]+)", p.strip())
                        if m:
                            domain = m.group(1).lower()
                            # strip leading www.
                            if domain.startswith('www.'):
                                domain = domain[4:]
                            include_domains.append(domain)
                    # Deduplicate
                    if include_domains:
                        seen = set()
                        uniq = []
                        for d in include_domains:
                            if d not in seen:
                                seen.add(d)
                                uniq.append(d)
                        search_kwargs["include_domains"] = uniq
                except Exception:
                    pass
                # Log the parameters being used
                logger.info(f"Search parameters: country={search_kwargs.get('country', 'not_set')}, language={search_kwargs.get('language', 'not_set')}, topic={search_kwargs.get('topic', 'not_set')}, time_range={search_kwargs.get('time_range', 'not_set')}, include_domains={search_kwargs.get('include_domains', [])}")
                search_results = await self.aiObjects.search_websites(**search_kwargs)
                logger.debug(f"Web search returned {len(search_results)} results:")
                for i, result in enumerate(search_results, 1):
                    logger.debug(f"   {i}. {result.url} - {result.title}")
                # Deduplicate while preserving order
                seen = set()
                search_urls = []
                for r in search_results:
                    u = str(r.url)
                    if u not in seen:
                        seen.add(u)
                        search_urls.append(u)
                logger.info(f"After initial deduplication: {len(search_urls)} unique URLs from {len(search_results)} search results")
                if not search_urls:
                    logger.error("No relevant websites found")
                    return WebResearchResult(success=False, error="No relevant websites found")
                # Now use AI to determine the main URLs based on user's intention
                logger.info(f"AI selecting main URLs from {len(search_urls)} search results based on user intent")
                # Create a prompt for AI to identify main URLs based on user's intention
                ai_prompt = f"""
        Select the most relevant URLs from these search results:
        {chr(10).join([f"{i+1}. {url}" for i, url in enumerate(search_urls)])}
        Return only the URLs that are most relevant for the user's query.
        One URL per line.
        """                
                # Create AI call request
                ai_request = AiCallRequest(
                    prompt=ai_prompt,
                    options=AiCallOptions()
                )
                # Write web research URL selection prompt to debug file
                self.services.utils.writeDebugFile(ai_prompt, "web_research_url_selection_prompt")
                ai_response_obj = await self.aiObjects.call(ai_request)
                ai_response = ai_response_obj.content
                # Write web research URL selection response to debug file
                self.services.utils.writeDebugFile(ai_response, "web_research_url_selection_response")
                logger.debug(f"AI response for main URL selection: {ai_response}")
                # Parse AI response to extract URLs
                websites = []
                for line in ai_response.strip().split('\n'):
                    line = line.strip()
                    if line and ('http://' in line or 'https://' in line):
                        # Extract URL from the line
                        for word in line.split():
                            if word.startswith('http://') or word.startswith('https://'):
                                websites.append(word.rstrip('.,;'))
                                break
                if not websites:
                    logger.warning("AI did not identify any main URLs, using first few search results")
                    websites = search_urls[:3]  # Fallback to first 3 search results
                # Deduplicate while preserving order
                seen = set()
                unique_websites = []
                for url in websites:
                    if url not in seen:
                        seen.add(url)
                        unique_websites.append(url)
                websites = unique_websites
                logger.info(f"After AI selection deduplication: {len(websites)} unique URLs from {len(websites)} AI-selected URLs")
                logger.info(f"AI selected {len(websites)} main URLs (after deduplication):")
                for i, url in enumerate(websites, 1):
                    logger.info(f"   {i}. {url}")
            # Step 2: Smart website selection using AI interface
            logger.info(f"=== STEP 2: FILTERED URL LIST BY USER PROMPT'S INTENTION ===")
            logger.info(f"AI analyzing {len(websites)} URLs for relevance to: '{request.user_prompt}'")
            selectedWebsites, aiResponse = await self.aiObjects.selectRelevantWebsites(websites, request.user_prompt)
            logger.debug(f"AI Response: {aiResponse}")
            logger.debug(f"AI selected {len(selectedWebsites)} most relevant URLs:")
            for i, url in enumerate(selectedWebsites, 1):
                logger.debug(f"   {i}. {url}")
            # Show which were filtered out
            filtered_out = [url for url in websites if url not in selectedWebsites]
            if filtered_out:
                logger.debug(f"Filtered out {len(filtered_out)} less relevant URLs:")
                for i, url in enumerate(filtered_out, 1):
                    logger.debug(f"   {i}. {url}")
            # Step 3+4+5: Recursive crawling with configurable depth
            # Get configuration parameters
            max_depth = int(APP_CONFIG.get("Web_Research_MAX_DEPTH", "2"))
            max_links_per_domain = int(APP_CONFIG.get("Web_Research_MAX_LINKS_PER_DOMAIN", "4"))
            crawl_timeout_minutes = int(APP_CONFIG.get("Web_Research_CRAWL_TIMEOUT_MINUTES", "10"))
            crawl_timeout_seconds = crawl_timeout_minutes * 60
            # Use the configured max_depth or the request's pages_search_depth, whichever is smaller
            effective_depth = min(max_depth, request.options.pages_search_depth)
            logger.info(f"=== STEP 3+4+5: RECURSIVE CRAWLING (DEPTH {effective_depth}) ===")
            logger.info(f"Starting recursive crawl of {len(selectedWebsites)} main websites...")
            logger.info(f"Search depth: {effective_depth} levels (max configured: {max_depth})")
            logger.info(f"Max links per domain: {max_links_per_domain}")
            logger.info(f"Crawl timeout: {crawl_timeout_minutes} minutes")
            # Use recursive crawling with URL index to avoid duplicates
            import asyncio
            try:
                allContent = await asyncio.wait_for(
                    self.aiObjects.crawlRecursively(
                        urls=selectedWebsites,
                        max_depth=effective_depth,
                        extract_depth=request.options.extract_depth,
                        max_per_domain=max_links_per_domain,
                        global_processed_urls=global_processed_urls
                    ),
                    timeout=crawl_timeout_seconds
                )
                logger.info(f"Crawling completed within timeout: {len(allContent)} pages crawled")
            except asyncio.TimeoutError:
                logger.warning(f"Crawling timed out after {crawl_timeout_minutes} minutes, using partial results")
                # crawlRecursively now handles timeouts gracefully and returns partial results
                # Try to get the partial results that were collected
                allContent = {}
            if not allContent:
                logger.error("Could not extract content from any websites")
                return WebResearchResult(success=False, error="Could not extract content from any websites")
            logger.info(f"=== WEB RESEARCH COMPLETED ===")
            logger.info(f"Successfully crawled {len(allContent)} URLs total")
            logger.info(f"Crawl depth: {effective_depth} levels")
            # Create simple result with raw content
            sources = [{"title": url, "url": url} for url in selectedWebsites]
            # Get all additional links (all URLs except main ones)
            additional_links = [url for url in allContent.keys() if url not in selectedWebsites]
            # Combine all content into a single result
            combinedContent = ""
            for url, content in allContent.items():
                combinedContent += f"\n\n=== {url} ===\n{content}\n"
            # Create simplified document structure
            document = {
                "documentName": f"webResearch_{request.user_prompt[:50]}.json",
                "documentData": {
                    "user_prompt": request.user_prompt,
                    "analysis_result": combinedContent,
                    "sources": sources,
                    "additional_links": additional_links,
                    "metadata": {
                        "websites_analyzed": len(allContent),
                        "additional_links_found": len(additional_links),
                        "crawl_depth": effective_depth,
                        "max_configured_depth": max_depth,
                        "max_links_per_domain": max_links_per_domain,
                        "crawl_timeout_minutes": crawl_timeout_minutes,
                        "total_urls_crawled": len(allContent),
                        "main_urls": len(selectedWebsites),
                        "additional_urls": len(additional_links)
                    }
                },
                "mimeType": "application/json"
            }
            return WebResearchResult(
                success=True,
                documents=[document]
            )
        except Exception as e:
            logger.error(f"Error in web research: {str(e)}")
            return WebResearchResult(success=False, error=str(e))
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@ -10,7 +10,7 @@ from datetime import datetime, UTC
 from modules.workflows.methods.methodBase import MethodBase, action
 from modules.datamodels.datamodelChat import ActionResult
-from modules.datamodels.datamodelAi import AiCallOptions
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
 from modules.datamodels.datamodelChat import ChatDocument
 from modules.aicore.aicorePluginTavily import WebResearchRequest
@ -28,6 +28,7 @@ class MethodAi(MethodBase):
        """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
        return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
    @action
    async def process(self, parameters: Dict[str, Any]) -> ActionResult:
        """
@ -161,93 +162,512 @@ class MethodAi(MethodBase):
                error=str(e)
            )
    @action
    async def webSearch(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        GENERAL:
        - Purpose: Search the web and return a list of relevant URLs only.
        - Input requirements: searchPrompt (required); optional maxResults, timeRange, country, language.
        - Output format: JSON with search results and URLs.
        Parameters:
        - searchPrompt (str, required): Natural language search prompt describing what to search for.
        - maxResults (int, optional): Maximum number of search results. Default: 5.
        - timeRange (str, optional): d | w | m | y for time filtering.
        - country (str, optional): Country name for localized results.
        - language (str, optional): Language code (e.g., de, en, fr).
        """
        try:
            searchPrompt = parameters.get("searchPrompt")
            if not searchPrompt:
                return ActionResult.isFailure(error="Search prompt is required")
            # Extract optional parameters
            maxResults = parameters.get("maxResults", 5)
            timeRange = parameters.get("timeRange")
            country = parameters.get("country")
            language = parameters.get("language")
            # Build AI call options for web search
            options = AiCallOptions(
                operationType=OperationTypeEnum.WEB_SEARCH,
                resultFormat="json"
            )
            # Create unified prompt JSON that both Tavily and Perplexity can understand
            promptData = {
                "searchPrompt": searchPrompt,
                "maxResults": maxResults,
                "timeRange": timeRange,
                "country": country,
                "language": language,
                "instructions": "Search the web and return a JSON response with a 'results' array containing objects with 'title', 'url', and optionally 'content' fields. Focus on finding relevant URLs for the search prompt."
            }
            import json
            prompt = json.dumps(promptData, indent=2)
            # Call AI service through unified path
            result = await self.services.ai.callAiDocuments(
                prompt=prompt,
                documents=None,
                options=options,
                outputFormat="json"
            )
            # Process result to ensure consistent format
            processedResult = self._processWebSearchResult(result)
            # Create meaningful filename
            meaningfulName = self._generateMeaningfulFileName(
                base_name="web_search",
                extension="json",
                action_name="search"
            )
            from modules.datamodels.datamodelChat import ActionDocument
            actionDocument = ActionDocument(
                documentName=meaningfulName,
                documentData=processedResult,
                mimeType="application/json"
            )
            return ActionResult.isSuccess(documents=[actionDocument])
        except Exception as e:
            logger.error(f"Error in web search: {str(e)}")
            return ActionResult.isFailure(error=str(e))
    def _processWebSearchResult(self, result: str) -> str:
        """
        Process web search result to ensure consistent JSON format with URL list.
        Both Tavily and Perplexity now return proper JSON format.
        """
        try:
            import json
            data = json.loads(result)
            # If it's already a proper search result format, return as-is
            if isinstance(data, dict) and "results" in data:
                return result
            # If it's a different JSON format, try to extract URLs
            if isinstance(data, dict):
                # Look for URL patterns in the JSON
                urls = self._extractUrlsFromJson(data)
                if urls:
                    processedData = {
                        "query": data.get("query", "web search"),
                        "results": [{"title": f"Result {i+1}", "url": url} for i, url in enumerate(urls)],
                        "total_count": len(urls)
                    }
                    return json.dumps(processedData, indent=2)
            # No URLs found, return original result in a structured format
            processedData = {
                "query": "web search",
                "results": [],
                "total_count": 0,
                "raw_response": result
            }
            return json.dumps(processedData, indent=2)
        except Exception as e:
            logger.warning(f"Error processing web search result: {str(e)}")
            # Return original result wrapped in error format
            errorData = {
                "query": "web search",
                "results": [],
                "total_count": 0,
                "error": f"Failed to process result: {str(e)}",
                "raw_response": result
            }
            return json.dumps(errorData, indent=2)
    def _extractUrlsFromJson(self, data: Dict[str, Any]) -> List[str]:
        """Extract URLs from JSON data structure."""
        urls = []
        def _extractFromValue(value):
            if isinstance(value, str):
                # Check if it's a URL
                if value.startswith(('http://', 'https://')):
                    urls.append(value)
            elif isinstance(value, dict):
                for v in value.values():
                    _extractFromValue(v)
            elif isinstance(value, list):
                for item in value:
                    _extractFromValue(item)
        _extractFromValue(data)
        return list(set(urls))  # Remove duplicates
    @action
    async def webCrawl(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        GENERAL:
        - Purpose: Extract content from specific URLs.
        - Input requirements: urls (required); optional extractDepth, format.
        - Output format: JSON with extracted content from URLs.
        Parameters:
        - urls (list, required): List of URLs to crawl and extract content from.
        - extractDepth (str, optional): basic | advanced. Default: advanced.
        - format (str, optional): markdown | html | text. Default: markdown.
        """
        try:
            urls = parameters.get("urls")
            if not urls or not isinstance(urls, list):
                return ActionResult.isFailure(error="URLs list is required")
            # Extract optional parameters
            extractDepth = parameters.get("extractDepth", "advanced")
            formatType = parameters.get("format", "markdown")
            # Build AI call options for web crawling
            options = AiCallOptions(
                operationType=OperationTypeEnum.WEB_CRAWL,
                resultFormat="json"
            )
            # Create unified prompt JSON for web crawling
            promptData = {
                "urls": urls,
                "extractDepth": extractDepth,
                "format": formatType,
                "instructions": "Extract content from the provided URLs and return a JSON response with 'results' array containing objects with 'url', 'title', 'content', and 'extractedAt' fields."
            }
            import json
            prompt = json.dumps(promptData, indent=2)
            # Call AI service through unified path
            result = await self.services.ai.callAiDocuments(
                prompt=prompt,
                documents=None,
                options=options,
                outputFormat="json"
            )
            # Create meaningful filename
            meaningfulName = self._generateMeaningfulFileName(
                base_name="web_crawl",
                extension="json",
                action_name="crawl"
            )
            from modules.datamodels.datamodelChat import ActionDocument
            actionDocument = ActionDocument(
                documentName=meaningfulName,
                documentData=result,
                mimeType="application/json"
            )
            return ActionResult.isSuccess(documents=[actionDocument])
        except Exception as e:
            logger.error(f"Error in web crawl: {str(e)}")
            return ActionResult.isFailure(error=str(e))
    @action
    async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        GENERAL:
-        - Purpose: Web research and information gathering with basic analysis and sources.
+        - Purpose: Comprehensive web research combining search and content extraction.
-        - Input requirements: user_prompt (required); optional urls, max_results, max_pages, search_depth, extract_depth, pages_search_depth, country, time_range, topic, language.
+        - Input requirements: researchPrompt (required); optional maxResults, urls, timeRange, country, language.
-        - Output format: JSON with results and sources.
+        - Output format: JSON with research results, sources, and analysis.
        Parameters:
-        - user_prompt (str, required): Research question or topic.
+        - researchPrompt (str, required): Natural language research prompt describing what to research.
-        - urls (list, optional): Specific URLs to crawl.
+        - maxResults (int, optional): Maximum search results. Default: 5.
-        - max_results (int, optional): Max search results. Default: 5.
+        - urls (list, optional): Specific URLs to include in research.
-        - max_pages (int, optional): Max pages to crawl per site. Default: 5.
+        - timeRange (str, optional): d | w | m | y for time filtering.
-        - extract_depth (str, optional): basic | advanced. Default: advanced.
+        - country (str, optional): Country name for localized results.
        - search_depth (int, optional): Crawl depth level - how many times to follow sublinks of a page. Default: 2.
        - country (str, optional): Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries).
        - time_range (str, optional): d | w | m | y.
        - topic (str, optional): general | news | academic.
        - language (str, optional): Language code (e.g., de, en, fr).
        """
        try:
-            user_prompt = parameters.get("user_prompt")
+            researchPrompt = parameters.get("researchPrompt")
            if not researchPrompt:
                return ActionResult.isFailure(error="Research prompt is required")
            # Extract optional parameters
            maxResults = parameters.get("maxResults", 5)
            urls = parameters.get("urls")
-            max_results = parameters.get("max_results", 5)
+            timeRange = parameters.get("timeRange")
            max_pages = parameters.get("max_pages", 5)
            extract_depth = parameters.get("extract_depth", "advanced")
            search_depth = parameters.get("pages_search_depth", 2)
            country = parameters.get("country")
            time_range = parameters.get("time_range")
            topic = parameters.get("topic")
            language = parameters.get("language")
-            if not user_prompt:
+            # Build AI call options for web research
-                return ActionResult.isFailure(
+            options = AiCallOptions(
-                    error="Search query is required"
+                operationType=OperationTypeEnum.WEB_RESEARCH,
-                )
+                resultFormat="json"
            # Build WebResearchRequest (simplified dataclass)
            request = WebResearchRequest(
                user_prompt=user_prompt,
                urls=urls,
                max_results=max_results,
                max_pages=max_pages,
                search_depth=search_depth,
                extract_depth=extract_depth,
                country=country,
                time_range=time_range,
                topic=topic,
                language=language
            )
-            # Call web research service
+            # Create unified prompt JSON for web research
-            logger.info(f"Performing comprehensive web research for: {user_prompt}")
+            promptData = {
-            logger.info(f"Max results: {max_results}, Max pages: {max_pages}")
+                "researchPrompt": researchPrompt,
-            if urls:
+                "maxResults": maxResults,
-                logger.info(f"Using provided URLs: {len(urls)}")
+                "urls": urls,
                "timeRange": timeRange,
                "country": country,
                "language": language,
                "instructions": "Conduct comprehensive web research and return a JSON response with 'results' array containing objects with 'title', 'url', 'content', and 'analysis' fields. Provide detailed analysis and insights."
            }
-            result = await self.services.ai.webResearch(request)
+            import json
            prompt = json.dumps(promptData, indent=2)
-            if not result.success:
+            # Call AI service through unified path
-                return ActionResult.isFailure(error=result.error)
+            result = await self.services.ai.callAiDocuments(
-            
+                prompt=prompt,
-            # Convert WebResearchResult to ActionResult format
+                documents=None,
-            documents = []
+                options=options,
-            for doc in result.documents:
+                outputFormat="json"
                documents.append({
                    "documentName": doc.documentName,
                    "documentData": {
                        "user_prompt": doc.documentData.user_prompt,
                        "websites_analyzed": doc.documentData.websites_analyzed,
                        "additional_links_found": doc.documentData.additional_links_found,
                        "analysis_result": doc.documentData.analysis_result,
                        "sources": [{"title": s.title, "url": str(s.url)} for s in doc.documentData.sources],
                        "additional_links": doc.documentData.additional_links,
                        "debug_info": doc.documentData.debug_info
                    },
                    "mimeType": doc.mimeType
                })
            # Return result in the standard ActionResult format
            return ActionResult.isSuccess(
                documents=documents
            )
            # Create meaningful filename
            meaningfulName = self._generateMeaningfulFileName(
                base_name="web_research",
                extension="json",
                action_name="research"
            )
            from modules.datamodels.datamodelChat import ActionDocument
            actionDocument = ActionDocument(
                documentName=meaningfulName,
                documentData=result,
                mimeType="application/json"
            )
            return ActionResult.isSuccess(documents=[actionDocument])
        except Exception as e:
            logger.error(f"Error in web research: {str(e)}")
-            return ActionResult.isFailure(
+            return ActionResult.isFailure(error=str(e))
-                error=str(e)
+
    @action
    async def webQuestions(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        GENERAL:
        - Purpose: Answer questions using web research and AI analysis.
        - Input requirements: question (required); optional context, maxResults, timeRange, country, language.
        - Output format: JSON with question answer and supporting sources.
        Parameters:
        - question (str, required): Question to be answered using web research.
        - context (str, optional): Additional context for the question.
        - maxResults (int, optional): Maximum search results. Default: 5.
        - timeRange (str, optional): d | w | m | y for time filtering.
        - country (str, optional): Country name for localized results.
        - language (str, optional): Language code (e.g., de, en, fr).
        """
        try:
            question = parameters.get("question")
            if not question:
                return ActionResult.isFailure(error="Question is required")
            # Extract optional parameters
            context = parameters.get("context", "")
            maxResults = parameters.get("maxResults", 5)
            timeRange = parameters.get("timeRange")
            country = parameters.get("country")
            language = parameters.get("language")
            # Build AI call options for web questions
            options = AiCallOptions(
                operationType=OperationTypeEnum.WEB_QUESTIONS,
                resultFormat="json"
            )
            # Create unified prompt JSON for web questions
            promptData = {
                "question": question,
                "context": context,
                "maxResults": maxResults,
                "timeRange": timeRange,
                "country": country,
                "language": language,
                "instructions": "Answer the question using web research and return a JSON response with 'answer', 'sources' array containing objects with 'title', 'url', 'content', and 'relevance' fields."
            }
            import json
            prompt = json.dumps(promptData, indent=2)
            # Call AI service through unified path
            result = await self.services.ai.callAiDocuments(
                prompt=prompt,
                documents=None,
                options=options,
                outputFormat="json"
            )
            # Create meaningful filename
            meaningfulName = self._generateMeaningfulFileName(
                base_name="web_questions",
                extension="json",
                action_name="questions"
            )
            from modules.datamodels.datamodelChat import ActionDocument
            actionDocument = ActionDocument(
                documentName=meaningfulName,
                documentData=result,
                mimeType="application/json"
            )
            return ActionResult.isSuccess(documents=[actionDocument])
        except Exception as e:
            logger.error(f"Error in web questions: {str(e)}")
            return ActionResult.isFailure(error=str(e))
    @action
    async def webNews(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        GENERAL:
        - Purpose: Search and analyze news articles on specific topics.
        - Input requirements: newsPrompt (required); optional maxResults, timeRange, country, language.
        - Output format: JSON with news articles, summaries, and analysis.
        Parameters:
        - newsPrompt (str, required): Natural language prompt describing what news to search for.
        - maxResults (int, optional): Maximum news articles. Default: 5.
        - timeRange (str, optional): d | w | m | y for time filtering. Default: w.
        - country (str, optional): Country name for localized news.
        - language (str, optional): Language code (e.g., de, en, fr).
        """
        try:
            newsPrompt = parameters.get("newsPrompt")
            if not newsPrompt:
                return ActionResult.isFailure(error="News prompt is required")
            # Extract optional parameters
            maxResults = parameters.get("maxResults", 5)
            timeRange = parameters.get("timeRange", "w")  # Default to week
            country = parameters.get("country")
            language = parameters.get("language")
            # Build AI call options for web news
            options = AiCallOptions(
                operationType=OperationTypeEnum.WEB_NEWS,
                resultFormat="json"
            )
            # Create unified prompt JSON for web news
            promptData = {
                "newsPrompt": newsPrompt,
                "maxResults": maxResults,
                "timeRange": timeRange,
                "country": country,
                "language": language,
                "instructions": "Find and analyze recent news articles and return a JSON response with 'articles' array containing objects with 'title', 'url', 'content', 'date', 'source', and 'summary' fields."
            }
            import json
            prompt = json.dumps(promptData, indent=2)
            # Call AI service through unified path
            result = await self.services.ai.callAiDocuments(
                prompt=prompt,
                documents=None,
                options=options,
                outputFormat="json"
            )
            # Create meaningful filename
            meaningfulName = self._generateMeaningfulFileName(
                base_name="web_news",
                extension="json",
                action_name="news"
            )
            from modules.datamodels.datamodelChat import ActionDocument
            actionDocument = ActionDocument(
                documentName=meaningfulName,
                documentData=result,
                mimeType="application/json"
            )
            return ActionResult.isSuccess(documents=[actionDocument])
        except Exception as e:
            logger.error(f"Error in web news: {str(e)}")
            return ActionResult.isFailure(error=str(e))
    @action
    async def generateImage(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        GENERAL:
        - Purpose: Generate images using AI based on text prompts.
        - Input requirements: prompt (required); optional size, quality, style.
        - Output format: Base64 encoded image data.
        Parameters:
        - prompt (str, required): Text description of the image to generate.
        - size (str, optional): Image size. Options: 1024x1024, 1792x1024, 1024x1792. Default: 1024x1024.
        - quality (str, optional): Image quality. Options: standard, hd. Default: standard.
        - style (str, optional): Image style. Options: vivid, natural. Default: vivid.
        """
        try:
            prompt = parameters.get("prompt")
            if not prompt:
                return ActionResult.isFailure(error="Image prompt is required")
            # Extract optional parameters
            size = parameters.get("size", "1024x1024")
            quality = parameters.get("quality", "standard")
            style = parameters.get("style", "vivid")
            # Build AI call options for image generation
            options = AiCallOptions(
                operationType=OperationTypeEnum.IMAGE_GENERATE,
                resultFormat="base64"
            )
            # Create unified prompt JSON for image generation
            promptData = {
                "prompt": prompt,
                "size": size,
                "quality": quality,
                "style": style,
                "instructions": "Generate an image based on the prompt and return the base64 encoded image data."
            }
            import json
            promptJson = json.dumps(promptData, indent=2)
            # Call AI service through unified path
            result = await self.services.ai.callAiDocuments(
                prompt=promptJson,
                documents=None,
                options=options,
                outputFormat="base64"
            )
            # Create meaningful filename
            meaningfulName = self._generateMeaningfulFileName(
                base_name="generated_image",
                extension="png",
                action_name="generate"
            )
            from modules.datamodels.datamodelChat import ActionDocument
            actionDocument = ActionDocument(
                documentName=meaningfulName,
                documentData=result,
                mimeType="image/png"
            )
            return ActionResult.isSuccess(documents=[actionDocument])
        except Exception as e:
            logger.error(f"Error in image generation: {str(e)}")
            return ActionResult.isFailure(error=str(e))