ai models ready for web and txt

2025-10-26 18:17:17 +01:00 · 2025-10-26 18:17:17 +01:00 · 2489719c62
commit 2489719c62
parent 72e0687826
7 changed files with 577 additions and 232 deletions
--- a/modules/aicore/aicorePluginPerplexity.py
+++ b/modules/aicore/aicorePluginPerplexity.py
@ -57,7 +57,7 @@ class AiPerplexity(BaseConnectorAi):
                connectorType="perplexity",
                apiUrl="https://api.perplexity.ai/chat/completions",
                temperature=0.2,
-                maxTokens=4000,
+                maxTokens=24000,  # Increased for detailed web crawl responses (Perplexity supports up to 25k)
                contextLength=32000,
                costPer1kTokensInput=0.005,
                costPer1kTokensOutput=0.005,
@ -80,12 +80,12 @@ class AiPerplexity(BaseConnectorAi):
                connectorType="perplexity",
                apiUrl="https://api.perplexity.ai/chat/completions",
                temperature=0.2,
-                maxTokens=4000,
+                maxTokens=24000,  # Increased for detailed web crawl responses (Perplexity supports up to 25k)
                contextLength=32000,
                costPer1kTokensInput=0.01,
                costPer1kTokensOutput=0.01,
                speedRating=6,  # Slower due to AI analysis
-                qualityRating=10,  # Best AI analysis quality
+                qualityRating=9,  # Best AI analysis quality
                # capabilities removed (not used in business logic)
                functionCall=self._routeWebOperation,
                priority=PriorityEnum.QUALITY,
@ -217,6 +217,42 @@ class AiPerplexity(BaseConnectorAi):
            # Fallback to basic call
            return await self.callAiBasic(modelCall)
    def _getDepthInstructions(self, maxDepth: int) -> str:
        """
        Map maxDepth (numeric) to instructional text for LLM.
        Args:
            maxDepth: 1 (fast/overview), 2 (general/standard), 3 (deep/comprehensive)
        Returns:
            Instructional text for the LLM
        """
        depthMap = {
            1: "Basic overview - extract main content from the main page only",
            2: "Standard crawl - extract content from main page and linked pages (2 levels deep)",
            3: "Deep crawl - comprehensively extract content from main page and all accessible linked pages (3+ levels deep)"
        }
        return depthMap.get(maxDepth, depthMap[2])
    def _getWidthInstructions(self, maxWidth: int) -> str:
        """
        Map maxWidth (numeric) to instructional text for LLM.
        Args:
            maxWidth: Number of pages to crawl at each level (default: 10)
        Returns:
            Instructional text for the LLM
        """
        if maxWidth <= 5:
            return f"Focused crawl - limit to {maxWidth} most relevant pages per level"
        elif maxWidth <= 15:
            return f"Standard breadth - crawl up to {maxWidth} pages per level"
        elif maxWidth <= 30:
            return f"Wide crawl - crawl up to {maxWidth} pages per level, prioritize quality"
        else:
            return f"Extensive crawl - crawl up to {maxWidth} pages per level, comprehensive coverage"
    async def webSearch(self, modelCall: AiModelCall) -> AiModelResponse:
        """
        WEB_SEARCH operation - returns list of URLs based on search query.
@ -253,8 +289,6 @@ class AiPerplexity(BaseConnectorAi):
 Return a JSON array of {webSearchPrompt.maxNumberPages} most relevant URLs.
 {'' if not countryName else f'Focus on results from {countryName}.'}
 {'' if not webSearchPrompt.timeRange else f'Limit to results from the last {webSearchPrompt.timeRange}'}
 {'' if not webSearchPrompt.language else f'Return results in {webSearchPrompt.language} language'}
 Return ONLY a JSON array of URLs, no additional text:
 [
@ -293,6 +327,15 @@ Return ONLY a JSON array of URLs, no additional text:
        """
        WEB_CRAWL operation - crawls ONE URL and returns content.
        Perplexity API Parameters Used:
        - messages: The prompt containing URL and instruction
        - max_tokens: Maximum response length
        - max_results: Number of search results (1-20, default: 10)
        - temperature: Response randomness (not web search specific)
        Pagination: Perplexity does NOT return paginated responses.
        A single response contains all results within max_tokens limit.
        Args:
            modelCall: AiModelCall with AiCallPromptWebCrawl as prompt
@ -316,55 +359,68 @@ Return ONLY a JSON array of URLs, no additional text:
            webCrawlPrompt = AiCallPromptWebCrawl(**promptData)
            # Build crawl request for Perplexity - ONE URL
-            crawlPrompt = f"""Crawl and extract content from this URL based on the instruction:
+            # Match playground prompt style: just URL + question
-
+            # This allows Perplexity to return detailed multi-source results
-INSTRUCTION: '{webCrawlPrompt.instruction}'
+            crawlPrompt = f"{webCrawlPrompt.url}: {webCrawlPrompt.instruction}"
-
+            
-URL to crawl (maxDepth={webCrawlPrompt.maxDepth}):
+            # Build payload with optional Perplexity parameters
-{webCrawlPrompt.url}
+            # Note: max_tokens_per_page may not be supported by chat/completions endpoint
-
+            # The playground Python SDK might use a different internal API
-Extract and return the relevant content based on the instruction.
+            maxResults = min(webCrawlPrompt.maxWidth or 10, 20)  # Max 20 results
 Return as JSON object with this structure:
 {{
  "url": "{webCrawlPrompt.url}",
  "title": "Page title",
  "content": "Extracted content relevant to the instruction"
 }}
 Return ONLY valid JSON, no additional text."""
            payload = {
                "model": model.name,
                "messages": [{"role": "user", "content": crawlPrompt}],
                "temperature": temperature,
-                "max_tokens": maxTokens
+                "max_tokens": maxTokens,  # Use model's configured maxTokens (24000)
                "max_results": maxResults,
                "return_citations": True  # Request citations explicitly
            }
            logger.info(f"Perplexity crawl payload: model={model.name}, prompt_length={len(crawlPrompt)}, max_tokens={maxTokens}, max_results={maxResults}")
            response = await self.httpClient.post(model.apiUrl, json=payload)
            if response.status_code != 200:
                raise HTTPException(status_code=500, detail=f"Perplexity Web Crawl API error: {response.text}")
            apiResponse = response.json()
            # Extract the main content
            content = apiResponse["choices"][0]["message"]["content"]
-            # Parse JSON content and ensure it's a single object
+            # Check for citations or search results in the response
-            import json
+            citations = apiResponse.get("citations", [])
-            try:
+            searchResults = apiResponse.get("search_results", [])
                parsedContent = json.loads(content)
                # Ensure it's a single object, not an array
                if isinstance(parsedContent, list):
                    parsedContent = parsedContent[0] if parsedContent else {}
            except:
                # If not JSON, create structured response
                parsedContent = {"url": webCrawlPrompt.url, "title": "", "content": content}
-            # Return as JSON string
+            # Log what we found
            if citations:
                logger.info(f"Found {len(citations)} citations in response")
            if searchResults:
                logger.info(f"Found {len(searchResults)} search results in response")
            logger.debug(f"API response keys: {list(apiResponse.keys())}")
            # Build comprehensive response with citations if available
            import json
            responseData = {
                "content": content,
                "citations": citations if citations else [],
                "search_results": searchResults if searchResults else []
            }
            # Return comprehensive response
            return AiModelResponse(
-                content=json.dumps(parsedContent, indent=2),
+                content=json.dumps(responseData, indent=2) if (citations or searchResults) else content,
                success=True,
                modelId=model.name,
-                metadata={"response_id": apiResponse.get("id", ""), "operation": "WEB_CRAWL", "url": webCrawlPrompt.url}
+                metadata={
                    "response_id": apiResponse.get("id", ""), 
                    "operation": "WEB_CRAWL", 
                    "url": webCrawlPrompt.url,
                    "actualPromptSent": crawlPrompt,
                    "has_citations": len(citations) > 0,
                    "has_search_results": len(searchResults) > 0
                }
            )
        except Exception as e:
--- a/modules/aicore/aicorePluginTavily.py
+++ b/modules/aicore/aicorePluginTavily.py
@ -27,7 +27,8 @@ class WebCrawlResult:
    content: str
    title: Optional[str] = None
-class ConnectorWeb(BaseConnectorAi):
+
 class AiTavily(BaseConnectorAi):
    """Tavily web search connector."""
    def __init__(self):
@ -42,7 +43,36 @@ class ConnectorWeb(BaseConnectorAi):
        self.webSearchMaxResults: int = 20
        # Initialize client if API key is available
        self._initializeClient()
-    
+
    def getModels(self) -> List[AiModel]:
        """Get all available Tavily models."""
        return [
            AiModel(
                name="tavily-search",
                displayName="Tavily Search & Research",
                connectorType="tavily",
                apiUrl="https://api.tavily.com",
                temperature=0.0,  # Web search doesn't use temperature
                maxTokens=0,  # Web search doesn't use tokens
                contextLength=0,
                costPer1kTokensInput=0.0,
                costPer1kTokensOutput=0.0,
                speedRating=8,  # Good speed for search and extract
                qualityRating=9,  # Excellent quality for web research
                # capabilities removed (not used in business logic)
                functionCall=self._routeWebOperation,
                priority=PriorityEnum.BALANCED,
                processingMode=ProcessingModeEnum.BASIC,
                operationTypes=createOperationTypeRatings(
                    (OperationTypeEnum.WEB_SEARCH, 9),
                    (OperationTypeEnum.WEB_CRAWL, 10)
                ),
                version="tavily-search",
                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.008  # Simple flat rate
            )
        ]
    def _initializeClient(self):
        """Initialize the Tavily client if API key is available."""
        try:
@ -206,34 +236,6 @@ class ConnectorWeb(BaseConnectorAi):
        return filteredResults
    def getModels(self) -> List[AiModel]:
        """Get all available Tavily models."""
        return [
            AiModel(
                name="tavily-search",
                displayName="Tavily Search & Research",
                connectorType="tavily",
                apiUrl="https://api.tavily.com",
                temperature=0.0,  # Web search doesn't use temperature
                maxTokens=0,  # Web search doesn't use tokens
                contextLength=0,
                costPer1kTokensInput=0.0,
                costPer1kTokensOutput=0.0,
                speedRating=8,  # Good speed for search and extract
                qualityRating=9,  # Excellent quality for web research
                # capabilities removed (not used in business logic)
                functionCall=self._routeWebOperation,
                priority=PriorityEnum.BALANCED,
                processingMode=ProcessingModeEnum.BASIC,
                operationTypes=createOperationTypeRatings(
                    (OperationTypeEnum.WEB_SEARCH, 9),
                    (OperationTypeEnum.WEB_CRAWL, 8)
                ),
                version="tavily-search",
                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.008  # Simple flat rate
            )
        ]
    @classmethod
    async def create(cls):
        apiKey = APP_CONFIG.get("Connector_AiTavily_API_SECRET")
@ -273,10 +275,9 @@ class ConnectorWeb(BaseConnectorAi):
        topic: str | None = None,
        includeDomains: list[str] | None = None,
        excludeDomains: list[str] | None = None,
        language: str | None = None,
        country: str | None = None,
-        includeAnswer: bool | None = None,
+        includeAnswer: str | None = None,
-        includeRawContent: bool | None = None,
+        includeRawContent: str | None = None,
    ) -> list[WebSearchResult]:
        """Calls the Tavily API to perform a web search."""
        # Make sure maxResults is within the allowed range (use cached values)
@ -298,8 +299,6 @@ class ConnectorWeb(BaseConnectorAi):
            kwargs["include_domains"] = includeDomains
        if excludeDomains is not None:
            kwargs["exclude_domains"] = excludeDomains
        if language is not None:
            kwargs["language"] = language
        if country is not None:
            kwargs["country"] = country
        if includeAnswer is not None:
@ -307,7 +306,8 @@ class ConnectorWeb(BaseConnectorAi):
        if includeRawContent is not None:
            kwargs["include_raw_content"] = includeRawContent
-        logger.debug(f"Tavily.search kwargs: {kwargs}")
+        # Log the final API call parameters for comparison
        logger.info(f"Tavily API call parameters: {kwargs}")
        # Ensure client is initialized
        if self.client is None:
@ -316,7 +316,11 @@ class ConnectorWeb(BaseConnectorAi):
                raise ValueError("Tavily client not initialized. Please check API key configuration.")
        response = await self.client.search(**kwargs)
-
+        
        # Return all results without score filtering
        # Tavily's scoring is already applied by the API
        logger.info(f"Tavily returned {len(response.get('results', []))} results")
        return [
            WebSearchResult(
                title=result["title"], 
@ -328,69 +332,77 @@ class ConnectorWeb(BaseConnectorAi):
    async def _crawl(
        self,
-        urls: list,
+        url: str,
-        extractDepth: str | None = None,
+        instructions: str | None = None,
-        format: str | None = None,
+        limit: int = 20,
        maxDepth: int = 2,
        maxBreadth: int = 40,
    ) -> list[WebCrawlResult]:
-        """Calls the Tavily API to extract text content from URLs with retry logic."""
+        """Calls the Tavily API to crawl ONE URL with link following and retry logic."""
        maxRetries = self.crawlMaxRetries
        retryDelay = self.crawlRetryDelay
        timeout = self.crawlTimeout
-        logger.debug(f"Starting crawl of {len(urls)} URLs: {urls}")
+        logger.debug(f"Starting crawl of URL: {url}")
-        logger.debug(f"Crawl settings: extractDepth={extractDepth}, format={format}, timeout={timeout}s")
+        logger.debug(f"Crawl settings: instructions={instructions}, limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
        for attempt in range(maxRetries + 1):
            try:
                logger.debug(f"Crawl attempt {attempt + 1}/{maxRetries + 1}")
                # Use asyncio.wait_for for timeout
                # Build kwargs for extract
                kwargsExtract: dict = {"urls": urls}
                kwargsExtract["extract_depth"] = extractDepth or "advanced"
                kwargsExtract["format"] = format or "markdown"  # Use markdown to get HTML structure
                logger.debug(f"Sending request to Tavily with kwargs: {kwargsExtract}")
                # Ensure client is initialized
                if self.client is None:
                    self._initializeClient()
                    if self.client is None:
                        raise ValueError("Tavily client not initialized. Please check API key configuration.")
                logger.debug(f"Crawling URL: {url}")
                # Build kwargs for crawl
                kwargsCrawl: dict = {"url": url}
                if instructions:
                    kwargsCrawl["instructions"] = instructions
                if limit:
                    kwargsCrawl["limit"] = limit
                if maxDepth:
                    kwargsCrawl["max_depth"] = maxDepth
                if maxBreadth:
                    kwargsCrawl["max_breadth"] = maxBreadth
                logger.debug(f"Sending request to Tavily with kwargs: {kwargsCrawl}")
                response = await asyncio.wait_for(
-                    self.client.extract(**kwargsExtract),
+                    self.client.crawl(**kwargsCrawl),
                    timeout=timeout
                )
                logger.debug(f"Tavily response received: {list(response.keys())}")
-                # Debug: Log what Tavily actually returns
+                logger.debug(f"Tavily response received: {type(response)}")
-                if "results" in response and response["results"]:
+                
-                    logger.debug(f"Tavily returned {len(response['results'])} results")
+                # Parse response - could be dict with results or list
-                    logger.debug(f"First result keys: {list(response['results'][0].keys())}")
+                if isinstance(response, dict) and "results" in response:
-                    logger.debug(f"First result has raw_content: {'raw_content' in response['results'][0]}")
+                    pageResults = response["results"]
-                    
+                elif isinstance(response, list):
-                    # Log each result
+                    pageResults = response
                    for i, result in enumerate(response["results"]):
                        logger.debug(f"Result {i+1}: URL={result.get('url', 'N/A')}, content_length={len(result.get('raw_content', result.get('content', '')))}")
                else:
-                    logger.warning(f"Tavily returned no results in response: {response}")
+                    logger.warning(f"Unexpected response format: {type(response)}")
                    pageResults = []
-                results = [
+                logger.debug(f"Got {len(pageResults)} pages from crawl")
                    WebCrawlResult(
                        url=result["url"], 
                        content=result.get("raw_content", result.get("content", "")),  # Try raw_content first, fallback to content
                        title=result.get("title", "")  # Extract title if available
                    )
                    for result in response["results"]
                ]
-                logger.debug(f"Crawl successful: extracted {len(results)} results")
+                # Convert to WebCrawlResult format
                results = []
                for result in pageResults:
                    results.append(WebCrawlResult(
                        url=result.get("url", url),
                        content=result.get("raw_content", result.get("content", "")),
                        title=result.get("title", "")
                    ))
                logger.debug(f"Crawl successful: extracted {len(results)} pages from URL")
                return results
            except asyncio.TimeoutError:
-                logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds for URLs: {urls}")
+                logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds for URL: {url}")
                if attempt < maxRetries:
                    logger.info(f"Retrying in {retryDelay} seconds...")
                    await asyncio.sleep(retryDelay)
@ -398,21 +410,20 @@ class ConnectorWeb(BaseConnectorAi):
                    raise Exception(f"Crawl failed after {maxRetries + 1} attempts due to timeout")
            except Exception as e:
-                logger.warning(f"Crawl attempt {attempt + 1} failed for URLs {urls}: {str(e)}")
+                logger.warning(f"Crawl attempt {attempt + 1} failed for URL {url}: {str(e)}")
                logger.debug(f"Full error details: {type(e).__name__}: {str(e)}")
                # Check if it's a validation error and log more details
                if "validation" in str(e).lower():
                    logger.debug(f"URL validation failed. Checking URL format:")
-                    for i, url in enumerate(urls):
+                    logger.debug(f"  URL: '{url}' (length: {len(url)})")
-                        logger.debug(f"  URL {i+1}: '{url}' (length: {len(url)})")
+                    # Check for common URL issues
-                        # Check for common URL issues
+                    if ' ' in url:
-                        if ' ' in url:
+                        logger.debug(f"    WARNING: URL contains spaces!")
-                            logger.debug(f"    WARNING: URL contains spaces!")
+                    if not url.startswith(('http://', 'https://')):
-                        if not url.startswith(('http://', 'https://')):
+                        logger.debug(f"    WARNING: URL doesn't start with http/https!")
-                            logger.debug(f"    WARNING: URL doesn't start with http/https!")
+                    if len(url) > 2000:
-                        if len(url) > 2000:
+                        logger.debug(f"    WARNING: URL is very long ({len(url)} chars)")
                            logger.debug(f"    WARNING: URL is very long ({len(url)} chars)")
                if attempt < maxRetries:
                    logger.info(f"Retrying in {retryDelay} seconds...")
@ -468,15 +479,15 @@ class ConnectorWeb(BaseConnectorAi):
            if countryName:
                countryName = self._convertIsoCodeToCountryName(countryName)
-            # Perform search
+            # Perform search - use exact parameters from prompt
            # NOTE: timeRange parameter causes generic results, so we don't use it
            searchResults = await self._search(
                query=webSearchPrompt.instruction,
                maxResults=webSearchPrompt.maxNumberPages,
-                timeRange=webSearchPrompt.timeRange,
+                timeRange=None,  # Not used - causes generic results
                country=countryName,
-                language=webSearchPrompt.language,
+                includeAnswer="basic",
-                includeAnswer=False,
+                includeRawContent="text"
                includeRawContent=False
            )
            # Extract URLs from results
@ -500,13 +511,13 @@ class ConnectorWeb(BaseConnectorAi):
    async def webCrawl(self, modelCall: AiModelCall) -> "AiModelResponse":
        """
-        WEB_CRAWL operation - crawls one URL using Tavily.
+        WEB_CRAWL operation - crawls one URL using Tavily with link following.
        Args:
            modelCall: AiModelCall with AiCallPromptWebCrawl as prompt
        Returns:
-            AiModelResponse with crawl results as JSON
+            AiModelResponse with crawl results as JSON (may include multiple pages)
        """
        try:
            # Extract parameters
@ -517,37 +528,49 @@ class ConnectorWeb(BaseConnectorAi):
            # Create Pydantic model
            webCrawlPrompt = AiCallPromptWebCrawl(**promptData)
-            # Perform crawl for ONE URL
+            # Perform crawl for ONE URL with link following
-            # Note: _crawl expects a list, so we wrap the single URL in a list
+            # Use maxWidth as limit, maxDepth as maxDepth, and calculate maxBreadth
            crawlResults = await self._crawl(
-                urls=[webCrawlPrompt.url],
+                url=webCrawlPrompt.url,
-                extractDepth="advanced" if webCrawlPrompt.maxDepth > 2 else "basic",
+                instructions=webCrawlPrompt.instruction,
-                format="markdown"
+                limit=webCrawlPrompt.maxWidth or 20,  # maxWidth controls number of pages
                maxDepth=webCrawlPrompt.maxDepth or 2,
                maxBreadth=webCrawlPrompt.maxWidth or 40  # Use same as limit for breadth
            )
-            # Format result for single URL - consistent with Perplexity format
+            # If we got multiple pages from the crawl, we need to format them differently
            # Return the first result for backwards compatibility, but include total page count
            if crawlResults and len(crawlResults) > 0:
-                firstResult = crawlResults[0]
+                # Get all pages content
                allContent = ""
                for i, result in enumerate(crawlResults, 1):
                    pageHeader = f"\n{'='*60}\nPAGE {i}: {result.url}\n{'='*60}\n"
                    if result.title:
                        allContent += f"{pageHeader}Title: {result.title}\n\n"
                    allContent += f"{result.content}\n"
                resultData = {
-                    "url": firstResult.url,
+                    "url": webCrawlPrompt.url,
-                    "title": firstResult.title if firstResult.title else "Content",
+                    "title": crawlResults[0].title if crawlResults[0].title else "Content",
-                    "content": firstResult.content
+                    "content": allContent,
                    "pagesCrawled": len(crawlResults),
                    "pageUrls": [result.url for result in crawlResults]
                }
            else:
-                resultData = {"url": webCrawlPrompt.url, "title": "", "content": "", "error": "No content extracted"}
+                resultData = {"url": webCrawlPrompt.url, "title": "", "content": "", "error": "No content extracted", "pagesCrawled": 0}
-            # Return as JSON - same format as Perplexity
+            # Return as JSON - same format as Perplexity but with multiple pages content
            import json
            return AiModelResponse(
                content=json.dumps(resultData, indent=2),
                success=True,
-                metadata={"operation": "WEB_CRAWL", "url": webCrawlPrompt.url}
+                metadata={"operation": "WEB_CRAWL", "url": webCrawlPrompt.url, "pagesCrawled": len(crawlResults) if crawlResults else 0}
            )
        except Exception as e:
            logger.error(f"Error in Tavily web crawl: {str(e)}")
            import json
-            errorResult = {"error": str(e), "url": ""}
+            errorResult = {"error": str(e), "url": webCrawlPrompt.url if 'webCrawlPrompt' in locals() else ""}
            return AiModelResponse(
                content=json.dumps(errorResult, indent=2),
                success=False,
--- a/modules/datamodels/datamodelAi.py
+++ b/modules/datamodels/datamodelAi.py
@ -200,7 +200,6 @@ class AiCallPromptWebSearch(BaseModel):
    instruction: str = Field(description="Search instruction/query for finding relevant URLs")
    country: Optional[str] = Field(default=None, description="Two-digit country code (lowercase, e.g., ch, us, de, fr)")
    maxNumberPages: Optional[int] = Field(default=10, description="Maximum number of pages to search (default: 10)")
    timeRange: Optional[str] = Field(default=None, description="Time range filter (d, w, m, y)")
    language: Optional[str] = Field(default=None, description="Language code (lowercase, e.g., de, en, fr)")
    researchDepth: Optional[str] = Field(default="general", description="Research depth: fast (maxDepth=1), general (maxDepth=2), deep (maxDepth=3)")
--- a/modules/datamodels/datamodelTools.py
+++ b/modules/datamodels/datamodelTools.py
@ -186,12 +186,13 @@ class CountryCodes:
        Get Tavily-compatible country name from ISO-2 code.
        Args:
-            isoCode: ISO-2 country code (e.g., "CH", "US")
+            isoCode: ISO-2 country code (e.g., "CH", "ch", "US", "us")
        Returns:
            Country name in lowercase as required by Tavily (e.g., "switzerland", "united states")
        """
-        isoCodeUpper = isoCode.upper()
+        # Convert to uppercase for lookup
        isoCodeUpper = isoCode.upper() if isoCode else ""
        mapping = cls._COUNTRY_MAP.get(isoCodeUpper)
        return mapping[0] if mapping else isoCode
--- a/modules/services/serviceWeb/mainServiceWeb.py
+++ b/modules/services/serviceWeb/mainServiceWeb.py
@ -11,7 +11,7 @@ from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiC
 logger = logging.getLogger(__name__)
-class WebcrawlService:
+class WebService:
    """Service for web search and crawling operations."""
    def __init__(self, services):
@ -56,7 +56,6 @@ class WebcrawlService:
            extractedUrls = analysisResult.get("urls", [])
            needsSearch = analysisResult.get("needsSearch", True)  # Default to True
            maxNumberPages = analysisResult.get("maxNumberPages", 10)
            timeRange = analysisResult.get("timeRange")
            countryCode = analysisResult.get("country", country)
            languageCode = analysisResult.get("language", language)
            finalResearchDepth = analysisResult.get("researchDepth", researchDepth)
@ -77,7 +76,6 @@ class WebcrawlService:
                searchUrls = await self._performWebSearch(
                    instruction=instruction,
                    maxNumberPages=maxNumberPages - len(allUrls),
                    timeRange=timeRange,
                    country=countryCode,
                    language=languageCode
                )
@ -153,10 +151,9 @@ Extract and provide a JSON response with:
 2. urls: List of URLs found in the prompt text
 3. needsSearch: true if web search is needed to identify url's to crawl, false if only crawling of provided URLs is wanted
 4. maxNumberPages: Recommended number of URLs to crawl (based on research scope, typical: 2-20)
-5. timeRange: Time range if mentioned (d, w, m, y, or null)
+5. country: Country code if identified in the prompt (2-digit lowercase, e.g., ch, us, de)
-6. country: Country code if specified (2-digit lowercase, e.g., ch, us, de)
+6. language: Language identified from the prompt (lowercase, e.g., de, en, fr)
-7. language: Language code if specified (lowercase, e.g., de, en, fr)
+7. researchDepth: Research depth based on instruction complexity - "fast" (quick overview, maxDepth=1), "general" (standard research, maxDepth=2), or "deep" (comprehensive research, maxDepth=3)
 8. researchDepth: Research depth based on instruction complexity - "fast" (quick overview, maxDepth=1), "general" (standard research, maxDepth=2), or "deep" (comprehensive research, maxDepth=3)
 Return ONLY valid JSON, no additional text:
 {{
@ -164,7 +161,6 @@ Return ONLY valid JSON, no additional text:
    "urls": ["url1", "url2"],
    "needsSearch": true,
    "maxNumberPages": 10,
    "timeRange": null,
    "country": "ch",
    "language": "de",
    "researchDepth": "general"
@ -188,7 +184,6 @@ Return ONLY valid JSON, no additional text:
                "urls": [],
                "needsSearch": True,
                "maxNumberPages": 10,
                "timeRange": None,
                "country": country,
                "language": language,
                "researchDepth": researchDepth
@ -198,7 +193,6 @@ Return ONLY valid JSON, no additional text:
        self,
        instruction: str,
        maxNumberPages: int,
        timeRange: Optional[str],
        country: Optional[str],
        language: Optional[str]
    ) -> List[str]:
@ -209,7 +203,6 @@ Return ONLY valid JSON, no additional text:
                instruction=instruction,
                country=country,
                maxNumberPages=maxNumberPages,
                timeRange=timeRange,
                language=language
            )
            searchPrompt = searchPromptModel.model_dump_json(exclude_none=True, indent=2)
@ -269,7 +262,7 @@ Return ONLY valid JSON, no additional text:
                    instruction=instruction,
                    url=url,  # Single URL
                    maxDepth=maxDepth,
-                    maxWidth=10
+                    maxWidth=50
                )
                crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@ -170,7 +170,7 @@ class MethodAi(MethodBase):
        - Output format: JSON with research results including URLs and content.
        Parameters:
-        - prompt (str, required): Natural language research instruction, including time range if relevant.
+        - prompt (str, required): Natural language research instruction.
        - list(url) (list, optional): Specific URLs to crawl, if needed.
        - country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
        - language (str, optional): Language code (lowercase, e.g., de, en, fr).
--- a/test_ai_models.py
+++ b/test_ai_models.py
@ -1,6 +1,9 @@
 #!/usr/bin/env python3
 """
-AI Models Test - Tests all available AI models individually
+AI Models Test - Tests WEB_CRAWL functionality on all models that support it
 This script tests all models that have WEB_CRAWL capability, validates that
 they can crawl specific URLs and return content, and analyzes the quality of results.
 """
 import asyncio
@ -53,9 +56,18 @@ class AIModelsTester:
    async def initialize(self):
        """Initialize the AI service."""
-        # Set logging level to INFO to reduce noise
+        # Set logging level to DEBUG for detailed output
        import logging
-        logging.getLogger().setLevel(logging.INFO)
+        logging.getLogger().setLevel(logging.DEBUG)
        # Initialize the model registry with all connectors
        from modules.aicore.aicoreModelRegistry import modelRegistry
        from modules.aicore.aicorePluginTavily import AiTavily
        from modules.aicore.aicorePluginPerplexity import AiPerplexity
        # Register web connectors that support WEB_CRAWL
        modelRegistry.registerConnector(AiTavily())
        modelRegistry.registerConnector(AiPerplexity())
        # The AI service needs to be recreated with proper initialization
        from modules.services.serviceAi.mainServiceAi import AiService
@ -86,27 +98,53 @@ class AIModelsTester:
        print(f"📁 Results will be saved to: {self.modelTestDir}")
    async def testModel(self, modelName: str) -> Dict[str, Any]:
-        """Test a specific AI model with a simple prompt."""
+        """Test a specific AI model with WEB_CRAWL operation."""
        print(f"\n{'='*60}")
        print(f"TESTING MODEL: {modelName}")
        print(f"OPERATION TYPE: WEB_CRAWL")
        print(f"{'='*60}")
-        # Use same prompt for all web models
+        # CRAWL CONFIGURATION
-        import json
+        # Deep and Broad Web Crawl Example:
        # - maxDepth: 3 (deep) - follows links up to 3 levels from starting page
        #   - Level 1: Starting page
        #   - Level 2: Pages linked from starting page
        #   - Level 3: Pages linked from Level 2 pages
        # - maxWidth: 50 (broad) - crawls up to 50 pages at each depth level
        # This results in potential maximum of ~1,250 pages (if 50 links exist at each level)
        # 
        # Common configurations:
        # - Fast/Overview: maxDepth=1, maxWidth=5  (shallow, focused)
        # - General/Standard: maxDepth=2, maxWidth=10  (balanced)
        # - Deep and Broad: maxDepth=3, maxWidth=50  (comprehensive)
-        if "tavily" in modelName.lower() or "perplexity" in modelName.lower() or "llama" in modelName.lower() or "sonar" in modelName.lower() or "mistral" in modelName.lower():
+        CRAWL_DEPTH = 3  # Deep crawl: follows links 3 levels deep
-            # All web models use the same JSON formatted prompt
+        CRAWL_WIDTH = 50  # Broad crawl: up to 50 pages per level
-            # Country format: Use full name for Tavily (Switzerland), Perplexity converts ISO codes to names
+        
-            testPrompt = json.dumps({
+        print(f"Crawl Configuration:")
-                "prompt": "Research, what ValueOn company in switzerland does and who works there? Return as JSON.",
+        print(f"  - Depth: {CRAWL_DEPTH} levels (deep)")
-                "maxResults": 5,
+        print(f"  - Width: {CRAWL_WIDTH} pages per level (broad)")
-                "timeRange": "y",
+        print(f"  - Theoretical max: {CRAWL_WIDTH ** min(CRAWL_DEPTH, 3)} pages")
-                "country": "CH",  # ISO-2 code, Perplexity will convert to "Switzerland"
+        
-                "format": "json"
+        # Use WEB_CRAWL specific prompt format
-            }, indent=2)
+        from modules.datamodels.datamodelAi import AiCallPromptWebCrawl
-        else:
+        
-            # Fallback for other models
+        # Test with simple prompt like playground example
-            testPrompt = "Generate a comprehensive analysis of the current state of artificial intelligence. Return as JSON."
+        simplePrompt = f"https://www.valueon.ch: Who works in this company?"
        # But keep structured format for now to match our API
        testPrompt = json.dumps({
            "instruction": "Who works in this company?",
            "url": "https://www.valueon.ch",
            "maxDepth": CRAWL_DEPTH,
            "maxWidth": CRAWL_WIDTH
        }, indent=2)
        print(f"Simple prompt (playground style): {simplePrompt}")
        # For Tavily models, test direct API call for better link following
        if "tavily" in modelName.lower():
            return await self._testTavilyDirect(modelName, CRAWL_DEPTH, CRAWL_WIDTH)
        print(f"Test prompt: {testPrompt}")
        print(f"Prompt length: {len(testPrompt)} characters")
@ -114,17 +152,11 @@ class AIModelsTester:
        startTime = asyncio.get_event_loop().time()
        try:
-            # Create options to force this specific model
+            # Create options for WEB_CRAWL operation
-            if "internal" in modelName.lower():
+            options = AiCallOptions(
-                options = AiCallOptions(
+                operationType=OperationTypeEnum.WEB_CRAWL,
-                    operationType=OperationTypeEnum.DATA_EXTRACT,
+                preferredModel=modelName
-                    preferredModel=modelName
+            )
                )
            else:
                options = AiCallOptions(
                    operationType=OperationTypeEnum.DATA_GENERATE,
                    preferredModel=modelName
                )
            # Call the AI service DIRECTLY through the model's functionCall
            # This tests the actual model, not the document generation pipeline
@ -140,29 +172,14 @@ class AIModelsTester:
            import base64
            import os
-            # Prepare messages and options based on model type
+            # For WEB_CRAWL models, use normal functionCall with structured prompt
-            if "vision" in modelName.lower():
+            messages = [{"role": "user", "content": testPrompt}]
-                # For vision models, skip for now since they require special handling
+            modelCall = AiModelCall(
-                print(f"⚠️  Skipping vision model {modelName} - requires special image handling")
+                messages=messages,
-                return {
+                model=model,
-                    "modelName": modelName,
+                options=options
-                    "status": "SKIPPED",
+            )
-                    "processingTime": 0.0,
+            response = await model.functionCall(modelCall)
                    "responseLength": 0,
                    "responseType": "skipped",
                    "hasContent": False,
                    "error": "Vision model requires special image handling",
                    "fullResponse": "Skipped - vision model requires special image handling"
                }
            else:
                # For other models, use normal functionCall
                messages = [{"role": "user", "content": testPrompt}]
                modelCall = AiModelCall(
                    messages=messages,
                    model=model,
                    options=options
                )
                response = await model.functionCall(modelCall)
            endTime = asyncio.get_event_loop().time()
            processingTime = endTime - startTime
@ -185,6 +202,10 @@ class AIModelsTester:
                        "bytesReceived": len(response.content.encode('utf-8')) if response.content else 0
                    }
                    # Extract actual prompt sent if available in metadata
                    if hasattr(response, 'metadata') and response.metadata:
                        result["actualPromptSent"] = response.metadata.get("actualPromptSent", "N/A")
                    # Try to parse content as JSON
                    if response.content:
                        try:
@ -289,9 +310,16 @@ class AIModelsTester:
                print(f"📄 Response length: {len(str(response))} characters")
                print(f"📄 Response preview: {result['responsePreview']}")
-            # Save text response for all models
+            # Add prompt to result for logging
-            if result.get("status") == "SUCCESS":
+            result["testPrompt"] = testPrompt
-                self._saveTextResponse(modelName, result)
+            result["crawlConfig"] = {
                "depth": CRAWL_DEPTH,
                "width": CRAWL_WIDTH
            }
            # For WEB_CRAWL, also validate that content was extracted
            if result.get("status") == "SUCCESS" and result.get("fullResponse"):
                self._validateCrawlResponse(modelName, result)
        except Exception as e:
            endTime = asyncio.get_event_loop().time()
@ -304,13 +332,22 @@ class AIModelsTester:
                "responseLength": 0,
                "responseType": "exception",
                "hasContent": False,
-                "error": str(e)
+                "error": str(e),
                "testPrompt": testPrompt,
                "crawlConfig": {
                    "depth": CRAWL_DEPTH,
                    "width": CRAWL_WIDTH
                }
            }
            print(f"💥 EXCEPTION - {str(e)}")
        self.testResults.append(result)
        # Save text response even for exceptions to log the prompt
        if result.get("status") in ["SUCCESS", "EXCEPTION", "ERROR"]:
            self._saveTextResponse(modelName, result)
        # Save individual model result immediately
        self._saveIndividualModelResult(modelName, result)
@ -378,6 +415,19 @@ class AIModelsTester:
            if not content:
                content = result.get("responsePreview", "No content available")
            # If there's an error, include it in the content
            if result.get("error"):
                content = f"ERROR: {result.get('error')}\n\n{content}"
            # Get prompt and config for logging
            config = result.get("crawlConfig", {})
            crawlDepth = config.get("depth", "N/A")
            crawlWidth = config.get("width", "N/A")
            # Get both the original JSON prompt and the actual prompt sent
            originalPrompt = result.get("testPrompt", "N/A")
            actualPromptSent = result.get("actualPromptSent", "N/A")
            # Add metadata header
            metadata = f"""Model: {modelName}
 Test Time: {timestamp}
@ -385,6 +435,23 @@ Status: {result.get('status', 'Unknown')}
 Processing Time: {result.get('processingTime', 0):.2f}s
 Response Length: {result.get('responseLength', 0)} characters
 Is Valid JSON: {result.get('isValidJson', False)}
 Test Method: {result.get('testMethod', 'standard')}
 Pages Crawled: {result.get('pagesCrawled', 'N/A')}
 Crawled URL: {result.get('crawledUrl', 'N/A')}
 Has URL: {result.get('hasUrl', 'N/A')}
 Has Title: {result.get('hasTitle', 'N/A')}
 Has Content: {result.get('hasContent', 'N/A')}
 Content Length: {result.get('contentLength', 'N/A')} characters
 --- CRAWL CONFIGURATION ---
 Depth: {crawlDepth}
 Width: {crawlWidth}
 --- ORIGINAL JSON PROMPT (input) ---
 {originalPrompt}
 --- ACTUAL PROMPT SENT TO API (EXACT) ---
 {actualPromptSent}
 --- RESPONSE CONTENT ---
 {content}
@ -400,6 +467,174 @@ Is Valid JSON: {result.get('isValidJson', False)}
            print(f"❌ Error saving text response: {str(e)}")
            result["textSaveError"] = str(e)
    def _validateCrawlResponse(self, modelName: str, result: Dict[str, Any]):
        """Validate that the WEB_CRAWL response contains crawled content."""
        try:
            content = result.get("fullResponse", "")
            # Try to parse as JSON
            crawledData = {}
            try:
                parsed = json.loads(content)
                if isinstance(parsed, dict):
                    crawledData = parsed
            except:
                pass
            # Check for expected fields: url, title, content
            hasUrl = bool(crawledData.get("url"))
            hasTitle = bool(crawledData.get("title"))
            hasContent = bool(crawledData.get("content"))
            contentLength = len(crawledData.get("content", ""))
            result["hasUrl"] = hasUrl
            result["hasTitle"] = hasTitle
            result["hasContent"] = hasContent
            result["contentLength"] = contentLength
            result["crawledUrl"] = crawledData.get("url", "")
            if hasUrl and hasContent:
                print(f"✅ Successfully crawled content from URL: {crawledData.get('url', 'unknown')}")
                print(f"   Content length: {contentLength} characters")
                print(f"   Title: {crawledData.get('title', 'N/A')}")
            else:
                print(f"⚠️  Incomplete crawl response - URL: {hasUrl}, Content: {hasContent}")
        except Exception as e:
            print(f"❌ Error validating crawl response: {str(e)}")
            result["crawlValidationError"] = str(e)
    async def _testTavilyDirect(self, modelName: str, crawlDepth: int = 3, crawlWidth: int = 50) -> Dict[str, Any]:
        """Test Tavily API directly using the crawl() method with better link following."""
        print(f"\n{'='*60}")
        print(f"TESTING TAVILY DIRECT API (crawl method)")
        print(f"{'='*60}")
        startTime = asyncio.get_event_loop().time()
        try:
            from tavily import AsyncTavilyClient
            from modules.shared.configuration import APP_CONFIG
            apiKey = APP_CONFIG.get("Connector_AiTavily_API_SECRET")
            if not apiKey:
                raise Exception("Tavily API key not found")
            client = AsyncTavilyClient(api_key=apiKey)
            # Map our configuration to Tavily parameters
            # maxWidth -> limit (pages per level)
            # maxDepth -> max_depth (link following depth)
            # max_breadth = maxWidth (breadth of crawl at each level)
            tavilyLimit = crawlWidth
            tavilyMaxDepth = crawlDepth
            tavilyMaxBreadth = crawlWidth
            print(f"Calling Tavily API with crawl() method...")
            print(f"URL: https://www.valueon.ch")
            print(f"Instructions: Who works in this company?")
            print(f"Limit: {tavilyLimit} pages per level")
            print(f"Max depth: {tavilyMaxDepth} (follows links {tavilyMaxDepth} levels deep)")
            print(f"Max breadth: {tavilyMaxBreadth} (up to {tavilyMaxBreadth} pages at each level)")
            print(f"Deep and Broad Crawl Configuration Active")
            response = await client.crawl(
                url="https://www.valueon.ch",
                instructions="Who works in this company?",
                limit=tavilyLimit,
                max_depth=tavilyMaxDepth,
                max_breadth=tavilyMaxBreadth
            )
            endTime = asyncio.get_event_loop().time()
            processingTime = endTime - startTime
            # Analyze response
            contentLength = 0
            pagesCrawled = 0
            fullContent = ""
            if isinstance(response, dict):
                # Check if it has results
                if "results" in response:
                    results = response["results"]
                    pagesCrawled = len(results)
                    content_parts = []
                    for result in results:
                        url = result.get("url", "")
                        title = result.get("title", "")
                        content = result.get("raw_content", result.get("content", ""))
                        content_parts.append(f"URL: {url}\nTitle: {title}\nContent: {content}\n{'='*60}\n")
                        contentLength += len(content)
                    fullContent = "\n".join(content_parts)
                else:
                    fullContent = json.dumps(response, indent=2)
                    contentLength = len(fullContent)
            elif isinstance(response, list):
                pagesCrawled = len(response)
                content_parts = []
                for item in response:
                    if isinstance(item, dict):
                        url = item.get("url", "")
                        title = item.get("title", "")
                        content = item.get("raw_content", item.get("content", ""))
                        content_parts.append(f"URL: {url}\nTitle: {title}\nContent: {content}\n{'='*60}\n")
                        contentLength += len(content)
                fullContent = "\n".join(content_parts)
            else:
                fullContent = str(response)
                contentLength = len(fullContent)
            result = {
                "modelName": modelName,
                "status": "SUCCESS",
                "processingTime": round(processingTime, 2),
                "responseLength": contentLength,
                "responseType": "TavilyDirectAPI",
                "hasContent": True,
                "error": None,
                "modelUsed": modelName,
                "priceUsd": 0.0,
                "bytesSent": 0,
                "bytesReceived": contentLength,
                "isValidJson": True,
                "fullResponse": fullContent,
                "pagesCrawled": pagesCrawled,
                "testMethod": "direct_api_crawl"
            }
            print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
            print(f"📄 Pages crawled: {pagesCrawled}")
            print(f"📄 Total content length: {contentLength} characters")
            # Save the response
            self._saveTextResponse(modelName, result)
            self._validateCrawlResponse(modelName, result)
            self._saveIndividualModelResult(modelName, result)
            self.testResults.append(result)
            return result
        except Exception as e:
            endTime = asyncio.get_event_loop().time()
            processingTime = endTime - startTime
            result = {
                "modelName": modelName,
                "status": "EXCEPTION",
                "processingTime": round(processingTime, 2),
                "responseLength": 0,
                "responseType": "exception",
                "hasContent": False,
                "error": str(e)
            }
            print(f"💥 EXCEPTION - {str(e)}")
            self.testResults.append(result)
            return result
    def _saveIndividualModelResult(self, modelName: str, result: Dict[str, Any]):
        """Save individual model test result to file."""
        try:
@ -425,22 +660,30 @@ Is Valid JSON: {result.get('isValidJson', False)}
            print(f"❌ Error saving individual result: {str(e)}")
    def getAllAvailableModels(self) -> List[str]:
-        """Get all available model names."""
+        """Get all available model names that support WEB_CRAWL."""
-        # Hardcoded list of known models - same approach as test_ai_behavior.py
+        from modules.aicore.aicoreModelRegistry import modelRegistry
-        return [
+        from modules.datamodels.datamodelAi import OperationTypeEnum
-            # "claude-3-5-sonnet-20241022",  # Skipped - text model, test later
+        
-            # "claude-3-5-sonnet-20241022-vision",  # Skipped - requires image input
+        # Get all models from registry
-            # "gpt-4o",  # Skipped - text model, test later
+        allModels = modelRegistry.getAvailableModels()
-            # "gpt-3.5-turbo",  # Skipped - text model, test later
+        
-            # "gpt-4o-vision",  # Skipped - requires image input
+        # Filter models that support WEB_CRAWL
-            # "dall-e-3",  # Skipped - image generation, test later
+        webCrawlModels = []
-            "sonar",  # Perplexity web model
+        for model in allModels:
-            "sonar-pro",  # Perplexity web model
+            if model.operationTypes and any(
-            "tavily-search",  # Tavily web model (unified research)
+                ot.operationType == OperationTypeEnum.WEB_CRAWL 
-            # "internal-extractor",  # Skipped - internal model, test later
+                for ot in model.operationTypes
-            # "internal-generator",  # Skipped - internal model, test later
+            ):  # Include both Tavily and Perplexity models
-            # "internal-renderer"  # Skipped - internal model, test later
+                webCrawlModels.append(model.name)
-        ]
+        
        # Filter to only "sonar" model for testing
        webCrawlModels = [m for m in webCrawlModels if m == "sonar"]
        print(f"Found {len(webCrawlModels)} models that support WEB_CRAWL (filtered to sonar):")
        for modelName in webCrawlModels:
            print(f"  - {modelName}")
        return webCrawlModels
    def saveTestResults(self):
        """Save detailed test results to file."""
@ -508,6 +751,15 @@ Is Valid JSON: {result.get('isValidJson', False)}
            if result.get("isValidJson") is not None:
                print(f"   Valid JSON: {'Yes' if result['isValidJson'] else 'No'}")
            if result.get("crawledUrl"):
                print(f"   Crawled URL: {result['crawledUrl']}")
            if result.get("contentLength") is not None:
                print(f"   Content length: {result['contentLength']} characters")
            if result.get("pagesCrawled") is not None:
                print(f"   Pages crawled: {result['pagesCrawled']}")
            if result["error"]:
                print(f"   Error: {result['error']}")
@ -525,12 +777,32 @@ Is Valid JSON: {result.get('isValidJson', False)}
            print(f"{'='*80}")
            print(f"🚀 Fastest model: {fastest['modelName']} ({fastest['processingTime']}s)")
            print(f"🐌 Slowest model: {slowest['modelName']} ({slowest['processingTime']}s)")
            # Find models with most content
            modelsWithContent = [r for r in successfulResults if r.get("contentLength", 0) > 0]
            if modelsWithContent:
                mostContent = max(modelsWithContent, key=lambda x: x.get("contentLength", 0))
                totalContent = sum(r.get("contentLength", 0) for r in modelsWithContent)
                avgContent = totalContent / len(modelsWithContent)
                print(f"📄 Model with most content: {mostContent['modelName']} ({mostContent.get('contentLength', 0)} chars)")
                print(f"📊 Average content per model: {avgContent:.0f} characters")
                print(f"📊 Total content crawled across all models: {totalContent} characters")
            # Find models with most pages crawled (for Tavily direct API)
            modelsWithPages = [r for r in successfulResults if r.get("pagesCrawled", 0) > 0]
            if modelsWithPages:
                mostPages = max(modelsWithPages, key=lambda x: x.get("pagesCrawled", 0))
                totalPages = sum(r.get("pagesCrawled", 0) for r in modelsWithPages)
                avgPages = totalPages / len(modelsWithPages)
                print(f"🔍 Model with most pages crawled: {mostPages['modelName']} ({mostPages.get('pagesCrawled', 0)} pages)")
                print(f"📊 Average pages per model: {avgPages:.1f} pages")
                print(f"📊 Total pages crawled across all models: {totalPages} pages")
 async def main():
-    """Run AI models testing."""
+    """Run AI models testing for WEB_CRAWL operation."""
    tester = AIModelsTester()
-    print("Starting AI Models Testing...")
+    print("Starting AI Models Testing for WEB_CRAWL...")
    print("Initializing AI service...")
    await tester.initialize()
@ -542,8 +814,9 @@ async def main():
        print(f"  {i}. {model}")
    print(f"\n{'='*80}")
-    print("STARTING INDIVIDUAL MODEL TESTS")
+    print("STARTING WEB_CRAWL TESTS")
    print(f"{'='*80}")
    print("Testing each model's ability to crawl URLs and return content...")
    print("Press Enter after each model test to continue to the next one...")
    # Test each model individually