gateway/modules/aicore/aicorePluginTavily.py
2026-01-17 02:17:58 +01:00

796 lines
35 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Tavily web search class.
"""
import logging
import asyncio
import re
from dataclasses import dataclass
from typing import Optional, List, Dict
from tavily import AsyncTavilyClient
from modules.shared.configuration import APP_CONFIG
from modules.aicore.aicoreBase import BaseConnectorAi
from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse, createOperationTypeRatings, AiCallPromptWebSearch, AiCallPromptWebCrawl
from modules.datamodels.datamodelTools import CountryCodes
logger = logging.getLogger(__name__)
@dataclass
class WebSearchResult:
title: str
url: str
rawContent: Optional[str] = None
@dataclass
class WebCrawlResult:
url: str
content: str
title: Optional[str] = None
class AiTavily(BaseConnectorAi):
"""Tavily web search connector."""
def __init__(self):
super().__init__()
self.client: Optional[AsyncTavilyClient] = None
# Cached settings loaded at initialization time
self.crawlTimeout: int = 30
self.crawlMaxRetries: int = 3
self.crawlRetryDelay: int = 2
# Cached web search constraints (camelCase per project style)
self.webSearchMinResults: int = 1
self.webSearchMaxResults: int = 20
# Initialize client if API key is available
self._initializeClient()
def getModels(self) -> List[AiModel]:
"""Get all available Tavily models."""
return [
AiModel(
name="tavily-search",
displayName="Tavily Search & Research",
connectorType="tavily",
apiUrl="https://api.tavily.com",
temperature=0.0, # Web search doesn't use temperature
maxTokens=0, # Web search doesn't use tokens
contextLength=0,
costPer1kTokensInput=0.0,
costPer1kTokensOutput=0.0,
speedRating=8, # Good speed for search and extract
qualityRating=9, # Excellent quality for web research
# capabilities removed (not used in business logic)
functionCall=self._routeWebOperation,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.BASIC,
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.WEB_SEARCH_DATA, 9),
(OperationTypeEnum.WEB_CRAWL, 10)
),
version="tavily-search",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.008 # Simple flat rate
)
]
def _initializeClient(self):
"""Initialize the Tavily client if API key is available."""
try:
apiKey = APP_CONFIG.get("Connector_AiTavily_API_SECRET")
if apiKey:
self.client = AsyncTavilyClient(api_key=apiKey)
logger.info("Tavily client initialized successfully")
else:
logger.warning("Tavily API key not found, client not initialized")
except Exception as e:
logger.error(f"Failed to initialize Tavily client: {str(e)}")
def getConnectorType(self) -> str:
"""Get the connector type identifier."""
return "tavily"
def _convertIsoCodeToCountryName(self, isoCode: str) -> str:
"""
Convert ISO-2 country code to Tavily country name.
Uses centralized CountryCodes mapping.
"""
return CountryCodes.getForTavily(isoCode)
def _extractUrlsFromPrompt(self, prompt: str) -> List[str]:
"""Extract URLs from a text prompt using regex."""
if not prompt:
return []
# URL regex pattern - matches http/https URLs
urlPattern = r'https?://(?:[-\w.])+(?:[:\d]+)?(?:/(?:[\w/_.])*(?:\?(?:[\w&=%.])*)?(?:#(?:[\w.])*)?)?'
urls = re.findall(urlPattern, prompt)
# Remove duplicates while preserving order
seen = set()
uniqueUrls = []
for url in urls:
if url not in seen:
seen.add(url)
uniqueUrls.append(url)
return uniqueUrls
def _normalizeUrl(self, url: str) -> str:
"""
Normalize URL for better deduplication.
Removes common variations that represent the same content.
"""
if not url:
return url
# Remove trailing slashes
url = url.rstrip('/')
# Remove common query parameters that don't affect content
import urllib.parse
parsed = urllib.parse.urlparse(url)
# Remove common tracking parameters
queryParams = urllib.parse.parse_qs(parsed.query)
filteredParams = {}
for key, values in queryParams.items():
# Keep important parameters, remove tracking ones
if key.lower() not in ['utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
'fbclid', 'gclid', 'ref', 'source', 'campaign']:
filteredParams[key] = values
# Rebuild query string
filteredQuery = urllib.parse.urlencode(filteredParams, doseq=True)
# Reconstruct URL
normalized = urllib.parse.urlunparse((
parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
filteredQuery,
parsed.fragment
))
return normalized
def _calculateRelevanceScore(self, result: WebSearchResult, queryWords: set) -> float:
"""
Calculate relevance score for a search result.
Higher score means more relevant to the query.
"""
score = 0.0
# Title relevance (most important)
titleWords = set(result.title.lower().split())
titleMatches = len(queryWords.intersection(titleWords))
score += titleMatches * 3.0 # Weight title matches heavily
# URL relevance
urlWords = set(result.url.lower().split('/'))
urlMatches = len(queryWords.intersection(urlWords))
score += urlMatches * 1.5
# Content relevance (if available)
if hasattr(result, 'rawContent') and result.rawContent:
contentWords = set(result.rawContent.lower().split())
contentMatches = len(queryWords.intersection(contentWords))
score += contentMatches * 0.1 # Lower weight for content matches
# Domain authority bonus (simple heuristic)
domain = result.url.split('/')[2] if '/' in result.url else result.url
if any(authDomain in domain.lower() for authDomain in
['wikipedia.org', 'github.com', 'stackoverflow.com', 'reddit.com', 'medium.com']):
score += 1.0
# Penalty for very long URLs (often less relevant)
if len(result.url) > 100:
score -= 0.5
return score
def _intelligentUrlFiltering(self, searchResults: List[WebSearchResult], query: str, maxResults: int) -> List[WebSearchResult]:
"""
Intelligent URL filtering with de-duplication and relevance scoring.
Args:
searchResults: Raw search results from Tavily
query: Original search query for relevance scoring
maxResults: Maximum number of results to return
Returns:
Filtered and deduplicated list of search results
"""
if not searchResults:
return []
# Step 1: Basic de-duplication by URL
seenUrls = set()
uniqueResults = []
for result in searchResults:
# Normalize URL for better deduplication
normalizedUrl = self._normalizeUrl(result.url)
if normalizedUrl not in seenUrls:
seenUrls.add(normalizedUrl)
uniqueResults.append(result)
logger.info(f"After basic deduplication: {len(uniqueResults)} unique URLs from {len(searchResults)} original")
# Step 2: Relevance scoring and filtering
scoredResults = []
queryWords = set(query.lower().split())
for result in uniqueResults:
score = self._calculateRelevanceScore(result, queryWords)
scoredResults.append((score, result))
# Step 3: Sort by relevance score (higher is better)
scoredResults.sort(key=lambda x: x[0], reverse=True)
# Step 4: Take top results
filteredResults = [result for score, result in scoredResults[:maxResults]]
logger.info(f"After intelligent filtering: {len(filteredResults)} results selected from {len(uniqueResults)} unique")
return filteredResults
@classmethod
async def create(cls):
apiKey = APP_CONFIG.get("Connector_AiTavily_API_SECRET")
if not apiKey:
raise ValueError("Tavily API key not configured. Please set Connector_AiTavily_API_SECRET in config.ini")
# Load and cache web crawl related configuration
crawlTimeout = int(APP_CONFIG.get("Web_Crawl_TIMEOUT", "30"))
crawlMaxRetries = int(APP_CONFIG.get("Web_Crawl_MAX_RETRIES", "3"))
crawlRetryDelay = int(APP_CONFIG.get("Web_Crawl_RETRY_DELAY", "2"))
return cls(
client=AsyncTavilyClient(api_key=apiKey),
crawlTimeout=crawlTimeout,
crawlMaxRetries=crawlMaxRetries,
crawlRetryDelay=crawlRetryDelay,
webSearchMinResults=int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1")),
webSearchMaxResults=int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")),
)
# Standardized method using AiModelCall/AiModelResponse pattern
def _cleanUrl(self, url: str) -> str:
"""Clean URL by removing extra text that might be appended."""
import re
# Extract just the URL part, removing any extra text after it
urlMatch = re.match(r'(https?://[^\s,]+)', url)
if urlMatch:
return urlMatch.group(1)
return url
async def _search(
self,
query: str,
maxResults: int,
searchDepth: str | None = None,
timeRange: str | None = None,
topic: str | None = None,
includeDomains: list[str] | None = None,
excludeDomains: list[str] | None = None,
country: str | None = None,
includeAnswer: str | None = None,
includeRawContent: str | None = None,
) -> list[WebSearchResult]:
"""Calls the Tavily API to perform a web search."""
# Make sure maxResults is within the allowed range (use cached values)
minResults = self.webSearchMinResults
maxAllowedResults = self.webSearchMaxResults
if maxResults < minResults or maxResults > maxAllowedResults:
raise ValueError(f"maxResults must be between {minResults} and {maxAllowedResults}")
# Perform actual API call
# Build kwargs only for provided options to avoid API rejections
kwargs: dict = {"query": query, "max_results": maxResults}
if searchDepth is not None:
kwargs["search_depth"] = searchDepth
if timeRange is not None:
kwargs["time_range"] = timeRange
if topic is not None:
kwargs["topic"] = topic
if includeDomains is not None and len(includeDomains) > 0:
kwargs["include_domains"] = includeDomains
if excludeDomains is not None:
kwargs["exclude_domains"] = excludeDomains
if country is not None:
kwargs["country"] = country
if includeAnswer is not None:
kwargs["include_answer"] = includeAnswer
if includeRawContent is not None:
kwargs["include_raw_content"] = includeRawContent
# Log the final API call parameters for comparison
logger.info(f"Tavily API call parameters: {kwargs}")
# Ensure client is initialized
if self.client is None:
self._initializeClient()
if self.client is None:
raise ValueError("Tavily client not initialized. Please check API key configuration.")
response = await self.client.search(**kwargs)
# Return all results without score filtering
# Tavily's scoring is already applied by the API
results_count = len(response.get('results', []))
logger.info(f"Tavily returned {results_count} results")
# Log content availability
results_with_content = 0
for result in response.get('results', []):
if result.get("raw_content"):
results_with_content += 1
logger.info(f"Tavily results with raw_content: {results_with_content}/{results_count}")
# Log first result structure for debugging
if response.get('results') and len(response['results']) > 0:
first_result = response['results'][0]
logger.debug(f"First result keys: {list(first_result.keys())}")
raw_content = first_result.get('raw_content') or ''
logger.debug(f"First result has raw_content: {'raw_content' in first_result}, content length: {len(raw_content)}")
return [
WebSearchResult(
title=result.get("title", ""),
url=self._cleanUrl(result.get("url", "")),
rawContent=result.get("raw_content") or result.get("content") or ""
)
for result in response["results"]
]
async def _crawl(
self,
url: str,
instructions: str | None = None,
limit: int = 20,
maxDepth: int = 2,
maxBreadth: int = 40,
) -> list[WebCrawlResult]:
"""Calls the Tavily API to crawl ONE URL with link following and retry logic."""
maxRetries = self.crawlMaxRetries
retryDelay = self.crawlRetryDelay
timeout = self.crawlTimeout
logger.info(f"Starting crawl of URL: {url}")
logger.info(f"Crawl settings: instructions={instructions[:100] if instructions else None}..., limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
for attempt in range(maxRetries + 1):
try:
logger.debug(f"Crawl attempt {attempt + 1}/{maxRetries + 1}")
# Ensure client is initialized
if self.client is None:
self._initializeClient()
if self.client is None:
raise ValueError("Tavily client not initialized. Please check API key configuration.")
logger.debug(f"Crawling URL: {url}")
# Build kwargs for crawl
kwargsCrawl: dict = {"url": url}
if instructions:
kwargsCrawl["instructions"] = instructions
if limit:
kwargsCrawl["limit"] = limit
if maxDepth:
kwargsCrawl["max_depth"] = maxDepth
if maxBreadth:
kwargsCrawl["max_breadth"] = maxBreadth
logger.info(f"Sending request to Tavily API with parameters: {kwargsCrawl}")
response = await asyncio.wait_for(
self.client.crawl(**kwargsCrawl),
timeout=timeout
)
logger.debug(f"Tavily response received: {type(response)}")
# Parse response - could be dict with results or list
if isinstance(response, dict):
if "results" in response:
pageResults = response["results"]
logger.debug(f"Found 'results' key in response dict with {len(pageResults)} items")
else:
logger.warning(f"Response dict keys: {list(response.keys())}")
# Check for other possible keys
if "pages" in response:
pageResults = response["pages"]
logger.debug(f"Found 'pages' key with {len(pageResults)} items")
elif "content" in response:
# Single page result
pageResults = [response]
logger.debug("Found 'content' key, treating as single page result")
else:
logger.warning(f"Unexpected response dict structure: {list(response.keys())}")
pageResults = []
elif isinstance(response, list):
pageResults = response
logger.debug(f"Response is a list with {len(pageResults)} items")
else:
logger.warning(f"Unexpected response format: {type(response)}, value: {str(response)[:200]}")
pageResults = []
logger.info(f"Got {len(pageResults)} pages from crawl for URL: {url}")
if len(pageResults) == 0:
logger.warning(f"Tavily crawl returned 0 pages for URL: {url}. Response structure: {type(response)}")
if isinstance(response, dict):
logger.warning(f"Response keys: {list(response.keys())}")
# Log all values to debug (not just first 3)
for key, value in response.items():
value_str = str(value)
if len(value_str) > 200:
value_str = value_str[:200] + "..."
logger.warning(f" {key}: {type(value)} - {value_str}")
# Check for error messages in response
if "error" in response:
logger.error(f"Tavily API error in response: {response.get('error')}")
if "message" in response:
logger.warning(f"Tavily API message: {response.get('message')}")
elif isinstance(response, str):
logger.warning(f"Tavily returned string response (first 500 chars): {response[:500]}")
else:
logger.warning(f"Unexpected response type: {type(response)}, value: {str(response)[:500]}")
# Convert to WebCrawlResult format with error handling
results = []
for idx, result in enumerate(pageResults):
try:
# Safely extract fields
result_url = result.get("url") if isinstance(result, dict) else (getattr(result, "url", None) if hasattr(result, "url") else url)
result_content = ""
if isinstance(result, dict):
result_content = result.get("raw_content") or result.get("content") or ""
elif hasattr(result, "raw_content"):
result_content = result.raw_content or ""
elif hasattr(result, "content"):
result_content = result.content or ""
result_title = ""
if isinstance(result, dict):
result_title = result.get("title", "")
elif hasattr(result, "title"):
result_title = result.title or ""
results.append(WebCrawlResult(
url=result_url or url,
content=result_content,
title=result_title
))
except Exception as resultError:
logger.warning(f"Error processing crawl result {idx}: {resultError}")
# Try to create a minimal result with at least the URL
try:
if isinstance(result, dict) and result.get("url"):
results.append(WebCrawlResult(
url=result.get("url", url),
content="",
title=""
))
except Exception:
logger.error(f"Failed to create minimal result for crawl result {idx}")
continue
logger.debug(f"Crawl successful: extracted {len(results)} pages from URL")
return results
except asyncio.TimeoutError:
logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds for URL: {url}")
if attempt < maxRetries:
logger.info(f"Retrying in {retryDelay} seconds...")
await asyncio.sleep(retryDelay)
else:
raise Exception(f"Crawl failed after {maxRetries + 1} attempts due to timeout")
except Exception as e:
logger.warning(f"Crawl attempt {attempt + 1} failed for URL {url}: {str(e)}")
logger.debug(f"Full error details: {type(e).__name__}: {str(e)}", exc_info=True)
# Check if it's a validation error and log more details
if "validation" in str(e).lower():
logger.debug(f"URL validation failed. Checking URL format:")
logger.debug(f" URL: '{url}' (length: {len(url)})")
# Check for common URL issues
if ' ' in url:
logger.debug(f" WARNING: URL contains spaces!")
if not url.startswith(('http://', 'https://')):
logger.debug(f" WARNING: URL doesn't start with http/https!")
if len(url) > 2000:
logger.debug(f" WARNING: URL is very long ({len(url)} chars)")
# Log API-specific errors
error_str = str(e).lower()
if "rate limit" in error_str or "429" in error_str:
logger.error(f"Tavily API rate limit hit for URL: {url}")
elif "401" in error_str or "unauthorized" in error_str:
logger.error(f"Tavily API authentication failed for URL: {url}")
elif "404" in error_str or "not found" in error_str:
logger.warning(f"URL not found (404) for: {url}")
elif "timeout" in error_str:
logger.warning(f"Timeout error for URL: {url}")
if attempt < maxRetries:
logger.info(f"Retrying in {retryDelay} seconds...")
await asyncio.sleep(retryDelay)
else:
logger.error(f"Crawl failed after {maxRetries + 1} attempts for URL: {url}")
raise Exception(f"Crawl failed after {maxRetries + 1} attempts: {str(e)}")
async def _routeWebOperation(self, modelCall: AiModelCall) -> "AiModelResponse":
"""
Route web operation based on operation type.
Args:
modelCall: AiModelCall with messages and options
Returns:
AiModelResponse based on operation type
"""
operationType = modelCall.options.operationType
if operationType == OperationTypeEnum.WEB_SEARCH_DATA:
return await self.webSearch(modelCall)
elif operationType == OperationTypeEnum.WEB_CRAWL:
return await self.webCrawl(modelCall)
else:
# Unsupported operation type
return AiModelResponse(
content="",
success=False,
error=f"Unsupported operation type: {operationType}"
)
async def webSearch(self, modelCall: AiModelCall) -> "AiModelResponse":
"""
WEB_SEARCH_DATA operation - returns list of URLs using Tavily search.
Args:
modelCall: AiModelCall with AiCallPromptWebSearch as prompt
Returns:
AiModelResponse with JSON list of URLs
"""
try:
# Extract parameters - find user message (not system message)
promptContent = ""
if modelCall.messages:
for msg in modelCall.messages:
if msg.get("role") == "user":
promptContent = msg.get("content", "")
break
# Fallback to first message if no user message found
if not promptContent and len(modelCall.messages) > 0:
promptContent = modelCall.messages[0].get("content", "")
if not promptContent or not promptContent.strip():
raise ValueError("Empty prompt content received for web search")
import json
try:
promptData = json.loads(promptContent)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}")
raise ValueError(f"Invalid JSON in prompt content: {str(e)}")
# Create Pydantic model
webSearchPrompt = AiCallPromptWebSearch(**promptData)
# Convert ISO country code to country name for Tavily
countryName = webSearchPrompt.country
if countryName:
countryName = self._convertIsoCodeToCountryName(countryName)
# Perform search - use exact parameters from prompt
# NOTE: timeRange parameter causes generic results, so we don't use it
searchResults = await self._search(
query=webSearchPrompt.instruction,
maxResults=webSearchPrompt.maxNumberPages,
timeRange=None, # Not used - causes generic results
country=countryName,
includeAnswer="basic",
includeRawContent="text"
)
# Extract URLs and content from results with error handling
urls = []
results_with_content = []
content_count = 0
try:
for result in searchResults:
try:
# Safely extract URL
url = result.url if hasattr(result, 'url') and result.url else ""
if url:
urls.append(url)
# Safely extract content
content = ""
if hasattr(result, 'rawContent'):
content = result.rawContent or ""
if not content and hasattr(result, 'content'):
content = result.content or ""
if content:
content_count += 1
# Safely extract title
title = result.title if hasattr(result, 'title') and result.title else ""
results_with_content.append({
"url": url,
"title": title,
"content": content,
"score": getattr(result, 'score', 0)
})
except Exception as resultError:
logger.warning(f"Error processing individual search result: {resultError}")
# Continue processing other results
continue
logger.info(f"Tavily search: {len(urls)} URLs, {content_count} with content, {len(results_with_content)} total results")
if content_count == 0:
logger.warning("Tavily search returned no content - results may need crawling")
except Exception as extractionError:
logger.error(f"Error extracting URLs and content from search results: {extractionError}")
# Try to recover at least URLs
try:
urls = [result.url for result in searchResults if hasattr(result, 'url') and result.url]
logger.info(f"Recovered {len(urls)} URLs after extraction error")
except Exception:
logger.error("Failed to recover any URLs from search results")
# Return both URLs and full results in JSON for direct extraction
# Format: {"urls": [...], "results": [...]}
import json
response_data = {
"urls": urls,
"results": results_with_content
}
return AiModelResponse(
content=json.dumps(response_data, indent=2),
success=True,
metadata={
"total_urls": len(urls),
"operation": "WEB_SEARCH_DATA",
"results_with_content": results_with_content # Also in metadata for compatibility
}
)
except Exception as e:
logger.error(f"Error in Tavily web search: {str(e)}", exc_info=True)
import json
# Return error response with empty results
error_response = {
"urls": [],
"results": [],
"error": str(e)
}
return AiModelResponse(
content=json.dumps(error_response, indent=2),
success=False,
error=str(e)
)
async def webCrawl(self, modelCall: AiModelCall) -> "AiModelResponse":
"""
WEB_CRAWL operation - crawls one URL using Tavily with link following.
Args:
modelCall: AiModelCall with AiCallPromptWebCrawl as prompt
Returns:
AiModelResponse with crawl results as JSON (may include multiple pages)
"""
try:
# Extract parameters - find user message (not system message)
promptContent = ""
if modelCall.messages:
for msg in modelCall.messages:
if msg.get("role") == "user":
promptContent = msg.get("content", "")
break
# Fallback to first message if no user message found
if not promptContent and len(modelCall.messages) > 0:
promptContent = modelCall.messages[0].get("content", "")
if not promptContent or not promptContent.strip():
raise ValueError("Empty prompt content received for web crawl")
import json
try:
promptData = json.loads(promptContent)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}")
raise ValueError(f"Invalid JSON in prompt content: {str(e)}")
# Create Pydantic model
webCrawlPrompt = AiCallPromptWebCrawl(**promptData)
# Perform crawl for ONE URL with link following
# Use maxWidth as limit, maxDepth as maxDepth, and calculate maxBreadth
crawlResults = await self._crawl(
url=webCrawlPrompt.url,
instructions=webCrawlPrompt.instruction,
limit=webCrawlPrompt.maxWidth or 20, # maxWidth controls number of pages
maxDepth=webCrawlPrompt.maxDepth or 2,
maxBreadth=webCrawlPrompt.maxWidth or 40 # Use same as limit for breadth
)
# Format multiple pages from the crawl into a single response
if crawlResults and len(crawlResults) > 0:
# Get all pages content with error handling
allContent = ""
pageUrls = []
for i, result in enumerate(crawlResults, 1):
try:
pageHeader = f"\n{'='*60}\nPAGE {i}: {result.url}\n{'='*60}\n"
if result.title:
allContent += f"{pageHeader}Title: {result.title}\n\n"
else:
allContent += f"{pageHeader}\n"
allContent += f"{result.content or ''}\n"
pageUrls.append(result.url)
except Exception as pageError:
logger.warning(f"Error formatting page {i} from crawl: {pageError}")
# Try to add at least the URL
try:
pageUrls.append(result.url if hasattr(result, 'url') and result.url else webCrawlPrompt.url)
except Exception:
pass
resultData = {
"url": webCrawlPrompt.url,
"title": crawlResults[0].title if crawlResults and crawlResults[0].title else "Content",
"content": allContent,
"pagesCrawled": len(crawlResults),
"pageUrls": pageUrls
}
logger.info(f"Crawl successful: {len(crawlResults)} pages extracted from {webCrawlPrompt.url}")
else:
logger.warning(f"Crawl returned no results for URL: {webCrawlPrompt.url}")
resultData = {
"url": webCrawlPrompt.url,
"title": "",
"content": "",
"error": "No content extracted - Tavily crawl returned 0 pages",
"pagesCrawled": 0,
"pageUrls": []
}
# Return as JSON - same format as Perplexity but with multiple pages content
import json
return AiModelResponse(
content=json.dumps(resultData, indent=2),
success=True,
metadata={"operation": "WEB_CRAWL", "url": webCrawlPrompt.url, "pagesCrawled": len(crawlResults) if crawlResults else 0}
)
except Exception as e:
logger.error(f"Error in Tavily web crawl: {str(e)}", exc_info=True)
import json
crawl_url = webCrawlPrompt.url if 'webCrawlPrompt' in locals() else ""
errorResult = {
"url": crawl_url,
"title": "",
"content": "",
"error": str(e),
"pagesCrawled": 0,
"pageUrls": []
}
return AiModelResponse(
content=json.dumps(errorResult, indent=2),
success=False,
error=str(e)
)