610 lines
25 KiB
Python
610 lines
25 KiB
Python
"""Tavily web search class.
|
|
"""
|
|
|
|
import logging
|
|
import asyncio
|
|
import re
|
|
from dataclasses import dataclass
|
|
from typing import Optional, List, Dict
|
|
from tavily import AsyncTavilyClient
|
|
from modules.shared.configuration import APP_CONFIG
|
|
from modules.aicore.aicoreBase import BaseConnectorAi
|
|
from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse, createOperationTypeRatings, AiCallPromptWebSearch, AiCallPromptWebCrawl
|
|
from modules.datamodels.datamodelTools import CountryCodes
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@dataclass
|
|
class WebSearchResult:
|
|
title: str
|
|
url: str
|
|
rawContent: Optional[str] = None
|
|
|
|
@dataclass
|
|
class WebCrawlResult:
|
|
url: str
|
|
content: str
|
|
title: Optional[str] = None
|
|
|
|
|
|
class AiTavily(BaseConnectorAi):
|
|
"""Tavily web search connector."""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.client: Optional[AsyncTavilyClient] = None
|
|
# Cached settings loaded at initialization time
|
|
self.crawlTimeout: int = 30
|
|
self.crawlMaxRetries: int = 3
|
|
self.crawlRetryDelay: int = 2
|
|
# Cached web search constraints (camelCase per project style)
|
|
self.webSearchMinResults: int = 1
|
|
self.webSearchMaxResults: int = 20
|
|
# Initialize client if API key is available
|
|
self._initializeClient()
|
|
|
|
|
|
def getModels(self) -> List[AiModel]:
|
|
"""Get all available Tavily models."""
|
|
return [
|
|
AiModel(
|
|
name="tavily-search",
|
|
displayName="Tavily Search & Research",
|
|
connectorType="tavily",
|
|
apiUrl="https://api.tavily.com",
|
|
temperature=0.0, # Web search doesn't use temperature
|
|
maxTokens=0, # Web search doesn't use tokens
|
|
contextLength=0,
|
|
costPer1kTokensInput=0.0,
|
|
costPer1kTokensOutput=0.0,
|
|
speedRating=8, # Good speed for search and extract
|
|
qualityRating=9, # Excellent quality for web research
|
|
# capabilities removed (not used in business logic)
|
|
functionCall=self._routeWebOperation,
|
|
priority=PriorityEnum.BALANCED,
|
|
processingMode=ProcessingModeEnum.BASIC,
|
|
operationTypes=createOperationTypeRatings(
|
|
(OperationTypeEnum.WEB_SEARCH, 9),
|
|
(OperationTypeEnum.WEB_CRAWL, 10)
|
|
),
|
|
version="tavily-search",
|
|
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.008 # Simple flat rate
|
|
)
|
|
]
|
|
|
|
def _initializeClient(self):
|
|
"""Initialize the Tavily client if API key is available."""
|
|
try:
|
|
apiKey = APP_CONFIG.get("Connector_AiTavily_API_SECRET")
|
|
if apiKey:
|
|
self.client = AsyncTavilyClient(api_key=apiKey)
|
|
logger.info("Tavily client initialized successfully")
|
|
else:
|
|
logger.warning("Tavily API key not found, client not initialized")
|
|
except Exception as e:
|
|
logger.error(f"Failed to initialize Tavily client: {str(e)}")
|
|
|
|
def getConnectorType(self) -> str:
|
|
"""Get the connector type identifier."""
|
|
return "tavily"
|
|
|
|
def _convertIsoCodeToCountryName(self, isoCode: str) -> str:
|
|
"""
|
|
Convert ISO-2 country code to Tavily country name.
|
|
Uses centralized CountryCodes mapping.
|
|
"""
|
|
return CountryCodes.getForTavily(isoCode)
|
|
|
|
def _extractUrlsFromPrompt(self, prompt: str) -> List[str]:
|
|
"""Extract URLs from a text prompt using regex."""
|
|
if not prompt:
|
|
return []
|
|
|
|
# URL regex pattern - matches http/https URLs
|
|
urlPattern = r'https?://(?:[-\w.])+(?:[:\d]+)?(?:/(?:[\w/_.])*(?:\?(?:[\w&=%.])*)?(?:#(?:[\w.])*)?)?'
|
|
urls = re.findall(urlPattern, prompt)
|
|
|
|
# Remove duplicates while preserving order
|
|
seen = set()
|
|
uniqueUrls = []
|
|
for url in urls:
|
|
if url not in seen:
|
|
seen.add(url)
|
|
uniqueUrls.append(url)
|
|
|
|
return uniqueUrls
|
|
|
|
def _normalizeUrl(self, url: str) -> str:
|
|
"""
|
|
Normalize URL for better deduplication.
|
|
Removes common variations that represent the same content.
|
|
"""
|
|
if not url:
|
|
return url
|
|
|
|
# Remove trailing slashes
|
|
url = url.rstrip('/')
|
|
|
|
# Remove common query parameters that don't affect content
|
|
import urllib.parse
|
|
parsed = urllib.parse.urlparse(url)
|
|
|
|
# Remove common tracking parameters
|
|
queryParams = urllib.parse.parse_qs(parsed.query)
|
|
filteredParams = {}
|
|
|
|
for key, values in queryParams.items():
|
|
# Keep important parameters, remove tracking ones
|
|
if key.lower() not in ['utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
|
|
'fbclid', 'gclid', 'ref', 'source', 'campaign']:
|
|
filteredParams[key] = values
|
|
|
|
# Rebuild query string
|
|
filteredQuery = urllib.parse.urlencode(filteredParams, doseq=True)
|
|
|
|
# Reconstruct URL
|
|
normalized = urllib.parse.urlunparse((
|
|
parsed.scheme,
|
|
parsed.netloc,
|
|
parsed.path,
|
|
parsed.params,
|
|
filteredQuery,
|
|
parsed.fragment
|
|
))
|
|
|
|
return normalized
|
|
|
|
def _calculateRelevanceScore(self, result: WebSearchResult, queryWords: set) -> float:
|
|
"""
|
|
Calculate relevance score for a search result.
|
|
Higher score means more relevant to the query.
|
|
"""
|
|
score = 0.0
|
|
|
|
# Title relevance (most important)
|
|
titleWords = set(result.title.lower().split())
|
|
titleMatches = len(queryWords.intersection(titleWords))
|
|
score += titleMatches * 3.0 # Weight title matches heavily
|
|
|
|
# URL relevance
|
|
urlWords = set(result.url.lower().split('/'))
|
|
urlMatches = len(queryWords.intersection(urlWords))
|
|
score += urlMatches * 1.5
|
|
|
|
# Content relevance (if available)
|
|
if hasattr(result, 'rawContent') and result.rawContent:
|
|
contentWords = set(result.rawContent.lower().split())
|
|
contentMatches = len(queryWords.intersection(contentWords))
|
|
score += contentMatches * 0.1 # Lower weight for content matches
|
|
|
|
# Domain authority bonus (simple heuristic)
|
|
domain = result.url.split('/')[2] if '/' in result.url else result.url
|
|
if any(authDomain in domain.lower() for authDomain in
|
|
['wikipedia.org', 'github.com', 'stackoverflow.com', 'reddit.com', 'medium.com']):
|
|
score += 1.0
|
|
|
|
# Penalty for very long URLs (often less relevant)
|
|
if len(result.url) > 100:
|
|
score -= 0.5
|
|
|
|
return score
|
|
|
|
def _intelligentUrlFiltering(self, searchResults: List[WebSearchResult], query: str, maxResults: int) -> List[WebSearchResult]:
|
|
"""
|
|
Intelligent URL filtering with de-duplication and relevance scoring.
|
|
|
|
Args:
|
|
searchResults: Raw search results from Tavily
|
|
query: Original search query for relevance scoring
|
|
maxResults: Maximum number of results to return
|
|
|
|
Returns:
|
|
Filtered and deduplicated list of search results
|
|
"""
|
|
if not searchResults:
|
|
return []
|
|
|
|
# Step 1: Basic de-duplication by URL
|
|
seenUrls = set()
|
|
uniqueResults = []
|
|
|
|
for result in searchResults:
|
|
# Normalize URL for better deduplication
|
|
normalizedUrl = self._normalizeUrl(result.url)
|
|
if normalizedUrl not in seenUrls:
|
|
seenUrls.add(normalizedUrl)
|
|
uniqueResults.append(result)
|
|
|
|
logger.info(f"After basic deduplication: {len(uniqueResults)} unique URLs from {len(searchResults)} original")
|
|
|
|
# Step 2: Relevance scoring and filtering
|
|
scoredResults = []
|
|
queryWords = set(query.lower().split())
|
|
|
|
for result in uniqueResults:
|
|
score = self._calculateRelevanceScore(result, queryWords)
|
|
scoredResults.append((score, result))
|
|
|
|
# Step 3: Sort by relevance score (higher is better)
|
|
scoredResults.sort(key=lambda x: x[0], reverse=True)
|
|
|
|
# Step 4: Take top results
|
|
filteredResults = [result for score, result in scoredResults[:maxResults]]
|
|
|
|
logger.info(f"After intelligent filtering: {len(filteredResults)} results selected from {len(uniqueResults)} unique")
|
|
|
|
return filteredResults
|
|
|
|
@classmethod
|
|
async def create(cls):
|
|
apiKey = APP_CONFIG.get("Connector_AiTavily_API_SECRET")
|
|
if not apiKey:
|
|
raise ValueError("Tavily API key not configured. Please set Connector_AiTavily_API_SECRET in config.ini")
|
|
# Load and cache web crawl related configuration
|
|
crawlTimeout = int(APP_CONFIG.get("Web_Crawl_TIMEOUT", "30"))
|
|
crawlMaxRetries = int(APP_CONFIG.get("Web_Crawl_MAX_RETRIES", "3"))
|
|
crawlRetryDelay = int(APP_CONFIG.get("Web_Crawl_RETRY_DELAY", "2"))
|
|
return cls(
|
|
client=AsyncTavilyClient(api_key=apiKey),
|
|
crawlTimeout=crawlTimeout,
|
|
crawlMaxRetries=crawlMaxRetries,
|
|
crawlRetryDelay=crawlRetryDelay,
|
|
webSearchMinResults=int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1")),
|
|
webSearchMaxResults=int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")),
|
|
)
|
|
|
|
# Standardized method using AiModelCall/AiModelResponse pattern
|
|
|
|
|
|
def _cleanUrl(self, url: str) -> str:
|
|
"""Clean URL by removing extra text that might be appended."""
|
|
import re
|
|
# Extract just the URL part, removing any extra text after it
|
|
urlMatch = re.match(r'(https?://[^\s,]+)', url)
|
|
if urlMatch:
|
|
return urlMatch.group(1)
|
|
return url
|
|
|
|
async def _search(
|
|
self,
|
|
query: str,
|
|
maxResults: int,
|
|
searchDepth: str | None = None,
|
|
timeRange: str | None = None,
|
|
topic: str | None = None,
|
|
includeDomains: list[str] | None = None,
|
|
excludeDomains: list[str] | None = None,
|
|
country: str | None = None,
|
|
includeAnswer: str | None = None,
|
|
includeRawContent: str | None = None,
|
|
) -> list[WebSearchResult]:
|
|
"""Calls the Tavily API to perform a web search."""
|
|
# Make sure maxResults is within the allowed range (use cached values)
|
|
minResults = self.webSearchMinResults
|
|
maxAllowedResults = self.webSearchMaxResults
|
|
if maxResults < minResults or maxResults > maxAllowedResults:
|
|
raise ValueError(f"maxResults must be between {minResults} and {maxAllowedResults}")
|
|
|
|
# Perform actual API call
|
|
# Build kwargs only for provided options to avoid API rejections
|
|
kwargs: dict = {"query": query, "max_results": maxResults}
|
|
if searchDepth is not None:
|
|
kwargs["search_depth"] = searchDepth
|
|
if timeRange is not None:
|
|
kwargs["time_range"] = timeRange
|
|
if topic is not None:
|
|
kwargs["topic"] = topic
|
|
if includeDomains is not None and len(includeDomains) > 0:
|
|
kwargs["include_domains"] = includeDomains
|
|
if excludeDomains is not None:
|
|
kwargs["exclude_domains"] = excludeDomains
|
|
if country is not None:
|
|
kwargs["country"] = country
|
|
if includeAnswer is not None:
|
|
kwargs["include_answer"] = includeAnswer
|
|
if includeRawContent is not None:
|
|
kwargs["include_raw_content"] = includeRawContent
|
|
|
|
# Log the final API call parameters for comparison
|
|
logger.info(f"Tavily API call parameters: {kwargs}")
|
|
|
|
# Ensure client is initialized
|
|
if self.client is None:
|
|
self._initializeClient()
|
|
if self.client is None:
|
|
raise ValueError("Tavily client not initialized. Please check API key configuration.")
|
|
|
|
response = await self.client.search(**kwargs)
|
|
|
|
# Return all results without score filtering
|
|
# Tavily's scoring is already applied by the API
|
|
logger.info(f"Tavily returned {len(response.get('results', []))} results")
|
|
|
|
return [
|
|
WebSearchResult(
|
|
title=result["title"],
|
|
url=self._cleanUrl(result["url"]),
|
|
rawContent=result.get("raw_content")
|
|
)
|
|
for result in response["results"]
|
|
]
|
|
|
|
async def _crawl(
|
|
self,
|
|
url: str,
|
|
instructions: str | None = None,
|
|
limit: int = 20,
|
|
maxDepth: int = 2,
|
|
maxBreadth: int = 40,
|
|
) -> list[WebCrawlResult]:
|
|
"""Calls the Tavily API to crawl ONE URL with link following and retry logic."""
|
|
maxRetries = self.crawlMaxRetries
|
|
retryDelay = self.crawlRetryDelay
|
|
timeout = self.crawlTimeout
|
|
|
|
logger.debug(f"Starting crawl of URL: {url}")
|
|
logger.debug(f"Crawl settings: instructions={instructions}, limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
|
|
|
|
for attempt in range(maxRetries + 1):
|
|
try:
|
|
logger.debug(f"Crawl attempt {attempt + 1}/{maxRetries + 1}")
|
|
|
|
# Ensure client is initialized
|
|
if self.client is None:
|
|
self._initializeClient()
|
|
if self.client is None:
|
|
raise ValueError("Tavily client not initialized. Please check API key configuration.")
|
|
|
|
logger.debug(f"Crawling URL: {url}")
|
|
|
|
# Build kwargs for crawl
|
|
kwargsCrawl: dict = {"url": url}
|
|
if instructions:
|
|
kwargsCrawl["instructions"] = instructions
|
|
if limit:
|
|
kwargsCrawl["limit"] = limit
|
|
if maxDepth:
|
|
kwargsCrawl["max_depth"] = maxDepth
|
|
if maxBreadth:
|
|
kwargsCrawl["max_breadth"] = maxBreadth
|
|
|
|
logger.debug(f"Sending request to Tavily with kwargs: {kwargsCrawl}")
|
|
|
|
response = await asyncio.wait_for(
|
|
self.client.crawl(**kwargsCrawl),
|
|
timeout=timeout
|
|
)
|
|
|
|
logger.debug(f"Tavily response received: {type(response)}")
|
|
|
|
# Parse response - could be dict with results or list
|
|
if isinstance(response, dict) and "results" in response:
|
|
pageResults = response["results"]
|
|
elif isinstance(response, list):
|
|
pageResults = response
|
|
else:
|
|
logger.warning(f"Unexpected response format: {type(response)}")
|
|
pageResults = []
|
|
|
|
logger.debug(f"Got {len(pageResults)} pages from crawl")
|
|
|
|
# Convert to WebCrawlResult format
|
|
results = []
|
|
for result in pageResults:
|
|
results.append(WebCrawlResult(
|
|
url=result.get("url", url),
|
|
content=result.get("raw_content", result.get("content", "")),
|
|
title=result.get("title", "")
|
|
))
|
|
|
|
logger.debug(f"Crawl successful: extracted {len(results)} pages from URL")
|
|
return results
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds for URL: {url}")
|
|
if attempt < maxRetries:
|
|
logger.info(f"Retrying in {retryDelay} seconds...")
|
|
await asyncio.sleep(retryDelay)
|
|
else:
|
|
raise Exception(f"Crawl failed after {maxRetries + 1} attempts due to timeout")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Crawl attempt {attempt + 1} failed for URL {url}: {str(e)}")
|
|
logger.debug(f"Full error details: {type(e).__name__}: {str(e)}")
|
|
|
|
# Check if it's a validation error and log more details
|
|
if "validation" in str(e).lower():
|
|
logger.debug(f"URL validation failed. Checking URL format:")
|
|
logger.debug(f" URL: '{url}' (length: {len(url)})")
|
|
# Check for common URL issues
|
|
if ' ' in url:
|
|
logger.debug(f" WARNING: URL contains spaces!")
|
|
if not url.startswith(('http://', 'https://')):
|
|
logger.debug(f" WARNING: URL doesn't start with http/https!")
|
|
if len(url) > 2000:
|
|
logger.debug(f" WARNING: URL is very long ({len(url)} chars)")
|
|
|
|
if attempt < maxRetries:
|
|
logger.info(f"Retrying in {retryDelay} seconds...")
|
|
await asyncio.sleep(retryDelay)
|
|
else:
|
|
raise Exception(f"Crawl failed after {maxRetries + 1} attempts: {str(e)}")
|
|
|
|
async def _routeWebOperation(self, modelCall: AiModelCall) -> "AiModelResponse":
|
|
"""
|
|
Route web operation based on operation type.
|
|
|
|
Args:
|
|
modelCall: AiModelCall with messages and options
|
|
|
|
Returns:
|
|
AiModelResponse based on operation type
|
|
"""
|
|
operationType = modelCall.options.operationType
|
|
|
|
if operationType == OperationTypeEnum.WEB_SEARCH:
|
|
return await self.webSearch(modelCall)
|
|
elif operationType == OperationTypeEnum.WEB_CRAWL:
|
|
return await self.webCrawl(modelCall)
|
|
else:
|
|
# Unsupported operation type
|
|
return AiModelResponse(
|
|
content="",
|
|
success=False,
|
|
error=f"Unsupported operation type: {operationType}"
|
|
)
|
|
|
|
async def webSearch(self, modelCall: AiModelCall) -> "AiModelResponse":
|
|
"""
|
|
WEB_SEARCH operation - returns list of URLs using Tavily search.
|
|
|
|
Args:
|
|
modelCall: AiModelCall with AiCallPromptWebSearch as prompt
|
|
|
|
Returns:
|
|
AiModelResponse with JSON list of URLs
|
|
"""
|
|
try:
|
|
# Extract parameters - find user message (not system message)
|
|
promptContent = ""
|
|
if modelCall.messages:
|
|
for msg in modelCall.messages:
|
|
if msg.get("role") == "user":
|
|
promptContent = msg.get("content", "")
|
|
break
|
|
# Fallback to first message if no user message found
|
|
if not promptContent and len(modelCall.messages) > 0:
|
|
promptContent = modelCall.messages[0].get("content", "")
|
|
|
|
if not promptContent or not promptContent.strip():
|
|
raise ValueError("Empty prompt content received for web search")
|
|
|
|
import json
|
|
try:
|
|
promptData = json.loads(promptContent)
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}")
|
|
raise ValueError(f"Invalid JSON in prompt content: {str(e)}")
|
|
|
|
# Create Pydantic model
|
|
webSearchPrompt = AiCallPromptWebSearch(**promptData)
|
|
|
|
# Convert ISO country code to country name for Tavily
|
|
countryName = webSearchPrompt.country
|
|
if countryName:
|
|
countryName = self._convertIsoCodeToCountryName(countryName)
|
|
|
|
# Perform search - use exact parameters from prompt
|
|
# NOTE: timeRange parameter causes generic results, so we don't use it
|
|
searchResults = await self._search(
|
|
query=webSearchPrompt.instruction,
|
|
maxResults=webSearchPrompt.maxNumberPages,
|
|
timeRange=None, # Not used - causes generic results
|
|
country=countryName,
|
|
includeAnswer="basic",
|
|
includeRawContent="text"
|
|
)
|
|
|
|
# Extract URLs from results
|
|
urls = [result.url for result in searchResults]
|
|
|
|
# Return as JSON array
|
|
import json
|
|
return AiModelResponse(
|
|
content=json.dumps(urls, indent=2),
|
|
success=True,
|
|
metadata={"total_urls": len(urls), "operation": "WEB_SEARCH"}
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in Tavily web search: {str(e)}")
|
|
return AiModelResponse(
|
|
content="[]",
|
|
success=False,
|
|
error=str(e)
|
|
)
|
|
|
|
async def webCrawl(self, modelCall: AiModelCall) -> "AiModelResponse":
|
|
"""
|
|
WEB_CRAWL operation - crawls one URL using Tavily with link following.
|
|
|
|
Args:
|
|
modelCall: AiModelCall with AiCallPromptWebCrawl as prompt
|
|
|
|
Returns:
|
|
AiModelResponse with crawl results as JSON (may include multiple pages)
|
|
"""
|
|
try:
|
|
# Extract parameters - find user message (not system message)
|
|
promptContent = ""
|
|
if modelCall.messages:
|
|
for msg in modelCall.messages:
|
|
if msg.get("role") == "user":
|
|
promptContent = msg.get("content", "")
|
|
break
|
|
# Fallback to first message if no user message found
|
|
if not promptContent and len(modelCall.messages) > 0:
|
|
promptContent = modelCall.messages[0].get("content", "")
|
|
|
|
if not promptContent or not promptContent.strip():
|
|
raise ValueError("Empty prompt content received for web crawl")
|
|
|
|
import json
|
|
try:
|
|
promptData = json.loads(promptContent)
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}")
|
|
raise ValueError(f"Invalid JSON in prompt content: {str(e)}")
|
|
|
|
# Create Pydantic model
|
|
webCrawlPrompt = AiCallPromptWebCrawl(**promptData)
|
|
|
|
# Perform crawl for ONE URL with link following
|
|
# Use maxWidth as limit, maxDepth as maxDepth, and calculate maxBreadth
|
|
crawlResults = await self._crawl(
|
|
url=webCrawlPrompt.url,
|
|
instructions=webCrawlPrompt.instruction,
|
|
limit=webCrawlPrompt.maxWidth or 20, # maxWidth controls number of pages
|
|
maxDepth=webCrawlPrompt.maxDepth or 2,
|
|
maxBreadth=webCrawlPrompt.maxWidth or 40 # Use same as limit for breadth
|
|
)
|
|
|
|
# If we got multiple pages from the crawl, we need to format them differently
|
|
# Return the first result for backwards compatibility, but include total page count
|
|
if crawlResults and len(crawlResults) > 0:
|
|
# Get all pages content
|
|
allContent = ""
|
|
for i, result in enumerate(crawlResults, 1):
|
|
pageHeader = f"\n{'='*60}\nPAGE {i}: {result.url}\n{'='*60}\n"
|
|
if result.title:
|
|
allContent += f"{pageHeader}Title: {result.title}\n\n"
|
|
allContent += f"{result.content}\n"
|
|
|
|
resultData = {
|
|
"url": webCrawlPrompt.url,
|
|
"title": crawlResults[0].title if crawlResults[0].title else "Content",
|
|
"content": allContent,
|
|
"pagesCrawled": len(crawlResults),
|
|
"pageUrls": [result.url for result in crawlResults]
|
|
}
|
|
else:
|
|
resultData = {"url": webCrawlPrompt.url, "title": "", "content": "", "error": "No content extracted", "pagesCrawled": 0}
|
|
|
|
# Return as JSON - same format as Perplexity but with multiple pages content
|
|
import json
|
|
return AiModelResponse(
|
|
content=json.dumps(resultData, indent=2),
|
|
success=True,
|
|
metadata={"operation": "WEB_CRAWL", "url": webCrawlPrompt.url, "pagesCrawled": len(crawlResults) if crawlResults else 0}
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in Tavily web crawl: {str(e)}")
|
|
import json
|
|
errorResult = {"error": str(e), "url": webCrawlPrompt.url if 'webCrawlPrompt' in locals() else ""}
|
|
return AiModelResponse(
|
|
content=json.dumps(errorResult, indent=2),
|
|
success=False,
|
|
error=str(e)
|
|
)
|