fix web service: maxwidth parameter chain fixed
This commit is contained in:
parent
e41411e5aa
commit
fae4bde059
2 changed files with 16 additions and 5 deletions
|
|
@ -345,8 +345,8 @@ class AiTavily(BaseConnectorAi):
|
||||||
retryDelay = self.crawlRetryDelay
|
retryDelay = self.crawlRetryDelay
|
||||||
timeout = self.crawlTimeout
|
timeout = self.crawlTimeout
|
||||||
|
|
||||||
logger.debug(f"Starting crawl of URL: {url}")
|
logger.info(f"Starting crawl of URL: {url}")
|
||||||
logger.debug(f"Crawl settings: instructions={instructions}, limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
|
logger.info(f"Crawl settings: instructions={instructions[:100] if instructions else None}..., limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
|
||||||
|
|
||||||
for attempt in range(maxRetries + 1):
|
for attempt in range(maxRetries + 1):
|
||||||
try:
|
try:
|
||||||
|
|
@ -371,7 +371,7 @@ class AiTavily(BaseConnectorAi):
|
||||||
if maxBreadth:
|
if maxBreadth:
|
||||||
kwargsCrawl["max_breadth"] = maxBreadth
|
kwargsCrawl["max_breadth"] = maxBreadth
|
||||||
|
|
||||||
logger.debug(f"Sending request to Tavily with kwargs: {kwargsCrawl}")
|
logger.info(f"Sending request to Tavily API with parameters: {kwargsCrawl}")
|
||||||
|
|
||||||
response = await asyncio.wait_for(
|
response = await asyncio.wait_for(
|
||||||
self.client.crawl(**kwargsCrawl),
|
self.client.crawl(**kwargsCrawl),
|
||||||
|
|
|
||||||
|
|
@ -115,9 +115,14 @@ class WebService:
|
||||||
if not validatedUrls:
|
if not validatedUrls:
|
||||||
return {"error": "No URLs found to crawl"}
|
return {"error": "No URLs found to crawl"}
|
||||||
|
|
||||||
# Step 4: Translate researchDepth to maxDepth
|
# Step 4: Translate researchDepth to maxDepth and maxWidth
|
||||||
depthMap = {"fast": 1, "general": 2, "deep": 3}
|
depthMap = {"fast": 1, "general": 2, "deep": 3}
|
||||||
maxDepth = depthMap.get(finalResearchDepth.lower(), 2)
|
maxDepth = depthMap.get(finalResearchDepth.lower(), 2)
|
||||||
|
# Scale maxWidth based on research depth: fast=5, general=10, deep=20 pages per level
|
||||||
|
widthMap = {"fast": 5, "general": 10, "deep": 20}
|
||||||
|
maxWidth = widthMap.get(finalResearchDepth.lower(), 10)
|
||||||
|
|
||||||
|
logger.info(f"Research depth settings: depth={finalResearchDepth}, maxDepth={maxDepth}, maxWidth={maxWidth}")
|
||||||
|
|
||||||
# Step 5: Crawl all URLs with hierarchical logging
|
# Step 5: Crawl all URLs with hierarchical logging
|
||||||
if operationId:
|
if operationId:
|
||||||
|
|
@ -131,6 +136,7 @@ class WebService:
|
||||||
instruction=instruction,
|
instruction=instruction,
|
||||||
urls=validatedUrls,
|
urls=validatedUrls,
|
||||||
maxDepth=maxDepth,
|
maxDepth=maxDepth,
|
||||||
|
maxWidth=maxWidth, # Pass maxWidth to crawl function
|
||||||
parentOperationId=parentOperationId
|
parentOperationId=parentOperationId
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -440,6 +446,7 @@ Return ONLY valid JSON, no additional text:
|
||||||
instruction: str,
|
instruction: str,
|
||||||
urls: List[str],
|
urls: List[str],
|
||||||
maxDepth: int = 2,
|
maxDepth: int = 2,
|
||||||
|
maxWidth: int = 10,
|
||||||
parentOperationId: Optional[str] = None
|
parentOperationId: Optional[str] = None
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""Perform web crawl on list of URLs - crawls URLs in parallel for better performance."""
|
"""Perform web crawl on list of URLs - crawls URLs in parallel for better performance."""
|
||||||
|
|
@ -452,6 +459,7 @@ Return ONLY valid JSON, no additional text:
|
||||||
totalUrls=len(urls),
|
totalUrls=len(urls),
|
||||||
instruction=instruction,
|
instruction=instruction,
|
||||||
maxDepth=maxDepth,
|
maxDepth=maxDepth,
|
||||||
|
maxWidth=maxWidth, # Pass maxWidth to single URL crawl
|
||||||
parentOperationId=parentOperationId
|
parentOperationId=parentOperationId
|
||||||
)
|
)
|
||||||
crawlTasks.append(task)
|
crawlTasks.append(task)
|
||||||
|
|
@ -479,6 +487,7 @@ Return ONLY valid JSON, no additional text:
|
||||||
totalUrls: int,
|
totalUrls: int,
|
||||||
instruction: str,
|
instruction: str,
|
||||||
maxDepth: int,
|
maxDepth: int,
|
||||||
|
maxWidth: int = 10,
|
||||||
parentOperationId: Optional[str] = None
|
parentOperationId: Optional[str] = None
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
|
|
@ -517,11 +526,13 @@ Return ONLY valid JSON, no additional text:
|
||||||
self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl")
|
self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl")
|
||||||
|
|
||||||
# Build crawl prompt model for single URL
|
# Build crawl prompt model for single URL
|
||||||
|
# maxWidth is passed from performWebResearch based on researchDepth
|
||||||
|
logger.info(f"Crawling URL {urlIndex + 1}/{totalUrls} with maxDepth={maxDepth}, maxWidth={maxWidth}")
|
||||||
crawlPromptModel = AiCallPromptWebCrawl(
|
crawlPromptModel = AiCallPromptWebCrawl(
|
||||||
instruction=instruction,
|
instruction=instruction,
|
||||||
url=url, # Single URL
|
url=url, # Single URL
|
||||||
maxDepth=maxDepth,
|
maxDepth=maxDepth,
|
||||||
maxWidth=5 # Default: 5 pages per level
|
maxWidth=maxWidth # Scaled based on researchDepth: fast=5, general=10, deep=20
|
||||||
)
|
)
|
||||||
crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
|
crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue