fix web service: maxwidth parameter chain fixed
This commit is contained in:
parent
e41411e5aa
commit
fae4bde059
2 changed files with 16 additions and 5 deletions
|
|
@ -345,8 +345,8 @@ class AiTavily(BaseConnectorAi):
|
|||
retryDelay = self.crawlRetryDelay
|
||||
timeout = self.crawlTimeout
|
||||
|
||||
logger.debug(f"Starting crawl of URL: {url}")
|
||||
logger.debug(f"Crawl settings: instructions={instructions}, limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
|
||||
logger.info(f"Starting crawl of URL: {url}")
|
||||
logger.info(f"Crawl settings: instructions={instructions[:100] if instructions else None}..., limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
|
||||
|
||||
for attempt in range(maxRetries + 1):
|
||||
try:
|
||||
|
|
@ -371,7 +371,7 @@ class AiTavily(BaseConnectorAi):
|
|||
if maxBreadth:
|
||||
kwargsCrawl["max_breadth"] = maxBreadth
|
||||
|
||||
logger.debug(f"Sending request to Tavily with kwargs: {kwargsCrawl}")
|
||||
logger.info(f"Sending request to Tavily API with parameters: {kwargsCrawl}")
|
||||
|
||||
response = await asyncio.wait_for(
|
||||
self.client.crawl(**kwargsCrawl),
|
||||
|
|
|
|||
|
|
@ -115,9 +115,14 @@ class WebService:
|
|||
if not validatedUrls:
|
||||
return {"error": "No URLs found to crawl"}
|
||||
|
||||
# Step 4: Translate researchDepth to maxDepth
|
||||
# Step 4: Translate researchDepth to maxDepth and maxWidth
|
||||
depthMap = {"fast": 1, "general": 2, "deep": 3}
|
||||
maxDepth = depthMap.get(finalResearchDepth.lower(), 2)
|
||||
# Scale maxWidth based on research depth: fast=5, general=10, deep=20 pages per level
|
||||
widthMap = {"fast": 5, "general": 10, "deep": 20}
|
||||
maxWidth = widthMap.get(finalResearchDepth.lower(), 10)
|
||||
|
||||
logger.info(f"Research depth settings: depth={finalResearchDepth}, maxDepth={maxDepth}, maxWidth={maxWidth}")
|
||||
|
||||
# Step 5: Crawl all URLs with hierarchical logging
|
||||
if operationId:
|
||||
|
|
@ -131,6 +136,7 @@ class WebService:
|
|||
instruction=instruction,
|
||||
urls=validatedUrls,
|
||||
maxDepth=maxDepth,
|
||||
maxWidth=maxWidth, # Pass maxWidth to crawl function
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
|
|
@ -440,6 +446,7 @@ Return ONLY valid JSON, no additional text:
|
|||
instruction: str,
|
||||
urls: List[str],
|
||||
maxDepth: int = 2,
|
||||
maxWidth: int = 10,
|
||||
parentOperationId: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Perform web crawl on list of URLs - crawls URLs in parallel for better performance."""
|
||||
|
|
@ -452,6 +459,7 @@ Return ONLY valid JSON, no additional text:
|
|||
totalUrls=len(urls),
|
||||
instruction=instruction,
|
||||
maxDepth=maxDepth,
|
||||
maxWidth=maxWidth, # Pass maxWidth to single URL crawl
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
crawlTasks.append(task)
|
||||
|
|
@ -479,6 +487,7 @@ Return ONLY valid JSON, no additional text:
|
|||
totalUrls: int,
|
||||
instruction: str,
|
||||
maxDepth: int,
|
||||
maxWidth: int = 10,
|
||||
parentOperationId: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
|
|
@ -517,11 +526,13 @@ Return ONLY valid JSON, no additional text:
|
|||
self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl")
|
||||
|
||||
# Build crawl prompt model for single URL
|
||||
# maxWidth is passed from performWebResearch based on researchDepth
|
||||
logger.info(f"Crawling URL {urlIndex + 1}/{totalUrls} with maxDepth={maxDepth}, maxWidth={maxWidth}")
|
||||
crawlPromptModel = AiCallPromptWebCrawl(
|
||||
instruction=instruction,
|
||||
url=url, # Single URL
|
||||
maxDepth=maxDepth,
|
||||
maxWidth=5 # Default: 5 pages per level
|
||||
maxWidth=maxWidth # Scaled based on researchDepth: fast=5, general=10, deep=20
|
||||
)
|
||||
crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue