fix web service: maxwidth parameter chain fixed

This commit is contained in:
ValueOn AG 2026-01-11 12:38:07 +01:00
parent e41411e5aa
commit fae4bde059
2 changed files with 16 additions and 5 deletions

View file

@ -345,8 +345,8 @@ class AiTavily(BaseConnectorAi):
retryDelay = self.crawlRetryDelay
timeout = self.crawlTimeout
logger.debug(f"Starting crawl of URL: {url}")
logger.debug(f"Crawl settings: instructions={instructions}, limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
logger.info(f"Starting crawl of URL: {url}")
logger.info(f"Crawl settings: instructions={instructions[:100] if instructions else None}..., limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
for attempt in range(maxRetries + 1):
try:
@ -371,7 +371,7 @@ class AiTavily(BaseConnectorAi):
if maxBreadth:
kwargsCrawl["max_breadth"] = maxBreadth
logger.debug(f"Sending request to Tavily with kwargs: {kwargsCrawl}")
logger.info(f"Sending request to Tavily API with parameters: {kwargsCrawl}")
response = await asyncio.wait_for(
self.client.crawl(**kwargsCrawl),

View file

@ -115,9 +115,14 @@ class WebService:
if not validatedUrls:
return {"error": "No URLs found to crawl"}
# Step 4: Translate researchDepth to maxDepth
# Step 4: Translate researchDepth to maxDepth and maxWidth
depthMap = {"fast": 1, "general": 2, "deep": 3}
maxDepth = depthMap.get(finalResearchDepth.lower(), 2)
# Scale maxWidth based on research depth: fast=5, general=10, deep=20 pages per level
widthMap = {"fast": 5, "general": 10, "deep": 20}
maxWidth = widthMap.get(finalResearchDepth.lower(), 10)
logger.info(f"Research depth settings: depth={finalResearchDepth}, maxDepth={maxDepth}, maxWidth={maxWidth}")
# Step 5: Crawl all URLs with hierarchical logging
if operationId:
@ -131,6 +136,7 @@ class WebService:
instruction=instruction,
urls=validatedUrls,
maxDepth=maxDepth,
maxWidth=maxWidth, # Pass maxWidth to crawl function
parentOperationId=parentOperationId
)
@ -440,6 +446,7 @@ Return ONLY valid JSON, no additional text:
instruction: str,
urls: List[str],
maxDepth: int = 2,
maxWidth: int = 10,
parentOperationId: Optional[str] = None
) -> List[Dict[str, Any]]:
"""Perform web crawl on list of URLs - crawls URLs in parallel for better performance."""
@ -452,6 +459,7 @@ Return ONLY valid JSON, no additional text:
totalUrls=len(urls),
instruction=instruction,
maxDepth=maxDepth,
maxWidth=maxWidth, # Pass maxWidth to single URL crawl
parentOperationId=parentOperationId
)
crawlTasks.append(task)
@ -479,6 +487,7 @@ Return ONLY valid JSON, no additional text:
totalUrls: int,
instruction: str,
maxDepth: int,
maxWidth: int = 10,
parentOperationId: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
@ -517,11 +526,13 @@ Return ONLY valid JSON, no additional text:
self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl")
# Build crawl prompt model for single URL
# maxWidth is passed from performWebResearch based on researchDepth
logger.info(f"Crawling URL {urlIndex + 1}/{totalUrls} with maxDepth={maxDepth}, maxWidth={maxWidth}")
crawlPromptModel = AiCallPromptWebCrawl(
instruction=instruction,
url=url, # Single URL
maxDepth=maxDepth,
maxWidth=5 # Default: 5 pages per level
maxWidth=maxWidth # Scaled based on researchDepth: fast=5, general=10, deep=20
)
crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)