From fae4bde059aa840d1987a3a28e480de82a4e5aad Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sun, 11 Jan 2026 12:38:07 +0100 Subject: [PATCH] fix web service: maxwidth parameter chain fixed --- modules/aicore/aicorePluginTavily.py | 6 +++--- modules/services/serviceWeb/mainServiceWeb.py | 15 +++++++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/modules/aicore/aicorePluginTavily.py b/modules/aicore/aicorePluginTavily.py index 90718683..a9237cf2 100644 --- a/modules/aicore/aicorePluginTavily.py +++ b/modules/aicore/aicorePluginTavily.py @@ -345,8 +345,8 @@ class AiTavily(BaseConnectorAi): retryDelay = self.crawlRetryDelay timeout = self.crawlTimeout - logger.debug(f"Starting crawl of URL: {url}") - logger.debug(f"Crawl settings: instructions={instructions}, limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s") + logger.info(f"Starting crawl of URL: {url}") + logger.info(f"Crawl settings: instructions={instructions[:100] if instructions else None}..., limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s") for attempt in range(maxRetries + 1): try: @@ -371,7 +371,7 @@ class AiTavily(BaseConnectorAi): if maxBreadth: kwargsCrawl["max_breadth"] = maxBreadth - logger.debug(f"Sending request to Tavily with kwargs: {kwargsCrawl}") + logger.info(f"Sending request to Tavily API with parameters: {kwargsCrawl}") response = await asyncio.wait_for( self.client.crawl(**kwargsCrawl), diff --git a/modules/services/serviceWeb/mainServiceWeb.py b/modules/services/serviceWeb/mainServiceWeb.py index 469ca6ae..4faced9c 100644 --- a/modules/services/serviceWeb/mainServiceWeb.py +++ b/modules/services/serviceWeb/mainServiceWeb.py @@ -115,9 +115,14 @@ class WebService: if not validatedUrls: return {"error": "No URLs found to crawl"} - # Step 4: Translate researchDepth to maxDepth + # Step 4: Translate researchDepth to maxDepth and maxWidth depthMap = {"fast": 1, "general": 2, "deep": 3} maxDepth = depthMap.get(finalResearchDepth.lower(), 2) + # Scale maxWidth based on research depth: fast=5, general=10, deep=20 pages per level + widthMap = {"fast": 5, "general": 10, "deep": 20} + maxWidth = widthMap.get(finalResearchDepth.lower(), 10) + + logger.info(f"Research depth settings: depth={finalResearchDepth}, maxDepth={maxDepth}, maxWidth={maxWidth}") # Step 5: Crawl all URLs with hierarchical logging if operationId: @@ -131,6 +136,7 @@ class WebService: instruction=instruction, urls=validatedUrls, maxDepth=maxDepth, + maxWidth=maxWidth, # Pass maxWidth to crawl function parentOperationId=parentOperationId ) @@ -440,6 +446,7 @@ Return ONLY valid JSON, no additional text: instruction: str, urls: List[str], maxDepth: int = 2, + maxWidth: int = 10, parentOperationId: Optional[str] = None ) -> List[Dict[str, Any]]: """Perform web crawl on list of URLs - crawls URLs in parallel for better performance.""" @@ -452,6 +459,7 @@ Return ONLY valid JSON, no additional text: totalUrls=len(urls), instruction=instruction, maxDepth=maxDepth, + maxWidth=maxWidth, # Pass maxWidth to single URL crawl parentOperationId=parentOperationId ) crawlTasks.append(task) @@ -479,6 +487,7 @@ Return ONLY valid JSON, no additional text: totalUrls: int, instruction: str, maxDepth: int, + maxWidth: int = 10, parentOperationId: Optional[str] = None ) -> List[Dict[str, Any]]: """ @@ -517,11 +526,13 @@ Return ONLY valid JSON, no additional text: self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl") # Build crawl prompt model for single URL + # maxWidth is passed from performWebResearch based on researchDepth + logger.info(f"Crawling URL {urlIndex + 1}/{totalUrls} with maxDepth={maxDepth}, maxWidth={maxWidth}") crawlPromptModel = AiCallPromptWebCrawl( instruction=instruction, url=url, # Single URL maxDepth=maxDepth, - maxWidth=5 # Default: 5 pages per level + maxWidth=maxWidth # Scaled based on researchDepth: fast=5, general=10, deep=20 ) crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)