Merge pull request #83 from valueonag/feat/foundation-customer-use-cases

Feat/foundation customer use cases
2026-01-11 13:08:41 +01:00 · 2026-01-11 13:08:41 +01:00 · b2a3d4afc8
commit b2a3d4afc8
parent 3f53369b2f fae4bde059
3 changed files with 49 additions and 12 deletions
--- a/modules/aicore/aicorePluginTavily.py
+++ b/modules/aicore/aicorePluginTavily.py
@ -345,8 +345,8 @@ class AiTavily(BaseConnectorAi):
        retryDelay = self.crawlRetryDelay
        timeout = self.crawlTimeout
        
-        logger.debug(f"Starting crawl of URL: {url}")
-        logger.debug(f"Crawl settings: instructions={instructions}, limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
+        logger.info(f"Starting crawl of URL: {url}")
+        logger.info(f"Crawl settings: instructions={instructions[:100] if instructions else None}..., limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")

        for attempt in range(maxRetries + 1):
            try:
@ -371,7 +371,7 @@ class AiTavily(BaseConnectorAi):
                if maxBreadth:
                    kwargsCrawl["max_breadth"] = maxBreadth
                
-                logger.debug(f"Sending request to Tavily with kwargs: {kwargsCrawl}")
+                logger.info(f"Sending request to Tavily API with parameters: {kwargsCrawl}")
                
                response = await asyncio.wait_for(
                    self.client.crawl(**kwargsCrawl),
--- a/modules/services/serviceWeb/mainServiceWeb.py
+++ b/modules/services/serviceWeb/mainServiceWeb.py
@ -115,9 +115,14 @@ class WebService:
            if not validatedUrls:
                return {"error": "No URLs found to crawl"}
            
-            # Step 4: Translate researchDepth to maxDepth
+            # Step 4: Translate researchDepth to maxDepth and maxWidth
            depthMap = {"fast": 1, "general": 2, "deep": 3}
            maxDepth = depthMap.get(finalResearchDepth.lower(), 2)
+            # Scale maxWidth based on research depth: fast=5, general=10, deep=20 pages per level
+            widthMap = {"fast": 5, "general": 10, "deep": 20}
+            maxWidth = widthMap.get(finalResearchDepth.lower(), 10)
+            
+            logger.info(f"Research depth settings: depth={finalResearchDepth}, maxDepth={maxDepth}, maxWidth={maxWidth}")
            
            # Step 5: Crawl all URLs with hierarchical logging
            if operationId:
@ -131,6 +136,7 @@ class WebService:
                instruction=instruction,
                urls=validatedUrls,
                maxDepth=maxDepth,
+                maxWidth=maxWidth,  # Pass maxWidth to crawl function
                parentOperationId=parentOperationId
            )
            
@ -440,6 +446,7 @@ Return ONLY valid JSON, no additional text:
        instruction: str,
        urls: List[str],
        maxDepth: int = 2,
+        maxWidth: int = 10,
        parentOperationId: Optional[str] = None
        ) -> List[Dict[str, Any]]:
        """Perform web crawl on list of URLs - crawls URLs in parallel for better performance."""
@ -452,6 +459,7 @@ Return ONLY valid JSON, no additional text:
                totalUrls=len(urls),
                instruction=instruction,
                maxDepth=maxDepth,
+                maxWidth=maxWidth,  # Pass maxWidth to single URL crawl
                parentOperationId=parentOperationId
            )
            crawlTasks.append(task)
@ -479,6 +487,7 @@ Return ONLY valid JSON, no additional text:
        totalUrls: int,
        instruction: str,
        maxDepth: int,
+        maxWidth: int = 10,
        parentOperationId: Optional[str] = None
        ) -> List[Dict[str, Any]]:
        """
@ -517,11 +526,13 @@ Return ONLY valid JSON, no additional text:
                self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl")
            
            # Build crawl prompt model for single URL
+            # maxWidth is passed from performWebResearch based on researchDepth
+            logger.info(f"Crawling URL {urlIndex + 1}/{totalUrls} with maxDepth={maxDepth}, maxWidth={maxWidth}")
            crawlPromptModel = AiCallPromptWebCrawl(
                instruction=instruction,
                url=url,  # Single URL
                maxDepth=maxDepth,
-                maxWidth=5  # Default: 5 pages per level
+                maxWidth=maxWidth  # Scaled based on researchDepth: fast=5, general=10, deep=20
            )
            crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
            
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@ -779,36 +779,62 @@ class ContentValidator:
                    actionHistoryContext = f"\n\n=== ACTION HISTORY ===\n" + "\n".join(f"- {entry}" for entry in historyEntries)
                    actionHistoryContext += "\n\nIMPORTANT: This shows the complete workflow that produced the documents. For process-oriented criteria (e.g., 'internet search performed'), check ACTION HISTORY first. Document metadata may only reflect the LAST action, not the entire workflow."
            
-            # Build document index context (all documents delivered in current round)
+            # Build document index context (all documents delivered in current round AND past rounds)
+            # CRITICAL: Include past rounds so validator can see files produced in previous rounds
+            # This prevents endless loops when files are produced in multiple sets across rounds
            documentIndexContext = ""
            if context and self.services and hasattr(self.services, 'chat') and hasattr(self.services, 'workflow') and self.services.workflow:
                try:
                    documentIndex = self.services.chat.getAvailableDocuments(self.services.workflow)
                    if documentIndex and documentIndex.strip() and documentIndex != "No documents available":
-                        # Extract only "Current round documents" section if present
+                        # Extract BOTH "Current round documents" AND "Past rounds documents" sections
                        lines = documentIndex.split('\n')
                        currentRoundSection = []
+                        pastRoundsSection = []
                        inCurrentRound = False
+                        inPastRounds = False
+                        
                        for line in lines:
                            if "Current round documents:" in line:
                                inCurrentRound = True
+                                inPastRounds = False
                                currentRoundSection.append(line)
+                            elif "Past rounds documents:" in line:
+                                inCurrentRound = False
+                                inPastRounds = True
+                                pastRoundsSection.append(line)
+                            elif "AVAILABLE_CONNECTIONS_INDEX:" in line:
+                                # End of document sections
+                                break
                            elif inCurrentRound:
                                if line.strip().startswith("- docList:") or line.strip().startswith("  - docItem:") or line.strip().startswith("- docItem:"):
                                    currentRoundSection.append(line)
                                elif line.strip() == "":
                                    # Empty line is okay, continue
                                    continue
-                                elif "Past rounds documents:" in line or "AVAILABLE_CONNECTIONS_INDEX:" in line:
-                                    # End of current round section
-                                    break
                                else:
                                    # Still in current round section
                                    currentRoundSection.append(line)
+                            elif inPastRounds:
+                                if line.strip().startswith("- docList:") or line.strip().startswith("  - docItem:") or line.strip().startswith("- docItem:"):
+                                    pastRoundsSection.append(line)
+                                elif line.strip() == "":
+                                    # Empty line is okay, continue
+                                    continue
+                                else:
+                                    # Still in past rounds section
+                                    pastRoundsSection.append(line)
                        
+                        # Build context with both sections
+                        sections = []
                        if currentRoundSection:
-                            documentIndexContext = "\n\n=== ALL DOCUMENTS DELIVERED IN CURRENT ROUND ===\n" + "\n".join(currentRoundSection)
-                            documentIndexContext += "\n\nIMPORTANT: This shows ALL documents that have been delivered in the current round, not just the ones being validated in this step. Use this to check if all required formats/documents are present across the entire round."
+                            sections.append("=== ALL DOCUMENTS DELIVERED IN CURRENT ROUND ===\n" + "\n".join(currentRoundSection))
+                        if pastRoundsSection:
+                            sections.append("=== ALL DOCUMENTS DELIVERED IN PAST ROUNDS ===\n" + "\n".join(pastRoundsSection))
+                        
+                        if sections:
+                            documentIndexContext = "\n\n" + "\n\n".join(sections)
+                            documentIndexContext += "\n\nIMPORTANT: This shows ALL documents that have been delivered in the current round AND past rounds, not just the ones being validated in this step. Use this to check if all required formats/documents are present across ALL rounds. Files produced in previous rounds (e.g., html+css in round 1, python in round 2) should all be visible here."
                except Exception as e:
                    logger.warning(f"Error extracting document index for validation: {str(e)}")
                    # Continue without document index - not critical