From e41411e5aa9cf82122bbe467de00bf0b2e140e58 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sun, 11 Jan 2026 00:02:24 +0100
Subject: [PATCH 1/2] fixed documents of all rounds in validator prompt
---
.../processing/adaptive/contentValidator.py | 40 +++++++++++++++----
1 file changed, 33 insertions(+), 7 deletions(-)
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index 369399cd..fe17572f 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -779,36 +779,62 @@ class ContentValidator:
actionHistoryContext = f"\n\n=== ACTION HISTORY ===\n" + "\n".join(f"- {entry}" for entry in historyEntries)
actionHistoryContext += "\n\nIMPORTANT: This shows the complete workflow that produced the documents. For process-oriented criteria (e.g., 'internet search performed'), check ACTION HISTORY first. Document metadata may only reflect the LAST action, not the entire workflow."
- # Build document index context (all documents delivered in current round)
+ # Build document index context (all documents delivered in current round AND past rounds)
+ # CRITICAL: Include past rounds so validator can see files produced in previous rounds
+ # This prevents endless loops when files are produced in multiple sets across rounds
documentIndexContext = ""
if context and self.services and hasattr(self.services, 'chat') and hasattr(self.services, 'workflow') and self.services.workflow:
try:
documentIndex = self.services.chat.getAvailableDocuments(self.services.workflow)
if documentIndex and documentIndex.strip() and documentIndex != "No documents available":
- # Extract only "Current round documents" section if present
+ # Extract BOTH "Current round documents" AND "Past rounds documents" sections
lines = documentIndex.split('\n')
currentRoundSection = []
+ pastRoundsSection = []
inCurrentRound = False
+ inPastRounds = False
+
for line in lines:
if "Current round documents:" in line:
inCurrentRound = True
+ inPastRounds = False
currentRoundSection.append(line)
+ elif "Past rounds documents:" in line:
+ inCurrentRound = False
+ inPastRounds = True
+ pastRoundsSection.append(line)
+ elif "AVAILABLE_CONNECTIONS_INDEX:" in line:
+ # End of document sections
+ break
elif inCurrentRound:
if line.strip().startswith("- docList:") or line.strip().startswith(" - docItem:") or line.strip().startswith("- docItem:"):
currentRoundSection.append(line)
elif line.strip() == "":
# Empty line is okay, continue
continue
- elif "Past rounds documents:" in line or "AVAILABLE_CONNECTIONS_INDEX:" in line:
- # End of current round section
- break
else:
# Still in current round section
currentRoundSection.append(line)
+ elif inPastRounds:
+ if line.strip().startswith("- docList:") or line.strip().startswith(" - docItem:") or line.strip().startswith("- docItem:"):
+ pastRoundsSection.append(line)
+ elif line.strip() == "":
+ # Empty line is okay, continue
+ continue
+ else:
+ # Still in past rounds section
+ pastRoundsSection.append(line)
+ # Build context with both sections
+ sections = []
if currentRoundSection:
- documentIndexContext = "\n\n=== ALL DOCUMENTS DELIVERED IN CURRENT ROUND ===\n" + "\n".join(currentRoundSection)
- documentIndexContext += "\n\nIMPORTANT: This shows ALL documents that have been delivered in the current round, not just the ones being validated in this step. Use this to check if all required formats/documents are present across the entire round."
+ sections.append("=== ALL DOCUMENTS DELIVERED IN CURRENT ROUND ===\n" + "\n".join(currentRoundSection))
+ if pastRoundsSection:
+ sections.append("=== ALL DOCUMENTS DELIVERED IN PAST ROUNDS ===\n" + "\n".join(pastRoundsSection))
+
+ if sections:
+ documentIndexContext = "\n\n" + "\n\n".join(sections)
+ documentIndexContext += "\n\nIMPORTANT: This shows ALL documents that have been delivered in the current round AND past rounds, not just the ones being validated in this step. Use this to check if all required formats/documents are present across ALL rounds. Files produced in previous rounds (e.g., html+css in round 1, python in round 2) should all be visible here."
except Exception as e:
logger.warning(f"Error extracting document index for validation: {str(e)}")
# Continue without document index - not critical
From fae4bde059aa840d1987a3a28e480de82a4e5aad Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sun, 11 Jan 2026 12:38:07 +0100
Subject: [PATCH 2/2] fix web service: maxwidth parameter chain fixed
---
modules/aicore/aicorePluginTavily.py | 6 +++---
modules/services/serviceWeb/mainServiceWeb.py | 15 +++++++++++++--
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/modules/aicore/aicorePluginTavily.py b/modules/aicore/aicorePluginTavily.py
index 90718683..a9237cf2 100644
--- a/modules/aicore/aicorePluginTavily.py
+++ b/modules/aicore/aicorePluginTavily.py
@@ -345,8 +345,8 @@ class AiTavily(BaseConnectorAi):
retryDelay = self.crawlRetryDelay
timeout = self.crawlTimeout
- logger.debug(f"Starting crawl of URL: {url}")
- logger.debug(f"Crawl settings: instructions={instructions}, limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
+ logger.info(f"Starting crawl of URL: {url}")
+ logger.info(f"Crawl settings: instructions={instructions[:100] if instructions else None}..., limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
for attempt in range(maxRetries + 1):
try:
@@ -371,7 +371,7 @@ class AiTavily(BaseConnectorAi):
if maxBreadth:
kwargsCrawl["max_breadth"] = maxBreadth
- logger.debug(f"Sending request to Tavily with kwargs: {kwargsCrawl}")
+ logger.info(f"Sending request to Tavily API with parameters: {kwargsCrawl}")
response = await asyncio.wait_for(
self.client.crawl(**kwargsCrawl),
diff --git a/modules/services/serviceWeb/mainServiceWeb.py b/modules/services/serviceWeb/mainServiceWeb.py
index 469ca6ae..4faced9c 100644
--- a/modules/services/serviceWeb/mainServiceWeb.py
+++ b/modules/services/serviceWeb/mainServiceWeb.py
@@ -115,9 +115,14 @@ class WebService:
if not validatedUrls:
return {"error": "No URLs found to crawl"}
- # Step 4: Translate researchDepth to maxDepth
+ # Step 4: Translate researchDepth to maxDepth and maxWidth
depthMap = {"fast": 1, "general": 2, "deep": 3}
maxDepth = depthMap.get(finalResearchDepth.lower(), 2)
+ # Scale maxWidth based on research depth: fast=5, general=10, deep=20 pages per level
+ widthMap = {"fast": 5, "general": 10, "deep": 20}
+ maxWidth = widthMap.get(finalResearchDepth.lower(), 10)
+
+ logger.info(f"Research depth settings: depth={finalResearchDepth}, maxDepth={maxDepth}, maxWidth={maxWidth}")
# Step 5: Crawl all URLs with hierarchical logging
if operationId:
@@ -131,6 +136,7 @@ class WebService:
instruction=instruction,
urls=validatedUrls,
maxDepth=maxDepth,
+ maxWidth=maxWidth, # Pass maxWidth to crawl function
parentOperationId=parentOperationId
)
@@ -440,6 +446,7 @@ Return ONLY valid JSON, no additional text:
instruction: str,
urls: List[str],
maxDepth: int = 2,
+ maxWidth: int = 10,
parentOperationId: Optional[str] = None
) -> List[Dict[str, Any]]:
"""Perform web crawl on list of URLs - crawls URLs in parallel for better performance."""
@@ -452,6 +459,7 @@ Return ONLY valid JSON, no additional text:
totalUrls=len(urls),
instruction=instruction,
maxDepth=maxDepth,
+ maxWidth=maxWidth, # Pass maxWidth to single URL crawl
parentOperationId=parentOperationId
)
crawlTasks.append(task)
@@ -479,6 +487,7 @@ Return ONLY valid JSON, no additional text:
totalUrls: int,
instruction: str,
maxDepth: int,
+ maxWidth: int = 10,
parentOperationId: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
@@ -517,11 +526,13 @@ Return ONLY valid JSON, no additional text:
self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl")
# Build crawl prompt model for single URL
+ # maxWidth is passed from performWebResearch based on researchDepth
+ logger.info(f"Crawling URL {urlIndex + 1}/{totalUrls} with maxDepth={maxDepth}, maxWidth={maxWidth}")
crawlPromptModel = AiCallPromptWebCrawl(
instruction=instruction,
url=url, # Single URL
maxDepth=maxDepth,
- maxWidth=5 # Default: 5 pages per level
+ maxWidth=maxWidth # Scaled based on researchDepth: fast=5, general=10, deep=20
)
crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)