Merge pull request #83 from valueonag/feat/foundation-customer-use-cases
Feat/foundation customer use cases
This commit is contained in:
commit
b2a3d4afc8
3 changed files with 49 additions and 12 deletions
|
|
@ -345,8 +345,8 @@ class AiTavily(BaseConnectorAi):
|
|||
retryDelay = self.crawlRetryDelay
|
||||
timeout = self.crawlTimeout
|
||||
|
||||
logger.debug(f"Starting crawl of URL: {url}")
|
||||
logger.debug(f"Crawl settings: instructions={instructions}, limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
|
||||
logger.info(f"Starting crawl of URL: {url}")
|
||||
logger.info(f"Crawl settings: instructions={instructions[:100] if instructions else None}..., limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s")
|
||||
|
||||
for attempt in range(maxRetries + 1):
|
||||
try:
|
||||
|
|
@ -371,7 +371,7 @@ class AiTavily(BaseConnectorAi):
|
|||
if maxBreadth:
|
||||
kwargsCrawl["max_breadth"] = maxBreadth
|
||||
|
||||
logger.debug(f"Sending request to Tavily with kwargs: {kwargsCrawl}")
|
||||
logger.info(f"Sending request to Tavily API with parameters: {kwargsCrawl}")
|
||||
|
||||
response = await asyncio.wait_for(
|
||||
self.client.crawl(**kwargsCrawl),
|
||||
|
|
|
|||
|
|
@ -115,9 +115,14 @@ class WebService:
|
|||
if not validatedUrls:
|
||||
return {"error": "No URLs found to crawl"}
|
||||
|
||||
# Step 4: Translate researchDepth to maxDepth
|
||||
# Step 4: Translate researchDepth to maxDepth and maxWidth
|
||||
depthMap = {"fast": 1, "general": 2, "deep": 3}
|
||||
maxDepth = depthMap.get(finalResearchDepth.lower(), 2)
|
||||
# Scale maxWidth based on research depth: fast=5, general=10, deep=20 pages per level
|
||||
widthMap = {"fast": 5, "general": 10, "deep": 20}
|
||||
maxWidth = widthMap.get(finalResearchDepth.lower(), 10)
|
||||
|
||||
logger.info(f"Research depth settings: depth={finalResearchDepth}, maxDepth={maxDepth}, maxWidth={maxWidth}")
|
||||
|
||||
# Step 5: Crawl all URLs with hierarchical logging
|
||||
if operationId:
|
||||
|
|
@ -131,6 +136,7 @@ class WebService:
|
|||
instruction=instruction,
|
||||
urls=validatedUrls,
|
||||
maxDepth=maxDepth,
|
||||
maxWidth=maxWidth, # Pass maxWidth to crawl function
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
|
|
@ -440,6 +446,7 @@ Return ONLY valid JSON, no additional text:
|
|||
instruction: str,
|
||||
urls: List[str],
|
||||
maxDepth: int = 2,
|
||||
maxWidth: int = 10,
|
||||
parentOperationId: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Perform web crawl on list of URLs - crawls URLs in parallel for better performance."""
|
||||
|
|
@ -452,6 +459,7 @@ Return ONLY valid JSON, no additional text:
|
|||
totalUrls=len(urls),
|
||||
instruction=instruction,
|
||||
maxDepth=maxDepth,
|
||||
maxWidth=maxWidth, # Pass maxWidth to single URL crawl
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
crawlTasks.append(task)
|
||||
|
|
@ -479,6 +487,7 @@ Return ONLY valid JSON, no additional text:
|
|||
totalUrls: int,
|
||||
instruction: str,
|
||||
maxDepth: int,
|
||||
maxWidth: int = 10,
|
||||
parentOperationId: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
|
|
@ -517,11 +526,13 @@ Return ONLY valid JSON, no additional text:
|
|||
self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl")
|
||||
|
||||
# Build crawl prompt model for single URL
|
||||
# maxWidth is passed from performWebResearch based on researchDepth
|
||||
logger.info(f"Crawling URL {urlIndex + 1}/{totalUrls} with maxDepth={maxDepth}, maxWidth={maxWidth}")
|
||||
crawlPromptModel = AiCallPromptWebCrawl(
|
||||
instruction=instruction,
|
||||
url=url, # Single URL
|
||||
maxDepth=maxDepth,
|
||||
maxWidth=5 # Default: 5 pages per level
|
||||
maxWidth=maxWidth # Scaled based on researchDepth: fast=5, general=10, deep=20
|
||||
)
|
||||
crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
|
||||
|
||||
|
|
|
|||
|
|
@ -779,36 +779,62 @@ class ContentValidator:
|
|||
actionHistoryContext = f"\n\n=== ACTION HISTORY ===\n" + "\n".join(f"- {entry}" for entry in historyEntries)
|
||||
actionHistoryContext += "\n\nIMPORTANT: This shows the complete workflow that produced the documents. For process-oriented criteria (e.g., 'internet search performed'), check ACTION HISTORY first. Document metadata may only reflect the LAST action, not the entire workflow."
|
||||
|
||||
# Build document index context (all documents delivered in current round)
|
||||
# Build document index context (all documents delivered in current round AND past rounds)
|
||||
# CRITICAL: Include past rounds so validator can see files produced in previous rounds
|
||||
# This prevents endless loops when files are produced in multiple sets across rounds
|
||||
documentIndexContext = ""
|
||||
if context and self.services and hasattr(self.services, 'chat') and hasattr(self.services, 'workflow') and self.services.workflow:
|
||||
try:
|
||||
documentIndex = self.services.chat.getAvailableDocuments(self.services.workflow)
|
||||
if documentIndex and documentIndex.strip() and documentIndex != "No documents available":
|
||||
# Extract only "Current round documents" section if present
|
||||
# Extract BOTH "Current round documents" AND "Past rounds documents" sections
|
||||
lines = documentIndex.split('\n')
|
||||
currentRoundSection = []
|
||||
pastRoundsSection = []
|
||||
inCurrentRound = False
|
||||
inPastRounds = False
|
||||
|
||||
for line in lines:
|
||||
if "Current round documents:" in line:
|
||||
inCurrentRound = True
|
||||
inPastRounds = False
|
||||
currentRoundSection.append(line)
|
||||
elif "Past rounds documents:" in line:
|
||||
inCurrentRound = False
|
||||
inPastRounds = True
|
||||
pastRoundsSection.append(line)
|
||||
elif "AVAILABLE_CONNECTIONS_INDEX:" in line:
|
||||
# End of document sections
|
||||
break
|
||||
elif inCurrentRound:
|
||||
if line.strip().startswith("- docList:") or line.strip().startswith(" - docItem:") or line.strip().startswith("- docItem:"):
|
||||
currentRoundSection.append(line)
|
||||
elif line.strip() == "":
|
||||
# Empty line is okay, continue
|
||||
continue
|
||||
elif "Past rounds documents:" in line or "AVAILABLE_CONNECTIONS_INDEX:" in line:
|
||||
# End of current round section
|
||||
break
|
||||
else:
|
||||
# Still in current round section
|
||||
currentRoundSection.append(line)
|
||||
elif inPastRounds:
|
||||
if line.strip().startswith("- docList:") or line.strip().startswith(" - docItem:") or line.strip().startswith("- docItem:"):
|
||||
pastRoundsSection.append(line)
|
||||
elif line.strip() == "":
|
||||
# Empty line is okay, continue
|
||||
continue
|
||||
else:
|
||||
# Still in past rounds section
|
||||
pastRoundsSection.append(line)
|
||||
|
||||
# Build context with both sections
|
||||
sections = []
|
||||
if currentRoundSection:
|
||||
documentIndexContext = "\n\n=== ALL DOCUMENTS DELIVERED IN CURRENT ROUND ===\n" + "\n".join(currentRoundSection)
|
||||
documentIndexContext += "\n\nIMPORTANT: This shows ALL documents that have been delivered in the current round, not just the ones being validated in this step. Use this to check if all required formats/documents are present across the entire round."
|
||||
sections.append("=== ALL DOCUMENTS DELIVERED IN CURRENT ROUND ===\n" + "\n".join(currentRoundSection))
|
||||
if pastRoundsSection:
|
||||
sections.append("=== ALL DOCUMENTS DELIVERED IN PAST ROUNDS ===\n" + "\n".join(pastRoundsSection))
|
||||
|
||||
if sections:
|
||||
documentIndexContext = "\n\n" + "\n\n".join(sections)
|
||||
documentIndexContext += "\n\nIMPORTANT: This shows ALL documents that have been delivered in the current round AND past rounds, not just the ones being validated in this step. Use this to check if all required formats/documents are present across ALL rounds. Files produced in previous rounds (e.g., html+css in round 1, python in round 2) should all be visible here."
|
||||
except Exception as e:
|
||||
logger.warning(f"Error extracting document index for validation: {str(e)}")
|
||||
# Continue without document index - not critical
|
||||
|
|
|
|||
Loading…
Reference in a new issue