117 lines
4.6 KiB
Python
117 lines
4.6 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
"""
|
|
Web Research action for AI operations.
|
|
Web research with two-step process: search for URLs, then crawl content.
|
|
"""
|
|
|
|
import logging
|
|
import time
|
|
import re
|
|
from typing import Dict, Any
|
|
from modules.workflows.methods.methodBase import action
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@action
|
|
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Web research with two-step process: search for URLs, then crawl content.
|
|
- Input requirements: prompt (required); optional list(url), country, language, researchDepth.
|
|
- Output format: JSON with research results including URLs and content.
|
|
|
|
Parameters:
|
|
- prompt (str, required): Natural language research instruction.
|
|
- urlList (list, optional): Specific URLs to crawl, if needed.
|
|
- country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
|
|
- language (str, optional): Language code (lowercase, e.g., de, en, fr).
|
|
- researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
|
|
"""
|
|
try:
|
|
prompt = parameters.get("prompt")
|
|
if not prompt:
|
|
return ActionResult.isFailure(error="Research prompt is required")
|
|
|
|
# Init progress logger
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"web_research_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
parentOperationId = parameters.get('parentOperationId')
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Web Research",
|
|
"Searching and Crawling",
|
|
"Extracting URLs and Content",
|
|
parentOperationId=parentOperationId
|
|
)
|
|
|
|
# Call webcrawl service - service handles all AI intention analysis and processing
|
|
result = await self.services.web.performWebResearch(
|
|
prompt=prompt,
|
|
urls=parameters.get("urlList", []),
|
|
country=parameters.get("country"),
|
|
language=parameters.get("language"),
|
|
researchDepth=parameters.get("researchDepth", "general"),
|
|
operationId=operationId
|
|
)
|
|
|
|
# Complete progress tracking
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
|
|
# Get meaningful filename from research result (generated by intent analyzer)
|
|
suggestedFilename = result.get("suggested_filename")
|
|
if suggestedFilename:
|
|
# Clean and validate filename
|
|
cleaned = suggestedFilename.strip().strip('"\'')
|
|
cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
|
|
# Ensure it doesn't already have extension
|
|
if cleaned.lower().endswith('.json'):
|
|
cleaned = cleaned[:-5]
|
|
# Validate: should be reasonable length and contain only safe characters
|
|
if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
|
|
meaningfulName = f"{cleaned}.json"
|
|
else:
|
|
# Fallback to generic meaningful filename
|
|
meaningfulName = self._generateMeaningfulFileName(
|
|
base_name="web_research",
|
|
extension="json",
|
|
action_name="research"
|
|
)
|
|
else:
|
|
# Fallback to generic meaningful filename
|
|
meaningfulName = self._generateMeaningfulFileName(
|
|
base_name="web_research",
|
|
extension="json",
|
|
action_name="research"
|
|
)
|
|
|
|
validationMetadata = {
|
|
"actionType": "ai.webResearch",
|
|
"prompt": prompt,
|
|
"urlList": parameters.get("urlList", []),
|
|
"country": parameters.get("country"),
|
|
"language": parameters.get("language"),
|
|
"researchDepth": parameters.get("researchDepth", "general"),
|
|
"resultFormat": "json"
|
|
}
|
|
actionDocument = ActionDocument(
|
|
documentName=meaningfulName,
|
|
documentData=result,
|
|
mimeType="application/json",
|
|
validationMetadata=validationMetadata
|
|
)
|
|
|
|
return ActionResult.isSuccess(documents=[actionDocument])
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in web research: {str(e)}")
|
|
try:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except:
|
|
pass
|
|
return ActionResult.isFailure(error=str(e))
|
|
|