# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Web Research action for AI operations. Web research with two-step process: search for URLs, then crawl content. """ import logging import time import re from typing import Dict, Any from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument logger = logging.getLogger(__name__) @action async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult: """ GENERAL: - Purpose: Web research with two-step process: search for URLs, then crawl content. - Input requirements: prompt (required); optional list(url), country, language, researchDepth. - Output format: JSON with research results including URLs and content. Parameters: - prompt (str, required): Natural language research instruction. - urlList (list, optional): Specific URLs to crawl, if needed. - country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de). - language (str, optional): Language code (lowercase, e.g., de, en, fr). - researchDepth (str, optional): Research depth - fast, general, or deep. Default: general. """ try: prompt = parameters.get("prompt") if not prompt: return ActionResult.isFailure(error="Research prompt is required") # Init progress logger workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" operationId = f"web_research_{workflowId}_{int(time.time())}" # Start progress tracking parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "Web Research", "Searching and Crawling", "Extracting URLs and Content", parentOperationId=parentOperationId ) # Call webcrawl service - service handles all AI intention analysis and processing result = await self.services.web.performWebResearch( prompt=prompt, urls=parameters.get("urlList", []), country=parameters.get("country"), language=parameters.get("language"), researchDepth=parameters.get("researchDepth", "general"), operationId=operationId ) # Complete progress tracking self.services.chat.progressLogFinish(operationId, True) # Get meaningful filename from research result (generated by intent analyzer) suggestedFilename = result.get("suggested_filename") if suggestedFilename: # Clean and validate filename cleaned = suggestedFilename.strip().strip('"\'') cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip() # Ensure it doesn't already have extension if cleaned.lower().endswith('.json'): cleaned = cleaned[:-5] # Validate: should be reasonable length and contain only safe characters if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned): meaningfulName = f"{cleaned}.json" else: # Fallback to generic meaningful filename meaningfulName = self._generateMeaningfulFileName( base_name="web_research", extension="json", action_name="research" ) else: # Fallback to generic meaningful filename meaningfulName = self._generateMeaningfulFileName( base_name="web_research", extension="json", action_name="research" ) validationMetadata = { "actionType": "ai.webResearch", "prompt": prompt, "urlList": parameters.get("urlList", []), "country": parameters.get("country"), "language": parameters.get("language"), "researchDepth": parameters.get("researchDepth", "general"), "resultFormat": "json" } actionDocument = ActionDocument( documentName=meaningfulName, documentData=result, mimeType="application/json", validationMetadata=validationMetadata ) return ActionResult.isSuccess(documents=[actionDocument]) except Exception as e: logger.error(f"Error in web research: {str(e)}") try: self.services.chat.progressLogFinish(operationId, False) except: pass return ActionResult.isFailure(error=str(e))