gateway/modules/workflows/methods/methodAi/actions/webResearch.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.

import logging
import time
import re
import json
from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.serviceCenter import ServiceCenterContext, getService, can_access_service

logger = logging.getLogger(__name__)

async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
    operationId = None
    try:
        prompt = parameters.get("prompt")
        if not prompt:
            return ActionResult.isFailure(error="Research prompt is required")

        # RBAC: Check service-level permission
        rbac = getattr(self.services, "rbac", None)
        if rbac and not can_access_service(
            self.services.user,
            rbac,
            "web",
            mandate_id=getattr(self.services, "mandateId", None),
            feature_instance_id=getattr(self.services, "featureInstanceId", None),
        ):
            return ActionResult.isFailure(error="Permission denied: Web research service")

        # Build context for service center
        context = ServiceCenterContext(
            user=self.services.user,
            mandate_id=getattr(self.services, "mandateId", None),
            feature_instance_id=getattr(self.services, "featureInstanceId", None),
            workflow_id=self.services.workflow.id if self.services.workflow else None,
            workflow=self.services.workflow,
        )
        web_service = getService("web", context, legacy_hub=self.services)

        # Init progress logger
        workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
        operationId = f"web_research_{workflowId}_{int(time.time())}"

        # Start progress tracking
        parentOperationId = parameters.get('parentOperationId')
        self.services.chat.progressLogStart(
            operationId,
            "Web Research",
            "Searching and Crawling",
            "Extracting URLs and Content",
            parentOperationId=parentOperationId
        )

        # Call webcrawl service - service handles all AI intention analysis and processing
        result = await web_service.performWebResearch(
            prompt=prompt,
            urls=parameters.get("urlList", []),
            country=parameters.get("country"),
            language=parameters.get("language"),
            researchDepth=parameters.get("researchDepth", "general"),
            operationId=operationId
        )

        # Complete progress tracking
        self.services.chat.progressLogFinish(operationId, True)

        # Get meaningful filename from research result (generated by intent analyzer)
        suggestedFilename = result.get("suggested_filename")
        if suggestedFilename:
            # Clean and validate filename
            cleaned = suggestedFilename.strip().strip('"\'')
            cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
            # Ensure it doesn't already have extension
            if cleaned.lower().endswith('.json'):
                cleaned = cleaned[:-5]
            # Validate: should be reasonable length and contain only safe characters
            if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
                meaningfulName = f"{cleaned}.json"
            else:
                # Fallback to generic meaningful filename
                meaningfulName = self._generateMeaningfulFileName(
                    base_name="web_research",
                    extension="json",
                    action_name="research"
                )
        else:
            # Fallback to generic meaningful filename
            meaningfulName = self._generateMeaningfulFileName(
                base_name="web_research",
                extension="json",
                action_name="research"
            )

        validationMetadata = {
            "actionType": "ai.webResearch",
            "prompt": prompt,
            "urlList": parameters.get("urlList", []),
            "country": parameters.get("country"),
            "language": parameters.get("language"),
            "researchDepth": parameters.get("researchDepth", "general"),
            "resultFormat": "json"
        }
        documentData = json.dumps(result, ensure_ascii=False) if isinstance(result, dict) else result
        actionDocument = ActionDocument(
            documentName=meaningfulName,
            documentData=documentData,
            mimeType="application/json",
            validationMetadata=validationMetadata
        )

        return ActionResult.isSuccess(documents=[actionDocument])

    except Exception as e:
        logger.error(f"Error in web research: {str(e)}")
        try:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
        except Exception:
            pass
        return ActionResult.isFailure(error=str(e))