123 lines
4.9 KiB
Python
123 lines
4.9 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
import logging
|
|
import time
|
|
import re
|
|
import json
|
|
from typing import Dict, Any
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
from modules.serviceCenter import ServiceCenterContext, getService, can_access_service
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
operationId = None
|
|
try:
|
|
prompt = parameters.get("prompt")
|
|
if not prompt:
|
|
return ActionResult.isFailure(error="Research prompt is required")
|
|
|
|
# RBAC: Check service-level permission
|
|
rbac = getattr(self.services, "rbac", None)
|
|
if rbac and not can_access_service(
|
|
self.services.user,
|
|
rbac,
|
|
"web",
|
|
mandate_id=getattr(self.services, "mandateId", None),
|
|
feature_instance_id=getattr(self.services, "featureInstanceId", None),
|
|
):
|
|
return ActionResult.isFailure(error="Permission denied: Web research service")
|
|
|
|
# Build context for service center
|
|
context = ServiceCenterContext(
|
|
user=self.services.user,
|
|
mandate_id=getattr(self.services, "mandateId", None),
|
|
feature_instance_id=getattr(self.services, "featureInstanceId", None),
|
|
workflow_id=self.services.workflow.id if self.services.workflow else None,
|
|
workflow=self.services.workflow,
|
|
)
|
|
web_service = getService("web", context, legacy_hub=self.services)
|
|
|
|
# Init progress logger
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"web_research_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
parentOperationId = parameters.get('parentOperationId')
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Web Research",
|
|
"Searching and Crawling",
|
|
"Extracting URLs and Content",
|
|
parentOperationId=parentOperationId
|
|
)
|
|
|
|
# Call webcrawl service - service handles all AI intention analysis and processing
|
|
result = await web_service.performWebResearch(
|
|
prompt=prompt,
|
|
urls=parameters.get("urlList", []),
|
|
country=parameters.get("country"),
|
|
language=parameters.get("language"),
|
|
researchDepth=parameters.get("researchDepth", "general"),
|
|
operationId=operationId
|
|
)
|
|
|
|
# Complete progress tracking
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
|
|
# Get meaningful filename from research result (generated by intent analyzer)
|
|
suggestedFilename = result.get("suggested_filename")
|
|
if suggestedFilename:
|
|
# Clean and validate filename
|
|
cleaned = suggestedFilename.strip().strip('"\'')
|
|
cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
|
|
# Ensure it doesn't already have extension
|
|
if cleaned.lower().endswith('.json'):
|
|
cleaned = cleaned[:-5]
|
|
# Validate: should be reasonable length and contain only safe characters
|
|
if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
|
|
meaningfulName = f"{cleaned}.json"
|
|
else:
|
|
# Fallback to generic meaningful filename
|
|
meaningfulName = self._generateMeaningfulFileName(
|
|
base_name="web_research",
|
|
extension="json",
|
|
action_name="research"
|
|
)
|
|
else:
|
|
# Fallback to generic meaningful filename
|
|
meaningfulName = self._generateMeaningfulFileName(
|
|
base_name="web_research",
|
|
extension="json",
|
|
action_name="research"
|
|
)
|
|
|
|
validationMetadata = {
|
|
"actionType": "ai.webResearch",
|
|
"prompt": prompt,
|
|
"urlList": parameters.get("urlList", []),
|
|
"country": parameters.get("country"),
|
|
"language": parameters.get("language"),
|
|
"researchDepth": parameters.get("researchDepth", "general"),
|
|
"resultFormat": "json"
|
|
}
|
|
documentData = json.dumps(result, ensure_ascii=False) if isinstance(result, dict) else result
|
|
actionDocument = ActionDocument(
|
|
documentName=meaningfulName,
|
|
documentData=documentData,
|
|
mimeType="application/json",
|
|
validationMetadata=validationMetadata
|
|
)
|
|
|
|
return ActionResult.isSuccess(documents=[actionDocument])
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in web research: {str(e)}")
|
|
try:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except Exception:
|
|
pass
|
|
return ActionResult.isFailure(error=str(e))
|
|
|