164 lines
6.5 KiB
Python
164 lines
6.5 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
import logging
|
|
import time
|
|
import re
|
|
import json
|
|
from typing import Dict, Any
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
from modules.serviceCenter import ServiceCenterContext, getService, can_access_service
|
|
from modules.serviceCenter.services.serviceSubscription.mainServiceSubscription import SubscriptionInactiveException
|
|
from modules.serviceCenter.services.serviceBilling.mainServiceBilling import BillingContextError
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def _build_research_prompt(parameters: Dict[str, Any]) -> str:
|
|
"""Assemble the final research prompt from prompt + optional context/documentList."""
|
|
from modules.workflows.methods.methodAi._common import serialize_context
|
|
base_prompt = (parameters.get("prompt") or "").strip()
|
|
context_val = serialize_context(parameters.get("context"))
|
|
doc_list = parameters.get("documentList")
|
|
|
|
parts: list[str] = []
|
|
|
|
if context_val:
|
|
parts.append(f"Kontext:\n{context_val}")
|
|
|
|
# Extract text from documentList items if provided
|
|
if doc_list:
|
|
docs: list = []
|
|
if isinstance(doc_list, dict):
|
|
docs = doc_list.get("documents", []) or doc_list.get("items", [])
|
|
elif isinstance(doc_list, list):
|
|
docs = doc_list
|
|
doc_texts = []
|
|
for d in docs:
|
|
if isinstance(d, dict):
|
|
text = d.get("documentData") or d.get("text") or d.get("content") or ""
|
|
if text and isinstance(text, str):
|
|
doc_texts.append(text.strip())
|
|
if doc_texts:
|
|
parts.append("Dokumente:\n" + "\n---\n".join(doc_texts))
|
|
|
|
parts.append(base_prompt)
|
|
return "\n\n".join(p for p in parts if p)
|
|
|
|
|
|
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
operationId = None
|
|
try:
|
|
prompt = _build_research_prompt(parameters)
|
|
if not prompt:
|
|
return ActionResult.isFailure(error="Research prompt is required")
|
|
|
|
# RBAC: Check service-level permission
|
|
rbac = getattr(self.services, "rbac", None)
|
|
if rbac and not can_access_service(
|
|
self.services.user,
|
|
rbac,
|
|
"web",
|
|
mandate_id=getattr(self.services, "mandateId", None),
|
|
feature_instance_id=getattr(self.services, "featureInstanceId", None),
|
|
):
|
|
return ActionResult.isFailure(error="Permission denied: Web research service")
|
|
|
|
# Build context for service center
|
|
context = ServiceCenterContext(
|
|
user=self.services.user,
|
|
mandate_id=getattr(self.services, "mandateId", None),
|
|
feature_instance_id=getattr(self.services, "featureInstanceId", None),
|
|
workflow_id=self.services.workflow.id if self.services.workflow else None,
|
|
workflow=self.services.workflow,
|
|
)
|
|
web_service = getService("web", context)
|
|
|
|
# Init progress logger
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"web_research_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
parentOperationId = parameters.get('parentOperationId')
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Web Research",
|
|
"Searching and Crawling",
|
|
"Extracting URLs and Content",
|
|
parentOperationId=parentOperationId
|
|
)
|
|
|
|
# Call webcrawl service - service handles all AI intention analysis and processing
|
|
result = await web_service.performWebResearch(
|
|
prompt=prompt,
|
|
urls=parameters.get("urlList", []),
|
|
country=parameters.get("country"),
|
|
language=parameters.get("language"),
|
|
researchDepth=parameters.get("researchDepth", "general"),
|
|
operationId=operationId
|
|
)
|
|
|
|
# Complete progress tracking
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
|
|
# Get meaningful filename from research result (generated by intent analyzer)
|
|
suggestedFilename = result.get("suggested_filename")
|
|
if suggestedFilename:
|
|
# Clean and validate filename
|
|
cleaned = suggestedFilename.strip().strip('"\'')
|
|
cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
|
|
# Ensure it doesn't already have extension
|
|
if cleaned.lower().endswith('.json'):
|
|
cleaned = cleaned[:-5]
|
|
# Validate: should be reasonable length and contain only safe characters
|
|
if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
|
|
meaningfulName = f"{cleaned}.json"
|
|
else:
|
|
# Fallback to generic meaningful filename
|
|
meaningfulName = self._generateMeaningfulFileName(
|
|
base_name="web_research",
|
|
extension="json",
|
|
action_name="research"
|
|
)
|
|
else:
|
|
# Fallback to generic meaningful filename
|
|
meaningfulName = self._generateMeaningfulFileName(
|
|
base_name="web_research",
|
|
extension="json",
|
|
action_name="research"
|
|
)
|
|
|
|
validationMetadata = {
|
|
"actionType": "ai.webResearch",
|
|
"prompt": prompt,
|
|
"urlList": parameters.get("urlList", []),
|
|
"country": parameters.get("country"),
|
|
"language": parameters.get("language"),
|
|
"researchDepth": parameters.get("researchDepth", "general"),
|
|
"resultFormat": "json"
|
|
}
|
|
documentData = json.dumps(result, ensure_ascii=False) if isinstance(result, dict) else result
|
|
actionDocument = ActionDocument(
|
|
documentName=meaningfulName,
|
|
documentData=documentData,
|
|
mimeType="application/json",
|
|
validationMetadata=validationMetadata
|
|
)
|
|
|
|
return ActionResult.isSuccess(documents=[actionDocument])
|
|
|
|
except (SubscriptionInactiveException, BillingContextError):
|
|
try:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except Exception:
|
|
pass
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error in web research: {str(e)}")
|
|
try:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except Exception:
|
|
pass
|
|
return ActionResult.isFailure(error=str(e))
|
|
|