# Copyright (c) 2025 Patrick Motsch # All rights reserved. import logging import time import re import json from typing import Dict, Any from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.serviceCenter import ServiceCenterContext, getService, can_access_service from modules.serviceCenter.services.serviceSubscription.mainServiceSubscription import SubscriptionInactiveException from modules.serviceCenter.services.serviceBilling.mainServiceBilling import BillingContextError logger = logging.getLogger(__name__) def _build_research_prompt(parameters: Dict[str, Any]) -> str: """Assemble the final research prompt from prompt + optional context/documentList.""" base_prompt = (parameters.get("prompt") or "").strip() context_val = parameters.get("context") doc_list = parameters.get("documentList") parts: list[str] = [] # Prepend context string if provided if context_val and isinstance(context_val, str) and context_val.strip(): parts.append(f"Kontext:\n{context_val.strip()}") # Extract text from documentList items if provided if doc_list: docs: list = [] if isinstance(doc_list, dict): docs = doc_list.get("documents", []) or doc_list.get("items", []) elif isinstance(doc_list, list): docs = doc_list doc_texts = [] for d in docs: if isinstance(d, dict): text = d.get("documentData") or d.get("text") or d.get("content") or "" if text and isinstance(text, str): doc_texts.append(text.strip()) if doc_texts: parts.append("Dokumente:\n" + "\n---\n".join(doc_texts)) parts.append(base_prompt) return "\n\n".join(p for p in parts if p) async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult: operationId = None try: prompt = _build_research_prompt(parameters) if not prompt: return ActionResult.isFailure(error="Research prompt is required") # RBAC: Check service-level permission rbac = getattr(self.services, "rbac", None) if rbac and not can_access_service( self.services.user, rbac, "web", mandate_id=getattr(self.services, "mandateId", None), feature_instance_id=getattr(self.services, "featureInstanceId", None), ): return ActionResult.isFailure(error="Permission denied: Web research service") # Build context for service center context = ServiceCenterContext( user=self.services.user, mandate_id=getattr(self.services, "mandateId", None), feature_instance_id=getattr(self.services, "featureInstanceId", None), workflow_id=self.services.workflow.id if self.services.workflow else None, workflow=self.services.workflow, ) web_service = getService("web", context) # Init progress logger workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" operationId = f"web_research_{workflowId}_{int(time.time())}" # Start progress tracking parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "Web Research", "Searching and Crawling", "Extracting URLs and Content", parentOperationId=parentOperationId ) # Call webcrawl service - service handles all AI intention analysis and processing result = await web_service.performWebResearch( prompt=prompt, urls=parameters.get("urlList", []), country=parameters.get("country"), language=parameters.get("language"), researchDepth=parameters.get("researchDepth", "general"), operationId=operationId ) # Complete progress tracking self.services.chat.progressLogFinish(operationId, True) # Get meaningful filename from research result (generated by intent analyzer) suggestedFilename = result.get("suggested_filename") if suggestedFilename: # Clean and validate filename cleaned = suggestedFilename.strip().strip('"\'') cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip() # Ensure it doesn't already have extension if cleaned.lower().endswith('.json'): cleaned = cleaned[:-5] # Validate: should be reasonable length and contain only safe characters if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned): meaningfulName = f"{cleaned}.json" else: # Fallback to generic meaningful filename meaningfulName = self._generateMeaningfulFileName( base_name="web_research", extension="json", action_name="research" ) else: # Fallback to generic meaningful filename meaningfulName = self._generateMeaningfulFileName( base_name="web_research", extension="json", action_name="research" ) validationMetadata = { "actionType": "ai.webResearch", "prompt": prompt, "urlList": parameters.get("urlList", []), "country": parameters.get("country"), "language": parameters.get("language"), "researchDepth": parameters.get("researchDepth", "general"), "resultFormat": "json" } documentData = json.dumps(result, ensure_ascii=False) if isinstance(result, dict) else result actionDocument = ActionDocument( documentName=meaningfulName, documentData=documentData, mimeType="application/json", validationMetadata=validationMetadata ) return ActionResult.isSuccess(documents=[actionDocument]) except (SubscriptionInactiveException, BillingContextError): try: if operationId: self.services.chat.progressLogFinish(operationId, False) except Exception: pass raise except Exception as e: logger.error(f"Error in web research: {str(e)}") try: if operationId: self.services.chat.progressLogFinish(operationId, False) except Exception: pass return ActionResult.isFailure(error=str(e))