refactored web research to be generic and in 2 steps: url, crawl
This commit is contained in:
parent
e8c3052176
commit
72e0687826
9 changed files with 1079 additions and 2169 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -21,11 +21,8 @@ class OperationTypeEnum(str, Enum):
|
||||||
IMAGE_GENERATE = "imageGenerate"
|
IMAGE_GENERATE = "imageGenerate"
|
||||||
|
|
||||||
# Web Operations
|
# Web Operations
|
||||||
WEB_SEARCH = "webSearch" # Returns list of URLs only
|
WEB_SEARCH = "webSearch" # Returns list of URLs only
|
||||||
WEB_CRAWL = "webCrawl" # Returns content from given URLs
|
WEB_CRAWL = "webCrawl" # Web crawl for a given URL
|
||||||
WEB_RESEARCH = "webResearch" # WEB_SEARCH + WEB_CRAWL combined (scrape function)
|
|
||||||
WEB_QUESTIONS = "webQuestions" # Question-answering web research
|
|
||||||
WEB_NEWS = "webNews" # News-specific web research
|
|
||||||
|
|
||||||
|
|
||||||
# Operation Type Rating - Helper class for capability ratings
|
# Operation Type Rating - Helper class for capability ratings
|
||||||
|
|
@ -49,8 +46,8 @@ def createOperationTypeRatings(*ratings: Tuple[OperationTypeEnum, int]) -> List[
|
||||||
Usage:
|
Usage:
|
||||||
operationTypes = createOperationTypeRatings(
|
operationTypes = createOperationTypeRatings(
|
||||||
(OperationTypeEnum.DATA_ANALYSE, 8),
|
(OperationTypeEnum.DATA_ANALYSE, 8),
|
||||||
(OperationTypeEnum.WEB_RESEARCH, 10),
|
(OperationTypeEnum.WEB_SEARCH, 10),
|
||||||
(OperationTypeEnum.WEB_NEWS, 7)
|
(OperationTypeEnum.WEB_CRAWL, 9)
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
return [OperationTypeRating(operationType=ot, rating=rating) for ot, rating in ratings]
|
return [OperationTypeRating(operationType=ot, rating=rating) for ot, rating in ratings]
|
||||||
|
|
@ -195,3 +192,42 @@ class AiModelResponse(BaseModel):
|
||||||
class Config:
|
class Config:
|
||||||
arbitraryTypesAllowed = True
|
arbitraryTypesAllowed = True
|
||||||
|
|
||||||
|
|
||||||
|
# Structured prompt models for specialized operations
|
||||||
|
class AiCallPromptWebSearch(BaseModel):
|
||||||
|
"""Structured prompt format for WEB_SEARCH operation - returns list of URLs."""
|
||||||
|
|
||||||
|
instruction: str = Field(description="Search instruction/query for finding relevant URLs")
|
||||||
|
country: Optional[str] = Field(default=None, description="Two-digit country code (lowercase, e.g., ch, us, de, fr)")
|
||||||
|
maxNumberPages: Optional[int] = Field(default=10, description="Maximum number of pages to search (default: 10)")
|
||||||
|
timeRange: Optional[str] = Field(default=None, description="Time range filter (d, w, m, y)")
|
||||||
|
language: Optional[str] = Field(default=None, description="Language code (lowercase, e.g., de, en, fr)")
|
||||||
|
researchDepth: Optional[str] = Field(default="general", description="Research depth: fast (maxDepth=1), general (maxDepth=2), deep (maxDepth=3)")
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class AiCallPromptWebCrawl(BaseModel):
|
||||||
|
"""Structured prompt format for WEB_CRAWL operation - crawls ONE specific URL and returns content."""
|
||||||
|
|
||||||
|
instruction: str = Field(description="Instruction for what content to extract from URL")
|
||||||
|
url: str = Field(description="Single URL to crawl")
|
||||||
|
maxDepth: Optional[int] = Field(default=2, description="Maximum number of hops from starting page (default: 2)")
|
||||||
|
maxWidth: Optional[int] = Field(default=10, description="Maximum pages to crawl per level (default: 10)")
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class AiCallPromptImage(BaseModel):
|
||||||
|
"""Structured prompt format for image generation."""
|
||||||
|
|
||||||
|
prompt: str = Field(description="Text description of the image to generate")
|
||||||
|
size: Optional[str] = Field(default="1024x1024", description="Image size (1024x1024, 1792x1024, 1024x1792)")
|
||||||
|
quality: Optional[str] = Field(default="standard", description="Image quality (standard, hd)")
|
||||||
|
style: Optional[str] = Field(default="vivid", description="Image style (vivid, natural)")
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
|
||||||
225
modules/datamodels/datamodelTools.py
Normal file
225
modules/datamodels/datamodelTools.py
Normal file
|
|
@ -0,0 +1,225 @@
|
||||||
|
"""
|
||||||
|
Utility data models and classes for common tools and mappings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class CountryCodes:
|
||||||
|
"""
|
||||||
|
Centralized country code mapping for different services.
|
||||||
|
|
||||||
|
Maps ISO-2 country codes to service-specific country names.
|
||||||
|
Each service may have different requirements for country names.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Mapping: ISO-2 code -> (Tavily country name, Perplexity country name)
|
||||||
|
_COUNTRY_MAP = {
|
||||||
|
"AF": ("afghanistan", "Afghanistan"),
|
||||||
|
"AL": ("albania", "Albania"),
|
||||||
|
"DZ": ("algeria", "Algeria"),
|
||||||
|
"AD": ("andorra", "Andorra"),
|
||||||
|
"AO": ("angola", "Angola"),
|
||||||
|
"AR": ("argentina", "Argentina"),
|
||||||
|
"AM": ("armenia", "Armenia"),
|
||||||
|
"AU": ("australia", "Australia"),
|
||||||
|
"AT": ("austria", "Austria"),
|
||||||
|
"AZ": ("azerbaijan", "Azerbaijan"),
|
||||||
|
"BS": ("bahamas", "Bahamas"),
|
||||||
|
"BH": ("bahrain", "Bahrain"),
|
||||||
|
"BD": ("bangladesh", "Bangladesh"),
|
||||||
|
"BB": ("barbados", "Barbados"),
|
||||||
|
"BY": ("belarus", "Belarus"),
|
||||||
|
"BE": ("belgium", "Belgium"),
|
||||||
|
"BZ": ("belize", "Belize"),
|
||||||
|
"BJ": ("benin", "Benin"),
|
||||||
|
"BT": ("bhutan", "Bhutan"),
|
||||||
|
"BO": ("bolivia", "Bolivia"),
|
||||||
|
"BA": ("bosnia and herzegovina", "Bosnia and Herzegovina"),
|
||||||
|
"BW": ("botswana", "Botswana"),
|
||||||
|
"BR": ("brazil", "Brazil"),
|
||||||
|
"BN": ("brunei", "Brunei"),
|
||||||
|
"BG": ("bulgaria", "Bulgaria"),
|
||||||
|
"BF": ("burkina faso", "Burkina Faso"),
|
||||||
|
"BI": ("burundi", "Burundi"),
|
||||||
|
"KH": ("cambodia", "Cambodia"),
|
||||||
|
"CM": ("cameroon", "Cameroon"),
|
||||||
|
"CA": ("canada", "Canada"),
|
||||||
|
"CV": ("cape verde", "Cape Verde"),
|
||||||
|
"CF": ("central african republic", "Central African Republic"),
|
||||||
|
"TD": ("chad", "Chad"),
|
||||||
|
"CL": ("chile", "Chile"),
|
||||||
|
"CN": ("china", "China"),
|
||||||
|
"CO": ("colombia", "Colombia"),
|
||||||
|
"KM": ("comoros", "Comoros"),
|
||||||
|
"CG": ("congo", "Congo"),
|
||||||
|
"CR": ("costa rica", "Costa Rica"),
|
||||||
|
"HR": ("croatia", "Croatia"),
|
||||||
|
"CU": ("cuba", "Cuba"),
|
||||||
|
"CY": ("cyprus", "Cyprus"),
|
||||||
|
"CZ": ("czech republic", "Czech Republic"),
|
||||||
|
"DK": ("denmark", "Denmark"),
|
||||||
|
"DJ": ("djibouti", "Djibouti"),
|
||||||
|
"DO": ("dominican republic", "Dominican Republic"),
|
||||||
|
"EC": ("ecuador", "Ecuador"),
|
||||||
|
"EG": ("egypt", "Egypt"),
|
||||||
|
"SV": ("el salvador", "El Salvador"),
|
||||||
|
"GQ": ("equatorial guinea", "Equatorial Guinea"),
|
||||||
|
"ER": ("eritrea", "Eritrea"),
|
||||||
|
"EE": ("estonia", "Estonia"),
|
||||||
|
"ET": ("ethiopia", "Ethiopia"),
|
||||||
|
"FJ": ("fiji", "Fiji"),
|
||||||
|
"FI": ("finland", "Finland"),
|
||||||
|
"FR": ("france", "France"),
|
||||||
|
"GA": ("gabon", "Gabon"),
|
||||||
|
"GM": ("gambia", "Gambia"),
|
||||||
|
"GE": ("georgia", "Georgia"),
|
||||||
|
"DE": ("germany", "Germany"),
|
||||||
|
"GH": ("ghana", "Ghana"),
|
||||||
|
"GR": ("greece", "Greece"),
|
||||||
|
"GT": ("guatemala", "Guatemala"),
|
||||||
|
"GN": ("guinea", "Guinea"),
|
||||||
|
"HT": ("haiti", "Haiti"),
|
||||||
|
"HN": ("honduras", "Honduras"),
|
||||||
|
"HU": ("hungary", "Hungary"),
|
||||||
|
"IS": ("iceland", "Iceland"),
|
||||||
|
"IN": ("india", "India"),
|
||||||
|
"ID": ("indonesia", "Indonesia"),
|
||||||
|
"IR": ("iran", "Iran"),
|
||||||
|
"IQ": ("iraq", "Iraq"),
|
||||||
|
"IE": ("ireland", "Ireland"),
|
||||||
|
"IL": ("israel", "Israel"),
|
||||||
|
"IT": ("italy", "Italy"),
|
||||||
|
"JM": ("jamaica", "Jamaica"),
|
||||||
|
"JP": ("japan", "Japan"),
|
||||||
|
"JO": ("jordan", "Jordan"),
|
||||||
|
"KZ": ("kazakhstan", "Kazakhstan"),
|
||||||
|
"KE": ("kenya", "Kenya"),
|
||||||
|
"KW": ("kuwait", "Kuwait"),
|
||||||
|
"KG": ("kyrgyzstan", "Kyrgyzstan"),
|
||||||
|
"LV": ("latvia", "Latvia"),
|
||||||
|
"LB": ("lebanon", "Lebanon"),
|
||||||
|
"LS": ("lesotho", "Lesotho"),
|
||||||
|
"LR": ("liberia", "Liberia"),
|
||||||
|
"LY": ("libya", "Libya"),
|
||||||
|
"LI": ("liechtenstein", "Liechtenstein"),
|
||||||
|
"LT": ("lithuania", "Lithuania"),
|
||||||
|
"LU": ("luxembourg", "Luxembourg"),
|
||||||
|
"MG": ("madagascar", "Madagascar"),
|
||||||
|
"MW": ("malawi", "Malawi"),
|
||||||
|
"MY": ("malaysia", "Malaysia"),
|
||||||
|
"MV": ("maldives", "Maldives"),
|
||||||
|
"ML": ("mali", "Mali"),
|
||||||
|
"MT": ("malta", "Malta"),
|
||||||
|
"MR": ("mauritania", "Mauritania"),
|
||||||
|
"MU": ("mauritius", "Mauritius"),
|
||||||
|
"MX": ("mexico", "Mexico"),
|
||||||
|
"MD": ("moldova", "Moldova"),
|
||||||
|
"MC": ("monaco", "Monaco"),
|
||||||
|
"MN": ("mongolia", "Mongolia"),
|
||||||
|
"ME": ("montenegro", "Montenegro"),
|
||||||
|
"MA": ("morocco", "Morocco"),
|
||||||
|
"MZ": ("mozambique", "Mozambique"),
|
||||||
|
"MM": ("myanmar", "Myanmar"),
|
||||||
|
"NA": ("namibia", "Namibia"),
|
||||||
|
"NP": ("nepal", "Nepal"),
|
||||||
|
"NL": ("netherlands", "Netherlands"),
|
||||||
|
"NZ": ("new zealand", "New Zealand"),
|
||||||
|
"NI": ("nicaragua", "Nicaragua"),
|
||||||
|
"NE": ("niger", "Niger"),
|
||||||
|
"NG": ("nigeria", "Nigeria"),
|
||||||
|
"KP": ("north korea", "North Korea"),
|
||||||
|
"MK": ("north macedonia", "North Macedonia"),
|
||||||
|
"NO": ("norway", "Norway"),
|
||||||
|
"OM": ("oman", "Oman"),
|
||||||
|
"PK": ("pakistan", "Pakistan"),
|
||||||
|
"PA": ("panama", "Panama"),
|
||||||
|
"PG": ("papua new guinea", "Papua New Guinea"),
|
||||||
|
"PY": ("paraguay", "Paraguay"),
|
||||||
|
"PE": ("peru", "Peru"),
|
||||||
|
"PH": ("philippines", "Philippines"),
|
||||||
|
"PL": ("poland", "Poland"),
|
||||||
|
"PT": ("portugal", "Portugal"),
|
||||||
|
"QA": ("qatar", "Qatar"),
|
||||||
|
"RO": ("romania", "Romania"),
|
||||||
|
"RU": ("russia", "Russia"),
|
||||||
|
"RW": ("rwanda", "Rwanda"),
|
||||||
|
"SA": ("saudi arabia", "Saudi Arabia"),
|
||||||
|
"SN": ("senegal", "Senegal"),
|
||||||
|
"RS": ("serbia", "Serbia"),
|
||||||
|
"SG": ("singapore", "Singapore"),
|
||||||
|
"SK": ("slovakia", "Slovakia"),
|
||||||
|
"SI": ("slovenia", "Slovenia"),
|
||||||
|
"SO": ("somalia", "Somalia"),
|
||||||
|
"ZA": ("south africa", "South Africa"),
|
||||||
|
"KR": ("south korea", "South Korea"),
|
||||||
|
"SS": ("south sudan", "South Sudan"),
|
||||||
|
"ES": ("spain", "Spain"),
|
||||||
|
"LK": ("sri lanka", "Sri Lanka"),
|
||||||
|
"SD": ("sudan", "Sudan"),
|
||||||
|
"SE": ("sweden", "Sweden"),
|
||||||
|
"CH": ("switzerland", "Switzerland"),
|
||||||
|
"SY": ("syria", "Syria"),
|
||||||
|
"TW": ("taiwan", "Taiwan"),
|
||||||
|
"TJ": ("tajikistan", "Tajikistan"),
|
||||||
|
"TZ": ("tanzania", "Tanzania"),
|
||||||
|
"TH": ("thailand", "Thailand"),
|
||||||
|
"TG": ("togo", "Togo"),
|
||||||
|
"TT": ("trinidad and tobago", "Trinidad and Tobago"),
|
||||||
|
"TN": ("tunisia", "Tunisia"),
|
||||||
|
"TR": ("turkey", "Turkey"),
|
||||||
|
"TM": ("turkmenistan", "Turkmenistan"),
|
||||||
|
"UG": ("uganda", "Uganda"),
|
||||||
|
"UA": ("ukraine", "Ukraine"),
|
||||||
|
"AE": ("united arab emirates", "United Arab Emirates"),
|
||||||
|
"GB": ("united kingdom", "United Kingdom"),
|
||||||
|
"US": ("united states", "United States"),
|
||||||
|
"UY": ("uruguay", "Uruguay"),
|
||||||
|
"UZ": ("uzbekistan", "Uzbekistan"),
|
||||||
|
"VE": ("venezuela", "Venezuela"),
|
||||||
|
"VN": ("vietnam", "Vietnam"),
|
||||||
|
"YE": ("yemen", "Yemen"),
|
||||||
|
"ZM": ("zambia", "Zambia"),
|
||||||
|
"ZW": ("zimbabwe", "Zimbabwe"),
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getForTavily(cls, isoCode: str) -> str:
|
||||||
|
"""
|
||||||
|
Get Tavily-compatible country name from ISO-2 code.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
isoCode: ISO-2 country code (e.g., "CH", "US")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Country name in lowercase as required by Tavily (e.g., "switzerland", "united states")
|
||||||
|
"""
|
||||||
|
isoCodeUpper = isoCode.upper()
|
||||||
|
mapping = cls._COUNTRY_MAP.get(isoCodeUpper)
|
||||||
|
return mapping[0] if mapping else isoCode
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getForPerplexity(cls, isoCode: str) -> str:
|
||||||
|
"""
|
||||||
|
Get Perplexity-compatible country name from ISO-2 code.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
isoCode: ISO-2 country code (e.g., "CH", "US")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Full country name as required by Perplexity (e.g., "Switzerland", "United States")
|
||||||
|
"""
|
||||||
|
isoCodeUpper = isoCode.upper()
|
||||||
|
mapping = cls._COUNTRY_MAP.get(isoCodeUpper)
|
||||||
|
return mapping[1] if mapping else isoCode
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def isValid(cls, isoCode: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if ISO-2 code is valid.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
isoCode: ISO-2 country code to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if valid, False otherwise
|
||||||
|
"""
|
||||||
|
return isoCode.upper() in cls._COUNTRY_MAP
|
||||||
|
|
||||||
|
|
@ -81,6 +81,9 @@ class Services:
|
||||||
from .serviceUtils.mainServiceUtils import UtilsService
|
from .serviceUtils.mainServiceUtils import UtilsService
|
||||||
self.utils = PublicService(UtilsService(self))
|
self.utils = PublicService(UtilsService(self))
|
||||||
|
|
||||||
|
from .serviceWeb.mainServiceWeb import WebService
|
||||||
|
self.web = PublicService(WebService(self))
|
||||||
|
|
||||||
|
|
||||||
def getInterface(user: User, workflow: ChatWorkflow) -> Services:
|
def getInterface(user: User, workflow: ChatWorkflow) -> Services:
|
||||||
return Services(user, workflow)
|
return Services(user, workflow)
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@ from typing import Dict, Any, List, Optional, Union
|
||||||
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
||||||
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||||
from modules.aicore.aicorePluginTavily import WebResearchRequest, WebResearchResult
|
|
||||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||||
from modules.services.serviceAi.subCoreAi import SubCoreAi
|
from modules.services.serviceAi.subCoreAi import SubCoreAi
|
||||||
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
|
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
|
||||||
|
|
|
||||||
314
modules/services/serviceWeb/mainServiceWeb.py
Normal file
314
modules/services/serviceWeb/mainServiceWeb.py
Normal file
|
|
@ -0,0 +1,314 @@
|
||||||
|
"""
|
||||||
|
Web crawl service for handling web research operations.
|
||||||
|
Manages the two-step process: WEB_SEARCH then WEB_CRAWL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List, Optional
|
||||||
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptWebSearch, AiCallPromptWebCrawl
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class WebcrawlService:
|
||||||
|
"""Service for web search and crawling operations."""
|
||||||
|
|
||||||
|
def __init__(self, services):
|
||||||
|
"""Initialize webcrawl service with service center access."""
|
||||||
|
self.services = services
|
||||||
|
|
||||||
|
async def performWebResearch(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
urls: List[str],
|
||||||
|
country: Optional[str],
|
||||||
|
language: Optional[str],
|
||||||
|
researchDepth: str = "general",
|
||||||
|
operationId: str = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Perform web research in two steps:
|
||||||
|
1. Use AI to analyze prompt and extract parameters + URLs
|
||||||
|
2. Call WEB_SEARCH to get URLs (if needed)
|
||||||
|
3. Combine URLs and filter to maxNumberPages
|
||||||
|
4. Call WEB_CRAWL for each URL
|
||||||
|
5. Return consolidated result
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: Natural language research prompt
|
||||||
|
urls: Optional list of URLs provided by user
|
||||||
|
country: Optional country code
|
||||||
|
language: Optional language code
|
||||||
|
operationId: Operation ID for progress tracking
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Consolidated research results as dictionary
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Step 1: AI intention analysis - extract URLs and parameters from prompt
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, 0.1, "Analyzing research intent")
|
||||||
|
|
||||||
|
analysisResult = await self._analyzeResearchIntent(prompt, urls, country, language, researchDepth)
|
||||||
|
|
||||||
|
# Extract parameters from AI analysis
|
||||||
|
instruction = analysisResult.get("instruction", prompt)
|
||||||
|
extractedUrls = analysisResult.get("urls", [])
|
||||||
|
needsSearch = analysisResult.get("needsSearch", True) # Default to True
|
||||||
|
maxNumberPages = analysisResult.get("maxNumberPages", 10)
|
||||||
|
timeRange = analysisResult.get("timeRange")
|
||||||
|
countryCode = analysisResult.get("country", country)
|
||||||
|
languageCode = analysisResult.get("language", language)
|
||||||
|
finalResearchDepth = analysisResult.get("researchDepth", researchDepth)
|
||||||
|
|
||||||
|
logger.info(f"AI Analysis: instruction='{instruction[:100]}...', urls={len(extractedUrls)}, needsSearch={needsSearch}, maxNumberPages={maxNumberPages}, researchDepth={finalResearchDepth}")
|
||||||
|
|
||||||
|
# Combine URLs (from user + from prompt extraction)
|
||||||
|
allUrls = []
|
||||||
|
if urls:
|
||||||
|
allUrls.extend(urls)
|
||||||
|
if extractedUrls:
|
||||||
|
allUrls.extend(extractedUrls)
|
||||||
|
|
||||||
|
# Step 2: Search for URLs if needed (based on needsSearch flag)
|
||||||
|
if needsSearch and (not allUrls or len(allUrls) < maxNumberPages):
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, 0.3, "Searching for URLs")
|
||||||
|
|
||||||
|
searchUrls = await self._performWebSearch(
|
||||||
|
instruction=instruction,
|
||||||
|
maxNumberPages=maxNumberPages - len(allUrls),
|
||||||
|
timeRange=timeRange,
|
||||||
|
country=countryCode,
|
||||||
|
language=languageCode
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add search URLs to the list
|
||||||
|
allUrls.extend(searchUrls)
|
||||||
|
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
|
||||||
|
|
||||||
|
# Step 3: Filter to maxNumberPages (simple cut, no intelligent filtering)
|
||||||
|
if len(allUrls) > maxNumberPages:
|
||||||
|
allUrls = allUrls[:maxNumberPages]
|
||||||
|
logger.info(f"Limited URLs to {maxNumberPages}")
|
||||||
|
|
||||||
|
if not allUrls:
|
||||||
|
return {"error": "No URLs found to crawl"}
|
||||||
|
|
||||||
|
# Step 4: Translate researchDepth to maxDepth
|
||||||
|
depthMap = {"fast": 1, "general": 2, "deep": 3}
|
||||||
|
maxDepth = depthMap.get(finalResearchDepth.lower(), 2)
|
||||||
|
|
||||||
|
# Step 5: Crawl all URLs
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, 0.6, f"Crawling {len(allUrls)} URLs")
|
||||||
|
|
||||||
|
crawlResult = await self._performWebCrawl(
|
||||||
|
instruction=instruction,
|
||||||
|
urls=allUrls,
|
||||||
|
maxDepth=maxDepth
|
||||||
|
)
|
||||||
|
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, 0.9, "Consolidating results")
|
||||||
|
|
||||||
|
# Return consolidated result
|
||||||
|
return {
|
||||||
|
"instruction": instruction,
|
||||||
|
"urls_crawled": allUrls,
|
||||||
|
"total_urls": len(allUrls),
|
||||||
|
"results": crawlResult,
|
||||||
|
"total_results": len(crawlResult) if isinstance(crawlResult, list) else 1
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in web research: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def _analyzeResearchIntent(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
urls: List[str],
|
||||||
|
country: Optional[str],
|
||||||
|
language: Optional[str],
|
||||||
|
researchDepth: str = "general"
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Use AI to analyze prompt and extract:
|
||||||
|
- URLs from the prompt text
|
||||||
|
- Research instruction
|
||||||
|
- maxNumberPages, timeRange, country, language from context
|
||||||
|
"""
|
||||||
|
# Build analysis prompt for AI
|
||||||
|
analysisPrompt = f"""Analyze this web research request and extract structured information.
|
||||||
|
|
||||||
|
RESEARCH REQUEST:
|
||||||
|
{prompt}
|
||||||
|
|
||||||
|
USER PROVIDED:
|
||||||
|
- URLs: {json.dumps(urls) if urls else "None"}
|
||||||
|
- Country: {country or "Not specified"}
|
||||||
|
- Language: {language or "Not specified"}
|
||||||
|
|
||||||
|
Extract and provide a JSON response with:
|
||||||
|
1. instruction: The core research instruction (cleaned prompt without URLs)
|
||||||
|
2. urls: List of URLs found in the prompt text
|
||||||
|
3. needsSearch: true if web search is needed to identify url's to crawl, false if only crawling of provided URLs is wanted
|
||||||
|
4. maxNumberPages: Recommended number of URLs to crawl (based on research scope, typical: 2-20)
|
||||||
|
5. timeRange: Time range if mentioned (d, w, m, y, or null)
|
||||||
|
6. country: Country code if specified (2-digit lowercase, e.g., ch, us, de)
|
||||||
|
7. language: Language code if specified (lowercase, e.g., de, en, fr)
|
||||||
|
8. researchDepth: Research depth based on instruction complexity - "fast" (quick overview, maxDepth=1), "general" (standard research, maxDepth=2), or "deep" (comprehensive research, maxDepth=3)
|
||||||
|
|
||||||
|
Return ONLY valid JSON, no additional text:
|
||||||
|
{{
|
||||||
|
"instruction": "cleaned research instruction",
|
||||||
|
"urls": ["url1", "url2"],
|
||||||
|
"needsSearch": true,
|
||||||
|
"maxNumberPages": 10,
|
||||||
|
"timeRange": null,
|
||||||
|
"country": "ch",
|
||||||
|
"language": "de",
|
||||||
|
"researchDepth": "general"
|
||||||
|
}}"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Call AI planning to analyze intent
|
||||||
|
analysisJson = await self.services.ai.callAiPlanning(analysisPrompt)
|
||||||
|
|
||||||
|
# Parse JSON response
|
||||||
|
result = json.loads(analysisJson)
|
||||||
|
|
||||||
|
logger.info(f"Intent analysis result: {result}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error in AI intent analysis: {str(e)}")
|
||||||
|
# Fallback to basic extraction
|
||||||
|
return {
|
||||||
|
"instruction": prompt,
|
||||||
|
"urls": [],
|
||||||
|
"needsSearch": True,
|
||||||
|
"maxNumberPages": 10,
|
||||||
|
"timeRange": None,
|
||||||
|
"country": country,
|
||||||
|
"language": language,
|
||||||
|
"researchDepth": researchDepth
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _performWebSearch(
|
||||||
|
self,
|
||||||
|
instruction: str,
|
||||||
|
maxNumberPages: int,
|
||||||
|
timeRange: Optional[str],
|
||||||
|
country: Optional[str],
|
||||||
|
language: Optional[str]
|
||||||
|
) -> List[str]:
|
||||||
|
"""Perform web search to find URLs."""
|
||||||
|
try:
|
||||||
|
# Build search prompt model
|
||||||
|
searchPromptModel = AiCallPromptWebSearch(
|
||||||
|
instruction=instruction,
|
||||||
|
country=country,
|
||||||
|
maxNumberPages=maxNumberPages,
|
||||||
|
timeRange=timeRange,
|
||||||
|
language=language
|
||||||
|
)
|
||||||
|
searchPrompt = searchPromptModel.model_dump_json(exclude_none=True, indent=2)
|
||||||
|
|
||||||
|
# Call AI with WEB_SEARCH operation
|
||||||
|
searchOptions = AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.WEB_SEARCH,
|
||||||
|
resultFormat="json"
|
||||||
|
)
|
||||||
|
|
||||||
|
searchResult = await self.services.ai.callAiDocuments(
|
||||||
|
prompt=searchPrompt,
|
||||||
|
documents=None,
|
||||||
|
options=searchOptions,
|
||||||
|
outputFormat="json"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse and extract URLs
|
||||||
|
if isinstance(searchResult, str):
|
||||||
|
searchData = json.loads(searchResult)
|
||||||
|
else:
|
||||||
|
searchData = searchResult
|
||||||
|
|
||||||
|
# Extract URLs from response
|
||||||
|
urls = []
|
||||||
|
if isinstance(searchData, dict):
|
||||||
|
if "urls" in searchData:
|
||||||
|
urls = searchData["urls"]
|
||||||
|
elif "results" in searchData:
|
||||||
|
urls = [r.get("url") for r in searchData["results"] if r.get("url")]
|
||||||
|
elif isinstance(searchData, list):
|
||||||
|
urls = [item.get("url") for item in searchData if item.get("url")]
|
||||||
|
|
||||||
|
logger.info(f"Web search returned {len(urls)} URLs")
|
||||||
|
return urls
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in web search: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def _performWebCrawl(
|
||||||
|
self,
|
||||||
|
instruction: str,
|
||||||
|
urls: List[str],
|
||||||
|
maxDepth: int = 2
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Perform web crawl on list of URLs - calls plugin for each URL individually."""
|
||||||
|
crawlResults = []
|
||||||
|
|
||||||
|
# Loop over each URL and crawl one at a time
|
||||||
|
for url in urls:
|
||||||
|
try:
|
||||||
|
logger.info(f"Crawling URL: {url}")
|
||||||
|
|
||||||
|
# Build crawl prompt model for single URL
|
||||||
|
crawlPromptModel = AiCallPromptWebCrawl(
|
||||||
|
instruction=instruction,
|
||||||
|
url=url, # Single URL
|
||||||
|
maxDepth=maxDepth,
|
||||||
|
maxWidth=10
|
||||||
|
)
|
||||||
|
crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
|
||||||
|
|
||||||
|
# Call AI with WEB_CRAWL operation
|
||||||
|
crawlOptions = AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.WEB_CRAWL,
|
||||||
|
resultFormat="json"
|
||||||
|
)
|
||||||
|
|
||||||
|
crawlResult = await self.services.ai.callAiDocuments(
|
||||||
|
prompt=crawlPrompt,
|
||||||
|
documents=None,
|
||||||
|
options=crawlOptions,
|
||||||
|
outputFormat="json"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse crawl result
|
||||||
|
if isinstance(crawlResult, str):
|
||||||
|
try:
|
||||||
|
crawlData = json.loads(crawlResult)
|
||||||
|
except:
|
||||||
|
crawlData = {"url": url, "content": crawlResult}
|
||||||
|
else:
|
||||||
|
crawlData = crawlResult
|
||||||
|
|
||||||
|
# Ensure it's a list of results
|
||||||
|
if isinstance(crawlData, list):
|
||||||
|
crawlResults.extend(crawlData)
|
||||||
|
elif isinstance(crawlData, dict):
|
||||||
|
if "results" in crawlData:
|
||||||
|
crawlResults.extend(crawlData["results"])
|
||||||
|
else:
|
||||||
|
crawlResults.append(crawlData)
|
||||||
|
else:
|
||||||
|
crawlResults.append({"url": url, "content": str(crawlData)})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error crawling URL {url}: {str(e)}")
|
||||||
|
crawlResults.append({"url": url, "error": str(e)})
|
||||||
|
|
||||||
|
return crawlResults
|
||||||
|
|
||||||
|
|
@ -10,9 +10,7 @@ from datetime import datetime, UTC
|
||||||
|
|
||||||
from modules.workflows.methods.methodBase import MethodBase, action
|
from modules.workflows.methods.methodBase import MethodBase, action
|
||||||
from modules.datamodels.datamodelChat import ActionResult
|
from modules.datamodels.datamodelChat import ActionResult
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptImage
|
||||||
from modules.datamodels.datamodelChat import ChatDocument
|
|
||||||
from modules.aicore.aicorePluginTavily import WebResearchRequest
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -163,272 +161,50 @@ class MethodAi(MethodBase):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def webSearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Search the web and return a list of relevant URLs only.
|
|
||||||
- Input requirements: searchPrompt (required); optional maxResults, timeRange, country, language.
|
|
||||||
- Output format: JSON with search results and URLs.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- searchPrompt (str, required): Natural language search prompt describing what to search for.
|
|
||||||
- maxResults (int, optional): Maximum number of search results. Default: 5.
|
|
||||||
- timeRange (str, optional): d | w | m | y for time filtering.
|
|
||||||
- country (str, optional): Country name for localized results.
|
|
||||||
- language (str, optional): Language code (e.g., de, en, fr).
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
searchPrompt = parameters.get("searchPrompt")
|
|
||||||
if not searchPrompt:
|
|
||||||
return ActionResult.isFailure(error="Search prompt is required")
|
|
||||||
|
|
||||||
# Extract optional parameters
|
|
||||||
maxResults = parameters.get("maxResults", 5)
|
|
||||||
timeRange = parameters.get("timeRange")
|
|
||||||
country = parameters.get("country")
|
|
||||||
language = parameters.get("language")
|
|
||||||
|
|
||||||
# Build AI call options for web search
|
|
||||||
options = AiCallOptions(
|
|
||||||
operationType=OperationTypeEnum.WEB_SEARCH,
|
|
||||||
resultFormat="json"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create unified prompt JSON that both Tavily and Perplexity can understand
|
|
||||||
promptData = {
|
|
||||||
"searchPrompt": searchPrompt,
|
|
||||||
"maxResults": maxResults,
|
|
||||||
"timeRange": timeRange,
|
|
||||||
"country": country,
|
|
||||||
"language": language,
|
|
||||||
"instructions": "Search the web and return a JSON response with a 'results' array containing objects with 'title', 'url', and optionally 'content' fields. Focus on finding relevant URLs for the search prompt."
|
|
||||||
}
|
|
||||||
|
|
||||||
import json
|
|
||||||
prompt = json.dumps(promptData, indent=2)
|
|
||||||
|
|
||||||
# Call AI service through unified path
|
|
||||||
result = await self.services.ai.callAiDocuments(
|
|
||||||
prompt=prompt,
|
|
||||||
documents=None,
|
|
||||||
options=options,
|
|
||||||
outputFormat="json"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Process result to ensure consistent format
|
|
||||||
processedResult = self._processWebSearchResult(result)
|
|
||||||
|
|
||||||
# Create meaningful filename
|
|
||||||
meaningfulName = self._generateMeaningfulFileName(
|
|
||||||
base_name="web_search",
|
|
||||||
extension="json",
|
|
||||||
action_name="search"
|
|
||||||
)
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelChat import ActionDocument
|
|
||||||
actionDocument = ActionDocument(
|
|
||||||
documentName=meaningfulName,
|
|
||||||
documentData=processedResult,
|
|
||||||
mimeType="application/json"
|
|
||||||
)
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=[actionDocument])
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in web search: {str(e)}")
|
|
||||||
return ActionResult.isFailure(error=str(e))
|
|
||||||
|
|
||||||
def _processWebSearchResult(self, result: str) -> str:
|
|
||||||
"""
|
|
||||||
Process web search result to ensure consistent JSON format with URL list.
|
|
||||||
Both Tavily and Perplexity now return proper JSON format.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
import json
|
|
||||||
data = json.loads(result)
|
|
||||||
|
|
||||||
# If it's already a proper search result format, return as-is
|
|
||||||
if isinstance(data, dict) and "results" in data:
|
|
||||||
return result
|
|
||||||
|
|
||||||
# If it's a different JSON format, try to extract URLs
|
|
||||||
if isinstance(data, dict):
|
|
||||||
# Look for URL patterns in the JSON
|
|
||||||
urls = self._extractUrlsFromJson(data)
|
|
||||||
if urls:
|
|
||||||
processedData = {
|
|
||||||
"query": data.get("query", "web search"),
|
|
||||||
"results": [{"title": f"Result {i+1}", "url": url} for i, url in enumerate(urls)],
|
|
||||||
"total_count": len(urls)
|
|
||||||
}
|
|
||||||
return json.dumps(processedData, indent=2)
|
|
||||||
|
|
||||||
# No URLs found, return original result in a structured format
|
|
||||||
processedData = {
|
|
||||||
"query": "web search",
|
|
||||||
"results": [],
|
|
||||||
"total_count": 0,
|
|
||||||
"raw_response": result
|
|
||||||
}
|
|
||||||
return json.dumps(processedData, indent=2)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error processing web search result: {str(e)}")
|
|
||||||
# Return original result wrapped in error format
|
|
||||||
errorData = {
|
|
||||||
"query": "web search",
|
|
||||||
"results": [],
|
|
||||||
"total_count": 0,
|
|
||||||
"error": f"Failed to process result: {str(e)}",
|
|
||||||
"raw_response": result
|
|
||||||
}
|
|
||||||
return json.dumps(errorData, indent=2)
|
|
||||||
|
|
||||||
def _extractUrlsFromJson(self, data: Dict[str, Any]) -> List[str]:
|
|
||||||
"""Extract URLs from JSON data structure."""
|
|
||||||
urls = []
|
|
||||||
|
|
||||||
def _extractFromValue(value):
|
|
||||||
if isinstance(value, str):
|
|
||||||
# Check if it's a URL
|
|
||||||
if value.startswith(('http://', 'https://')):
|
|
||||||
urls.append(value)
|
|
||||||
elif isinstance(value, dict):
|
|
||||||
for v in value.values():
|
|
||||||
_extractFromValue(v)
|
|
||||||
elif isinstance(value, list):
|
|
||||||
for item in value:
|
|
||||||
_extractFromValue(item)
|
|
||||||
|
|
||||||
_extractFromValue(data)
|
|
||||||
return list(set(urls)) # Remove duplicates
|
|
||||||
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def webCrawl(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Extract content from specific URLs.
|
|
||||||
- Input requirements: urls (required); optional extractDepth, format.
|
|
||||||
- Output format: JSON with extracted content from URLs.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- urls (list, required): List of URLs to crawl and extract content from.
|
|
||||||
- extractDepth (str, optional): basic | advanced. Default: advanced.
|
|
||||||
- format (str, optional): markdown | html | text. Default: markdown.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
urls = parameters.get("urls")
|
|
||||||
if not urls or not isinstance(urls, list):
|
|
||||||
return ActionResult.isFailure(error="URLs list is required")
|
|
||||||
|
|
||||||
# Extract optional parameters
|
|
||||||
extractDepth = parameters.get("extractDepth", "advanced")
|
|
||||||
formatType = parameters.get("format", "markdown")
|
|
||||||
|
|
||||||
# Build AI call options for web crawling
|
|
||||||
options = AiCallOptions(
|
|
||||||
operationType=OperationTypeEnum.WEB_CRAWL,
|
|
||||||
resultFormat="json"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create unified prompt JSON for web crawling
|
|
||||||
promptData = {
|
|
||||||
"urls": urls,
|
|
||||||
"extractDepth": extractDepth,
|
|
||||||
"format": formatType,
|
|
||||||
"instructions": "Extract content from the provided URLs and return a JSON response with 'results' array containing objects with 'url', 'title', 'content', and 'extractedAt' fields."
|
|
||||||
}
|
|
||||||
|
|
||||||
import json
|
|
||||||
prompt = json.dumps(promptData, indent=2)
|
|
||||||
|
|
||||||
# Call AI service through unified path
|
|
||||||
result = await self.services.ai.callAiDocuments(
|
|
||||||
prompt=prompt,
|
|
||||||
documents=None,
|
|
||||||
options=options,
|
|
||||||
outputFormat="json"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create meaningful filename
|
|
||||||
meaningfulName = self._generateMeaningfulFileName(
|
|
||||||
base_name="web_crawl",
|
|
||||||
extension="json",
|
|
||||||
action_name="crawl"
|
|
||||||
)
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelChat import ActionDocument
|
|
||||||
actionDocument = ActionDocument(
|
|
||||||
documentName=meaningfulName,
|
|
||||||
documentData=result,
|
|
||||||
mimeType="application/json"
|
|
||||||
)
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=[actionDocument])
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in web crawl: {str(e)}")
|
|
||||||
return ActionResult.isFailure(error=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
@action
|
@action
|
||||||
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
GENERAL:
|
GENERAL:
|
||||||
- Purpose: Comprehensive web research combining search and content extraction.
|
- Purpose: Web research with two-step process: search for URLs, then crawl content.
|
||||||
- Input requirements: researchPrompt (required); optional maxResults, urls, timeRange, country, language.
|
- Input requirements: prompt (required); optional list(url), country, language, researchDepth.
|
||||||
- Output format: JSON with research results, sources, and analysis.
|
- Output format: JSON with research results including URLs and content.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
- researchPrompt (str, required): Natural language research prompt describing what to research.
|
- prompt (str, required): Natural language research instruction, including time range if relevant.
|
||||||
- maxResults (int, optional): Maximum search results. Default: 5.
|
- list(url) (list, optional): Specific URLs to crawl, if needed.
|
||||||
- urls (list, optional): Specific URLs to include in research.
|
- country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
|
||||||
- timeRange (str, optional): d | w | m | y for time filtering.
|
- language (str, optional): Language code (lowercase, e.g., de, en, fr).
|
||||||
- country (str, optional): Country name for localized results.
|
- researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
|
||||||
- language (str, optional): Language code (e.g., de, en, fr).
|
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
researchPrompt = parameters.get("researchPrompt")
|
prompt = parameters.get("prompt")
|
||||||
if not researchPrompt:
|
if not prompt:
|
||||||
return ActionResult.isFailure(error="Research prompt is required")
|
return ActionResult.isFailure(error="Research prompt is required")
|
||||||
|
|
||||||
# Extract optional parameters
|
# Init progress logger
|
||||||
maxResults = parameters.get("maxResults", 5)
|
operationId = f"web_research_{self.services.currentWorkflow.id}_{int(time.time())}"
|
||||||
urls = parameters.get("urls")
|
|
||||||
timeRange = parameters.get("timeRange")
|
|
||||||
country = parameters.get("country")
|
|
||||||
language = parameters.get("language")
|
|
||||||
|
|
||||||
# Build AI call options for web research
|
# Start progress tracking
|
||||||
options = AiCallOptions(
|
self.services.workflow.progressLogStart(
|
||||||
operationType=OperationTypeEnum.WEB_RESEARCH,
|
operationId,
|
||||||
resultFormat="json"
|
"Web Research",
|
||||||
|
"Searching and Crawling",
|
||||||
|
"Extracting URLs and Content"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create unified prompt JSON for web research
|
# Call webcrawl service - service handles all AI intention analysis and processing
|
||||||
promptData = {
|
result = await self.services.web.performWebResearch(
|
||||||
"researchPrompt": researchPrompt,
|
|
||||||
"maxResults": maxResults,
|
|
||||||
"urls": urls,
|
|
||||||
"timeRange": timeRange,
|
|
||||||
"country": country,
|
|
||||||
"language": language,
|
|
||||||
"instructions": "Conduct comprehensive web research and return a JSON response with 'results' array containing objects with 'title', 'url', 'content', and 'analysis' fields. Provide detailed analysis and insights."
|
|
||||||
}
|
|
||||||
|
|
||||||
import json
|
|
||||||
prompt = json.dumps(promptData, indent=2)
|
|
||||||
|
|
||||||
# Call AI service through unified path
|
|
||||||
result = await self.services.ai.callAiDocuments(
|
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
documents=None,
|
urls=parameters.get("list(url)", []),
|
||||||
options=options,
|
country=parameters.get("country"),
|
||||||
outputFormat="json"
|
language=parameters.get("language"),
|
||||||
|
researchDepth=parameters.get("researchDepth", "general"),
|
||||||
|
operationId=operationId
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Complete progress tracking
|
||||||
|
self.services.workflow.progressLogFinish(operationId, True)
|
||||||
|
|
||||||
# Create meaningful filename
|
# Create meaningful filename
|
||||||
meaningfulName = self._generateMeaningfulFileName(
|
meaningfulName = self._generateMeaningfulFileName(
|
||||||
base_name="web_research",
|
base_name="web_research",
|
||||||
|
|
@ -447,157 +223,10 @@ class MethodAi(MethodBase):
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in web research: {str(e)}")
|
logger.error(f"Error in web research: {str(e)}")
|
||||||
return ActionResult.isFailure(error=str(e))
|
try:
|
||||||
|
self.services.workflow.progressLogFinish(operationId, False)
|
||||||
|
except:
|
||||||
@action
|
pass
|
||||||
async def webQuestions(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Answer questions using web research and AI analysis.
|
|
||||||
- Input requirements: question (required); optional context, maxResults, timeRange, country, language.
|
|
||||||
- Output format: JSON with question answer and supporting sources.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- question (str, required): Question to be answered using web research.
|
|
||||||
- context (str, optional): Additional context for the question.
|
|
||||||
- maxResults (int, optional): Maximum search results. Default: 5.
|
|
||||||
- timeRange (str, optional): d | w | m | y for time filtering.
|
|
||||||
- country (str, optional): Country name for localized results.
|
|
||||||
- language (str, optional): Language code (e.g., de, en, fr).
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
question = parameters.get("question")
|
|
||||||
if not question:
|
|
||||||
return ActionResult.isFailure(error="Question is required")
|
|
||||||
|
|
||||||
# Extract optional parameters
|
|
||||||
context = parameters.get("context", "")
|
|
||||||
maxResults = parameters.get("maxResults", 5)
|
|
||||||
timeRange = parameters.get("timeRange")
|
|
||||||
country = parameters.get("country")
|
|
||||||
language = parameters.get("language")
|
|
||||||
|
|
||||||
# Build AI call options for web questions
|
|
||||||
options = AiCallOptions(
|
|
||||||
operationType=OperationTypeEnum.WEB_QUESTIONS,
|
|
||||||
resultFormat="json"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create unified prompt JSON for web questions
|
|
||||||
promptData = {
|
|
||||||
"question": question,
|
|
||||||
"context": context,
|
|
||||||
"maxResults": maxResults,
|
|
||||||
"timeRange": timeRange,
|
|
||||||
"country": country,
|
|
||||||
"language": language,
|
|
||||||
"instructions": "Answer the question using web research and return a JSON response with 'answer', 'sources' array containing objects with 'title', 'url', 'content', and 'relevance' fields."
|
|
||||||
}
|
|
||||||
|
|
||||||
import json
|
|
||||||
prompt = json.dumps(promptData, indent=2)
|
|
||||||
|
|
||||||
# Call AI service through unified path
|
|
||||||
result = await self.services.ai.callAiDocuments(
|
|
||||||
prompt=prompt,
|
|
||||||
documents=None,
|
|
||||||
options=options,
|
|
||||||
outputFormat="json"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create meaningful filename
|
|
||||||
meaningfulName = self._generateMeaningfulFileName(
|
|
||||||
base_name="web_questions",
|
|
||||||
extension="json",
|
|
||||||
action_name="questions"
|
|
||||||
)
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelChat import ActionDocument
|
|
||||||
actionDocument = ActionDocument(
|
|
||||||
documentName=meaningfulName,
|
|
||||||
documentData=result,
|
|
||||||
mimeType="application/json"
|
|
||||||
)
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=[actionDocument])
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in web questions: {str(e)}")
|
|
||||||
return ActionResult.isFailure(error=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def webNews(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Search and analyze news articles on specific topics.
|
|
||||||
- Input requirements: newsPrompt (required); optional maxResults, timeRange, country, language.
|
|
||||||
- Output format: JSON with news articles, summaries, and analysis.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- newsPrompt (str, required): Natural language prompt describing what news to search for.
|
|
||||||
- maxResults (int, optional): Maximum news articles. Default: 5.
|
|
||||||
- timeRange (str, optional): d | w | m | y for time filtering. Default: w.
|
|
||||||
- country (str, optional): Country name for localized news.
|
|
||||||
- language (str, optional): Language code (e.g., de, en, fr).
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
newsPrompt = parameters.get("newsPrompt")
|
|
||||||
if not newsPrompt:
|
|
||||||
return ActionResult.isFailure(error="News prompt is required")
|
|
||||||
|
|
||||||
# Extract optional parameters
|
|
||||||
maxResults = parameters.get("maxResults", 5)
|
|
||||||
timeRange = parameters.get("timeRange", "w") # Default to week
|
|
||||||
country = parameters.get("country")
|
|
||||||
language = parameters.get("language")
|
|
||||||
|
|
||||||
# Build AI call options for web news
|
|
||||||
options = AiCallOptions(
|
|
||||||
operationType=OperationTypeEnum.WEB_NEWS,
|
|
||||||
resultFormat="json"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create unified prompt JSON for web news
|
|
||||||
promptData = {
|
|
||||||
"newsPrompt": newsPrompt,
|
|
||||||
"maxResults": maxResults,
|
|
||||||
"timeRange": timeRange,
|
|
||||||
"country": country,
|
|
||||||
"language": language,
|
|
||||||
"instructions": "Find and analyze recent news articles and return a JSON response with 'articles' array containing objects with 'title', 'url', 'content', 'date', 'source', and 'summary' fields."
|
|
||||||
}
|
|
||||||
|
|
||||||
import json
|
|
||||||
prompt = json.dumps(promptData, indent=2)
|
|
||||||
|
|
||||||
# Call AI service through unified path
|
|
||||||
result = await self.services.ai.callAiDocuments(
|
|
||||||
prompt=prompt,
|
|
||||||
documents=None,
|
|
||||||
options=options,
|
|
||||||
outputFormat="json"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create meaningful filename
|
|
||||||
meaningfulName = self._generateMeaningfulFileName(
|
|
||||||
base_name="web_news",
|
|
||||||
extension="json",
|
|
||||||
action_name="news"
|
|
||||||
)
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelChat import ActionDocument
|
|
||||||
actionDocument = ActionDocument(
|
|
||||||
documentName=meaningfulName,
|
|
||||||
documentData=result,
|
|
||||||
mimeType="application/json"
|
|
||||||
)
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=[actionDocument])
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in web news: {str(e)}")
|
|
||||||
return ActionResult.isFailure(error=str(e))
|
return ActionResult.isFailure(error=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -631,17 +260,16 @@ class MethodAi(MethodBase):
|
||||||
resultFormat="base64"
|
resultFormat="base64"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create unified prompt JSON for image generation
|
# Create structured prompt using Pydantic model
|
||||||
promptData = {
|
promptModel = AiCallPromptImage(
|
||||||
"prompt": prompt,
|
prompt=prompt,
|
||||||
"size": size,
|
size=size,
|
||||||
"quality": quality,
|
quality=quality,
|
||||||
"style": style,
|
style=style
|
||||||
"instructions": "Generate an image based on the prompt and return the base64 encoded image data."
|
)
|
||||||
}
|
|
||||||
|
|
||||||
import json
|
# Convert to JSON string for prompt
|
||||||
promptJson = json.dumps(promptData, indent=2)
|
promptJson = promptModel.model_dump_json(exclude_none=True, indent=2)
|
||||||
|
|
||||||
# Call AI service through unified path
|
# Call AI service through unified path
|
||||||
result = await self.services.ai.callAiDocuments(
|
result = await self.services.ai.callAiDocuments(
|
||||||
|
|
|
||||||
|
|
@ -91,26 +91,18 @@ class AIModelsTester:
|
||||||
print(f"TESTING MODEL: {modelName}")
|
print(f"TESTING MODEL: {modelName}")
|
||||||
print(f"{'='*60}")
|
print(f"{'='*60}")
|
||||||
|
|
||||||
# Choose test prompt based on model type - Web models get JSON formatted prompts
|
# Use same prompt for all web models
|
||||||
import json
|
import json
|
||||||
|
|
||||||
if "tavily" in modelName.lower():
|
if "tavily" in modelName.lower() or "perplexity" in modelName.lower() or "llama" in modelName.lower() or "sonar" in modelName.lower() or "mistral" in modelName.lower():
|
||||||
# Tavily models get web search prompt in JSON format (from methodAi.py)
|
# All web models use the same JSON formatted prompt
|
||||||
|
# Country format: Use full name for Tavily (Switzerland), Perplexity converts ISO codes to names
|
||||||
testPrompt = json.dumps({
|
testPrompt = json.dumps({
|
||||||
"searchPrompt": "Search for recent news about artificial intelligence developments in 2024. Return the top 3 results as JSON with fields: title, url, snippet.",
|
"prompt": "Research, what ValueOn company in switzerland does and who works there? Return as JSON.",
|
||||||
"maxResults": 3,
|
|
||||||
"timeRange": "y",
|
|
||||||
"country": "United States",
|
|
||||||
"instructions": "Search the web and return a JSON response with a 'results' array containing objects with 'title', 'url', and optionally 'content' fields. Focus on finding relevant URLs for the search prompt."
|
|
||||||
}, indent=2)
|
|
||||||
elif "perplexity" in modelName.lower() or "llama" in modelName.lower() or "sonar" in modelName.lower() or "mistral" in modelName.lower():
|
|
||||||
# Perplexity models get web research prompt in JSON format (from methodAi.py)
|
|
||||||
testPrompt = json.dumps({
|
|
||||||
"researchPrompt": "Research the latest trends in renewable energy technology. Provide a comprehensive overview with key developments, companies involved, and future prospects. Return as JSON.",
|
|
||||||
"maxResults": 5,
|
"maxResults": 5,
|
||||||
"timeRange": "y",
|
"timeRange": "y",
|
||||||
"country": "United States",
|
"country": "CH", # ISO-2 code, Perplexity will convert to "Switzerland"
|
||||||
"instructions": "Conduct comprehensive web research and return a JSON response with 'results' array containing objects with 'title', 'url', 'content', and 'analysis' fields. Provide detailed analysis and insights."
|
"format": "json"
|
||||||
}, indent=2)
|
}, indent=2)
|
||||||
else:
|
else:
|
||||||
# Fallback for other models
|
# Fallback for other models
|
||||||
|
|
@ -444,9 +436,7 @@ Is Valid JSON: {result.get('isValidJson', False)}
|
||||||
# "dall-e-3", # Skipped - image generation, test later
|
# "dall-e-3", # Skipped - image generation, test later
|
||||||
"sonar", # Perplexity web model
|
"sonar", # Perplexity web model
|
||||||
"sonar-pro", # Perplexity web model
|
"sonar-pro", # Perplexity web model
|
||||||
"tavily-search", # Tavily web model
|
"tavily-search", # Tavily web model (unified research)
|
||||||
"tavily-extract", # Tavily web model
|
|
||||||
"tavily-search-extract", # Tavily web model
|
|
||||||
# "internal-extractor", # Skipped - internal model, test later
|
# "internal-extractor", # Skipped - internal model, test later
|
||||||
# "internal-generator", # Skipped - internal model, test later
|
# "internal-generator", # Skipped - internal model, test later
|
||||||
# "internal-renderer" # Skipped - internal model, test later
|
# "internal-renderer" # Skipped - internal model, test later
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue