refactored web research to be generic and in 2 steps: url, crawl
This commit is contained in:
parent
e8c3052176
commit
72e0687826
9 changed files with 1079 additions and 2169 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -21,11 +21,8 @@ class OperationTypeEnum(str, Enum):
|
|||
IMAGE_GENERATE = "imageGenerate"
|
||||
|
||||
# Web Operations
|
||||
WEB_SEARCH = "webSearch" # Returns list of URLs only
|
||||
WEB_CRAWL = "webCrawl" # Returns content from given URLs
|
||||
WEB_RESEARCH = "webResearch" # WEB_SEARCH + WEB_CRAWL combined (scrape function)
|
||||
WEB_QUESTIONS = "webQuestions" # Question-answering web research
|
||||
WEB_NEWS = "webNews" # News-specific web research
|
||||
WEB_SEARCH = "webSearch" # Returns list of URLs only
|
||||
WEB_CRAWL = "webCrawl" # Web crawl for a given URL
|
||||
|
||||
|
||||
# Operation Type Rating - Helper class for capability ratings
|
||||
|
|
@ -49,8 +46,8 @@ def createOperationTypeRatings(*ratings: Tuple[OperationTypeEnum, int]) -> List[
|
|||
Usage:
|
||||
operationTypes = createOperationTypeRatings(
|
||||
(OperationTypeEnum.DATA_ANALYSE, 8),
|
||||
(OperationTypeEnum.WEB_RESEARCH, 10),
|
||||
(OperationTypeEnum.WEB_NEWS, 7)
|
||||
(OperationTypeEnum.WEB_SEARCH, 10),
|
||||
(OperationTypeEnum.WEB_CRAWL, 9)
|
||||
)
|
||||
"""
|
||||
return [OperationTypeRating(operationType=ot, rating=rating) for ot, rating in ratings]
|
||||
|
|
@ -195,3 +192,42 @@ class AiModelResponse(BaseModel):
|
|||
class Config:
|
||||
arbitraryTypesAllowed = True
|
||||
|
||||
|
||||
# Structured prompt models for specialized operations
|
||||
class AiCallPromptWebSearch(BaseModel):
|
||||
"""Structured prompt format for WEB_SEARCH operation - returns list of URLs."""
|
||||
|
||||
instruction: str = Field(description="Search instruction/query for finding relevant URLs")
|
||||
country: Optional[str] = Field(default=None, description="Two-digit country code (lowercase, e.g., ch, us, de, fr)")
|
||||
maxNumberPages: Optional[int] = Field(default=10, description="Maximum number of pages to search (default: 10)")
|
||||
timeRange: Optional[str] = Field(default=None, description="Time range filter (d, w, m, y)")
|
||||
language: Optional[str] = Field(default=None, description="Language code (lowercase, e.g., de, en, fr)")
|
||||
researchDepth: Optional[str] = Field(default="general", description="Research depth: fast (maxDepth=1), general (maxDepth=2), deep (maxDepth=3)")
|
||||
|
||||
class Config:
|
||||
pass
|
||||
|
||||
|
||||
class AiCallPromptWebCrawl(BaseModel):
|
||||
"""Structured prompt format for WEB_CRAWL operation - crawls ONE specific URL and returns content."""
|
||||
|
||||
instruction: str = Field(description="Instruction for what content to extract from URL")
|
||||
url: str = Field(description="Single URL to crawl")
|
||||
maxDepth: Optional[int] = Field(default=2, description="Maximum number of hops from starting page (default: 2)")
|
||||
maxWidth: Optional[int] = Field(default=10, description="Maximum pages to crawl per level (default: 10)")
|
||||
|
||||
class Config:
|
||||
pass
|
||||
|
||||
|
||||
class AiCallPromptImage(BaseModel):
|
||||
"""Structured prompt format for image generation."""
|
||||
|
||||
prompt: str = Field(description="Text description of the image to generate")
|
||||
size: Optional[str] = Field(default="1024x1024", description="Image size (1024x1024, 1792x1024, 1024x1792)")
|
||||
quality: Optional[str] = Field(default="standard", description="Image quality (standard, hd)")
|
||||
style: Optional[str] = Field(default="vivid", description="Image style (vivid, natural)")
|
||||
|
||||
class Config:
|
||||
pass
|
||||
|
||||
|
|
|
|||
225
modules/datamodels/datamodelTools.py
Normal file
225
modules/datamodels/datamodelTools.py
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
"""
|
||||
Utility data models and classes for common tools and mappings.
|
||||
"""
|
||||
|
||||
class CountryCodes:
|
||||
"""
|
||||
Centralized country code mapping for different services.
|
||||
|
||||
Maps ISO-2 country codes to service-specific country names.
|
||||
Each service may have different requirements for country names.
|
||||
"""
|
||||
|
||||
# Mapping: ISO-2 code -> (Tavily country name, Perplexity country name)
|
||||
_COUNTRY_MAP = {
|
||||
"AF": ("afghanistan", "Afghanistan"),
|
||||
"AL": ("albania", "Albania"),
|
||||
"DZ": ("algeria", "Algeria"),
|
||||
"AD": ("andorra", "Andorra"),
|
||||
"AO": ("angola", "Angola"),
|
||||
"AR": ("argentina", "Argentina"),
|
||||
"AM": ("armenia", "Armenia"),
|
||||
"AU": ("australia", "Australia"),
|
||||
"AT": ("austria", "Austria"),
|
||||
"AZ": ("azerbaijan", "Azerbaijan"),
|
||||
"BS": ("bahamas", "Bahamas"),
|
||||
"BH": ("bahrain", "Bahrain"),
|
||||
"BD": ("bangladesh", "Bangladesh"),
|
||||
"BB": ("barbados", "Barbados"),
|
||||
"BY": ("belarus", "Belarus"),
|
||||
"BE": ("belgium", "Belgium"),
|
||||
"BZ": ("belize", "Belize"),
|
||||
"BJ": ("benin", "Benin"),
|
||||
"BT": ("bhutan", "Bhutan"),
|
||||
"BO": ("bolivia", "Bolivia"),
|
||||
"BA": ("bosnia and herzegovina", "Bosnia and Herzegovina"),
|
||||
"BW": ("botswana", "Botswana"),
|
||||
"BR": ("brazil", "Brazil"),
|
||||
"BN": ("brunei", "Brunei"),
|
||||
"BG": ("bulgaria", "Bulgaria"),
|
||||
"BF": ("burkina faso", "Burkina Faso"),
|
||||
"BI": ("burundi", "Burundi"),
|
||||
"KH": ("cambodia", "Cambodia"),
|
||||
"CM": ("cameroon", "Cameroon"),
|
||||
"CA": ("canada", "Canada"),
|
||||
"CV": ("cape verde", "Cape Verde"),
|
||||
"CF": ("central african republic", "Central African Republic"),
|
||||
"TD": ("chad", "Chad"),
|
||||
"CL": ("chile", "Chile"),
|
||||
"CN": ("china", "China"),
|
||||
"CO": ("colombia", "Colombia"),
|
||||
"KM": ("comoros", "Comoros"),
|
||||
"CG": ("congo", "Congo"),
|
||||
"CR": ("costa rica", "Costa Rica"),
|
||||
"HR": ("croatia", "Croatia"),
|
||||
"CU": ("cuba", "Cuba"),
|
||||
"CY": ("cyprus", "Cyprus"),
|
||||
"CZ": ("czech republic", "Czech Republic"),
|
||||
"DK": ("denmark", "Denmark"),
|
||||
"DJ": ("djibouti", "Djibouti"),
|
||||
"DO": ("dominican republic", "Dominican Republic"),
|
||||
"EC": ("ecuador", "Ecuador"),
|
||||
"EG": ("egypt", "Egypt"),
|
||||
"SV": ("el salvador", "El Salvador"),
|
||||
"GQ": ("equatorial guinea", "Equatorial Guinea"),
|
||||
"ER": ("eritrea", "Eritrea"),
|
||||
"EE": ("estonia", "Estonia"),
|
||||
"ET": ("ethiopia", "Ethiopia"),
|
||||
"FJ": ("fiji", "Fiji"),
|
||||
"FI": ("finland", "Finland"),
|
||||
"FR": ("france", "France"),
|
||||
"GA": ("gabon", "Gabon"),
|
||||
"GM": ("gambia", "Gambia"),
|
||||
"GE": ("georgia", "Georgia"),
|
||||
"DE": ("germany", "Germany"),
|
||||
"GH": ("ghana", "Ghana"),
|
||||
"GR": ("greece", "Greece"),
|
||||
"GT": ("guatemala", "Guatemala"),
|
||||
"GN": ("guinea", "Guinea"),
|
||||
"HT": ("haiti", "Haiti"),
|
||||
"HN": ("honduras", "Honduras"),
|
||||
"HU": ("hungary", "Hungary"),
|
||||
"IS": ("iceland", "Iceland"),
|
||||
"IN": ("india", "India"),
|
||||
"ID": ("indonesia", "Indonesia"),
|
||||
"IR": ("iran", "Iran"),
|
||||
"IQ": ("iraq", "Iraq"),
|
||||
"IE": ("ireland", "Ireland"),
|
||||
"IL": ("israel", "Israel"),
|
||||
"IT": ("italy", "Italy"),
|
||||
"JM": ("jamaica", "Jamaica"),
|
||||
"JP": ("japan", "Japan"),
|
||||
"JO": ("jordan", "Jordan"),
|
||||
"KZ": ("kazakhstan", "Kazakhstan"),
|
||||
"KE": ("kenya", "Kenya"),
|
||||
"KW": ("kuwait", "Kuwait"),
|
||||
"KG": ("kyrgyzstan", "Kyrgyzstan"),
|
||||
"LV": ("latvia", "Latvia"),
|
||||
"LB": ("lebanon", "Lebanon"),
|
||||
"LS": ("lesotho", "Lesotho"),
|
||||
"LR": ("liberia", "Liberia"),
|
||||
"LY": ("libya", "Libya"),
|
||||
"LI": ("liechtenstein", "Liechtenstein"),
|
||||
"LT": ("lithuania", "Lithuania"),
|
||||
"LU": ("luxembourg", "Luxembourg"),
|
||||
"MG": ("madagascar", "Madagascar"),
|
||||
"MW": ("malawi", "Malawi"),
|
||||
"MY": ("malaysia", "Malaysia"),
|
||||
"MV": ("maldives", "Maldives"),
|
||||
"ML": ("mali", "Mali"),
|
||||
"MT": ("malta", "Malta"),
|
||||
"MR": ("mauritania", "Mauritania"),
|
||||
"MU": ("mauritius", "Mauritius"),
|
||||
"MX": ("mexico", "Mexico"),
|
||||
"MD": ("moldova", "Moldova"),
|
||||
"MC": ("monaco", "Monaco"),
|
||||
"MN": ("mongolia", "Mongolia"),
|
||||
"ME": ("montenegro", "Montenegro"),
|
||||
"MA": ("morocco", "Morocco"),
|
||||
"MZ": ("mozambique", "Mozambique"),
|
||||
"MM": ("myanmar", "Myanmar"),
|
||||
"NA": ("namibia", "Namibia"),
|
||||
"NP": ("nepal", "Nepal"),
|
||||
"NL": ("netherlands", "Netherlands"),
|
||||
"NZ": ("new zealand", "New Zealand"),
|
||||
"NI": ("nicaragua", "Nicaragua"),
|
||||
"NE": ("niger", "Niger"),
|
||||
"NG": ("nigeria", "Nigeria"),
|
||||
"KP": ("north korea", "North Korea"),
|
||||
"MK": ("north macedonia", "North Macedonia"),
|
||||
"NO": ("norway", "Norway"),
|
||||
"OM": ("oman", "Oman"),
|
||||
"PK": ("pakistan", "Pakistan"),
|
||||
"PA": ("panama", "Panama"),
|
||||
"PG": ("papua new guinea", "Papua New Guinea"),
|
||||
"PY": ("paraguay", "Paraguay"),
|
||||
"PE": ("peru", "Peru"),
|
||||
"PH": ("philippines", "Philippines"),
|
||||
"PL": ("poland", "Poland"),
|
||||
"PT": ("portugal", "Portugal"),
|
||||
"QA": ("qatar", "Qatar"),
|
||||
"RO": ("romania", "Romania"),
|
||||
"RU": ("russia", "Russia"),
|
||||
"RW": ("rwanda", "Rwanda"),
|
||||
"SA": ("saudi arabia", "Saudi Arabia"),
|
||||
"SN": ("senegal", "Senegal"),
|
||||
"RS": ("serbia", "Serbia"),
|
||||
"SG": ("singapore", "Singapore"),
|
||||
"SK": ("slovakia", "Slovakia"),
|
||||
"SI": ("slovenia", "Slovenia"),
|
||||
"SO": ("somalia", "Somalia"),
|
||||
"ZA": ("south africa", "South Africa"),
|
||||
"KR": ("south korea", "South Korea"),
|
||||
"SS": ("south sudan", "South Sudan"),
|
||||
"ES": ("spain", "Spain"),
|
||||
"LK": ("sri lanka", "Sri Lanka"),
|
||||
"SD": ("sudan", "Sudan"),
|
||||
"SE": ("sweden", "Sweden"),
|
||||
"CH": ("switzerland", "Switzerland"),
|
||||
"SY": ("syria", "Syria"),
|
||||
"TW": ("taiwan", "Taiwan"),
|
||||
"TJ": ("tajikistan", "Tajikistan"),
|
||||
"TZ": ("tanzania", "Tanzania"),
|
||||
"TH": ("thailand", "Thailand"),
|
||||
"TG": ("togo", "Togo"),
|
||||
"TT": ("trinidad and tobago", "Trinidad and Tobago"),
|
||||
"TN": ("tunisia", "Tunisia"),
|
||||
"TR": ("turkey", "Turkey"),
|
||||
"TM": ("turkmenistan", "Turkmenistan"),
|
||||
"UG": ("uganda", "Uganda"),
|
||||
"UA": ("ukraine", "Ukraine"),
|
||||
"AE": ("united arab emirates", "United Arab Emirates"),
|
||||
"GB": ("united kingdom", "United Kingdom"),
|
||||
"US": ("united states", "United States"),
|
||||
"UY": ("uruguay", "Uruguay"),
|
||||
"UZ": ("uzbekistan", "Uzbekistan"),
|
||||
"VE": ("venezuela", "Venezuela"),
|
||||
"VN": ("vietnam", "Vietnam"),
|
||||
"YE": ("yemen", "Yemen"),
|
||||
"ZM": ("zambia", "Zambia"),
|
||||
"ZW": ("zimbabwe", "Zimbabwe"),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def getForTavily(cls, isoCode: str) -> str:
|
||||
"""
|
||||
Get Tavily-compatible country name from ISO-2 code.
|
||||
|
||||
Args:
|
||||
isoCode: ISO-2 country code (e.g., "CH", "US")
|
||||
|
||||
Returns:
|
||||
Country name in lowercase as required by Tavily (e.g., "switzerland", "united states")
|
||||
"""
|
||||
isoCodeUpper = isoCode.upper()
|
||||
mapping = cls._COUNTRY_MAP.get(isoCodeUpper)
|
||||
return mapping[0] if mapping else isoCode
|
||||
|
||||
@classmethod
|
||||
def getForPerplexity(cls, isoCode: str) -> str:
|
||||
"""
|
||||
Get Perplexity-compatible country name from ISO-2 code.
|
||||
|
||||
Args:
|
||||
isoCode: ISO-2 country code (e.g., "CH", "US")
|
||||
|
||||
Returns:
|
||||
Full country name as required by Perplexity (e.g., "Switzerland", "United States")
|
||||
"""
|
||||
isoCodeUpper = isoCode.upper()
|
||||
mapping = cls._COUNTRY_MAP.get(isoCodeUpper)
|
||||
return mapping[1] if mapping else isoCode
|
||||
|
||||
@classmethod
|
||||
def isValid(cls, isoCode: str) -> bool:
|
||||
"""
|
||||
Check if ISO-2 code is valid.
|
||||
|
||||
Args:
|
||||
isoCode: ISO-2 country code to check
|
||||
|
||||
Returns:
|
||||
True if valid, False otherwise
|
||||
"""
|
||||
return isoCode.upper() in cls._COUNTRY_MAP
|
||||
|
||||
|
|
@ -81,6 +81,9 @@ class Services:
|
|||
from .serviceUtils.mainServiceUtils import UtilsService
|
||||
self.utils = PublicService(UtilsService(self))
|
||||
|
||||
from .serviceWeb.mainServiceWeb import WebService
|
||||
self.web = PublicService(WebService(self))
|
||||
|
||||
|
||||
def getInterface(user: User, workflow: ChatWorkflow) -> Services:
|
||||
return Services(user, workflow)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ from typing import Dict, Any, List, Optional, Union
|
|||
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
||||
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||
from modules.aicore.aicorePluginTavily import WebResearchRequest, WebResearchResult
|
||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||
from modules.services.serviceAi.subCoreAi import SubCoreAi
|
||||
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
|
||||
|
|
|
|||
314
modules/services/serviceWeb/mainServiceWeb.py
Normal file
314
modules/services/serviceWeb/mainServiceWeb.py
Normal file
|
|
@ -0,0 +1,314 @@
|
|||
"""
|
||||
Web crawl service for handling web research operations.
|
||||
Manages the two-step process: WEB_SEARCH then WEB_CRAWL.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptWebSearch, AiCallPromptWebCrawl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WebcrawlService:
|
||||
"""Service for web search and crawling operations."""
|
||||
|
||||
def __init__(self, services):
|
||||
"""Initialize webcrawl service with service center access."""
|
||||
self.services = services
|
||||
|
||||
async def performWebResearch(
|
||||
self,
|
||||
prompt: str,
|
||||
urls: List[str],
|
||||
country: Optional[str],
|
||||
language: Optional[str],
|
||||
researchDepth: str = "general",
|
||||
operationId: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform web research in two steps:
|
||||
1. Use AI to analyze prompt and extract parameters + URLs
|
||||
2. Call WEB_SEARCH to get URLs (if needed)
|
||||
3. Combine URLs and filter to maxNumberPages
|
||||
4. Call WEB_CRAWL for each URL
|
||||
5. Return consolidated result
|
||||
|
||||
Args:
|
||||
prompt: Natural language research prompt
|
||||
urls: Optional list of URLs provided by user
|
||||
country: Optional country code
|
||||
language: Optional language code
|
||||
operationId: Operation ID for progress tracking
|
||||
|
||||
Returns:
|
||||
Consolidated research results as dictionary
|
||||
"""
|
||||
try:
|
||||
# Step 1: AI intention analysis - extract URLs and parameters from prompt
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.1, "Analyzing research intent")
|
||||
|
||||
analysisResult = await self._analyzeResearchIntent(prompt, urls, country, language, researchDepth)
|
||||
|
||||
# Extract parameters from AI analysis
|
||||
instruction = analysisResult.get("instruction", prompt)
|
||||
extractedUrls = analysisResult.get("urls", [])
|
||||
needsSearch = analysisResult.get("needsSearch", True) # Default to True
|
||||
maxNumberPages = analysisResult.get("maxNumberPages", 10)
|
||||
timeRange = analysisResult.get("timeRange")
|
||||
countryCode = analysisResult.get("country", country)
|
||||
languageCode = analysisResult.get("language", language)
|
||||
finalResearchDepth = analysisResult.get("researchDepth", researchDepth)
|
||||
|
||||
logger.info(f"AI Analysis: instruction='{instruction[:100]}...', urls={len(extractedUrls)}, needsSearch={needsSearch}, maxNumberPages={maxNumberPages}, researchDepth={finalResearchDepth}")
|
||||
|
||||
# Combine URLs (from user + from prompt extraction)
|
||||
allUrls = []
|
||||
if urls:
|
||||
allUrls.extend(urls)
|
||||
if extractedUrls:
|
||||
allUrls.extend(extractedUrls)
|
||||
|
||||
# Step 2: Search for URLs if needed (based on needsSearch flag)
|
||||
if needsSearch and (not allUrls or len(allUrls) < maxNumberPages):
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.3, "Searching for URLs")
|
||||
|
||||
searchUrls = await self._performWebSearch(
|
||||
instruction=instruction,
|
||||
maxNumberPages=maxNumberPages - len(allUrls),
|
||||
timeRange=timeRange,
|
||||
country=countryCode,
|
||||
language=languageCode
|
||||
)
|
||||
|
||||
# Add search URLs to the list
|
||||
allUrls.extend(searchUrls)
|
||||
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
|
||||
|
||||
# Step 3: Filter to maxNumberPages (simple cut, no intelligent filtering)
|
||||
if len(allUrls) > maxNumberPages:
|
||||
allUrls = allUrls[:maxNumberPages]
|
||||
logger.info(f"Limited URLs to {maxNumberPages}")
|
||||
|
||||
if not allUrls:
|
||||
return {"error": "No URLs found to crawl"}
|
||||
|
||||
# Step 4: Translate researchDepth to maxDepth
|
||||
depthMap = {"fast": 1, "general": 2, "deep": 3}
|
||||
maxDepth = depthMap.get(finalResearchDepth.lower(), 2)
|
||||
|
||||
# Step 5: Crawl all URLs
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.6, f"Crawling {len(allUrls)} URLs")
|
||||
|
||||
crawlResult = await self._performWebCrawl(
|
||||
instruction=instruction,
|
||||
urls=allUrls,
|
||||
maxDepth=maxDepth
|
||||
)
|
||||
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.9, "Consolidating results")
|
||||
|
||||
# Return consolidated result
|
||||
return {
|
||||
"instruction": instruction,
|
||||
"urls_crawled": allUrls,
|
||||
"total_urls": len(allUrls),
|
||||
"results": crawlResult,
|
||||
"total_results": len(crawlResult) if isinstance(crawlResult, list) else 1
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web research: {str(e)}")
|
||||
raise
|
||||
|
||||
async def _analyzeResearchIntent(
|
||||
self,
|
||||
prompt: str,
|
||||
urls: List[str],
|
||||
country: Optional[str],
|
||||
language: Optional[str],
|
||||
researchDepth: str = "general"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Use AI to analyze prompt and extract:
|
||||
- URLs from the prompt text
|
||||
- Research instruction
|
||||
- maxNumberPages, timeRange, country, language from context
|
||||
"""
|
||||
# Build analysis prompt for AI
|
||||
analysisPrompt = f"""Analyze this web research request and extract structured information.
|
||||
|
||||
RESEARCH REQUEST:
|
||||
{prompt}
|
||||
|
||||
USER PROVIDED:
|
||||
- URLs: {json.dumps(urls) if urls else "None"}
|
||||
- Country: {country or "Not specified"}
|
||||
- Language: {language or "Not specified"}
|
||||
|
||||
Extract and provide a JSON response with:
|
||||
1. instruction: The core research instruction (cleaned prompt without URLs)
|
||||
2. urls: List of URLs found in the prompt text
|
||||
3. needsSearch: true if web search is needed to identify url's to crawl, false if only crawling of provided URLs is wanted
|
||||
4. maxNumberPages: Recommended number of URLs to crawl (based on research scope, typical: 2-20)
|
||||
5. timeRange: Time range if mentioned (d, w, m, y, or null)
|
||||
6. country: Country code if specified (2-digit lowercase, e.g., ch, us, de)
|
||||
7. language: Language code if specified (lowercase, e.g., de, en, fr)
|
||||
8. researchDepth: Research depth based on instruction complexity - "fast" (quick overview, maxDepth=1), "general" (standard research, maxDepth=2), or "deep" (comprehensive research, maxDepth=3)
|
||||
|
||||
Return ONLY valid JSON, no additional text:
|
||||
{{
|
||||
"instruction": "cleaned research instruction",
|
||||
"urls": ["url1", "url2"],
|
||||
"needsSearch": true,
|
||||
"maxNumberPages": 10,
|
||||
"timeRange": null,
|
||||
"country": "ch",
|
||||
"language": "de",
|
||||
"researchDepth": "general"
|
||||
}}"""
|
||||
|
||||
try:
|
||||
# Call AI planning to analyze intent
|
||||
analysisJson = await self.services.ai.callAiPlanning(analysisPrompt)
|
||||
|
||||
# Parse JSON response
|
||||
result = json.loads(analysisJson)
|
||||
|
||||
logger.info(f"Intent analysis result: {result}")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error in AI intent analysis: {str(e)}")
|
||||
# Fallback to basic extraction
|
||||
return {
|
||||
"instruction": prompt,
|
||||
"urls": [],
|
||||
"needsSearch": True,
|
||||
"maxNumberPages": 10,
|
||||
"timeRange": None,
|
||||
"country": country,
|
||||
"language": language,
|
||||
"researchDepth": researchDepth
|
||||
}
|
||||
|
||||
async def _performWebSearch(
|
||||
self,
|
||||
instruction: str,
|
||||
maxNumberPages: int,
|
||||
timeRange: Optional[str],
|
||||
country: Optional[str],
|
||||
language: Optional[str]
|
||||
) -> List[str]:
|
||||
"""Perform web search to find URLs."""
|
||||
try:
|
||||
# Build search prompt model
|
||||
searchPromptModel = AiCallPromptWebSearch(
|
||||
instruction=instruction,
|
||||
country=country,
|
||||
maxNumberPages=maxNumberPages,
|
||||
timeRange=timeRange,
|
||||
language=language
|
||||
)
|
||||
searchPrompt = searchPromptModel.model_dump_json(exclude_none=True, indent=2)
|
||||
|
||||
# Call AI with WEB_SEARCH operation
|
||||
searchOptions = AiCallOptions(
|
||||
operationType=OperationTypeEnum.WEB_SEARCH,
|
||||
resultFormat="json"
|
||||
)
|
||||
|
||||
searchResult = await self.services.ai.callAiDocuments(
|
||||
prompt=searchPrompt,
|
||||
documents=None,
|
||||
options=searchOptions,
|
||||
outputFormat="json"
|
||||
)
|
||||
|
||||
# Parse and extract URLs
|
||||
if isinstance(searchResult, str):
|
||||
searchData = json.loads(searchResult)
|
||||
else:
|
||||
searchData = searchResult
|
||||
|
||||
# Extract URLs from response
|
||||
urls = []
|
||||
if isinstance(searchData, dict):
|
||||
if "urls" in searchData:
|
||||
urls = searchData["urls"]
|
||||
elif "results" in searchData:
|
||||
urls = [r.get("url") for r in searchData["results"] if r.get("url")]
|
||||
elif isinstance(searchData, list):
|
||||
urls = [item.get("url") for item in searchData if item.get("url")]
|
||||
|
||||
logger.info(f"Web search returned {len(urls)} URLs")
|
||||
return urls
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web search: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _performWebCrawl(
|
||||
self,
|
||||
instruction: str,
|
||||
urls: List[str],
|
||||
maxDepth: int = 2
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Perform web crawl on list of URLs - calls plugin for each URL individually."""
|
||||
crawlResults = []
|
||||
|
||||
# Loop over each URL and crawl one at a time
|
||||
for url in urls:
|
||||
try:
|
||||
logger.info(f"Crawling URL: {url}")
|
||||
|
||||
# Build crawl prompt model for single URL
|
||||
crawlPromptModel = AiCallPromptWebCrawl(
|
||||
instruction=instruction,
|
||||
url=url, # Single URL
|
||||
maxDepth=maxDepth,
|
||||
maxWidth=10
|
||||
)
|
||||
crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
|
||||
|
||||
# Call AI with WEB_CRAWL operation
|
||||
crawlOptions = AiCallOptions(
|
||||
operationType=OperationTypeEnum.WEB_CRAWL,
|
||||
resultFormat="json"
|
||||
)
|
||||
|
||||
crawlResult = await self.services.ai.callAiDocuments(
|
||||
prompt=crawlPrompt,
|
||||
documents=None,
|
||||
options=crawlOptions,
|
||||
outputFormat="json"
|
||||
)
|
||||
|
||||
# Parse crawl result
|
||||
if isinstance(crawlResult, str):
|
||||
try:
|
||||
crawlData = json.loads(crawlResult)
|
||||
except:
|
||||
crawlData = {"url": url, "content": crawlResult}
|
||||
else:
|
||||
crawlData = crawlResult
|
||||
|
||||
# Ensure it's a list of results
|
||||
if isinstance(crawlData, list):
|
||||
crawlResults.extend(crawlData)
|
||||
elif isinstance(crawlData, dict):
|
||||
if "results" in crawlData:
|
||||
crawlResults.extend(crawlData["results"])
|
||||
else:
|
||||
crawlResults.append(crawlData)
|
||||
else:
|
||||
crawlResults.append({"url": url, "content": str(crawlData)})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error crawling URL {url}: {str(e)}")
|
||||
crawlResults.append({"url": url, "error": str(e)})
|
||||
|
||||
return crawlResults
|
||||
|
||||
|
|
@ -10,9 +10,7 @@ from datetime import datetime, UTC
|
|||
|
||||
from modules.workflows.methods.methodBase import MethodBase, action
|
||||
from modules.datamodels.datamodelChat import ActionResult
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.aicore.aicorePluginTavily import WebResearchRequest
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptImage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -163,272 +161,50 @@ class MethodAi(MethodBase):
|
|||
)
|
||||
|
||||
|
||||
@action
|
||||
async def webSearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Search the web and return a list of relevant URLs only.
|
||||
- Input requirements: searchPrompt (required); optional maxResults, timeRange, country, language.
|
||||
- Output format: JSON with search results and URLs.
|
||||
|
||||
Parameters:
|
||||
- searchPrompt (str, required): Natural language search prompt describing what to search for.
|
||||
- maxResults (int, optional): Maximum number of search results. Default: 5.
|
||||
- timeRange (str, optional): d | w | m | y for time filtering.
|
||||
- country (str, optional): Country name for localized results.
|
||||
- language (str, optional): Language code (e.g., de, en, fr).
|
||||
"""
|
||||
try:
|
||||
searchPrompt = parameters.get("searchPrompt")
|
||||
if not searchPrompt:
|
||||
return ActionResult.isFailure(error="Search prompt is required")
|
||||
|
||||
# Extract optional parameters
|
||||
maxResults = parameters.get("maxResults", 5)
|
||||
timeRange = parameters.get("timeRange")
|
||||
country = parameters.get("country")
|
||||
language = parameters.get("language")
|
||||
|
||||
# Build AI call options for web search
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.WEB_SEARCH,
|
||||
resultFormat="json"
|
||||
)
|
||||
|
||||
# Create unified prompt JSON that both Tavily and Perplexity can understand
|
||||
promptData = {
|
||||
"searchPrompt": searchPrompt,
|
||||
"maxResults": maxResults,
|
||||
"timeRange": timeRange,
|
||||
"country": country,
|
||||
"language": language,
|
||||
"instructions": "Search the web and return a JSON response with a 'results' array containing objects with 'title', 'url', and optionally 'content' fields. Focus on finding relevant URLs for the search prompt."
|
||||
}
|
||||
|
||||
import json
|
||||
prompt = json.dumps(promptData, indent=2)
|
||||
|
||||
# Call AI service through unified path
|
||||
result = await self.services.ai.callAiDocuments(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options=options,
|
||||
outputFormat="json"
|
||||
)
|
||||
|
||||
# Process result to ensure consistent format
|
||||
processedResult = self._processWebSearchResult(result)
|
||||
|
||||
# Create meaningful filename
|
||||
meaningfulName = self._generateMeaningfulFileName(
|
||||
base_name="web_search",
|
||||
extension="json",
|
||||
action_name="search"
|
||||
)
|
||||
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
actionDocument = ActionDocument(
|
||||
documentName=meaningfulName,
|
||||
documentData=processedResult,
|
||||
mimeType="application/json"
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[actionDocument])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web search: {str(e)}")
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
def _processWebSearchResult(self, result: str) -> str:
|
||||
"""
|
||||
Process web search result to ensure consistent JSON format with URL list.
|
||||
Both Tavily and Perplexity now return proper JSON format.
|
||||
"""
|
||||
try:
|
||||
import json
|
||||
data = json.loads(result)
|
||||
|
||||
# If it's already a proper search result format, return as-is
|
||||
if isinstance(data, dict) and "results" in data:
|
||||
return result
|
||||
|
||||
# If it's a different JSON format, try to extract URLs
|
||||
if isinstance(data, dict):
|
||||
# Look for URL patterns in the JSON
|
||||
urls = self._extractUrlsFromJson(data)
|
||||
if urls:
|
||||
processedData = {
|
||||
"query": data.get("query", "web search"),
|
||||
"results": [{"title": f"Result {i+1}", "url": url} for i, url in enumerate(urls)],
|
||||
"total_count": len(urls)
|
||||
}
|
||||
return json.dumps(processedData, indent=2)
|
||||
|
||||
# No URLs found, return original result in a structured format
|
||||
processedData = {
|
||||
"query": "web search",
|
||||
"results": [],
|
||||
"total_count": 0,
|
||||
"raw_response": result
|
||||
}
|
||||
return json.dumps(processedData, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing web search result: {str(e)}")
|
||||
# Return original result wrapped in error format
|
||||
errorData = {
|
||||
"query": "web search",
|
||||
"results": [],
|
||||
"total_count": 0,
|
||||
"error": f"Failed to process result: {str(e)}",
|
||||
"raw_response": result
|
||||
}
|
||||
return json.dumps(errorData, indent=2)
|
||||
|
||||
def _extractUrlsFromJson(self, data: Dict[str, Any]) -> List[str]:
|
||||
"""Extract URLs from JSON data structure."""
|
||||
urls = []
|
||||
|
||||
def _extractFromValue(value):
|
||||
if isinstance(value, str):
|
||||
# Check if it's a URL
|
||||
if value.startswith(('http://', 'https://')):
|
||||
urls.append(value)
|
||||
elif isinstance(value, dict):
|
||||
for v in value.values():
|
||||
_extractFromValue(v)
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
_extractFromValue(item)
|
||||
|
||||
_extractFromValue(data)
|
||||
return list(set(urls)) # Remove duplicates
|
||||
|
||||
|
||||
@action
|
||||
async def webCrawl(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Extract content from specific URLs.
|
||||
- Input requirements: urls (required); optional extractDepth, format.
|
||||
- Output format: JSON with extracted content from URLs.
|
||||
|
||||
Parameters:
|
||||
- urls (list, required): List of URLs to crawl and extract content from.
|
||||
- extractDepth (str, optional): basic | advanced. Default: advanced.
|
||||
- format (str, optional): markdown | html | text. Default: markdown.
|
||||
"""
|
||||
try:
|
||||
urls = parameters.get("urls")
|
||||
if not urls or not isinstance(urls, list):
|
||||
return ActionResult.isFailure(error="URLs list is required")
|
||||
|
||||
# Extract optional parameters
|
||||
extractDepth = parameters.get("extractDepth", "advanced")
|
||||
formatType = parameters.get("format", "markdown")
|
||||
|
||||
# Build AI call options for web crawling
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.WEB_CRAWL,
|
||||
resultFormat="json"
|
||||
)
|
||||
|
||||
# Create unified prompt JSON for web crawling
|
||||
promptData = {
|
||||
"urls": urls,
|
||||
"extractDepth": extractDepth,
|
||||
"format": formatType,
|
||||
"instructions": "Extract content from the provided URLs and return a JSON response with 'results' array containing objects with 'url', 'title', 'content', and 'extractedAt' fields."
|
||||
}
|
||||
|
||||
import json
|
||||
prompt = json.dumps(promptData, indent=2)
|
||||
|
||||
# Call AI service through unified path
|
||||
result = await self.services.ai.callAiDocuments(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options=options,
|
||||
outputFormat="json"
|
||||
)
|
||||
|
||||
# Create meaningful filename
|
||||
meaningfulName = self._generateMeaningfulFileName(
|
||||
base_name="web_crawl",
|
||||
extension="json",
|
||||
action_name="crawl"
|
||||
)
|
||||
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
actionDocument = ActionDocument(
|
||||
documentName=meaningfulName,
|
||||
documentData=result,
|
||||
mimeType="application/json"
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[actionDocument])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web crawl: {str(e)}")
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
|
||||
@action
|
||||
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Comprehensive web research combining search and content extraction.
|
||||
- Input requirements: researchPrompt (required); optional maxResults, urls, timeRange, country, language.
|
||||
- Output format: JSON with research results, sources, and analysis.
|
||||
- Purpose: Web research with two-step process: search for URLs, then crawl content.
|
||||
- Input requirements: prompt (required); optional list(url), country, language, researchDepth.
|
||||
- Output format: JSON with research results including URLs and content.
|
||||
|
||||
Parameters:
|
||||
- researchPrompt (str, required): Natural language research prompt describing what to research.
|
||||
- maxResults (int, optional): Maximum search results. Default: 5.
|
||||
- urls (list, optional): Specific URLs to include in research.
|
||||
- timeRange (str, optional): d | w | m | y for time filtering.
|
||||
- country (str, optional): Country name for localized results.
|
||||
- language (str, optional): Language code (e.g., de, en, fr).
|
||||
- prompt (str, required): Natural language research instruction, including time range if relevant.
|
||||
- list(url) (list, optional): Specific URLs to crawl, if needed.
|
||||
- country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
|
||||
- language (str, optional): Language code (lowercase, e.g., de, en, fr).
|
||||
- researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
|
||||
"""
|
||||
try:
|
||||
researchPrompt = parameters.get("researchPrompt")
|
||||
if not researchPrompt:
|
||||
prompt = parameters.get("prompt")
|
||||
if not prompt:
|
||||
return ActionResult.isFailure(error="Research prompt is required")
|
||||
|
||||
# Extract optional parameters
|
||||
maxResults = parameters.get("maxResults", 5)
|
||||
urls = parameters.get("urls")
|
||||
timeRange = parameters.get("timeRange")
|
||||
country = parameters.get("country")
|
||||
language = parameters.get("language")
|
||||
# Init progress logger
|
||||
operationId = f"web_research_{self.services.currentWorkflow.id}_{int(time.time())}"
|
||||
|
||||
# Build AI call options for web research
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.WEB_RESEARCH,
|
||||
resultFormat="json"
|
||||
# Start progress tracking
|
||||
self.services.workflow.progressLogStart(
|
||||
operationId,
|
||||
"Web Research",
|
||||
"Searching and Crawling",
|
||||
"Extracting URLs and Content"
|
||||
)
|
||||
|
||||
# Create unified prompt JSON for web research
|
||||
promptData = {
|
||||
"researchPrompt": researchPrompt,
|
||||
"maxResults": maxResults,
|
||||
"urls": urls,
|
||||
"timeRange": timeRange,
|
||||
"country": country,
|
||||
"language": language,
|
||||
"instructions": "Conduct comprehensive web research and return a JSON response with 'results' array containing objects with 'title', 'url', 'content', and 'analysis' fields. Provide detailed analysis and insights."
|
||||
}
|
||||
|
||||
import json
|
||||
prompt = json.dumps(promptData, indent=2)
|
||||
|
||||
# Call AI service through unified path
|
||||
result = await self.services.ai.callAiDocuments(
|
||||
# Call webcrawl service - service handles all AI intention analysis and processing
|
||||
result = await self.services.web.performWebResearch(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options=options,
|
||||
outputFormat="json"
|
||||
urls=parameters.get("list(url)", []),
|
||||
country=parameters.get("country"),
|
||||
language=parameters.get("language"),
|
||||
researchDepth=parameters.get("researchDepth", "general"),
|
||||
operationId=operationId
|
||||
)
|
||||
|
||||
# Complete progress tracking
|
||||
self.services.workflow.progressLogFinish(operationId, True)
|
||||
|
||||
# Create meaningful filename
|
||||
meaningfulName = self._generateMeaningfulFileName(
|
||||
base_name="web_research",
|
||||
|
|
@ -447,157 +223,10 @@ class MethodAi(MethodBase):
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web research: {str(e)}")
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
|
||||
@action
|
||||
async def webQuestions(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Answer questions using web research and AI analysis.
|
||||
- Input requirements: question (required); optional context, maxResults, timeRange, country, language.
|
||||
- Output format: JSON with question answer and supporting sources.
|
||||
|
||||
Parameters:
|
||||
- question (str, required): Question to be answered using web research.
|
||||
- context (str, optional): Additional context for the question.
|
||||
- maxResults (int, optional): Maximum search results. Default: 5.
|
||||
- timeRange (str, optional): d | w | m | y for time filtering.
|
||||
- country (str, optional): Country name for localized results.
|
||||
- language (str, optional): Language code (e.g., de, en, fr).
|
||||
"""
|
||||
try:
|
||||
question = parameters.get("question")
|
||||
if not question:
|
||||
return ActionResult.isFailure(error="Question is required")
|
||||
|
||||
# Extract optional parameters
|
||||
context = parameters.get("context", "")
|
||||
maxResults = parameters.get("maxResults", 5)
|
||||
timeRange = parameters.get("timeRange")
|
||||
country = parameters.get("country")
|
||||
language = parameters.get("language")
|
||||
|
||||
# Build AI call options for web questions
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.WEB_QUESTIONS,
|
||||
resultFormat="json"
|
||||
)
|
||||
|
||||
# Create unified prompt JSON for web questions
|
||||
promptData = {
|
||||
"question": question,
|
||||
"context": context,
|
||||
"maxResults": maxResults,
|
||||
"timeRange": timeRange,
|
||||
"country": country,
|
||||
"language": language,
|
||||
"instructions": "Answer the question using web research and return a JSON response with 'answer', 'sources' array containing objects with 'title', 'url', 'content', and 'relevance' fields."
|
||||
}
|
||||
|
||||
import json
|
||||
prompt = json.dumps(promptData, indent=2)
|
||||
|
||||
# Call AI service through unified path
|
||||
result = await self.services.ai.callAiDocuments(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options=options,
|
||||
outputFormat="json"
|
||||
)
|
||||
|
||||
# Create meaningful filename
|
||||
meaningfulName = self._generateMeaningfulFileName(
|
||||
base_name="web_questions",
|
||||
extension="json",
|
||||
action_name="questions"
|
||||
)
|
||||
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
actionDocument = ActionDocument(
|
||||
documentName=meaningfulName,
|
||||
documentData=result,
|
||||
mimeType="application/json"
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[actionDocument])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web questions: {str(e)}")
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
|
||||
@action
|
||||
async def webNews(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Search and analyze news articles on specific topics.
|
||||
- Input requirements: newsPrompt (required); optional maxResults, timeRange, country, language.
|
||||
- Output format: JSON with news articles, summaries, and analysis.
|
||||
|
||||
Parameters:
|
||||
- newsPrompt (str, required): Natural language prompt describing what news to search for.
|
||||
- maxResults (int, optional): Maximum news articles. Default: 5.
|
||||
- timeRange (str, optional): d | w | m | y for time filtering. Default: w.
|
||||
- country (str, optional): Country name for localized news.
|
||||
- language (str, optional): Language code (e.g., de, en, fr).
|
||||
"""
|
||||
try:
|
||||
newsPrompt = parameters.get("newsPrompt")
|
||||
if not newsPrompt:
|
||||
return ActionResult.isFailure(error="News prompt is required")
|
||||
|
||||
# Extract optional parameters
|
||||
maxResults = parameters.get("maxResults", 5)
|
||||
timeRange = parameters.get("timeRange", "w") # Default to week
|
||||
country = parameters.get("country")
|
||||
language = parameters.get("language")
|
||||
|
||||
# Build AI call options for web news
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.WEB_NEWS,
|
||||
resultFormat="json"
|
||||
)
|
||||
|
||||
# Create unified prompt JSON for web news
|
||||
promptData = {
|
||||
"newsPrompt": newsPrompt,
|
||||
"maxResults": maxResults,
|
||||
"timeRange": timeRange,
|
||||
"country": country,
|
||||
"language": language,
|
||||
"instructions": "Find and analyze recent news articles and return a JSON response with 'articles' array containing objects with 'title', 'url', 'content', 'date', 'source', and 'summary' fields."
|
||||
}
|
||||
|
||||
import json
|
||||
prompt = json.dumps(promptData, indent=2)
|
||||
|
||||
# Call AI service through unified path
|
||||
result = await self.services.ai.callAiDocuments(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options=options,
|
||||
outputFormat="json"
|
||||
)
|
||||
|
||||
# Create meaningful filename
|
||||
meaningfulName = self._generateMeaningfulFileName(
|
||||
base_name="web_news",
|
||||
extension="json",
|
||||
action_name="news"
|
||||
)
|
||||
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
actionDocument = ActionDocument(
|
||||
documentName=meaningfulName,
|
||||
documentData=result,
|
||||
mimeType="application/json"
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[actionDocument])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web news: {str(e)}")
|
||||
try:
|
||||
self.services.workflow.progressLogFinish(operationId, False)
|
||||
except:
|
||||
pass
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
|
||||
|
|
@ -631,17 +260,16 @@ class MethodAi(MethodBase):
|
|||
resultFormat="base64"
|
||||
)
|
||||
|
||||
# Create unified prompt JSON for image generation
|
||||
promptData = {
|
||||
"prompt": prompt,
|
||||
"size": size,
|
||||
"quality": quality,
|
||||
"style": style,
|
||||
"instructions": "Generate an image based on the prompt and return the base64 encoded image data."
|
||||
}
|
||||
# Create structured prompt using Pydantic model
|
||||
promptModel = AiCallPromptImage(
|
||||
prompt=prompt,
|
||||
size=size,
|
||||
quality=quality,
|
||||
style=style
|
||||
)
|
||||
|
||||
import json
|
||||
promptJson = json.dumps(promptData, indent=2)
|
||||
# Convert to JSON string for prompt
|
||||
promptJson = promptModel.model_dump_json(exclude_none=True, indent=2)
|
||||
|
||||
# Call AI service through unified path
|
||||
result = await self.services.ai.callAiDocuments(
|
||||
|
|
|
|||
|
|
@ -91,26 +91,18 @@ class AIModelsTester:
|
|||
print(f"TESTING MODEL: {modelName}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Choose test prompt based on model type - Web models get JSON formatted prompts
|
||||
# Use same prompt for all web models
|
||||
import json
|
||||
|
||||
if "tavily" in modelName.lower():
|
||||
# Tavily models get web search prompt in JSON format (from methodAi.py)
|
||||
if "tavily" in modelName.lower() or "perplexity" in modelName.lower() or "llama" in modelName.lower() or "sonar" in modelName.lower() or "mistral" in modelName.lower():
|
||||
# All web models use the same JSON formatted prompt
|
||||
# Country format: Use full name for Tavily (Switzerland), Perplexity converts ISO codes to names
|
||||
testPrompt = json.dumps({
|
||||
"searchPrompt": "Search for recent news about artificial intelligence developments in 2024. Return the top 3 results as JSON with fields: title, url, snippet.",
|
||||
"maxResults": 3,
|
||||
"timeRange": "y",
|
||||
"country": "United States",
|
||||
"instructions": "Search the web and return a JSON response with a 'results' array containing objects with 'title', 'url', and optionally 'content' fields. Focus on finding relevant URLs for the search prompt."
|
||||
}, indent=2)
|
||||
elif "perplexity" in modelName.lower() or "llama" in modelName.lower() or "sonar" in modelName.lower() or "mistral" in modelName.lower():
|
||||
# Perplexity models get web research prompt in JSON format (from methodAi.py)
|
||||
testPrompt = json.dumps({
|
||||
"researchPrompt": "Research the latest trends in renewable energy technology. Provide a comprehensive overview with key developments, companies involved, and future prospects. Return as JSON.",
|
||||
"prompt": "Research, what ValueOn company in switzerland does and who works there? Return as JSON.",
|
||||
"maxResults": 5,
|
||||
"timeRange": "y",
|
||||
"country": "United States",
|
||||
"instructions": "Conduct comprehensive web research and return a JSON response with 'results' array containing objects with 'title', 'url', 'content', and 'analysis' fields. Provide detailed analysis and insights."
|
||||
"country": "CH", # ISO-2 code, Perplexity will convert to "Switzerland"
|
||||
"format": "json"
|
||||
}, indent=2)
|
||||
else:
|
||||
# Fallback for other models
|
||||
|
|
@ -444,9 +436,7 @@ Is Valid JSON: {result.get('isValidJson', False)}
|
|||
# "dall-e-3", # Skipped - image generation, test later
|
||||
"sonar", # Perplexity web model
|
||||
"sonar-pro", # Perplexity web model
|
||||
"tavily-search", # Tavily web model
|
||||
"tavily-extract", # Tavily web model
|
||||
"tavily-search-extract", # Tavily web model
|
||||
"tavily-search", # Tavily web model (unified research)
|
||||
# "internal-extractor", # Skipped - internal model, test later
|
||||
# "internal-generator", # Skipped - internal model, test later
|
||||
# "internal-renderer" # Skipped - internal model, test later
|
||||
|
|
|
|||
Loading…
Reference in a new issue