"""Web-related modules""" from abc import ABC, abstractmethod from pydantic import BaseModel, Field, HttpUrl from typing import List, Optional, Literal, Dict, Any from modules.shared.configuration import APP_CONFIG from modules.datamodels.datamodelWorkflow import ActionDocument, ActionResult WEB_SEARCH_MAX_QUERY_LENGTH: int = int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400")) WEB_SEARCH_MAX_RESULTS: int = int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")) WEB_SEARCH_MIN_RESULTS: int = int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1")) class WebResearchOptions(BaseModel): """Advanced options for web research workflow""" max_pages: int = Field(default=10, ge=1, le=50, description="Maximum pages to crawl") search_depth: Literal["basic", "advanced"] = Field(default="basic", description="Tavily search depth") extract_depth: Literal["basic", "advanced"] = Field(default="advanced", description="Tavily extract depth") format: Literal["text", "markdown"] = Field(default="markdown", description="Content format") return_report: bool = Field(default=True, description="Return formatted report or raw data") pages_search_depth: int = Field(default=1, ge=1, le=5, description="How deep to crawl: 1=main pages only, 2=main+sub-pages, 3=main+sub+sub-sub, etc.") country: Optional[str] = Field(default=None, description="Country code for search bias") time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None, description="Time range for search") topic: Optional[Literal["general", "news", "academic"]] = Field(default=None, description="Search topic") language: Optional[str] = Field(default=None, description="Language code") include_answer: Optional[bool] = Field(default=None, description="Include AI answer") include_raw_content: Optional[bool] = Field(default=None, description="Include raw content") class WebResearchRequest(BaseModel): """Main web research request""" search_query: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH, description="User's research question") urls: Optional[List[str]] = Field(default=None, description="Specific URLs to crawl (optional)") max_results: int = Field(default=5, ge=1, le=WEB_SEARCH_MAX_RESULTS, description="Max search results") options: WebResearchOptions = Field(default_factory=WebResearchOptions, description="Advanced options") class WebSearchResultItem(BaseModel): """Individual search result""" title: str url: HttpUrl raw_content: Optional[str] = Field(default=None, description="Raw HTML content") class WebCrawlResultItem(BaseModel): """Individual crawl result""" url: HttpUrl content: str class WebResearchDocumentData(BaseModel): """Complete web research results""" search_query: str websites_analyzed: int additional_links_found: int analysis_result: str sources: List[WebSearchResultItem] additional_links: List[str] individual_content: Optional[Dict[str, str]] = None # URL -> content mapping debug_info: Optional[Dict[str, Any]] = None class WebResearchActionDocument(ActionDocument): documentData: WebResearchDocumentData class WebResearchActionResult(ActionResult): documents: List[WebResearchActionDocument] = Field(default_factory=list) class WebResearchBase(ABC): @abstractmethod async def web_research(self, request: WebResearchRequest) -> WebResearchActionResult: ... # Legacy models for connector compatibility class WebSearchDocumentData(BaseModel): """Search results document data""" query: str results: List[WebSearchResultItem] total_count: int class WebSearchActionDocument(ActionDocument): documentData: WebSearchDocumentData class WebSearchActionResult(ActionResult): documents: List[WebSearchActionDocument] = Field(default_factory=list) class WebCrawlDocumentData(BaseModel): """Crawl results document data""" urls: List[HttpUrl] results: List[WebCrawlResultItem] total_count: int class WebCrawlActionDocument(ActionDocument): documentData: WebCrawlDocumentData class WebCrawlActionResult(ActionResult): documents: List[WebCrawlActionDocument] = Field(default_factory=list) class WebScrapeDocumentData(BaseModel): """Scrape results document data""" query: str results: List[WebSearchResultItem] total_count: int class WebScrapeActionDocument(ActionDocument): documentData: WebScrapeDocumentData class WebScrapeActionResult(ActionResult): documents: List[WebScrapeActionDocument] = Field(default_factory=list) class WebSearchRequest(BaseModel): """Search request for Tavily""" query: str max_results: int = 5 search_depth: Optional[Literal["basic", "advanced"]] = None time_range: Optional[Literal["d", "w", "m", "y"]] = None topic: Optional[Literal["general", "news", "academic"]] = None include_domains: Optional[List[str]] = None exclude_domains: Optional[List[str]] = None language: Optional[str] = None include_answer: Optional[bool] = None include_raw_content: Optional[bool] = None auto_parameters: Optional[bool] = None country: Optional[str] = None class WebCrawlRequest(BaseModel): """Crawl request for Tavily""" urls: List[HttpUrl] extract_depth: Optional[Literal["basic", "advanced"]] = None format: Optional[Literal["text", "markdown"]] = None class WebScrapeRequest(BaseModel): """Scrape request for Tavily""" query: str max_results: int = 5 search_depth: Optional[Literal["basic", "advanced"]] = None time_range: Optional[Literal["d", "w", "m", "y"]] = None topic: Optional[Literal["general", "news", "academic"]] = None include_domains: Optional[List[str]] = None exclude_domains: Optional[List[str]] = None language: Optional[str] = None include_answer: Optional[bool] = None include_raw_content: Optional[bool] = None auto_parameters: Optional[bool] = None country: Optional[str] = None extract_depth: Optional[Literal["basic", "advanced"]] = None format: Optional[Literal["text", "markdown"]] = None class WebScrapeResultItem(BaseModel): """Individual scrape result""" url: HttpUrl content: str