"""Base class for web classes.""" from abc import ABC, abstractmethod from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult from pydantic import BaseModel, Field, HttpUrl from typing import List, Optional, Literal from modules.shared.configuration import APP_CONFIG # Configuration loading functions def get_web_search_max_query_length() -> int: """Get maximum query length from configuration""" return int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400")) def get_web_search_max_results() -> int: """Get maximum search results from configuration""" return int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")) def get_web_search_min_results() -> int: """Get minimum search results from configuration""" return int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1")) # --- Web search --- # query -> list of URLs class WebSearchRequest(BaseModel): query: str = Field(min_length=1, max_length=get_web_search_max_query_length()) max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results()) # Tavily tuning options search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None) time_range: Optional[Literal["d", "w", "m", "y"]] = Field( default=None, description="Limit results to last day/week/month/year" ) topic: Optional[Literal["general", "news", "academic"]] = Field(default=None) include_domains: Optional[List[str]] = Field(default=None) exclude_domains: Optional[List[str]] = Field(default=None) language: Optional[str] = Field(default=None, description="ISO language code like 'en', 'de'") include_answer: Optional[bool] = Field(default=None) include_raw_content: Optional[bool] = Field(default=None) class WebSearchResultItem(BaseModel): """Individual search result""" title: str url: HttpUrl class WebSearchDocumentData(BaseModel): """Complete search results document""" query: str = Field(min_length=1, max_length=get_web_search_max_query_length()) results: List[WebSearchResultItem] total_count: int class WebSearchActionDocument(ActionDocument): documentData: WebSearchDocumentData class WebSearchActionResult(ActionResult): documents: List[WebSearchActionDocument] = Field(default_factory=list) class WebSearchBase(ABC): @abstractmethod async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: ... # --- Web crawl --- # list of URLs -> list of extracted HTML content class WebCrawlRequest(BaseModel): urls: List[HttpUrl] # Tavily extract options extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None) format: Optional[Literal["text", "markdown"]] = Field(default=None) class WebCrawlResultItem(BaseModel): """Individual crawl result""" url: HttpUrl content: str class WebCrawlDocumentData(BaseModel): """Complete crawl results document""" urls: List[HttpUrl] results: List[WebCrawlResultItem] total_count: int class WebCrawlActionDocument(ActionDocument): documentData: WebCrawlDocumentData = Field( description="The data extracted from crawled URLs" ) class WebCrawlActionResult(ActionResult): documents: List[WebCrawlActionDocument] = Field(default_factory=list) class WebCrawlBase(ABC): @abstractmethod async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult: ... # --- Web scrape --- # scrape -> list of extracted text; combines web search and crawl in one step class WebScrapeRequest(BaseModel): query: str = Field(min_length=1, max_length=get_web_search_max_query_length()) max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results()) # Pass-through search options search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None) time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None) topic: Optional[Literal["general", "news", "academic"]] = Field(default=None) include_domains: Optional[List[str]] = Field(default=None) exclude_domains: Optional[List[str]] = Field(default=None) language: Optional[str] = Field(default=None) include_answer: Optional[bool] = Field(default=None) include_raw_content: Optional[bool] = Field(default=None) # Extract options extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None) format: Optional[Literal["text", "markdown"]] = Field(default=None) class WebScrapeResultItem(BaseModel): """Individual scrape result""" url: HttpUrl content: str class WebScrapeDocumentData(BaseModel): """Complete scrape results document""" query: str = Field(min_length=1, max_length=get_web_search_max_query_length()) results: List[WebScrapeResultItem] total_count: int class WebScrapeActionDocument(ActionDocument): documentData: WebScrapeDocumentData = Field( description="The data extracted from scraped URLs" ) class WebScrapeActionResult(ActionResult): documents: List[WebScrapeActionDocument] = Field(default_factory=list) class WebScrapeBase(ABC): @abstractmethod async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult: ...