"""Base class for web classes.""" from abc import ABC, abstractmethod from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult from pydantic import BaseModel, Field, HttpUrl from typing import List from modules.shared.configuration import APP_CONFIG # Configuration loading functions def get_web_search_max_query_length() -> int: """Get maximum query length from configuration""" return int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400")) def get_web_search_max_results() -> int: """Get maximum search results from configuration""" return int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")) def get_web_search_min_results() -> int: """Get minimum search results from configuration""" return int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1")) # --- Web search --- # query -> list of URLs class WebSearchRequest(BaseModel): query: str = Field(min_length=1, max_length=get_web_search_max_query_length()) max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results()) class WebSearchResultItem(BaseModel): """Individual search result""" title: str url: HttpUrl class WebSearchDocumentData(BaseModel): """Complete search results document""" query: str = Field(min_length=1, max_length=get_web_search_max_query_length()) results: List[WebSearchResultItem] total_count: int class WebSearchActionDocument(ActionDocument): documentData: WebSearchDocumentData class WebSearchActionResult(ActionResult): documents: List[WebSearchActionDocument] = Field(default_factory=list) class WebSearchBase(ABC): @abstractmethod async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: ... # --- Web crawl --- # list of URLs -> list of extracted HTML content class WebCrawlRequest(BaseModel): urls: List[HttpUrl] class WebCrawlResultItem(BaseModel): """Individual crawl result""" url: HttpUrl content: str class WebCrawlDocumentData(BaseModel): """Complete crawl results document""" urls: List[HttpUrl] results: List[WebCrawlResultItem] total_count: int class WebCrawlActionDocument(ActionDocument): documentData: WebCrawlDocumentData = Field( description="The data extracted from crawled URLs" ) class WebCrawlActionResult(ActionResult): documents: List[WebCrawlActionDocument] = Field(default_factory=list) class WebCrawlBase(ABC): @abstractmethod async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult: ... # --- Web scrape --- # scrape -> list of extracted text; combines web search and crawl in one step class WebScrapeRequest(BaseModel): query: str = Field(min_length=1, max_length=get_web_search_max_query_length()) max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results()) class WebScrapeResultItem(BaseModel): """Individual scrape result""" url: HttpUrl content: str class WebScrapeDocumentData(BaseModel): """Complete scrape results document""" query: str = Field(min_length=1, max_length=get_web_search_max_query_length()) results: List[WebScrapeResultItem] total_count: int class WebScrapeActionDocument(ActionDocument): documentData: WebScrapeDocumentData = Field( description="The data extracted from scraped URLs" ) class WebScrapeActionResult(ActionResult): documents: List[WebScrapeActionDocument] = Field(default_factory=list) class WebScrapeBase(ABC): @abstractmethod async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult: ...