gateway/modules/interfaces/interfaceWebModel.py
2025-09-02 18:58:30 +02:00

140 lines
3.6 KiB
Python

"""Base class for web classes."""
from abc import ABC, abstractmethod
from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult
from pydantic import BaseModel, Field, HttpUrl
from typing import List
from modules.shared.configuration import APP_CONFIG
# Configuration loading functions
def get_web_search_max_query_length() -> int:
"""Get maximum query length from configuration"""
return int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400"))
def get_web_search_max_results() -> int:
"""Get maximum search results from configuration"""
return int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20"))
def get_web_search_min_results() -> int:
"""Get minimum search results from configuration"""
return int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1"))
# --- Web search ---
# query -> list of URLs
class WebSearchRequest(BaseModel):
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
class WebSearchResultItem(BaseModel):
"""Individual search result"""
title: str
url: HttpUrl
class WebSearchDocumentData(BaseModel):
"""Complete search results document"""
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
results: List[WebSearchResultItem]
total_count: int
class WebSearchActionDocument(ActionDocument):
documentData: WebSearchDocumentData
class WebSearchActionResult(ActionResult):
documents: List[WebSearchActionDocument] = Field(default_factory=list)
class WebSearchBase(ABC):
@abstractmethod
async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: ...
# --- Web crawl ---
# list of URLs -> list of extracted HTML content
class WebCrawlRequest(BaseModel):
urls: List[HttpUrl]
class WebCrawlResultItem(BaseModel):
"""Individual crawl result"""
url: HttpUrl
content: str
class WebCrawlDocumentData(BaseModel):
"""Complete crawl results document"""
urls: List[HttpUrl]
results: List[WebCrawlResultItem]
total_count: int
class WebCrawlActionDocument(ActionDocument):
documentData: WebCrawlDocumentData = Field(
description="The data extracted from crawled URLs"
)
class WebCrawlActionResult(ActionResult):
documents: List[WebCrawlActionDocument] = Field(default_factory=list)
class WebCrawlBase(ABC):
@abstractmethod
async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult: ...
# --- Web scrape ---
# scrape -> list of extracted text; combines web search and crawl in one step
class WebScrapeRequest(BaseModel):
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
class WebScrapeResultItem(BaseModel):
"""Individual scrape result"""
url: HttpUrl
content: str
class WebScrapeDocumentData(BaseModel):
"""Complete scrape results document"""
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
results: List[WebScrapeResultItem]
total_count: int
class WebScrapeActionDocument(ActionDocument):
documentData: WebScrapeDocumentData = Field(
description="The data extracted from scraped URLs"
)
class WebScrapeActionResult(ActionResult):
documents: List[WebScrapeActionDocument] = Field(default_factory=list)
class WebScrapeBase(ABC):
@abstractmethod
async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult: ...