From dfd76c7d11738649c8a79458f79f3a40e7acbbc9 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Mon, 25 Aug 2025 17:24:33 +0200 Subject: [PATCH 01/17] feat: add web search abstraction --- .../methods/web/web_search/web_search_base.py | 31 ++++++++ .../web/web_search/web_search_tavily.py | 70 +++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 modules/methods/web/web_search/web_search_base.py create mode 100644 modules/methods/web/web_search/web_search_tavily.py diff --git a/modules/methods/web/web_search/web_search_base.py b/modules/methods/web/web_search/web_search_base.py new file mode 100644 index 00000000..d655bfd3 --- /dev/null +++ b/modules/methods/web/web_search/web_search_base.py @@ -0,0 +1,31 @@ +"""Base class for web search classes.""" + +from abc import ABC, abstractmethod +from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult + + +from pydantic import BaseModel, Field +from typing import List + + +class WebSearchRequest(BaseModel): + query: str + max_results: int + + +class WebSearchDocumentData(BaseModel): + title: str + url: str + + +class WebSearchActionDocument(ActionDocument): + documentData: List[WebSearchDocumentData] + + +class WebSearchActionResult(ActionResult): + documents: List[WebSearchActionDocument] = Field(default_factory=list) + + +class WebSearchBase(ABC): + @abstractmethod + async def __call__(self, request: WebSearchRequest) -> WebSearchActionResult: ... diff --git a/modules/methods/web/web_search/web_search_tavily.py b/modules/methods/web/web_search/web_search_tavily.py new file mode 100644 index 00000000..dcbea35c --- /dev/null +++ b/modules/methods/web/web_search/web_search_tavily.py @@ -0,0 +1,70 @@ +"""Tavily web search class.""" + +import os +from dataclasses import dataclass +from web_search_base import ( + WebSearchBase, + WebSearchRequest, + WebSearchActionResult, + WebSearchActionDocument, + WebSearchDocumentData, +) + +# from modules.interfaces.interfaceChatModel import ActionResult, ActionDocument +from tavily import AsyncTavilyClient +from modules.shared.timezoneUtils import get_utc_timestamp + + +@dataclass +class WebSearchTavily(WebSearchBase): + client: AsyncTavilyClient = None + + @classmethod + async def create(cls): + return cls(client=AsyncTavilyClient(api_key=os.getenv("TAVILY_API_KEY"))) + + async def __call__(self, request: WebSearchRequest) -> WebSearchActionResult: + """Handles the web search request.""" + # Step 1: Search + try: + search_results = await self._search(request.query, request.max_results) + except Exception as e: + return WebSearchActionResult(success=False, error=str(e)) + + # Step 2: Build ActionResult + try: + result = self._build_action_result(search_results) + except Exception as e: + return WebSearchActionResult(success=False, error=str(e)) + + return result + + async def _search(self, query: str, max_results: int) -> WebSearchActionResult: + """Calls the Tavily API to perform a web search.""" + # Make sure max_results is within the allowed range + if max_results < 0 or max_results > 20: + raise ValueError("max_results must be between 0 and 20") + + # Perform actual API call + response = await self.client.search(query=query, max_results=max_results) + return response["results"] + + def _build_action_result(self, search_results: list) -> WebSearchActionResult: + """Builds the ActionResult from the search results.""" + documents = [] + for result in search_results: + document_name = f"web_search_{get_utc_timestamp()}.txt" + document_data = WebSearchDocumentData( + title=result["title"], url=result["url"] + ) + mime_type = "text/plain" + doc = WebSearchActionDocument( + documentName=document_name, + documentData=document_data, + mimeType=mime_type, + ) + documents.append(doc) + + return WebSearchActionResult( + success=True, documents=documents, resultLabel="web_search_results" + ) From b37cd502cd7e3137bef5b062991448f9b4f218ff Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Mon, 25 Aug 2025 17:25:11 +0200 Subject: [PATCH 02/17] chore: add tavily requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index adf8d3c3..e6397aa6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,6 +42,7 @@ requests==2.31.0 chardet>=5.0.0 # Für Zeichensatzerkennung bei Webinhalten aiohttp>=3.8.0 # Required for SharePoint operations (async HTTP) selenium>=4.15.0 # Required for web automation and JavaScript-heavy pages +tavily-python==0.7.11 # Tavily SDK ## Image Processing Pillow>=10.0.0 # Für Bildverarbeitung (als PIL importiert) From d4b846c5980d6448b0ef41ea4c6ecbf67a74f22f Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 29 Aug 2025 15:35:14 +0200 Subject: [PATCH 03/17] chore: restructure web search w/ tests --- modules/__init__.py | 0 modules/connectors/connector_tavily.py | 70 +++++++++++++++++++++ modules/interfaces/interface_web_model.py | 49 +++++++++++++++ modules/interfaces/interface_web_objects.py | 24 +++++++ modules/methods/method_web.py | 43 +++++++++++++ pytest.ini | 11 ++++ requirements.txt | 4 ++ tests/__init__.py | 1 + tests/connectors/__init__.py | 0 tests/connectors/test_connector_tavily.py | 39 ++++++++++++ tests/methods/__init__.py | 0 tests/methods/test_method_web.py | 36 +++++++++++ 12 files changed, 277 insertions(+) create mode 100644 modules/__init__.py create mode 100644 modules/connectors/connector_tavily.py create mode 100644 modules/interfaces/interface_web_model.py create mode 100644 modules/interfaces/interface_web_objects.py create mode 100644 modules/methods/method_web.py create mode 100644 pytest.ini create mode 100644 tests/__init__.py create mode 100644 tests/connectors/__init__.py create mode 100644 tests/connectors/test_connector_tavily.py create mode 100644 tests/methods/__init__.py create mode 100644 tests/methods/test_method_web.py diff --git a/modules/__init__.py b/modules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/connectors/connector_tavily.py b/modules/connectors/connector_tavily.py new file mode 100644 index 00000000..4f57fb94 --- /dev/null +++ b/modules/connectors/connector_tavily.py @@ -0,0 +1,70 @@ +"""Tavily web search class.""" + +import os +from dataclasses import dataclass +from modules.interfaces.interface_web_model import ( + WebSearchBase, + WebSearchRequest, + WebSearchActionResult, + WebSearchActionDocument, + WebSearchDocumentData, +) + +# from modules.interfaces.interfaceChatModel import ActionResult, ActionDocument +from tavily import AsyncTavilyClient +from modules.shared.timezoneUtils import get_utc_timestamp + + +@dataclass +class ConnectorTavily(WebSearchBase): + client: AsyncTavilyClient = None + + @classmethod + async def create(cls): + return cls(client=AsyncTavilyClient(api_key=os.getenv("TAVILY_API_KEY"))) + + async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: + """Handles the web search request.""" + # Step 1: Search + try: + search_results = await self._search(request.query, request.max_results) + except Exception as e: + return WebSearchActionResult(success=False, error=str(e)) + + # Step 2: Build ActionResult + try: + result = self._build_action_result(search_results) + except Exception as e: + return WebSearchActionResult(success=False, error=str(e)) + + return result + + async def _search(self, query: str, max_results: int) -> WebSearchActionResult: + """Calls the Tavily API to perform a web search.""" + # Make sure max_results is within the allowed range + if max_results < 0 or max_results > 20: + raise ValueError("max_results must be between 0 and 20") + + # Perform actual API call + response = await self.client.search(query=query, max_results=max_results) + return response["results"] + + def _build_action_result(self, search_results: list) -> WebSearchActionResult: + """Builds the ActionResult from the search results.""" + documents = [] + for result in search_results: + document_name = f"web_search_{get_utc_timestamp()}.txt" + document_data = WebSearchDocumentData( + title=result["title"], url=result["url"] + ) + mime_type = "application/json" + doc = WebSearchActionDocument( + documentName=document_name, + documentData=document_data, + mimeType=mime_type, + ) + documents.append(doc) + + return WebSearchActionResult( + success=True, documents=documents, resultLabel="web_search_results" + ) diff --git a/modules/interfaces/interface_web_model.py b/modules/interfaces/interface_web_model.py new file mode 100644 index 00000000..8dc01fc8 --- /dev/null +++ b/modules/interfaces/interface_web_model.py @@ -0,0 +1,49 @@ +"""Base class for web classes.""" + +from abc import ABC, abstractmethod +from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult + + +from pydantic import BaseModel, Field +from typing import List + + +# --- Web search --- + +# query -> list of URLs + + +class WebSearchRequest(BaseModel): + query: str + max_results: int + + +class WebSearchDocumentData(BaseModel): + title: str + url: str + + +class WebSearchActionDocument(ActionDocument): + documentData: WebSearchDocumentData + + +class WebSearchActionResult(ActionResult): + documents: List[WebSearchActionDocument] = Field(default_factory=list) + + +class WebSearchBase(ABC): + @abstractmethod + async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: ... + + +# --- Web crawl --- + +# list of URLs -> list of extracted HTML content + +# TODO + +# --- Web query --- + +# query -> list of extracted text + +# TODO diff --git a/modules/interfaces/interface_web_objects.py b/modules/interfaces/interface_web_objects.py new file mode 100644 index 00000000..b38db6a3 --- /dev/null +++ b/modules/interfaces/interface_web_objects.py @@ -0,0 +1,24 @@ +from modules.interfaces.interface_web_model import ( + WebSearchActionResult, + WebSearchRequest, +) + +from dataclasses import dataclass +from modules.connectors.connector_tavily import ConnectorTavily + + +@dataclass +class WebInterface: + connector_tavily: ConnectorTavily = None + + @classmethod + async def create(cls) -> "WebInterface": + connector_tavily = await ConnectorTavily.create() + + return WebInterface(connector_tavily=connector_tavily) + + async def search( + self, web_search_request: WebSearchRequest + ) -> WebSearchActionResult: + # NOTE: Add connectors here + return await self.connector_tavily.search_urls(web_search_request) diff --git a/modules/methods/method_web.py b/modules/methods/method_web.py new file mode 100644 index 00000000..27b82ba5 --- /dev/null +++ b/modules/methods/method_web.py @@ -0,0 +1,43 @@ +import logging +from typing import Any, Dict +from modules.chat.methodBase import MethodBase, action +from modules.interfaces.interfaceChatModel import ActionResult +from modules.interfaces.interface_web_objects import WebInterface +from modules.interfaces.interface_web_model import WebSearchRequest + + +logger = logging.getLogger(__name__) + + +class MethodWeb(MethodBase): + """Web method implementation for web operations.""" + + def __init__(self, serviceCenter: Any): + super().__init__(serviceCenter) + + @action + async def search(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Perform a web search and output a .txt file with a plain list of URLs (one per line). + + Parameters: + query (str): Search query to perform + maxResults (int, optional): Maximum number of results (default: 10) + """ + # TODO: Fix docstrings - do we need that format for parsing? + + try: + # Prepare request data + web_search_request = WebSearchRequest( + query=parameters.get("query"), + max_results=parameters.get("maxResults", 10), + ) + + # Perform request + web_interface = await WebInterface.create() + web_search_result = await web_interface.search(web_search_request) + + return web_search_result + + except Exception as e: + return ActionResult(success=False, error=str(e)) diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..b606fa5f --- /dev/null +++ b/pytest.ini @@ -0,0 +1,11 @@ +[pytest] +testpaths = tests +python_paths = . +addopts = -v --tb=short +python_files = test_*.py +python_classes = Test* +python_functions = test_* +log_file = logs/test_logs.log +log_file_level = INFO +log_file_format = %(asctime)s %(levelname)s %(message)s +log_file_date_format = %Y-%m-%d %H:%M:%S \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e6397aa6..75bd81b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,3 +68,7 @@ PyPDF2>=3.0.0 PyMuPDF>=1.20.0 beautifulsoup4>=4.11.0 chardet>=4.0.0 # For encoding detection + +## Testing Dependencies +pytest>=8.0.0 +pytest-asyncio>=0.21.0 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..4ede8e6d --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# noqa diff --git a/tests/connectors/__init__.py b/tests/connectors/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/connectors/test_connector_tavily.py b/tests/connectors/test_connector_tavily.py new file mode 100644 index 00000000..54ea382c --- /dev/null +++ b/tests/connectors/test_connector_tavily.py @@ -0,0 +1,39 @@ +"""Tests for Tavliy web search.""" + +import pytest +import logging + +from modules.interfaces.interfaceChatModel import ActionResult +from modules.interfaces.interface_web_model import WebSearchRequest +from modules.connectors.connector_tavily import ConnectorTavily + +logger = logging.getLogger(__name__) + + +@pytest.mark.asyncio +async def test_tavily_connector_search_test_live_api(): + logger.info("Testing Tavliy connector with live API calls") + + # Test request + request = WebSearchRequest(query="How old is the Earth?", max_results=5) + + # Tavily instance + connector_tavily = await ConnectorTavily.create() + + # Search test + action_result = await connector_tavily.search_urls(request=request) + + # Check results + assert isinstance(action_result, ActionResult) + + logger.info("=" * 20) + logger.info(f"Action result success status: {action_result.success}") + logger.info(f"Action result error: {action_result.error}") + logger.info(f"Action result label: {action_result.resultLabel}") + + logger.info("Documents:") + for doc in action_result.documents: + logger.info("-" * 10) + logger.info(f" - Document Name: {doc.documentName}") + logger.info(f" - Document Mime Type: {doc.mimeType}") + logger.info(f" - Document Data: {doc.documentData}") diff --git a/tests/methods/__init__.py b/tests/methods/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/methods/test_method_web.py b/tests/methods/test_method_web.py new file mode 100644 index 00000000..078d2902 --- /dev/null +++ b/tests/methods/test_method_web.py @@ -0,0 +1,36 @@ +"""Tests for method web.py""" + +import logging + +import pytest +from modules.methods.method_web import MethodWeb + +logger = logging.getLogger(__name__) + + +@pytest.mark.asyncio +async def test_method_web_search_live(): + """Tests method web search with live API calls.""" + + method_web = MethodWeb(serviceCenter=None) + + # Actual request + action_result = await method_web.search( + {"query": "How old is the earth", "maxResults": 5} + ) + + # Evaluate results + assert action_result.success + assert len(action_result.documents) > 0 + + logger.info("=" * 20) + logger.info(f"Action result success status: {action_result.success}") + logger.info(f"Action result error: {action_result.error}") + logger.info(f"Action result label: {action_result.resultLabel}") + + logger.info("Documents:") + for doc in action_result.documents: + logger.info("-" * 10) + logger.info(f" - Document Name: {doc.documentName}") + logger.info(f" - Document Mime Type: {doc.mimeType}") + logger.info(f" - Document Data: {doc.documentData}") From 6b05ad206727961f3e7a815f42523cb210bc60cd Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 29 Aug 2025 20:50:40 +0200 Subject: [PATCH 04/17] chore: add method web test w/ patched tavily api response --- modules/connectors/connector_tavily.py | 7 ++++ tests/connectors/test_connector_tavily.py | 1 + tests/fixtures/__init__.py | 0 tests/fixtures/tavily_responses.py | 47 +++++++++++++++++++++++ tests/methods/test_method_web.py | 36 +++++++++++++++++ 5 files changed, 91 insertions(+) create mode 100644 tests/fixtures/__init__.py create mode 100644 tests/fixtures/tavily_responses.py diff --git a/modules/connectors/connector_tavily.py b/modules/connectors/connector_tavily.py index 4f57fb94..bcb38e3f 100644 --- a/modules/connectors/connector_tavily.py +++ b/modules/connectors/connector_tavily.py @@ -1,5 +1,6 @@ """Tavily web search class.""" +import logging import os from dataclasses import dataclass from modules.interfaces.interface_web_model import ( @@ -15,6 +16,9 @@ from tavily import AsyncTavilyClient from modules.shared.timezoneUtils import get_utc_timestamp +logger = logging.getLogger(__name__) + + @dataclass class ConnectorTavily(WebSearchBase): client: AsyncTavilyClient = None @@ -47,6 +51,9 @@ class ConnectorTavily(WebSearchBase): # Perform actual API call response = await self.client.search(query=query, max_results=max_results) + + logger.info(f"Tavily API response:\n{response}") + return response["results"] def _build_action_result(self, search_results: list) -> WebSearchActionResult: diff --git a/tests/connectors/test_connector_tavily.py b/tests/connectors/test_connector_tavily.py index 54ea382c..3b23f69a 100644 --- a/tests/connectors/test_connector_tavily.py +++ b/tests/connectors/test_connector_tavily.py @@ -11,6 +11,7 @@ logger = logging.getLogger(__name__) @pytest.mark.asyncio +@pytest.mark.expensive async def test_tavily_connector_search_test_live_api(): logger.info("Testing Tavliy connector with live API calls") diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/fixtures/tavily_responses.py b/tests/fixtures/tavily_responses.py new file mode 100644 index 00000000..789f62f0 --- /dev/null +++ b/tests/fixtures/tavily_responses.py @@ -0,0 +1,47 @@ +"""Sample tavily responses for patching responses in tests.""" + +RESPONSE_HOW_OLD_IS_EARTH_NO_ANSWER = { + "query": "How old is the earth", + "follow_up_questions": None, + "answer": None, + "images": [], + "results": [ + { + "url": "https://en.wikipedia.org/wiki/Age_of_Earth", + "title": "Age of Earth - Wikipedia", + "content": 'Scientific dating of the age of Earth The **age of Earth** is estimated to be 4.54 ± 0.05 billion years. In 1862, the physicist William Thomson, 1st Baron Kelvin published calculations that fixed the age of Earth at between 20 million and 400 million years. This suggested that it might be possible to measure the age of Earth by determining the relative proportions of radioactive materials in geological samples. Holmes published *The Age of the Earth, an Introduction to Geological Ideas* in 1927 in which he presented a range of 1.6 to 3.0 billion years. "The age of the Earth and the invention of geological time".', + "score": 0.8775715, + "raw_content": None, + }, + { + "url": "https://answersingenesis.org/age-of-the-earth/how-old-earth/?srsltid=AfmBOorqG4wgNP3fQ457C11mdj7kVx0IcByShaqH3wwc1VivvrqvJnCF", + "title": "How Old Is the Earth? | Answers in Genesis", + "content": "If you ask this question of most scientifically literate people, they will answer that the earth is about 4.54 billion years old.", + "score": 0.8703443, + "raw_content": None, + }, + { + "url": "https://sites.nd.edu/james-applewhite/2020/03/22/age-of-our-earth/", + "title": "Age of Our Earth: 6000 or 4.5 billion years old? - Notre Dame Sites", + "content": "If the Earth is only 6,000 years old, why does radiometric dating techniques used by geologists suggest the age is around much older? Each technique demonstrates the earth is much older than 6,000 years old and when combined with the various different techniques of relative dating using rock strata and formations, it becomes apparent that we have solid scientific evidence that the earth is much older than what AIG thinks. With this, as they try to discount radiometric dating as evidence since we were not around back then, they invalidate their own argument as they suggest that we should accept the words of the Bible as evidence.", + "score": 0.7975099, + "raw_content": None, + }, + { + "url": "https://www.tomorrowsworld.org/magazines/2013/march-april/how-old-is-the-earth", + "title": "How Old Is the Earth? | Tomorrow's World", + "content": "Was it billions of years ago—close to the scientists' estimate of a 4.5 billion-year-old Earth? Or was it earlier or later? On these details, the Bible is", + "score": 0.78944516, + "raw_content": None, + }, + { + "url": "https://www.planetary.org/articles/how-old-is-the-earth", + "title": "How old is the Earth? | The Planetary Society", + "content": "Skip to main content Community Account Renew Search * Become A Member * Renew Back To Main Menu Learn how our members and community are changing the worlds. Back To Main Menu * ### The Planetary Report Back To Main Menu + Become A Member + Action Center + Renew Membership Back To Main Menu Back To Main Menu + Become A Member + Renew Membership * Take Action * Member Community * Account Center * Search Public Education Specialist, The Planetary Society Along with other planets, the Earth was born in the early days of the Solar System, which first started forming about 4.6 billion years ago. thanks to techniques including radiometric dating of rocks and minerals,", + "score": 0.7756902, + "raw_content": None, + }, + ], + "response_time": 0.96, + "request_id": "3c36cccd-0918-49fd-bd1c-23c62ba7ec2d", +} diff --git a/tests/methods/test_method_web.py b/tests/methods/test_method_web.py index 078d2902..9c0b7671 100644 --- a/tests/methods/test_method_web.py +++ b/tests/methods/test_method_web.py @@ -3,12 +3,15 @@ import logging import pytest +from unittest.mock import patch from modules.methods.method_web import MethodWeb +from tests.fixtures.tavily_responses import RESPONSE_HOW_OLD_IS_EARTH_NO_ANSWER logger = logging.getLogger(__name__) @pytest.mark.asyncio +@pytest.mark.expensive async def test_method_web_search_live(): """Tests method web search with live API calls.""" @@ -34,3 +37,36 @@ async def test_method_web_search_live(): logger.info(f" - Document Name: {doc.documentName}") logger.info(f" - Document Mime Type: {doc.mimeType}") logger.info(f" - Document Data: {doc.documentData}") + + +@pytest.mark.asyncio +async def test_method_web_search_dummy(): + """Tests method web search with dummy response data - no external API calls.""" + + method_web = MethodWeb(serviceCenter=None) + + # Mock the Tavily API response + with patch( + "tavily.AsyncTavilyClient.search", + return_value=RESPONSE_HOW_OLD_IS_EARTH_NO_ANSWER, + ) as mock_client: + action_result = await method_web.search( + {"query": "How old is the earth", "maxResults": 5} + ) + mock_client.assert_called_once() + + # Evaluate results + assert action_result.success + assert len(action_result.documents) > 0 + + logger.info("=" * 20) + logger.info(f"Action result success status: {action_result.success}") + logger.info(f"Action result error: {action_result.error}") + logger.info(f"Action result label: {action_result.resultLabel}") + + logger.info("Documents:") + for doc in action_result.documents: + logger.info("-" * 10) + logger.info(f" - Document Name: {doc.documentName}") + logger.info(f" - Document Mime Type: {doc.mimeType}") + logger.info(f" - Document Data: {doc.documentData}") From 181f55359b2399e33cb85ef3aca484b6013fff07 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 29 Aug 2025 20:51:11 +0200 Subject: [PATCH 05/17] chore: exclude real api calling tests by default --- pytest.ini | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pytest.ini b/pytest.ini index b606fa5f..e3d8c35e 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,11 +1,13 @@ [pytest] testpaths = tests python_paths = . -addopts = -v --tb=short python_files = test_*.py python_classes = Test* python_functions = test_* log_file = logs/test_logs.log log_file_level = INFO log_file_format = %(asctime)s %(levelname)s %(message)s -log_file_date_format = %Y-%m-%d %H:%M:%S \ No newline at end of file +log_file_date_format = %Y-%m-%d %H:%M:%S +# Only run non-expensive tests by default, verbose log, short traceback +# Use 'pytest -m ""' to run ALL tests. +addopts = -v --tb=short -m 'not expensive' From 31177063dee00d7c734de8f7c66c7ba5d94c0ffc Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Mon, 1 Sep 2025 09:49:57 +0200 Subject: [PATCH 06/17] feat: add web crawl connector; interface --- modules/connectors/connector_tavily.py | 61 +++++++++++++++++++++-- modules/interfaces/interface_web_model.py | 29 +++++++++-- 2 files changed, 82 insertions(+), 8 deletions(-) diff --git a/modules/connectors/connector_tavily.py b/modules/connectors/connector_tavily.py index bcb38e3f..783fea8c 100644 --- a/modules/connectors/connector_tavily.py +++ b/modules/connectors/connector_tavily.py @@ -4,11 +4,16 @@ import logging import os from dataclasses import dataclass from modules.interfaces.interface_web_model import ( + WebCrawlBase, + WebCrawlDocumentData, + WebCrawlRequest, WebSearchBase, WebSearchRequest, WebSearchActionResult, WebSearchActionDocument, WebSearchDocumentData, + WebCrawlActionDocument, + WebCrawlActionResult, ) # from modules.interfaces.interfaceChatModel import ActionResult, ActionDocument @@ -20,7 +25,7 @@ logger = logging.getLogger(__name__) @dataclass -class ConnectorTavily(WebSearchBase): +class ConnectorTavily(WebSearchBase, WebCrawlBase): client: AsyncTavilyClient = None @classmethod @@ -28,7 +33,10 @@ class ConnectorTavily(WebSearchBase): return cls(client=AsyncTavilyClient(api_key=os.getenv("TAVILY_API_KEY"))) async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: - """Handles the web search request.""" + """Handles the web search request. + + Takes a query and returns a list of URLs. + """ # Step 1: Search try: search_results = await self._search(request.query, request.max_results) @@ -37,12 +45,28 @@ class ConnectorTavily(WebSearchBase): # Step 2: Build ActionResult try: - result = self._build_action_result(search_results) + result = self._build_search_action_result(search_results) except Exception as e: return WebSearchActionResult(success=False, error=str(e)) return result + async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult: + """Crawls the given URLs and returns the extracted text content.""" + # Step 1: Crawl + try: + crawl_results = await self._crawl(request.urls) + except Exception as e: + return WebCrawlActionResult(success=False, error=str(e)) + + # Step 2: Build ActionResult + try: + result = self._build_crawl_action_result(crawl_results) + except Exception as e: + return WebCrawlActionResult(success=False, error=str(e)) + + return result + async def _search(self, query: str, max_results: int) -> WebSearchActionResult: """Calls the Tavily API to perform a web search.""" # Make sure max_results is within the allowed range @@ -56,7 +80,9 @@ class ConnectorTavily(WebSearchBase): return response["results"] - def _build_action_result(self, search_results: list) -> WebSearchActionResult: + def _build_search_action_result( + self, search_results: list + ) -> WebSearchActionResult: """Builds the ActionResult from the search results.""" documents = [] for result in search_results: @@ -75,3 +101,30 @@ class ConnectorTavily(WebSearchBase): return WebSearchActionResult( success=True, documents=documents, resultLabel="web_search_results" ) + + async def _crawl(self, urls: list) -> list[str]: + """Calls the Tavily API to extract text content from URLs.""" + response = await self.client.extract( + urls=urls, extract_depth="advanced", format="text" + ) + return response["results"] + + def _build_crawl_action_result(self, crawl_results: list) -> WebCrawlActionResult: + """Builds the ActionResult from the crawl results.""" + documents = [] + for result in crawl_results: + document_name = f"web_crawl_{get_utc_timestamp()}.txt" + doc_data = WebCrawlDocumentData( + url=result["url"], content=result["raw_content"] + ) + mime_type = "application/json" + doc = WebCrawlActionDocument( + documentName=document_name, + documentData=doc_data, + mimeType=mime_type, + ) + documents.append(doc) + + return WebCrawlActionResult( + success=True, documents=documents, resultLabel="web_crawl_results" + ) diff --git a/modules/interfaces/interface_web_model.py b/modules/interfaces/interface_web_model.py index 8dc01fc8..0a258623 100644 --- a/modules/interfaces/interface_web_model.py +++ b/modules/interfaces/interface_web_model.py @@ -40,10 +40,31 @@ class WebSearchBase(ABC): # list of URLs -> list of extracted HTML content -# TODO + +class WebCrawlRequest(BaseModel): + urls: List[str] + + +class WebCrawlDocumentData(BaseModel): + url: str + content: str + + +class WebCrawlActionDocument(ActionDocument): + documentData: WebCrawlDocumentData = Field( + description="The data extracted from a single crawled URL" + ) + + +class WebCrawlActionResult(ActionResult): + documents: List[WebCrawlActionDocument] = Field(default_factory=list) + + +class WebCrawlBase(ABC): + @abstractmethod + async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult: ... + # --- Web query --- -# query -> list of extracted text - -# TODO +# query -> list of extracted text; combines web search and crawl in one step From 0816e7c45cb64a6014204aac3c17a40c6444eed5 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Mon, 1 Sep 2025 10:15:10 +0200 Subject: [PATCH 07/17] feat: finish implement web interface w/ tavily connector (untested) --- modules/connectors/connector_tavily.py | 84 ++++++++++++++++++--- modules/interfaces/interface_web_model.py | 29 ++++++- modules/interfaces/interface_web_objects.py | 14 ++++ 3 files changed, 116 insertions(+), 11 deletions(-) diff --git a/modules/connectors/connector_tavily.py b/modules/connectors/connector_tavily.py index 783fea8c..786dc1f1 100644 --- a/modules/connectors/connector_tavily.py +++ b/modules/connectors/connector_tavily.py @@ -7,6 +7,11 @@ from modules.interfaces.interface_web_model import ( WebCrawlBase, WebCrawlDocumentData, WebCrawlRequest, + WebScrapeActionDocument, + WebScrapeActionResult, + WebScrapeBase, + WebScrapeDocumentData, + WebScrapeRequest, WebSearchBase, WebSearchRequest, WebSearchActionResult, @@ -25,7 +30,19 @@ logger = logging.getLogger(__name__) @dataclass -class ConnectorTavily(WebSearchBase, WebCrawlBase): +class TavilySearchResult: + title: str + url: str + + +@dataclass +class TavilyCrawlResult: + url: str + content: str + + +@dataclass +class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase): client: AsyncTavilyClient = None @classmethod @@ -67,7 +84,30 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase): return result - async def _search(self, query: str, max_results: int) -> WebSearchActionResult: + async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult: + """Turns a query in a list of urls with extracted content.""" + # Step 1: Search + try: + search_results = await self._search(request.query, request.max_results) + except Exception as e: + return WebScrapeActionResult(success=False, error=str(e)) + + # Step 2: Crawl + try: + urls = [result.url for result in search_results] + crawl_results = await self._crawl(urls) + except Exception as e: + return WebScrapeActionResult(success=False, error=str(e)) + + # Step 3: Build ActionResult + try: + result = self._build_scrape_action_result(crawl_results) + except Exception as e: + return WebScrapeActionResult(success=False, error=str(e)) + + return result + + async def _search(self, query: str, max_results: int) -> list[TavilySearchResult]: """Calls the Tavily API to perform a web search.""" # Make sure max_results is within the allowed range if max_results < 0 or max_results > 20: @@ -78,18 +118,19 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase): logger.info(f"Tavily API response:\n{response}") - return response["results"] + return [ + TavilySearchResult(title=result["title"], url=result["url"]) + for result in response["results"] + ] def _build_search_action_result( - self, search_results: list + self, search_results: list[TavilySearchResult] ) -> WebSearchActionResult: """Builds the ActionResult from the search results.""" documents = [] for result in search_results: document_name = f"web_search_{get_utc_timestamp()}.txt" - document_data = WebSearchDocumentData( - title=result["title"], url=result["url"] - ) + document_data = WebSearchDocumentData(title=result.title, url=result.url) mime_type = "application/json" doc = WebSearchActionDocument( documentName=document_name, @@ -107,9 +148,14 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase): response = await self.client.extract( urls=urls, extract_depth="advanced", format="text" ) - return response["results"] + return [ + TavilyCrawlResult(url=result["url"], content=result["raw_content"]) + for result in response["results"] + ] - def _build_crawl_action_result(self, crawl_results: list) -> WebCrawlActionResult: + def _build_crawl_action_result( + self, crawl_results: list[TavilyCrawlResult] + ) -> WebCrawlActionResult: """Builds the ActionResult from the crawl results.""" documents = [] for result in crawl_results: @@ -128,3 +174,23 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase): return WebCrawlActionResult( success=True, documents=documents, resultLabel="web_crawl_results" ) + + def _build_scrape_action_result( + self, crawl_results: list[TavilyCrawlResult] + ) -> WebScrapeActionResult: + """Builds the ActionResult from the scrape results.""" + documents = [] + for result in crawl_results: + document_name = f"web_scrape_{get_utc_timestamp()}.txt" + doc_data = WebScrapeDocumentData(url=result.url, content=result.content) + mime_type = "application/json" + doc = WebScrapeActionDocument( + documentName=document_name, + documentData=doc_data, + mimeType=mime_type, + ) + documents.append(doc) + + return WebScrapeActionResult( + success=True, documents=documents, resultLabel="web_scrape_results" + ) diff --git a/modules/interfaces/interface_web_model.py b/modules/interfaces/interface_web_model.py index 0a258623..389cd7ed 100644 --- a/modules/interfaces/interface_web_model.py +++ b/modules/interfaces/interface_web_model.py @@ -65,6 +65,31 @@ class WebCrawlBase(ABC): async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult: ... -# --- Web query --- +# --- Web scrape --- -# query -> list of extracted text; combines web search and crawl in one step +# scrape -> list of extracted text; combines web search and crawl in one step + + +class WebScrapeRequest(BaseModel): + query: str + max_results: int + + +class WebScrapeDocumentData(BaseModel): + url: str + content: str + + +class WebScrapeActionDocument(ActionDocument): + documentData: WebScrapeDocumentData = Field( + description="The data extracted from a single scraped URL" + ) + + +class WebScrapeActionResult(ActionResult): + documents: List[WebScrapeActionDocument] = Field(default_factory=list) + + +class WebScrapeBase(ABC): + @abstractmethod + async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult: ... diff --git a/modules/interfaces/interface_web_objects.py b/modules/interfaces/interface_web_objects.py index b38db6a3..0ea43bd7 100644 --- a/modules/interfaces/interface_web_objects.py +++ b/modules/interfaces/interface_web_objects.py @@ -1,6 +1,10 @@ from modules.interfaces.interface_web_model import ( + WebCrawlActionResult, WebSearchActionResult, WebSearchRequest, + WebCrawlRequest, + WebScrapeActionResult, + WebScrapeRequest, ) from dataclasses import dataclass @@ -22,3 +26,13 @@ class WebInterface: ) -> WebSearchActionResult: # NOTE: Add connectors here return await self.connector_tavily.search_urls(web_search_request) + + async def crawl(self, web_crawl_request: WebCrawlRequest) -> WebCrawlActionResult: + # NOTE: Add connectors here + return await self.connector_tavily.crawl_urls(web_crawl_request) + + async def scrape( + self, web_scrape_request: WebScrapeRequest + ) -> WebScrapeActionResult: + # NOTE: Add connectors here + return await self.connector_tavily.scrape(web_scrape_request) From 4c3592d7d9aaa7c811a5afa54283a75ecec14ce8 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Mon, 1 Sep 2025 11:21:37 +0200 Subject: [PATCH 08/17] feat: switch to single file approach --- modules/connectors/connector_tavily.py | 115 +++++++++------- modules/interfaces/interface_web_model.py | 42 +++++- modules/methods/method_web.py | 160 +++++++++++++++++++++- 3 files changed, 261 insertions(+), 56 deletions(-) diff --git a/modules/connectors/connector_tavily.py b/modules/connectors/connector_tavily.py index 786dc1f1..af802790 100644 --- a/modules/connectors/connector_tavily.py +++ b/modules/connectors/connector_tavily.py @@ -7,16 +7,19 @@ from modules.interfaces.interface_web_model import ( WebCrawlBase, WebCrawlDocumentData, WebCrawlRequest, + WebCrawlResultItem, WebScrapeActionDocument, WebScrapeActionResult, WebScrapeBase, WebScrapeDocumentData, WebScrapeRequest, + WebScrapeResultItem, WebSearchBase, WebSearchRequest, WebSearchActionResult, WebSearchActionDocument, WebSearchDocumentData, + WebSearchResultItem, WebCrawlActionDocument, WebCrawlActionResult, ) @@ -62,7 +65,7 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase): # Step 2: Build ActionResult try: - result = self._build_search_action_result(search_results) + result = self._build_search_action_result(search_results, request.query) except Exception as e: return WebSearchActionResult(success=False, error=str(e)) @@ -78,7 +81,7 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase): # Step 2: Build ActionResult try: - result = self._build_crawl_action_result(crawl_results) + result = self._build_crawl_action_result(crawl_results, request.urls) except Exception as e: return WebCrawlActionResult(success=False, error=str(e)) @@ -101,7 +104,7 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase): # Step 3: Build ActionResult try: - result = self._build_scrape_action_result(crawl_results) + result = self._build_scrape_action_result(crawl_results, request.query) except Exception as e: return WebScrapeActionResult(success=False, error=str(e)) @@ -124,26 +127,32 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase): ] def _build_search_action_result( - self, search_results: list[TavilySearchResult] + self, search_results: list[TavilySearchResult], query: str = "" ) -> WebSearchActionResult: """Builds the ActionResult from the search results.""" - documents = [] - for result in search_results: - document_name = f"web_search_{get_utc_timestamp()}.txt" - document_data = WebSearchDocumentData(title=result.title, url=result.url) - mime_type = "application/json" - doc = WebSearchActionDocument( - documentName=document_name, - documentData=document_data, - mimeType=mime_type, - ) - documents.append(doc) + # Convert to result items + result_items = [ + WebSearchResultItem(title=result.title, url=result.url) + for result in search_results + ] - return WebSearchActionResult( - success=True, documents=documents, resultLabel="web_search_results" + # Create document data with all results + document_data = WebSearchDocumentData( + query=query, results=result_items, total_count=len(result_items) ) - async def _crawl(self, urls: list) -> list[str]: + # Create single document + document = WebSearchActionDocument( + documentName=f"web_search_results_{get_utc_timestamp()}.json", + documentData=document_data, + mimeType="application/json", + ) + + return WebSearchActionResult( + success=True, documents=[document], resultLabel="web_search_results" + ) + + async def _crawl(self, urls: list) -> list[TavilyCrawlResult]: """Calls the Tavily API to extract text content from URLs.""" response = await self.client.extract( urls=urls, extract_depth="advanced", format="text" @@ -154,43 +163,57 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase): ] def _build_crawl_action_result( - self, crawl_results: list[TavilyCrawlResult] + self, crawl_results: list[TavilyCrawlResult], urls: list[str] = None ) -> WebCrawlActionResult: """Builds the ActionResult from the crawl results.""" - documents = [] - for result in crawl_results: - document_name = f"web_crawl_{get_utc_timestamp()}.txt" - doc_data = WebCrawlDocumentData( - url=result["url"], content=result["raw_content"] - ) - mime_type = "application/json" - doc = WebCrawlActionDocument( - documentName=document_name, - documentData=doc_data, - mimeType=mime_type, - ) - documents.append(doc) + # Convert to result items + result_items = [ + WebCrawlResultItem(url=result.url, content=result.content) + for result in crawl_results + ] + + # Create document data with all results + document_data = WebCrawlDocumentData( + urls=urls or [result.url for result in crawl_results], + results=result_items, + total_count=len(result_items), + ) + + # Create single document + document = WebCrawlActionDocument( + documentName=f"web_crawl_results_{get_utc_timestamp()}.json", + documentData=document_data, + mimeType="application/json", + ) return WebCrawlActionResult( - success=True, documents=documents, resultLabel="web_crawl_results" + success=True, documents=[document], resultLabel="web_crawl_results" ) def _build_scrape_action_result( - self, crawl_results: list[TavilyCrawlResult] + self, crawl_results: list[TavilyCrawlResult], query: str = "" ) -> WebScrapeActionResult: """Builds the ActionResult from the scrape results.""" - documents = [] - for result in crawl_results: - document_name = f"web_scrape_{get_utc_timestamp()}.txt" - doc_data = WebScrapeDocumentData(url=result.url, content=result.content) - mime_type = "application/json" - doc = WebScrapeActionDocument( - documentName=document_name, - documentData=doc_data, - mimeType=mime_type, - ) - documents.append(doc) + # Convert to result items + result_items = [ + WebScrapeResultItem(url=result.url, content=result.content) + for result in crawl_results + ] + + # Create document data with all results + document_data = WebScrapeDocumentData( + query=query, + results=result_items, + total_count=len(result_items), + ) + + # Create single document + document = WebScrapeActionDocument( + documentName=f"web_scrape_results_{get_utc_timestamp()}.json", + documentData=document_data, + mimeType="application/json", + ) return WebScrapeActionResult( - success=True, documents=documents, resultLabel="web_scrape_results" + success=True, documents=[document], resultLabel="web_scrape_results" ) diff --git a/modules/interfaces/interface_web_model.py b/modules/interfaces/interface_web_model.py index 389cd7ed..86f19e08 100644 --- a/modules/interfaces/interface_web_model.py +++ b/modules/interfaces/interface_web_model.py @@ -2,8 +2,6 @@ from abc import ABC, abstractmethod from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult - - from pydantic import BaseModel, Field from typing import List @@ -18,11 +16,21 @@ class WebSearchRequest(BaseModel): max_results: int -class WebSearchDocumentData(BaseModel): +class WebSearchResultItem(BaseModel): + """Individual search result""" + title: str url: str +class WebSearchDocumentData(BaseModel): + """Complete search results document""" + + query: str + results: List[WebSearchResultItem] + total_count: int + + class WebSearchActionDocument(ActionDocument): documentData: WebSearchDocumentData @@ -45,14 +53,24 @@ class WebCrawlRequest(BaseModel): urls: List[str] -class WebCrawlDocumentData(BaseModel): +class WebCrawlResultItem(BaseModel): + """Individual crawl result""" + url: str content: str +class WebCrawlDocumentData(BaseModel): + """Complete crawl results document""" + + urls: List[str] + results: List[WebCrawlResultItem] + total_count: int + + class WebCrawlActionDocument(ActionDocument): documentData: WebCrawlDocumentData = Field( - description="The data extracted from a single crawled URL" + description="The data extracted from crawled URLs" ) @@ -75,14 +93,24 @@ class WebScrapeRequest(BaseModel): max_results: int -class WebScrapeDocumentData(BaseModel): +class WebScrapeResultItem(BaseModel): + """Individual scrape result""" + url: str content: str +class WebScrapeDocumentData(BaseModel): + """Complete scrape results document""" + + query: str + results: List[WebScrapeResultItem] + total_count: int + + class WebScrapeActionDocument(ActionDocument): documentData: WebScrapeDocumentData = Field( - description="The data extracted from a single scraped URL" + description="The data extracted from scraped URLs" ) diff --git a/modules/methods/method_web.py b/modules/methods/method_web.py index 27b82ba5..ccb0f185 100644 --- a/modules/methods/method_web.py +++ b/modules/methods/method_web.py @@ -3,7 +3,11 @@ from typing import Any, Dict from modules.chat.methodBase import MethodBase, action from modules.interfaces.interfaceChatModel import ActionResult from modules.interfaces.interface_web_objects import WebInterface -from modules.interfaces.interface_web_model import WebSearchRequest +from modules.interfaces.interface_web_model import ( + WebSearchRequest, + WebCrawlRequest, + WebScrapeRequest, +) logger = logging.getLogger(__name__) @@ -14,11 +18,14 @@ class MethodWeb(MethodBase): def __init__(self, serviceCenter: Any): super().__init__(serviceCenter) + self.name = "web" + self.description = "Web search, crawling, and scraping operations using Tavily" @action async def search(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Perform a web search and output a .txt file with a plain list of URLs (one per line). + """Perform a web search and outputs a .json file with a list of found URLs. + + Each result contains "title" and "url". Parameters: query (str): Search query to perform @@ -41,3 +48,150 @@ class MethodWeb(MethodBase): except Exception as e: return ActionResult(success=False, error=str(e)) + + @action + async def crawl(self, parameters: Dict[str, Any]) -> ActionResult: + """Crawls a list of URLs and extracts information from them. + + Parameters: + document (str): Document reference containing URL list from search results + expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description + """ + try: + document_ref = parameters.get("document") + + if not document_ref: + return ActionResult( + success=False, error="No document reference provided." + ) + + # Resolve document reference to ChatDocument objects + chat_documents = self.service.getChatDocumentsFromDocumentList( + [document_ref] + ) + + if not chat_documents: + return ActionResult( + success=False, + error=f"No documents found for reference: {document_ref}", + ) + + # Get the first document (search results) + search_doc = chat_documents[0] + + # Get file data using the service center + file_data = self.service.getFileData(search_doc.fileId) + if not file_data: + return ActionResult( + success=False, error="Could not retrieve file data for document" + ) + + content = file_data.decode("utf-8") + + # Parse JSON to extract URLs from search results + import json + + try: + # The document structure from WebSearchActionResult + search_data = json.loads(content) + + # Extract URLs from the search results structure + urls = [] + if isinstance(search_data, dict): + # Handle the document structure: documentData contains the actual search results + doc_data = search_data.get("documentData", search_data) + if "results" in doc_data and isinstance(doc_data["results"], list): + urls = [ + result["url"] + for result in doc_data["results"] + if isinstance(result, dict) and "url" in result + ] + elif "urls" in doc_data and isinstance(doc_data["urls"], list): + # Fallback: if URLs are stored directly in a 'urls' field + urls = [url for url in doc_data["urls"] if isinstance(url, str)] + + # Fallback: try to parse as plain text with regex (for backward compatibility) + if not urls: + logger.warning( + "Could not extract URLs from JSON structure, trying plain text parsing" + ) + import re + + urls = re.split(r"[\n,;]+", content) + urls = [ + u.strip() + for u in urls + if u.strip() + and ( + u.strip().startswith("http://") + or u.strip().startswith("https://") + ) + ] + + except json.JSONDecodeError: + # Fallback to plain text parsing if JSON parsing fails + logger.warning("Document is not valid JSON, trying plain text parsing") + import re + + urls = re.split(r"[\n,;]+", content) + urls = [ + u.strip() + for u in urls + if u.strip() + and ( + u.strip().startswith("http://") + or u.strip().startswith("https://") + ) + ] + + if not urls: + return ActionResult( + success=False, error="No valid URLs found in the document." + ) + + logger.info(f"Extracted {len(urls)} URLs from document: {urls}") + + # Prepare request data + web_crawl_request = WebCrawlRequest(urls=urls) + + # Perform request + web_interface = await WebInterface.create() + web_crawl_result = await web_interface.crawl(web_crawl_request) + + return web_crawl_result + + except Exception as e: + logger.error(f"Error in crawl method: {str(e)}") + return ActionResult(success=False, error=str(e)) + + @action + async def scrape(self, parameters: Dict[str, Any]) -> ActionResult: + """Scrapes web content by searching for URLs and then extracting their content. + + Combines search and crawl operations in one step. + + Parameters: + query (str): Search query to perform + maxResults (int, optional): Maximum number of results (default: 10) + """ + try: + query = parameters.get("query") + max_results = parameters.get("maxResults", 10) + + if not query: + return ActionResult(success=False, error="Search query is required") + + # Prepare request data + web_scrape_request = WebScrapeRequest( + query=query, + max_results=max_results, + ) + + # Perform request + web_interface = await WebInterface.create() + web_scrape_result = await web_interface.scrape(web_scrape_request) + + return web_scrape_result + + except Exception as e: + return ActionResult(success=False, error=str(e)) From cfc83a7f42d990a391ba6a3d469f13ce089892e4 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Mon, 1 Sep 2025 15:36:32 +0200 Subject: [PATCH 09/17] chore: add tests --- modules/connectors/connector_tavily.py | 6 +- tests/connectors/test_connector_tavily.py | 72 +++++++- tests/fixtures/tavily_responses.py | 26 ++- tests/methods/test_method_web.py | 196 ++++++++++++++++++++-- 4 files changed, 286 insertions(+), 14 deletions(-) diff --git a/modules/connectors/connector_tavily.py b/modules/connectors/connector_tavily.py index af802790..8a05e781 100644 --- a/modules/connectors/connector_tavily.py +++ b/modules/connectors/connector_tavily.py @@ -119,7 +119,7 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase): # Perform actual API call response = await self.client.search(query=query, max_results=max_results) - logger.info(f"Tavily API response:\n{response}") + logger.info(f"Tavily API search response:\n{response}") return [ TavilySearchResult(title=result["title"], url=result["url"]) @@ -157,6 +157,10 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase): response = await self.client.extract( urls=urls, extract_depth="advanced", format="text" ) + + # Log the result + logger.info(f"Tavily API extract (crawl) response:\n{response}") + return [ TavilyCrawlResult(url=result["url"], content=result["raw_content"]) for result in response["results"] diff --git a/tests/connectors/test_connector_tavily.py b/tests/connectors/test_connector_tavily.py index 3b23f69a..81ce64c3 100644 --- a/tests/connectors/test_connector_tavily.py +++ b/tests/connectors/test_connector_tavily.py @@ -4,7 +4,11 @@ import pytest import logging from modules.interfaces.interfaceChatModel import ActionResult -from modules.interfaces.interface_web_model import WebSearchRequest +from modules.interfaces.interface_web_model import ( + WebSearchRequest, + WebCrawlRequest, + WebScrapeRequest, +) from modules.connectors.connector_tavily import ConnectorTavily logger = logging.getLogger(__name__) @@ -13,7 +17,7 @@ logger = logging.getLogger(__name__) @pytest.mark.asyncio @pytest.mark.expensive async def test_tavily_connector_search_test_live_api(): - logger.info("Testing Tavliy connector with live API calls") + logger.info("Testing Tavliy connector search with live API calls") # Test request request = WebSearchRequest(query="How old is the Earth?", max_results=5) @@ -38,3 +42,67 @@ async def test_tavily_connector_search_test_live_api(): logger.info(f" - Document Name: {doc.documentName}") logger.info(f" - Document Mime Type: {doc.mimeType}") logger.info(f" - Document Data: {doc.documentData}") + + +@pytest.mark.asyncio +@pytest.mark.expensive +async def test_tavily_connector_crawl_test_live_api(): + logger.info("Testing Tavily connector crawl with live API calls") + + # Test request + urls = [ + "https://en.wikipedia.org/wiki/Earth", + "https://valueon.ch", + ] + request = WebCrawlRequest(urls=urls) + + # Tavily instance + connector_tavily = await ConnectorTavily.create() + + # Crawl test + action_result = await connector_tavily.crawl_urls(request=request) + + # Check results + assert isinstance(action_result, ActionResult) + + logger.info("=" * 20) + logger.info(f"Action result success status: {action_result.success}") + logger.info(f"Action result error: {action_result.error}") + logger.info(f"Action result label: {action_result.resultLabel}") + + logger.info("Documents:") + for doc in action_result.documents: + logger.info("-" * 10) + logger.info(f" - Document Name: {doc.documentName}") + logger.info(f" - Document Mime Type: {doc.mimeType}") + logger.info(f" - Document Data: {doc.documentData}") + + +@pytest.mark.asyncio +@pytest.mark.expensive +async def test_tavily_connector_scrape_test_live_api(): + logger.info("Testing Tavily connector scrape with live API calls") + + # Test request with query + request = WebScrapeRequest(query="How old is the Earth?", max_results=3) + + # Tavily instance + connector_tavily = await ConnectorTavily.create() + + # Scrape test + action_result = await connector_tavily.scrape(request=request) + + # Check results + assert isinstance(action_result, ActionResult) + + logger.info("=" * 20) + logger.info(f"Action result success status: {action_result.success}") + logger.info(f"Action result error: {action_result.error}") + logger.info(f"Action result label: {action_result.resultLabel}") + + logger.info("Documents:") + for doc in action_result.documents: + logger.info("-" * 10) + logger.info(f" - Document Name: {doc.documentName}") + logger.info(f" - Document Mime Type: {doc.mimeType}") + logger.info(f" - Document Data: {doc.documentData}") diff --git a/tests/fixtures/tavily_responses.py b/tests/fixtures/tavily_responses.py index 789f62f0..ab94d353 100644 --- a/tests/fixtures/tavily_responses.py +++ b/tests/fixtures/tavily_responses.py @@ -1,6 +1,6 @@ """Sample tavily responses for patching responses in tests.""" -RESPONSE_HOW_OLD_IS_EARTH_NO_ANSWER = { +RESPONSE_SEARCH_HOW_OLD_IS_EARTH_NO_ANSWER = { "query": "How old is the earth", "follow_up_questions": None, "answer": None, @@ -45,3 +45,27 @@ RESPONSE_HOW_OLD_IS_EARTH_NO_ANSWER = { "response_time": 0.96, "request_id": "3c36cccd-0918-49fd-bd1c-23c62ba7ec2d", } + + +RESPONSE_EXTRACT_HOW_OLD_IS_EARTH_NO_ANSWER = { + "results": [ + { + "url": "https://en.wikipedia.org/wiki/Age_of_Earth", + "raw_content": 'Jump to content\nAge of Earth\n\nAfrikaans\nالعربية\nAzərbaycanca\nবাংলা\nБеларуская\nБългарски\nCatalà\nČeština\nDansk\nΕλληνικά\nEspañol\nEsperanto\nEuskara\nفارسی\nFrançais\nGalego\n한국어\nՀայերեն\nBahasa Indonesia\nItaliano\nעברית\nҚазақша\nLatina\nLëtzebuergesch\nLietuvių\nBahasa Melayu\nNederlands\n日本語\nپښتو\nPortuguês\nRomnă\nРусский\nSimple English\nSlovenčina\nSlovenščina\nСрпски / srpski\nSrpskohrvatski / српскохрватски\nSvenska\nTürkçe\nУкраїнська\nاردو\nTiếng Việt\n文言\nYorùbá\n粵語\n中文\n\nEdit links\nFrom Wikipedia, the free encyclopedia\nScientific dating of the age of Earth\nThe age of Earth is estimated to be 4.54 ± 0.05 billion years. This age represents the final stages of Earth\'s accretion and planetary differentiation. Age estimates are based on evidence from radiometric age-dating of meteoritic material—consistent with the radiometric ages of the oldest-known terrestrial material and lunar samples—and astrophysical accretion models consistent with observations of planet formation in protoplanetary disks.\nFollowing the development of radiometric dating in the early 20th century, measurements of lead in uranium-rich minerals showed that some were in excess of a billion years old. The oldest such minerals analyzed to date—small crystals of zircon from the Jack Hills of Western Australia—are at least 4.404 billion years old. Calcium–aluminium-rich inclusions—the oldest known solid constituents within meteorites that are formed within the Solar System—are 4.5673 ± 0.00016 billion years old giving a lower limit for the age of the Solar System.\nIt is hypothesized that the accretion of Earth began soon after the formation of the calcium-aluminium-rich inclusions. Because the duration of this accretion process is not yet adequately constrained—predictions from different accretion models range from around 30 million to 100 million years—the difference between the age of Earth and of the oldest rocks is difficult to determine. It can also be difficult to determine the exact age of the oldest rocks on Earth, exposed at the surface, as they are aggregates of minerals of possibly different ages.\nDevelopment of modern geologic concepts\n| | | |\n --- \n| Life timeline | | |\n| This box: view talk edit | | |\n| −4500 — – — – −4000 — – — – −3500 — – — – −3000 — – — – −2500 — – — – −2000 — – — – −1500 — – — – −1000 — – — – −500 — – — – 0 — | Water Single-celled life Photosynthesis Multicellular life Plants Arthropods Molluscs Flowers Dinosaurs Mammals Birds Primates Hadean Archean Proterozoic Phanerozoic | | | | --- | | ← | Earth formed | | | | --- | | ← | Earliest water | | | | --- | | ← | LUCA | | | | --- | | ← | Earliest fossils | | | | --- | | ← | Atmospheric oxygen | | | | --- | | ← | Sexual reproduction | | | | --- | | ← | Earliest fungi | | | | --- | | ← | Neoproterozoic oxygenation event | | | | --- | | ← | Ediacaran biota | | | | --- | | ← | Cambrian explosion | | | | --- | | ← | Earliest tetrapods | | | | --- | | ← | Earliest hominoid | |\n| (million years ago) | | |\nMain article: History of geology\nFurther information: Relative dating\nStudies of strata—the layering of rocks and soil—gave naturalists an appreciation that Earth may have been through many changes during its existence. These layers often contained fossilized remains of unknown creatures, leading some to interpret a progression of organisms from layer to layer.\nNicolas Steno in the 17th century was one of the first naturalists to appreciate the connection between fossil remains and strata. His observations led him to formulate important stratigraphic concepts (i.e., the "law of superposition" and the "principle of original horizontality"). In the 1790s, William Smith hypothesized that if two layers of rock at widely differing locations contained similar fossils, then it was very plausible that the layers were the same age. Smith\'s nephew and student, John Phillips, later calculated by such means that Earth was about 96 million years old.\nIn the mid-18th century, the naturalist Mikhail Lomonosov suggested that Earth had been created separately from, and several hundred thousand years before, the rest of the universe.[citation needed] Lomonosov\'s ideas were mostly speculative.[citation needed] In 1779 the Comte du Buffon tried to obtain a value for the age of Earth using an experiment: he created a small globe that resembled Earth in composition and then measured its rate of cooling. This led him to estimate that Earth was about 75,000 years old. Even earlier, in 1687, in his Principia, the mathematician and physicist Isaac Newton was the first to calculate the age of the Earth by experiment, coming to a conclusion of 50,000 years.\nOther naturalists used these hypotheses to construct a history of Earth, though their timelines were inexact as they did not know how long it took to lay down stratigraphic layers. In 1830, geologist Charles Lyell, developing ideas found in James Hutton\'s works, popularized the concept that the features of Earth were in perpetual change, eroding and reforming continuously, and the rate of this change was roughly constant. This was a challenge to the traditional view, which saw the history of Earth as dominated by intermittent catastrophes. Many naturalists were influenced by Lyell to become "uniformitarians" who believed that changes were constant and uniform.[citation needed]\nEarly calculations\nFurther information: William Thomson, 1st Baron Kelvin § Age of the Earth: geology\nIn 1862, the physicist William Thomson, 1st Baron Kelvin published calculations that fixed the age of Earth at between 20 million and 400 million years. He assumed that Earth had formed as a completely molten object, and determined the amount of time it would take for the near-surface temperature gradient to decrease to its present value. His calculations did not account for heat produced via radioactive decay (a then unknown process) or, more significantly, convection inside Earth, which allows the temperature in the upper mantle to remain high much longer, maintaining a high thermal gradient in the crust much longer. Even more constraining were Thomson\'s estimates of the age of the Sun, which were based on estimates of its thermal output and a theory that the Sun obtains its energy from gravitational collapse; Thomson estimated that the Sun is about 20 million years old.\nGeologists such as Lyell had difficulty accepting such a short age for Earth. For biologists, even 100 million years seemed much too short to be plausible. In Charles Darwin\'s theory of evolution, the process of random heritable variation with cumulative selection requires great durations of time, and Darwin stated that Thomson\'s estimates did not appear to provide enough time. According to modern biology, the total evolutionary history from the beginning of life to today has taken place since 3.5 to 3.8 billion years ago, the amount of time which passed since the last universal ancestor of all living organisms as shown by geological dating.\nIn a lecture in 1869, Darwin\'s great advocate, Thomas Henry Huxley, attacked Thomson\'s calculations, suggesting they appeared precise in themselves but were based on faulty assumptions. The physicist Hermann von Helmholtz (in 1856) and astronomer Simon Newcomb (in 1892) contributed their own calculations of 22 and 18 million years, respectively, to the debate: they independently calculated the amount of time it would take for the Sun to condense down to its current diameter and brightness from the nebula of gas and dust from which it was born. Their values were consistent with Thomson\'s calculations. However, they assumed that the Sun was only glowing from the heat of its gravitational contraction. The process of solar nuclear fusion was not yet known to science.\nIn 1892, Thomson was ennobled as Lord Kelvin in appreciation of his many scientific accomplishments. In 1895 John Perry challenged Kelvin\'s figure on the basis of his assumptions on conductivity, and Oliver Heaviside entered the dialogue, considering it "a vehicle to display the ability of his operator method to solve problems of astonishing complexity." Other scientists backed up Kelvin\'s figures. Darwin\'s son, the astronomer George H. Darwin, proposed that Earth and Moon had broken apart in their early days when they were both molten. He calculated the amount of time it would have taken for tidal friction to give Earth its current 24-hour day. His value of 56 million years was additional evidence that Thomson was on the right track. The last estimate Kelvin gave, in 1897, was: "that it was more than 20 and less than 40 million year old, and probably much nearer 20 than 40". In 1899 and 1900, John Joly calculated the rate at which the oceans should have accumulated salt from erosion processes and determined that the oceans were about 80 to 100 million years old.\nRadiometric dating\nMain article: Radiometric dating\nOverview\nBy their chemical nature, rock minerals contain certain elements and not others; but in rocks containing radioactive isotopes, the process of radioactive decay generates exotic elements over time. By measuring the concentration of the stable end product of the decay, coupled with knowledge of the half life and initial concentration of the decaying element, the age of the rock can be calculated. Typical radioactive end products are argon from decay of potassium-40, and lead from decay of uranium and thorium. If the rock becomes molten, as happens in Earth\'s mantle, such nonradioactive end products typically escape or are redistributed. Thus the age of the oldest terrestrial rock gives a minimum for the age of Earth, assuming that no rock has been intact for longer than Earth itself.\nConvective mantle and radioactivity\nThe discovery of radioactivity introduced another factor in the calculation. After Henri Becquerel\'s initial discovery in 1896, Marie and Pierre Curie discovered the radioactive elements polonium and radium in 1898; and in 1903, Pierre Curie and Albert Laborde announced that radium produces enough heat to melt its own weight in ice in less than an hour. Geologists quickly realized that this upset the assumptions underlying most calculations of the age of Earth. These had assumed that the original heat of Earth and the Sun had dissipated steadily into space, but radioactive decay meant that this heat had been continually replenished. George Darwin and John Joly were the first to point this out, in 1903.\nInvention of radiometric dating\nRadioactivity, which had overthrown the old calculations, yielded a bonus by providing a basis for new calculations, in the form of radiometric dating.\nErnest Rutherford and Frederick Soddy jointly had continued their work on radioactive materials and concluded that radioactivity was caused by a spontaneous transmutation of atomic elements. In radioactive decay, an element breaks down into another, lighter element, releasing alpha, beta, or gamma radiation in the process. They also determined that a particular isotope of a radioactive element decays into another element at a distinctive rate. This rate is given in terms of a "half-life", or the amount of time it takes half of a mass of that radioactive material to break down into its "decay product".\nSome radioactive materials have short half-lives; some have long half-lives. Uranium and thorium have long half-lives and so persist in Earth\'s crust, but radioactive elements with short half-lives have generally disappeared. This suggested that it might be possible to measure the age of Earth by determining the relative proportions of radioactive materials in geological samples. In reality, radioactive elements do not always decay into nonradioactive ("stable") elements directly, instead, decaying into other radioactive elements that have their own half-lives and so on, until they reach a stable element. These "decay chains", such as the uranium-radium and thorium series, were known within a few years of the discovery of radioactivity and provided a basis for constructing techniques of radiometric dating.\nThe pioneers of radioactivity were chemist Bertram B. Boltwood and physicist Rutherford. Boltwood had conducted studies of radioactive materials as a consultant, and when Rutherford lectured at Yale in 1904, Boltwood was inspired to describe the relationships between elements in various decay series. Late in 1904, Rutherford took the first step toward radiometric dating by suggesting that the alpha particles released by radioactive decay could be trapped in a rocky material as helium atoms. At the time, Rutherford was only guessing at the relationship between alpha particles and helium atoms, but he would prove the connection four years later.\nSoddy and Sir William Ramsay had just determined the rate at which radium produces alpha particles, and Rutherford proposed that he could determine the age of a rock sample by measuring its concentration of helium. He dated a rock in his possession to an age of 40 million years by this technique. Rutherford wrote of addressing a meeting of the Royal Institution in 1904:\n\nI came into the room, which was half dark, and presently spotted Lord Kelvin in the audience and realized that I was in trouble at the last part of my speech dealing with the age of the Earth, where my views conflicted with his. To my relief, Kelvin fell fast asleep, but as I came to the important point, I saw the old bird sit up, open an eye, and cock a baleful glance at me! Then a sudden inspiration came, and I said, "Lord Kelvin had limited the age of the Earth, provided no new source was discovered. That prophetic utterance refers to what we are now considering tonight, radium!" Behold! the old boy beamed upon me.\n\nRutherford assumed that the rate of decay of radium as determined by Ramsay and Soddy was accurate and that helium did not escape from the sample over time. Rutherford\'s scheme was inaccurate, but it was a useful first step. Boltwood focused on the end products of decay series. In 1905, he suggested that lead was the final stable product of the decay of radium. It was already known that radium was an intermediate product of the decay of uranium. Rutherford joined in, outlining a decay process in which radium emitted five alpha particles through various intermediate products to end up with lead, and speculated that the radium–lead decay chain could be used to date rock samples. Boltwood did the legwork and by the end of 1905 had provided dates for 26 separate rock samples, ranging from 92 to 570 million years. He did not publish these results, which was fortunate because they were flawed by measurement errors and poor estimates of the half-life of radium. Boltwood refined his work and finally published the results in 1907.\nBoltwood\'s paper pointed out that samples taken from comparable layers of strata had similar lead-to-uranium ratios, and that samples from older layers had a higher proportion of lead, except where there was evidence that lead had leached out of the sample. His studies were flawed by the fact that the decay series of thorium was not understood, which led to incorrect results for samples that contained both uranium and thorium. However, his calculations were far more accurate than any that had been performed to that time. Refinements in the technique would later give ages for Boltwood\'s 26 samples of 410 million to 2.2 billion years.\nArthur Holmes establishes radiometric dating\nAlthough Boltwood published his paper in a prominent geological journal, the geological community had little interest in radioactivity.[citation needed] Boltwood gave up work on radiometric dating and went on to investigate other decay series. Rutherford remained mildly curious about the issue of the age of Earth but did little work on it.\nRobert Strutt tinkered with Rutherford\'s helium method until 1910 and then ceased. However, Strutt\'s student Arthur Holmes became interested in radiometric dating and continued to work on it after everyone else had given up. Holmes focused on lead dating because he regarded the helium method as unpromising. He performed measurements on rock samples and concluded in 1911 that the oldest (a sample from Ceylon) was about 1.6 billion years old. These calculations were not particularly trustworthy. For example, he assumed that the samples had contained only uranium and no lead when they were formed.\nMore important research was published in 1913. It showed that elements generally exist in multiple variants with different masses, or "isotopes". In the 1930s, isotopes would be shown to have nuclei with differing numbers of the neutral particles known as "neutrons". In that same year, other research was published establishing the rules for radioactive decay, allowing more precise identification of decay series.\nMany geologists felt these new discoveries made radiometric dating so complicated as to be worthless.[citation needed] Holmes felt that they gave him tools to improve his techniques, and he plodded ahead with his research, publishing before and after the First World War. His work was generally ignored until the 1920s, though in 1917 Joseph Barrell, a professor of geology at Yale, redrew geological history as it was understood at the time to conform to Holmes\'s findings in radiometric dating. Barrell\'s research determined that the layers of strata had not all been laid down at the same rate, and so current rates of geological change could not be used to provide accurate timelines of the history of Earth.[citation needed]\nHolmes\' persistence finally began to pay off in 1921, when the speakers at the yearly meeting of the British Association for the Advancement of Science came to a rough consensus that Earth was a few billion years old and that radiometric dating was credible. Holmes published The Age of the Earth, an Introduction to Geological Ideas in 1927 in which he presented a range of 1.6 to 3.0 billion years. No great push to embrace radiometric dating followed, however, and the die-hards in the geological community stubbornly resisted. They had never cared for attempts by physicists to intrude in their domain, and had successfully ignored them so far. The growing weight of evidence finally tilted the balance in 1931, when the National Research Council of the US National Academy of Sciences decided to resolve the question of the age of Earth by appointing a committee to investigate.\nHolmes, being one of the few people who was trained in radiometric dating techniques, was a committee member and in fact wrote most of the final report. Thus, Holmes\' report concluded that radioactive dating was the only reliable means of pinning down a geologic time scale. Questions of bias were deflected by the great and exacting detail of the report. It described the methods used, the care with which measurements were made, and their error bars and limitations.[citation needed]\nModern radiometric dating\nRadiometric dating continues to be the predominant way scientists date geologic time scales. Techniques for radioactive dating have been tested and fine-tuned on an ongoing basis since the 1960s. Forty or so different dating techniques have been utilized to date, working on a wide variety of materials. Dates for the same sample using these different techniques are in very close agreement on the age of the material.[citation needed] Possible contamination problems do exist, but they have been studied and dealt with by careful investigation, leading to sample preparation procedures being minimized to limit the chance of contamination.[citation needed]\nUse of meteorites\nAn age of 4.55 ± 0.07 billion years, very close to today\'s accepted age, was determined by Clair Cameron Patterson using uranium–lead isotope dating (specifically lead–lead dating) on several meteorites including the Canyon Diablo meteorite and published in 1956. The quoted age of Earth is derived, in part, from the Canyon Diablo meteorite for several important reasons and is built upon a modern understanding of cosmochemistry built up over decades of research.\nMost geological samples from Earth are unable to give a direct date of the formation of Earth from the solar nebula because Earth has undergone differentiation into the core, mantle, and crust, and this has then undergone a long history of mixing and unmixing of these sample reservoirs by plate tectonics, weathering and hydrothermal circulation.\nAll of these processes may adversely affect isotopic dating mechanisms because the sample cannot always be assumed to have remained as a closed system, by which it is meant that either the parent or daughter nuclide (a species of atom characterised by the number of neutrons and protons an atom contains) or an intermediate daughter nuclide may have been partially removed from the sample, which will skew the resulting isotopic date. To mitigate this effect it is usual to date several minerals in the same sample, to provide an isochron. Alternatively, more than one dating system may be used on a sample to check the date.\nSome meteorites are furthermore considered to represent the primitive material from which the accreting solar disk was formed. Some have behaved as closed systems (for some isotopic systems) soon after the solar disk and the planets formed.[citation needed] To date, these assumptions are supported by much scientific observation and repeated isotopic dates, and it is certainly a more robust hypothesis than that which assumes a terrestrial rock has retained its original composition.\nNevertheless, ancient Archaean lead ores of galena have been used to date the formation of Earth as these represent the earliest formed lead-only minerals on the planet and record the earliest homogeneous lead–lead isotope systems on the planet. These have returned age dates of 4.54 billion years with a precision of as little as 1% margin for error.\nStatistics for several meteorites that have undergone isochron dating are as follows:\n| 1. St. Severin (ordinary chondrite) | | | |\n --- --- |\n| | 1. | Pb-Pb isochron | 4.543 ± 0.019 billion years |\n| | 2. | Sm-Nd isochron | 4.55 ± 0.33 billion years |\n| | 3. | Rb-Sr isochron | 4.51 ± 0.15 billion years |\n| | 4. | Re-Os isochron | 4.68 ± 0.15 billion years |\n| 2. Juvinas (basaltic achondrite) | | | |\n| | 1. | Pb-Pb isochron | 4.556 ± 0.012 billion years |\n| | 2. | Pb-Pb isochron | 4.540 ± 0.001 billion years |\n| | 3. | Sm-Nd isochron | 4.56 ± 0.08 billion years |\n| | 4. | Rb-Sr isochron | 4.50 ± 0.07 billion years |\n| 3. Allende (carbonaceous chondrite) | | | |\n| | 1. | Pb-Pb isochron | 4.553 ± 0.004 billion years |\n| | 2. | Ar-Ar age spectrum | 4.52 ± 0.02 billion years |\n| | 3. | Ar-Ar age spectrum | 4.55 ± 0.03 billion years |\n| | 4. | Ar-Ar age spectrum | 4.56 ± 0.05 billion years |\nCanyon Diablo meteorite\nFurther information: Age of the Solar System and Canyon Diablo (meteorite)\nThe Canyon Diablo meteorite was used because it is both large and representative of a particularly rare type of meteorite that contains sulfide minerals (particularly troilite, FeS), metallic nickel-iron alloys, plus silicate minerals. This is important because the presence of the three mineral phases allows investigation of isotopic dates using samples that provide a great separation in concentrations between parent and daughter nuclides. This is particularly true of uranium and lead. Lead is strongly chalcophilic and is found in the sulfide at a much greater concentration than in the silicate, versus uranium. Because of this segregation in the parent and daughter nuclides during the formation of the meteorite, this allowed a much more precise date of the formation of the solar disk and hence the planets than ever before.\nThe age determined from the Canyon Diablo meteorite has been confirmed by hundreds of other age determinations, from both terrestrial samples and other meteorites. The meteorite samples, however, show a spread from 4.53 to 4.58 billion years ago. This is interpreted as the duration of formation of the solar nebula and its collapse into the solar disk to form the Sun and the planets. This 50 million year time span allows for accretion of the planets from the original solar dust and meteorites.\nThe Moon, as another extraterrestrial body that has not undergone plate tectonics and that has no atmosphere, provides quite precise age dates from the samples returned from the Apollo missions. Rocks returned from the Moon have been dated at a maximum of 4.51 billion years old. Martian meteorites that have landed upon Earth have also been dated to around 4.5 billion years old by lead–lead dating. Lunar samples, since they have not been disturbed by weathering, plate tectonics or material moved by organisms, can also provide dating by direct electron microscope examination of cosmic ray tracks. The accumulation of dislocations generated by high energy cosmic ray particle impacts provides another confirmation of the isotopic dates. Cosmic ray dating is only useful on material that has not been melted, since melting erases the crystalline structure of the material, and wipes away the tracks left by the particles.\nSee also\n\nWorld portal\n\nAge of the universe\n\nCreation myth\nGeochronology\nHistory of Earth\nNatural history\nOldest dated rocks\nTimeline of natural history\n\nReferences\n\n^ "Age of the Earth". U.S. Geological Survey. 1997. Archived from the original on 23 December 2005. Retrieved 2006-01-10.\n^ Dalrymple, G. Brent (2001). "The age of the Earth in the twentieth century: a problem (mostly) solved". Special Publications, Geological Society of London. 190 (1): 205–221. Bibcode:2001GSLSP.190..205D. doi:10.1144/GSL.SP.2001.190.01.14. S2CID 130092094.\n^ Manhesa, Gérard; Allègre, Claude J.; Dupréa, Bernard & Hamelin, Bruno (1980). "Lead isotope study of basic-ultrabasic layered complexes: Speculations about the age of the earth and primitive mantle characteristics". Earth and Planetary Science Letters. 47 (3): 370–382. Bibcode:1980E&PSL..47..370M. doi:10.1016/0012-821X(80)90024-2.\n^ Braterman, Paul S. (2013). "How Science Figured Out the Age of Earth". Scientific American. Archived from the original on 2016-04-12.\n^ a b Mezger, K.; Schönbächler, M.; Bouvier, A. (2020-03-04). "Accretion of the Earth—Missing Components?". Space Science Reviews. 216 (2): 27. doi:10.1007/s11214-020-00649-y. hdl:20.500.11850/405628. ISSN 1572-9672.\n^ Hedman, Matthew (2007). "9: Meteorites and the Age of the Solar System". The Age of Everything. University of Chicago Press. pp. 142–162. ISBN 9780226322940. Archived from the original on 2018-02-14.\n^ a b Wilde, S. A.; Valley, J. W.; Peck, W. H.; Graham C. M. (2001-01-11). "Evidence from detrital zircons for the existence of continental crust and oceans on the Earth 4.4 Gyr ago". Nature. 409 (6817): 175–178. Bibcode:2001Natur.409..175W. doi:10.1038/35051550. PMID 11196637. S2CID 4319774.\n^ Barboni, Melanie; Boehnke, Patrick; Keller, Brenhin; Kohl, Issaku E.; Schoene, Blair; Young, Edward D.; McKeegan, Kevin D. (2017-01-06). "Early formation of the Moon 4.51 billion years ago". Science Advances. 3 (1): e1602365. Bibcode:2017SciA....3E2365B. doi:10.1126/sciadv.1602365. ISSN 2375-2548. PMC 5226643. PMID 28097222.\n^ Halliday, Alex N.; Canup, Robin M. (2022-11-29). "The accretion of planet Earth". Nature Reviews Earth & Environment. 4 (1): 19–35. doi:10.1038/s43017-022-00370-0. ISSN 2662-138X.\n^ Pfalzner, S; Davies, M B; Gounelle, M; Johansen, A; Münker, C; Lacerda, P; Zwart, S Portegies; Testi, L; Trieloff, M; Veras, D (2015-06-01). "The formation of the solar system". Physica Scripta. 90 (6): 068001. arXiv:1501.03101. doi:10.1088/0031-8949/90/6/068001. ISSN 0031-8949.\n^ a b c Boltwood, B. B. (1907). "On the ultimate disintegration products of the radio-active elements. Part II. The disintegration products of uranium". American Journal of Science. 23 (134): 77–88. doi:10.2475/ajs.s4-23.134.78. S2CID 131688682.\n For the abstract, see: Chemical Abstracts Service, American Chemical Society (1907). Chemical Abstracts. New York, London: American Chemical Society. p. 817. Retrieved 2008-12-19.\n^ Valley, John W.; Peck, William H.; Kin, Elizabeth M. (1999). "Zircons Are Forever" (PDF). The Outcrop, Geology Alumni Newsletter. University of Wisconsin-Madison. pp. 34–35. Archived (PDF) from the original on 2009-02-26. Retrieved 2008-12-22.\n^ Wyche, S.; Nelson, D. R.; Riganti, A. (2004). "4350–3130 Ma detrital zircons in the Southern Cross Granite–Greenstone Terrane, Western Australia: implications for the early evolution of the Yilgarn Craton". Australian Journal of Earth Sciences. 51 (1): 31–45. Bibcode:2004AuJES..51...31W. doi:10.1046/j.1400-0952.2003.01042.x.\n^ Amelin, Yuri; Kaltenbach, Angela; Iizuka, Tsuyoshi; Stirling, Claudine H.; Ireland, Trevor R.; Petaev, Michail; Jacobsen, Stein B. (2010-12-01). "U–Pb chronology of the Solar System\'s oldest solids with variable 238U/235U". Earth and Planetary Science Letters. 300 (3): 343–350. doi:10.1016/j.epsl.2010.10.015. hdl:1885/21305. ISSN 0012-821X.\n^ Connelly, James N.; Bizzarro, Martin; Krot, Alexander N.; Nordlund, Åke; Wielandt, Daniel; Ivanova, Marina A. (2012-11-02). "The Absolute Chronology and Thermal Processing of Solids in the Solar Protoplanetary Disk". Science. 338 (6107): 651–655. doi:10.1126/science.1226919.\n^ Sossi, Paolo A.; Stotz, Ingo L.; Jacobson, Seth A.; Morbidelli, Alessandro; O’Neill, Hugh St C. (2022-07-07). "Stochastic accretion of the Earth". Nature Astronomy. 6 (8): 951–960. doi:10.1038/s41550-022-01702-2. ISSN 2397-3366. PMC 7613298.\n^ Lyell, Charles, Sir (1866). Elements of Geology; or, The Ancient Changes of the Earth and its Inhabitants as Illustrated by Geological Monuments (Sixth ed.). New York: D. Appleton and company. Retrieved 2008-12-19.{{cite book}}: CS1 maint: multiple names: authors list (link)\n^ a b Stiebing, William H. (1994). Uncovering the Past. Oxford University Press US. ISBN 978-0-19-508921-9.\n^ a b Brookfield, Michael E. (2004). Principles of Stratigraphy. Blackwell Publishing. p. 116. ISBN 978-1-4051-1164-5.\n^ Fuller, J. G. C. M. (2007-07-17). "Smith\'s other debt, John Strachey, William Smith and the strata of England 1719–1801". Geoscientist. The Geological Society. Archived from the original on 24 November 2008. Retrieved 2008-12-19.\n^ Burchfield, Joe D. (1998). "The age of the Earth and the invention of geological time". Geological Society, London, Special Publications. 143 (1): 137–143. Bibcode:1998GSLSP.143..137B. CiteSeerX 10.1.1.557.2702. doi:10.1144/GSL.SP.1998.143.01.12. S2CID 129443412.\n^ BUFFON, GEORGES LOUIS LECLERC (2022). HISTOIRE NATURELLE, GA (C)NA (C)RALE ET PARTICULIARE, : introduction a l\'histoire... des mina (c)raux (classic reprint). [S.l.]: FORGOTTEN BOOKS. ISBN 978-0-265-92735-9. OCLC 1354275595.\n^ Merrill, Ronald T. (2010). Our Magnetic Earth: The Science of Geomagnetism. Chicago: University of Chicago Press. p. 86. ISBN 978-0-226-52053-7.\n^ Simms, D. L. (2004). "Newton\'s Contribution to the Science of Heat". Annals of Science. 61 (1): 33–77. doi:10.1080/00033790210123810. ISSN 0003-3790.\n^ a b England, P.; Molnar, P.; Righter, F. (January 2007). "John Perry\'s neglected critique of Kelvin\'s age for the Earth: A missed opportunity in geodynamics". GSA Today. 17 (1): 4–9. Bibcode:2007GSAT...17R...4E. doi:10.1130/GSAT01701A.1.\n^ Dalrymple (1994) pp. 14–17, 38\n^ Burchfield, Joe D. (1990-05-15). Lord Kelvin and the Age of the Earth. University of Chicago Press. pp. 69 ff. ISBN 9780226080437. Archived from the original on 2018-02-14.\n^ Stacey, Frank D. (2000). "Kelvin\'s age of the Earth paradox revisited". Journal of Geophysical Research. 105 (B6): 13155–13158. Bibcode:2000JGR...10513155S. doi:10.1029/2000JB900028.\n^ Origin of Species, Charles Darwin, 1872 edition, page 286\n^ Borenstein, Seth (November 13, 2013). "Oldest fossil found: Meet your microbial mom". Excite. Yonkers, NY: Mindspark Interactive Network. Associated Press. Archived from the original on June 29, 2015. Retrieved 2015-03-02.)\n^ a b c Dalrymple (1994) pp. 14–17\n^ Paul J. Nahin (1985) Oliver Heaviside, Fractional Operators, and the Age of the Earth, IEEE Transactions on Education E-28(2): 94–104, link from IEEE Explore\n^ Dalrymple (1994) pp. 14, 43\n^ a b c Nichols, Gary (2009). "21.2 Radiometric Dating". Sedimentology and Stratigraphy. John Wiley & Sons. pp. 325–327. ISBN 978-1405193795.\n^ Henri Becquerel (1896). "Sur les radiations émises par phosphorescence". Comptes Rendus. 122: 420–421.\n^ Comptes Rendus 122: 420 (1896), translated by Carmen Giunta. Accessed 12 April 2021.\n^ Henri Becquerel (1896). "Sur les radiations invisibles émises par les corps phosphorescents". Comptes Rendus. 122: 501–503.\n^ Comptes Rendus 122: 501–503 (1896), translated by Carmen Giunta. Accessed 12 April 2021.\n^ Curie, Pierre; Curie, Marie & Bémont, Gustave (1898). "Sur une nouvelle substance fortement radio-active, contenue dans la pechblende (On a new, strongly radioactive substance contained in pitchblende)". Comptes Rendus. 127: 1215–1217. Archived from the original on 6 August 2009. Retrieved 12 April 2021.\n^ Curie, Pierre; Laborde, Albert (1903). "Sur la chaleur dégagée spontanément par les sels de radium". Comptes Rendus. 136: 673–675.\n^ Joly, John (1909). Radioactivity and Geology: An Account of the Influence of Radioactive Energy on Terrestrial History (1st ed.). London, UK: Archibald Constable & Co., ltd. p. 36. Reprinted by BookSurge Publishing (2004) ISBN 1-4021-3577-7.\n^ Rutherford, E. (1906). Radioactive Transformations. London: Charles Scribner\'s Sons. Reprinted by Juniper Grove (2007) ISBN 978-1-60355-054-3.\n^ Eve, Arthur Stewart (1939). Rutherford: Being the life and letters of the Rt. Hon. Lord Rutherford, O. M.. Cambridge: Cambridge University Press.\n^ Dalrymple (1994) p. 74\n^ The Age of the Earth Debate Badash, L Scientific American 1989 esp p95 Archived 2016-11-05 at the Wayback Machine\n^ Dalrymple (1994) pp. 77–78\n^ Patterson, Claire (1956). "Age of meteorites and the earth" (PDF). Geochimica et Cosmochimica Acta. 10 (4): 230–237. Bibcode:1956GeCoA..10..230P. doi:10.1016/0016-7037(56)90036-9. Archived (PDF) from the original on 2010-06-21. Retrieved 2009-07-07.\n^ Carlson, R. W.; Tera, F. (December 1–3, 1998). "Lead–Lead Constraints on the Timescale of Early Planetary Differentiation" (PDF). Conference Proceedings, Origin of the Earth and Moon. Houston, Texas: Lunar and Planetary Institute. p. 6. Archived (PDF) from the original on 16 December 2008. Retrieved 2008-12-22.\n^ Dalrymple (1994) pp. 310–341\n^ Dalrymple, Brent G. (2004). Ancient Earth, Ancient Skies: The Age of the Earth and Its Cosmic Surroundings. Stanford University Press. pp. 147, 169. ISBN 978-0-8047-4933-6.\n^ Terada, K.; Sano, Y. (May 20–24, 2001). "In-situ ion microprobe U-Pb dating of phosphates in H-chondrites" (PDF). Proceedings, Eleventh Annual V. M. Goldschmidt Conference. Hot Springs, Virginia: Lunar and Planetary Institute. Bibcode:2001eag..conf.3306T. Archived (PDF) from the original on 16 December 2008. Retrieved 2008-12-22.\n\nBibliography\n\nDalrymple, G. Brent (1994-02-01). The Age of the Earth. Stanford University Press. ISBN 978-0-8047-2331-2.\n\nFurther reading\n\nBaadsgaard, H.; Lerbekmo, J.F.; Wijbrans, J.R., 1993. Multimethod radiometric age for a bentonite near the top of the Baculites reesidei Zone of southwestern Saskatchewan (Campanian-Maastrichtian stage boundary?). Canadian Journal of Earth Sciences, v.30, p. 769–775.\nBaadsgaard, H. and Lerbekmo, J.F., 1988. A radiometric age for the Cretaceous-Tertiary boundary based on K-Ar, Rb-Sr, and U-Pb ages of bentonites from Alberta, Saskatchewan, and Montana. Canadian Journal of Earth Sciences, v.25, p. 1088–1097.\nEberth, D.A. and Braman, D., 1990. Stratigraphy, sedimentology, and vertebrate paleontology of the Judith River Formation (Campanian) near Muddy Lake, west-central Saskatchewan. Bulletin of Canadian Petroleum Geology, v.38, no.4, p. 387–406.\nGoodwin, M.B. and Deino, A.L., 1989. The first radiometric ages from the Judith River Formation (Upper Cretaceous), Hill County, Montana. Canadian Journal of Earth Sciences, v.26, p. 1384–1391.\nGradstein, F. M.; Agterberg, F.P.; Ogg, J.G.; Hardenbol, J.; van Veen, P.; Thierry, J. and Zehui Huang., 1995. A Triassic, Jurassic and Cretaceous time scale. IN: Bergren, W. A.; Kent, D.V.; Aubry, M-P. and Hardenbol, J. (eds.), Geochronology, Time Scales, and Global Stratigraphic Correlation. Society of Economic Paleontologists and Mineralogists, Special Publication No. 54, p. 95–126.\nHarland, W.B., Cox, A.V.; Llewellyn, P.G.; Pickton, C.A.G.; Smith, A.G.; and Walters, R., 1982. A Geologic Time Scale: 1982 edition. Cambridge University Press: Cambridge, 131p.\nHarland, W.B.; Armstrong, R.L.; Cox, A.V.; Craig, L.E.; Smith, A.G.; Smith, D.G., 1990. A Geologic Time Scale, 1989 edition. Cambridge University Press: Cambridge, p. 1–263. ISBN 0-521-38765-5\nHarper, C.W. Jr (1980). "Relative age inference in paleontology". Lethaia. 13 (3): 239–248. Bibcode:1980Letha..13..239H. doi:10.1111/j.1502-3931.1980.tb00638.x.\nObradovich, J.D., 1993. A Cretaceous time scale. IN: Caldwell, W.G.E. and Kauffman, E.G. (eds.). Evolution of the Western Interior Basin. Geological Association of Canada, Special Paper 39, p. 379–396.\nPalmer, Allison R (1983). "The Decade of North American Geology 1983 Geologic Time Scale". Geology. 11 (9): 503–504. Bibcode:1983Geo....11..503P. doi:10.1130/0091-7613(1983)11<503:tdonag>2.0.co;2.\nPowell, James Lawrence, 2001, Mysteries of Terra Firma: the Age and Evolution of the Earth, Simon & Schuster, ISBN 0-684-87282-X\n\nExternal links\n\nThe Age of the Earth by Chris Stassen (TalkOrigins.org)\nUSGS preface on the Age of the Earth\nNASA exposition on the age of Martian meteorites\nAgeing the Earth on In Our Time at the BBC\nPre-1900 Non-Religious Estimates of the Age of the Earth\n\n| | |\n --- |\n| Outline History | |\n| Atmosphere | Atmosphere of Earth Prebiotic atmosphere Troposphere Stratosphere Mesosphere Thermosphere Exosphere Weather |\n| Climate | Climate system Energy balance Climate change Climate variability and change Climatology Paleoclimatology |\n| Continents | Africa Antarctica Asia Australia Europe North America South America |\n| Culture and society | List of sovereign states + dependent territories In culture Earth Day Flag Symbol World economy Etymology World history Time zones World |\n| Environment | Biome Biosphere Biogeochemical cycles Ecology Ecosystem Human impact on the environment Evolutionary history of life Nature |\n| Geodesy | Cartography + Computer cartography Earth\'s orbit Geodetic astronomy Geomatics Gravity Navigation Remote Sensing Geopositioning Virtual globe |\n| Geophysics | Earth structure Fluid dynamics Geomagnetism Magnetosphere Mineral physics Seismology Plate tectonics Signal processing Tomography |\n| Geology | Age of Earth Earth science Extremes on Earth Future Geological history + Geologic time scale Geologic record History of Earth |\n| Oceans | Antarctic/Southern Ocean Arctic Ocean Atlantic Ocean Indian Ocean Pacific Ocean Oceanography |\n| Planetary science | The Moon Evolution of the Solar System Geology of solar terrestrial planets Location in the Universe Solar System |\n| | |\n| Authority control databases | |\n --- |\n| National | United States Israel |\n| Other | Yale LUX |\nRetrieved from "\nCategories:\n\nGeochronology\nHistory of Earth science\nGeology theories\n\nHidden categories:\n\nCS1 maint: multiple names: authors list\nWebarchive template wayback links\nArticles with short description\nShort description is different from Wikidata\nWikipedia pages semi-protected against vandalism\nAll articles with unsourced statements\nArticles with unsourced statements from February 2023\nArticles with unsourced statements from March 2015', + "images": [], + }, + { + "url": "https://www.planetary.org/articles/how-old-is-the-earth", + "raw_content": "Skip to main content\nCommunity Account Renew Search\nJoin\n\nBecome A Member\nRenew\nGift Membership\nKids Membership\nOther Ways to Give\n\nDonate\nJoin\nDonate\nBack To Main Menu\n\nWhat We Do\n\nExplore Worlds\n\nFind Life\nDefend Earth\n\nHow We Work\n\nEducation & Public Outreach\n\nSpace Policy & Advocacy\nScience & Technology\nGlobal Collaboration\n\nOur Results\n\nOur Impact\nLearn how our members and community are changing the worlds.\n + LightSail\nOur citizen-funded spacecraft successfully demonstrated solar sailing for CubeSats.\n\nBack To Main Menu\n\nSpace Topics\n\nPlanets & Other Worlds\n\nSpace Missions\nNight Sky\nSpace Policy\nFor Kids\n\nLearn\n\nArticles\n\nPlanetary Radio\nSpace Images\nVideos\nCourses\n\nThe Planetary Report\n\n#### Solar Maximum\nOur dynamic star's reach throughout the Solar System.\nBack To Main Menu\n\nGet Involved\n\nBecome A Member\nMembership programs for explorers of all ages.\n + Email Signup\nGet updates and weekly tools to learn, share, and advocate for space exploration.\n + Action Center\nVolunteer as a space advocate.\n ### Support Our Mission\n\nRenew Membership\n\nSociety Projects\nTravel\nOther Ways to Give\nStore\n\nThe Planetary Fund\n\nAccelerate progress in our three core enterprises — Explore Worlds, Find Life, and Defend Earth. You can support the entire fund, or designate a core enterprise of your choice.\nGive Today\nBack To Main Menu\n\nAbout Us\n\nOverview\n\nStrategic Framework\nNews & Press\nCareers\nContact Us\nOur Story\n\nThe Planetary Society\n\n#### Our Vision\nKnow the Cosmos and our place within it.\n#### Our Mission\nEmpowering the world's citizens to advance space science and exploration.\nBack To Main Menu\n\nMembership\n\nBecome A Member\n\nRenew Membership\nGift Membership\nKids Membership\nOther Ways to Give\n\nContact Us\n\nOur Work\n\nExplore Space\nTake Action\nAbout\nMembership\nMember Community\nAccount Center\n\nSearch\n\n“Exploration is in our nature.” - Carl Sagan\n\nHow old is the Earth?\nWritten by\nKate Howells\nPublic Education Specialist, The Planetary Society \nNovember 14, 2023\nThe Earth is thought to be about 4.54 billion years old. Along with other planets, the Earth was born in the early days of the Solar System, which first started forming about 4.6 billion years ago.\nHow did the Earth form?\nThe Solar System formed about 4.6 billion years ago from material in a massive, rotating cloud of gas and dust called the solar nebula. Gravity caused this cloud to collapse in on itself, spin, and flatten into a disk shape. Most of the material in that cloud was pulled toward the center, forming the protostar that would eventually become our Sun. The rest of the material began to come together into clumps called planetesimals. These in turn gradually came together with other planetesimals, forming larger bodies called protoplanets. Earth started as one of these protoplanets, likely about 4.5 billion years ago.\nThe Earth’s history\nAs the proto-Earth grew, heavier elements within it began to sink toward the center, forming the core, and lighter elements rose to the surface. This process, called differentiation, likely took place over tens of millions of years.\nDuring these early stages a Mars-sized protoplanet, often referred to as Theia, collided with the young Earth, ejecting material from both protoplanets into space. Some of this material fell back to Earth, but some of the material eventually coalesced in orbit around Earth to form the Moon.\nThe Earth continued to experience impacts throughout its early life, though none as dramatic as the collision with Theia. During a period called the Late Heavy Bombardment, which likely happened between 4.1 and 3.8 billion years ago, there was an increased rate of asteroid and comet impacts in the inner Solar System. The Late Heavy Bombardment had major geological consequences, including causing Earth’s crust to melt and differentiate and shaping the early atmosphere and oceans. Although geological activity has erased the craters from this time on Earth, they are preserved on the Moon. These are some of the craters you can see from Earth.\nBy about 4.3 billion years ago, the Earth's surface had cooled enough for water vapor in the atmosphere to condense on the surface, leading to the formation of oceans. Volcanic activity, which was more widespread at the time, released gasses that shaped the early atmosphere. Life emerged around 3.5 to 4 billion years ago in the form of simple, single-celled organisms.\nThe Earth has probably been as we know it today — with recognizable continents, oceans, a hospitable climate, and diverse life — for the past few hundred million years. But it continues to evolve through its own gradual tectonic and volcanic activity, and through the more rapid effects of climate change.\nHow do scientists determine the age of the Earth?\nScientists have been able to piece together our planet’s timeline\nthanks to techniques including radiometric dating of rocks and minerals,\nexamining layers of sedimentary rock, and studying the Earth's magnetic\nfield.\nThe most precise method is radiometric dating,\nwhich measures the decay of radioactive isotopes in rocks. Because\ngeologists know how long these isotopes take to decay, they can\ndetermine a rock’s age by looking at the ratio of parent (pre-decay) and\ndaughter (post-decay) isotopes in a sample.\nOne challenge with dating the Earth via rocks is that most of the\noriginal rocks that formed on our planet at the earliest stages of its\ncreation have likely been recycled into the mantle since then. Because\nof this, geologists also learn about the history of the Solar System by\nstudying rocks from beyond Earth, including meteorites that were formed\nbillions of years before falling to Earth, meteorites of Earth material\nthat have been found on the Moon,\nand asteroids that have coasted through space undisturbed for billions\nof years without undergoing any major composition-altering change. The\nasteroid Bennu,\nfor example, is thought to have formed in the first 10 million years of\nthe Solar System’s history. By studying the samples returned to Earth\nby the OSIRIS-REx mission, scientists can learn a lot about the early Solar System.\nEarth's First Line of Defense\nSupport the team of astronomers defending Earth with a gift today.\nDonate", + "images": [], + }, + { + "url": "https://answersingenesis.org/age-of-the-earth/how-old-earth/?srsltid=AfmBOoqSX0LqvRa1nZM5V8YjVoWspP8t9WHAhFQRrQUEVoHW8DukYZf4", + "raw_content": "Published Time: Sept. 1, 2018, 6 a.m.\nHow Old Is the Earth? | Answers in Genesis \n\nInternet Explorer is no longer supported. Try downloading another browser like Chrome or Firefox.\n\nCart\nAccount\nUnited States / English\n\nIf you already have an account, Sign in.\nView Cart\n×\nUnited States / English\n\nAnswers\nStore\nEvents\nVideos\nKids\nEducation\nDonate\n\nSubscribe\n\nAnswers in Genesis\nAnswers\nScience\nGeology\nAge of the Earth\nHow Old Is the Earth?\n\nHow Old Is the Earth?\nIs the earth 4.54 billion years old?\nby Dr. Danny R. Faulkner on September 1, 2018; last featured November 26, 2023\nFeatured in Answers Magazine\nAudio Version\nShare\n\nWatch the video on YouTube.\nIf you ask this question of most scientifically literate people, they will answer that the earth is about 4.54 billion years old. But if you ask biblically literate people, many will answer that the earth is little more than 6,000 years old. Why the huge difference? We look at the same world but come to different conclusions because our worldviews are different.\n Latest Answers -------------- Stay up to date each week with top articles, blogs, news, videos, and more. Sign Up Now\nTo fully understand the issue, we must look beyond this earth. Literally. Let me explain.\nThe Biblical Date\nHow do we arrive at the biblical date? The genealogies of Genesis 5 and 11 make it clear that Abraham lived about 2,000 years after creation. And we know from chronologies found elsewhere in the Bible that Abraham lived about 2,000 years before the birth of Jesus Christ. Furthermore, we know that Jesus’ ministry was about 2,000 years ago. Summing these lengths of time, we get about 6,000 years (technically just a little more).\nWhat Most Scientists Think\nHow do many scientists arrive at the age of 4.54 billion years? They rely on radiometric dating, though the story is a bit more complicated than it sounds. Some rocks contain trace amounts of radioactive atoms. Those radioactive atoms decay into stable atoms over time. By knowing the decay rate and measuring the amount of both kinds of atoms in a rock, scientists can compute the amount of time it took to produce the stable atoms.\nSome assumptions are involved, however. Were some of the stable atoms present in the rock to begin with? Did some of either type of atom leave or enter the rock during the time being measured for decay? To make matters worse, measuring the age of a rock by different kinds of radioactive atoms (such as uranium or rubidium) often yields very different ages. There are many examples of such discordant ages.\nBut even if we accept these ages as correct, there are many other assumptions that cause even more problems. You see, we never find rocks on earth that date back 4.54 billion years. The earth is a very dynamic place, with volcanic eruptions and tectonic plate movements that constantly recycle old rocks into new rocks. When rocks are recycled this way, it is believed that their radiometric dates are reset.\nSo we wouldn’t expect to find the original “primordial” rocks on earth. Instead, scientists must look to other bodies in the solar system that are less active geologically. The search for primordial rocks was one of the scientific reasons we sent men to the moon a half-century ago. Scientists thought that since the moon has far less geological activity than the earth, its rocks would be older.\nIndeed, the moon’s rocks generally yield old radiometric dates, but even they don’t yield dates of 4.54 billion years. Why?\nWhile the moon is far less active than the earth, that hasn’t always been the case. Most scientists think that the moon was very active early in its history. So while moon rocks have relatively old radiometric dates, they aren’t primordial either. To find truly primordial rocks, planetary scientists think they must look at meteorites, debris that has fallen onto the earth’s surface from somewhere else in the solar system. The 4.54-billion-year age of the earth comes from radiometric dating of meteorites.\nHow can they know these are the earliest rocks? They have a theory that the whole solar system formed at the same time, around 4.54 billion years ago. This means the sun and planets would be about the same age. Material that didn’t become part of the sun supposedly coalesced into larger and larger pieces in outer space, eventually forming planets and their satellites, or moons. But many of the pieces never formed into planets or satellites. Fragments of these pieces are thought to be the origin of meteorites. Since meteorites didn’t form into planets, they must have avoided the geological process that reset radiometric ages on earth. This is particularly true of the carbonaceous chondrites.\nBut note all the unproven assumptions. Evolutionary assumptions at that.\nThe Real Agenda\nFor decades, scientists who believe the earth is billions of years old have said that radiometric dates are their reason for believing so. But this hasn’t always been the case. The methods for radiometric dating were developed only a hundred years ago. Prior to that, many scientists already believed the earth was billions of years old, not based upon radiometric dates but the assumption that modern life evolved from nonlife. Evolutionists recognize that we can’t see planets and life evolving before our eyes. They say it requires great time; so the earth must be very old.\nThere is a good lesson here. In the late 1800s, many scientists concluded that the earth must be at least 100 million years old because that was considered the minimum time necessary for evolution to account for the earth’s biology and geology. The need for time drives the claims of ancient dates.\nLord Kelvin, one of the most significant scientists of the 1800s, tested the then-popular age of 100 million years and produced two quantitative tests that showed the earth and sun could be no more than about one-third of this age. Yet his evolutionary colleagues persisted in their belief despite Lord Kelvin’s objections. Since then, many critics have noted that his objections have been explained to their satisfaction. But that misses the point. Many of Kelvin’s colleagues believed in great age despite the evidence, not because of it.\nToday many scientists continue to believe in a 4.54-billion-year-old earth, which evolution requires. They will continue to choose to believe that age, even though solid scientific reasons are available to doubt those dates.\nOur Job\nIt is the job of creation scientists to reevaluate scientific claims using their biblical worldview. Scientists have found many evidences that the earth is far younger than 4.54 billion years—even as young as 6,000 years—but these are usually swept under the rug.\n\nScientists have found many evidences that the earth is far younger than 4.54 billion years, but these are usually swept under the rug.\n\nCreationist literature (including this magazine) is filled with examples. They include the composition of the earth’s atmosphere and seawater, which would be much different if helium had been escaping the atmosphere and salt had been accumulating in the ocean for millions of years. Also, the moon’s tidal interaction with the earth is causing the moon to spiral outward, which limits how long it has been in orbit. (Just go online and search for “evidences of a young earth” for details and more examples.)\nMost importantly, we need to point people to the importance of starting in the right place—God’s Word—when interpreting the evidence.\nDr. Danny R. Faulkner joined the staff of Answers in Genesis after more than 26 years as professor of physics and astronomy at the University of South Carolina Lancaster. He has written numerous articles in astronomical journals, and he is the author of Universe by Design.\nRelated Videos\nWhy Shouldn’t Christians Accept Millions of Years?\n\nAnswers Magazine\nSeptember–October 2018\n\nEven as skepticism spreads around the globe, the creation movement is flourishing. Meet some of the new generation of creation scientists.\nBrowse IssueSubscribe\nRecommended Resources\n\nUniverse by Design$18.99 \nThe Heavens Declare Set$39.99 Sale \n\nAge of the Earth$12.99 \n\nScience\n\nWhat Is Science?\nAstronomy\nBiology\nChemistry\nEnvironmental Science\nFossils\nGenetics\nGeology\nHuman Body\nMathematics\nPhysics\n\nNewsletter\nGet the latest answers emailed to you.\nBy submitting this form, you accept our Privacy Policy and will be given an opportunity to receive emails from Answers in Genesis regarding our latest news, resources, and events.\nThank You!\nThank you for signing up to receive email newsletters from Answers in Genesis.\nYou can also sign up for our free print newsletter (US only).\nFinish your subscription\nYou're almost done! Please follow the instructions we emailed you in order to finish subscribing.\nYou can also sign up for our free print newsletter (US only).\nWhoops!\nYour newsletter signup did not work out. Please refresh the page and try again.\nSupport the creation/gospel message by donating or getting involved! \n\nAnswers in Genesis is an apologetics ministry, dedicated to helping Christians defend their faith and proclaim the good news of Jesus Christ.\nLearn more\n\nCustomer Service 800.778.3390\nAvailable Monday–Friday | 9 AM–5 PM ET\n© 2025 Answers in Genesis", + "images": [], + }, + ], + "failed_results": [], + "response_time": 5.16, + "request_id": "81c3ce85-3014-4a81-b342-fd9f2a9fba32", +} diff --git a/tests/methods/test_method_web.py b/tests/methods/test_method_web.py index 9c0b7671..2d9305a2 100644 --- a/tests/methods/test_method_web.py +++ b/tests/methods/test_method_web.py @@ -1,11 +1,15 @@ """Tests for method web.py""" +import json import logging import pytest from unittest.mock import patch from modules.methods.method_web import MethodWeb -from tests.fixtures.tavily_responses import RESPONSE_HOW_OLD_IS_EARTH_NO_ANSWER +from tests.fixtures.tavily_responses import ( + RESPONSE_SEARCH_HOW_OLD_IS_EARTH_NO_ANSWER, + RESPONSE_EXTRACT_HOW_OLD_IS_EARTH_NO_ANSWER, +) logger = logging.getLogger(__name__) @@ -15,6 +19,9 @@ logger = logging.getLogger(__name__) async def test_method_web_search_live(): """Tests method web search with live API calls.""" + logger.info("=" * 50) + logger.info("==> Test: Method Web Search Live") + method_web = MethodWeb(serviceCenter=None) # Actual request @@ -26,29 +33,30 @@ async def test_method_web_search_live(): assert action_result.success assert len(action_result.documents) > 0 - logger.info("=" * 20) logger.info(f"Action result success status: {action_result.success}") logger.info(f"Action result error: {action_result.error}") logger.info(f"Action result label: {action_result.resultLabel}") logger.info("Documents:") for doc in action_result.documents: - logger.info("-" * 10) logger.info(f" - Document Name: {doc.documentName}") - logger.info(f" - Document Mime Type: {doc.mimeType}") - logger.info(f" - Document Data: {doc.documentData}") + logger.info(f" --> Document Mime Type: {doc.mimeType}") + logger.info(f" --> Document Data: {doc.documentData}") @pytest.mark.asyncio async def test_method_web_search_dummy(): """Tests method web search with dummy response data - no external API calls.""" + logger.info("=" * 50) + logger.info("==> Test: Method Web Search Dummy") + method_web = MethodWeb(serviceCenter=None) # Mock the Tavily API response with patch( "tavily.AsyncTavilyClient.search", - return_value=RESPONSE_HOW_OLD_IS_EARTH_NO_ANSWER, + return_value=RESPONSE_SEARCH_HOW_OLD_IS_EARTH_NO_ANSWER, ) as mock_client: action_result = await method_web.search( {"query": "How old is the earth", "maxResults": 5} @@ -59,14 +67,182 @@ async def test_method_web_search_dummy(): assert action_result.success assert len(action_result.documents) > 0 - logger.info("=" * 20) logger.info(f"Action result success status: {action_result.success}") logger.info(f"Action result error: {action_result.error}") logger.info(f"Action result label: {action_result.resultLabel}") logger.info("Documents:") for doc in action_result.documents: - logger.info("-" * 10) logger.info(f" - Document Name: {doc.documentName}") - logger.info(f" - Document Mime Type: {doc.mimeType}") - logger.info(f" - Document Data: {doc.documentData}") + logger.info(f" --> Document Mime Type: {doc.mimeType}") + logger.info(f" --> Document Data: {doc.documentData}") + + +@pytest.mark.asyncio +@pytest.mark.expensive +async def test_method_web_crawl_live(): + """Tests method web crawl with live API calls.""" + + logger.info("=" * 50) + logger.info("==> Test: Method Web Crawl Live") + + method_web = MethodWeb(serviceCenter=None) + + # Create mock document data with URLs from search results + search_results_json = { + "documentData": { + "results": [ + {"url": "https://en.wikipedia.org/wiki/Age_of_Earth"}, + {"url": "https://www.planetary.org/articles/how-old-is-the-earth"}, + ] + } + } + + # Mock the service center methods + with patch.object(method_web, "service") as mock_service: + mock_service.getChatDocumentsFromDocumentList.return_value = [ + type("MockDoc", (), {"fileId": "test-file-id"})() + ] + mock_service.getFileData.return_value = json.dumps(search_results_json).encode( + "utf-8" + ) + + # Actual request + action_result = await method_web.crawl({"document": "test-document-ref"}) + + # Evaluate results + assert action_result.success + assert len(action_result.documents) > 0 + + logger.info(f"Action result success status: {action_result.success}") + logger.info(f"Action result error: {action_result.error}") + logger.info(f"Action result label: {action_result.resultLabel}") + + logger.info("Documents:") + for doc in action_result.documents: + logger.info(f" - Document Name: {doc.documentName}") + logger.info(f" --> Document Mime Type: {doc.mimeType}") + logger.info(f" --> Document Data: {doc.documentData}") + + +@pytest.mark.asyncio +async def test_method_web_crawl_dummy(): + """Tests method web crawl with dummy response data - no external API calls.""" + + logger.info("=" * 50) + logger.info("==> Test: Method Web Crawl Dummy") + + method_web = MethodWeb(serviceCenter=None) + + # Create mock document data with URLs from search results + search_results_json = { + "documentData": { + "results": [ + {"url": "https://en.wikipedia.org/wiki/Age_of_Earth"}, + {"url": "https://www.planetary.org/articles/how-old-is-the-earth"}, + ] + } + } + + # Mock both the service center and Tavily API + with ( + patch.object(method_web, "service") as mock_service, + patch( + "tavily.AsyncTavilyClient.extract", + return_value=RESPONSE_EXTRACT_HOW_OLD_IS_EARTH_NO_ANSWER, + ) as mock_client, + ): + mock_service.getChatDocumentsFromDocumentList.return_value = [ + type("MockDoc", (), {"fileId": "test-file-id"})() + ] + mock_service.getFileData.return_value = json.dumps(search_results_json).encode( + "utf-8" + ) + + action_result = await method_web.crawl({"document": "test-document-ref"}) + mock_client.assert_called_once() + + # Evaluate results + assert action_result.success + assert len(action_result.documents) > 0 + + logger.info(f"Action result success status: {action_result.success}") + logger.info(f"Action result error: {action_result.error}") + logger.info(f"Action result label: {action_result.resultLabel}") + + logger.info("Documents:") + for doc in action_result.documents: + logger.info(f" - Document Name: {doc.documentName}") + logger.info(f" --> Document Mime Type: {doc.mimeType}") + logger.info(f" --> Document Data: {doc.documentData}") + + +@pytest.mark.asyncio +@pytest.mark.expensive +async def test_method_web_scrape_live(): + """Tests method web scrape with live API calls.""" + + logger.info("=" * 50) + logger.info("==> Test: Method Web Scrape Live") + + method_web = MethodWeb(serviceCenter=None) + + # Actual request + action_result = await method_web.scrape( + {"query": "How old is the earth", "maxResults": 3} + ) + + # Evaluate results + assert action_result.success + assert len(action_result.documents) > 0 + + logger.info(f"Action result success status: {action_result.success}") + logger.info(f"Action result error: {action_result.error}") + logger.info(f"Action result label: {action_result.resultLabel}") + + logger.info("Documents:") + for doc in action_result.documents: + logger.info(f" - Document Name: {doc.documentName}") + logger.info(f" --> Document Mime Type: {doc.mimeType}") + logger.info(f" --> Document Data: {doc.documentData}") + + +@pytest.mark.asyncio +async def test_method_web_scrape_dummy(): + """Tests method web scrape with dummy response data - no external API calls.""" + + logger.info("=" * 50) + logger.info("==> Test: Method Web Scrape Dummy") + + method_web = MethodWeb(serviceCenter=None) + + # Mock both Tavily API responses (search + extract) + with ( + patch( + "tavily.AsyncTavilyClient.search", + return_value=RESPONSE_SEARCH_HOW_OLD_IS_EARTH_NO_ANSWER, + ) as mock_search, + patch( + "tavily.AsyncTavilyClient.extract", + return_value=RESPONSE_EXTRACT_HOW_OLD_IS_EARTH_NO_ANSWER, + ) as mock_extract, + ): + action_result = await method_web.scrape( + {"query": "How old is the earth", "maxResults": 3} + ) + mock_search.assert_called_once() + mock_extract.assert_called_once() + + # Evaluate results + assert action_result.success + assert len(action_result.documents) > 0 + + logger.info(f"Action result success status: {action_result.success}") + logger.info(f"Action result error: {action_result.error}") + logger.info(f"Action result label: {action_result.resultLabel}") + + logger.info("Documents:") + for doc in action_result.documents: + logger.info(f" - Document Name: {doc.documentName}") + logger.info(f" --> Document Mime Type: {doc.mimeType}") + logger.info(f" --> Document Data: {doc.documentData}") From 017b7f1ca04d5be5397a9479eebd266d0b151741 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Mon, 1 Sep 2025 16:06:29 +0200 Subject: [PATCH 10/17] chore: type checking; input validation for web interface --- modules/interfaces/interface_web_model.py | 24 ++++++++++----------- modules/interfaces/interface_web_objects.py | 12 +++++++++-- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/modules/interfaces/interface_web_model.py b/modules/interfaces/interface_web_model.py index 86f19e08..bb4a82e0 100644 --- a/modules/interfaces/interface_web_model.py +++ b/modules/interfaces/interface_web_model.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, HttpUrl from typing import List @@ -12,21 +12,21 @@ from typing import List class WebSearchRequest(BaseModel): - query: str - max_results: int + query: str = Field(min_length=1, max_length=400) + max_results: int = Field(ge=1, le=20) class WebSearchResultItem(BaseModel): """Individual search result""" title: str - url: str + url: HttpUrl class WebSearchDocumentData(BaseModel): """Complete search results document""" - query: str + query: str = Field(min_length=1, max_length=400) results: List[WebSearchResultItem] total_count: int @@ -50,20 +50,20 @@ class WebSearchBase(ABC): class WebCrawlRequest(BaseModel): - urls: List[str] + urls: List[HttpUrl] class WebCrawlResultItem(BaseModel): """Individual crawl result""" - url: str + url: HttpUrl content: str class WebCrawlDocumentData(BaseModel): """Complete crawl results document""" - urls: List[str] + urls: List[HttpUrl] results: List[WebCrawlResultItem] total_count: int @@ -89,21 +89,21 @@ class WebCrawlBase(ABC): class WebScrapeRequest(BaseModel): - query: str - max_results: int + query: str = Field(min_length=1, max_length=400) + max_results: int = Field(ge=1, le=20) class WebScrapeResultItem(BaseModel): """Individual scrape result""" - url: str + url: HttpUrl content: str class WebScrapeDocumentData(BaseModel): """Complete scrape results document""" - query: str + query: str = Field(min_length=1, max_length=400) results: List[WebScrapeResultItem] total_count: int diff --git a/modules/interfaces/interface_web_objects.py b/modules/interfaces/interface_web_objects.py index 0ea43bd7..f348f0bd 100644 --- a/modules/interfaces/interface_web_objects.py +++ b/modules/interfaces/interface_web_objects.py @@ -1,3 +1,4 @@ +from typing import Optional from modules.interfaces.interface_web_model import ( WebCrawlActionResult, WebSearchActionResult, @@ -11,9 +12,16 @@ from dataclasses import dataclass from modules.connectors.connector_tavily import ConnectorTavily -@dataclass +@dataclass(slots=True) class WebInterface: - connector_tavily: ConnectorTavily = None + connector_tavily: ConnectorTavily + + def __post_init__(self) -> None: + if self.connector_tavily is None: + raise TypeError( + "connector_tavily must be provided. " + "Use `await WebInterface.create()` or pass a ConnectorTavily." + ) @classmethod async def create(cls) -> "WebInterface": From 91aff56e1c192b5770733943fbf69c18b7dd7ebe Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 2 Sep 2025 11:47:39 +0200 Subject: [PATCH 11/17] messageing aligned --- modules/chat/handling/handlingTasks.py | 315 +++++-------------------- modules/chat/handling/promptFactory.py | 10 +- modules/chat/serviceCenter.py | 102 +++++++- modules/methods/methodDocument.py | 24 +- modules/routes/routeWorkflows.py | 9 - modules/workflow/managerWorkflow.py | 10 +- notes/changelog.txt | 3 +- 7 files changed, 177 insertions(+), 296 deletions(-) diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py index 290fdf0a..6138932b 100644 --- a/modules/chat/handling/handlingTasks.py +++ b/modules/chat/handling/handlingTasks.py @@ -192,7 +192,8 @@ class HandlingTasks: task_plan = TaskPlan( overview=task_plan_dict.get('overview', ''), - tasks=tasks + tasks=tasks, + userMessage=task_plan_dict.get('userMessage', '') ) # Set workflow totals for progress tracking @@ -217,24 +218,19 @@ class HandlingTasks: """Create a chat message containing the task plan with user-friendly messages""" try: # Build task plan summary - task_summary = f"📋 **Task Plan Generated**\n\n" - task_summary += f"**Overview:** {task_plan.overview}\n\n" - task_summary += f"**Total Tasks:** {len(task_plan.tasks)}\n\n" - - # Add each task with its user message - for i, task in enumerate(task_plan.tasks): - task_summary += f"**Task {i+1}:** {task.objective}\n" - if task.userMessage: - task_summary += f" 💬 {task.userMessage}\n" - if task.success_criteria: - criteria_str = ', '.join(task.success_criteria) - task_summary += f" ✅ Success Criteria: {criteria_str}\n" - task_summary += "\n" - + task_summary = f"📋 **Task Plan**\n\n" + # Get overall user message from task plan if available overall_message = task_plan.userMessage if overall_message: - task_summary += f"**Plan Summary:** {overall_message}\n\n" + task_summary += f"{overall_message}\n\n" + + # Add each task with its user message + for i, task in enumerate(task_plan.tasks): + if task.userMessage: + task_summary += f"💬 {task.userMessage}\n" + task_summary += "\n" + # Create workflow message message_data = { @@ -269,76 +265,6 @@ class HandlingTasks: except Exception as e: logger.error(f"Error creating task plan message: {str(e)}") - async def createDocumentContextMessage(self, documents: List, workflow): - """Create a chat message with document context and workflow labeling""" - try: - # Get current workflow context and stats - workflow_context = self.service.getWorkflowContext() - workflow_stats = self.service.getWorkflowStats() - - # Create a simple document context message without AI dependency - message_text = f"📄 **Document Context**\n\n" - message_text += f"**Total Documents:** {len(documents)}\n\n" - - # Add workflow context information - current_round = workflow_context.get('currentRound', 0) - current_task = workflow_context.get('currentTask', 0) - total_tasks = workflow_stats.get('totalTasks', 0) - current_action = workflow_context.get('currentAction', 0) - total_actions = workflow_stats.get('totalActions', 0) - - message_text += f"**Workflow Context:**\n" - message_text += f"- Round: {current_round}\n" - if total_tasks > 0: - message_text += f"- Task: {current_task}/{total_tasks}\n" - else: - message_text += f"- Task: {current_task}\n" - if total_actions > 0: - message_text += f"- Action: {current_action}/{total_actions}\n" - else: - message_text += f"- Action: {current_action}\n" - message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}\n\n" - - # Add document list - if documents: - message_text += "**Available Documents:**\n" - for i, doc in enumerate(documents[:5]): # Show first 5 documents - message_text += f"- {doc.fileName if hasattr(doc, 'fileName') else f'Document {i+1}'}\n" - if len(documents) > 5: - message_text += f"- ... and {len(documents) - 5} more documents\n" - message_text += "\n" - - message_text += "Document context information is available for processing." - - # Create workflow message - message_data = { - "workflowId": workflow.id, - "role": "assistant", - "message": message_text, - "status": "step", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": get_utc_timestamp(), - "documentsLabel": "document_context", - "documents": [], # Empty documents for context message - # Add workflow context fields - "roundNumber": workflow_context.get('currentRound', 0), - "taskNumber": workflow_context.get('currentTask', 0), - "actionNumber": workflow_context.get('currentAction', 0), - # Add progress status - "taskProgress": "pending", - "actionProgress": "pending" - } - - message = self.chatInterface.createWorkflowMessage(message_data) - if message: - workflow.messages.append(message) - logger.info(f"Document context message created with {len(documents)} documents") - else: - logger.error("Failed to create document context message") - - except Exception as e: - logger.error(f"Error creating document context message: {str(e)}") - async def generateTaskActions(self, task_step, workflow, previous_results=None, enhanced_context=None) -> List[TaskAction]: """Generate actions for a given task step.""" try: @@ -546,25 +472,13 @@ class HandlingTasks: # Create database log entry for task start in format expected by frontend if task_index is not None: - if total_tasks is not None: - self.chatInterface.createWorkflowLog({ - "workflowId": workflow.id, - "message": f"Executing task {task_index}/{total_tasks}", - "type": "info" - }) - else: - self.chatInterface.createWorkflowLog({ - "workflowId": workflow.id, - "message": f"Executing task {task_index}/?", - "type": "info" - }) - + # Create a task start message for the user task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index) task_start_message = { "workflowId": workflow.id, "role": "assistant", - "message": f"🚀 Starting Task {task_progress}\n\nObjective: {task_step.objective}", + "message": f"🚀 **Task {task_progress}**", "status": "step", "sequenceNr": len(workflow.messages) + 1, "publishedAt": get_utc_timestamp(), @@ -617,11 +531,6 @@ class HandlingTasks: logger.error("No actions defined for task step, aborting task execution") break - # Create document context message if documents are available - available_docs = self.service.getAvailableDocuments(workflow) - if available_docs: - await self.createDocumentContextMessage(available_docs, workflow) - action_results = [] for action_idx, action in enumerate(actions): # Check workflow status before each action execution @@ -639,18 +548,11 @@ class HandlingTasks: # Log action start in format expected by frontend logger.info(f"Task {task_index} - Starting action {action_number}/{total_actions}") - # Create database log entry for action start - self.chatInterface.createWorkflowLog({ - "workflowId": workflow.id, - "message": f"Task {task_index} - Starting action {action_number}/{total_actions}", - "type": "info" - }) - # Create an action start message for the user action_start_message = { "workflowId": workflow.id, "role": "assistant", - "message": f"⚡ Task {task_index} - Action {action_number}/{total_actions}\n\nMethod: {action.execMethod}.{action.execAction}", + "message": f"⚡ **Action {action_number}/{total_actions}** (Method {action.execMethod}.{action.execAction})", "status": "step", "sequenceNr": len(workflow.messages) + 1, "publishedAt": get_utc_timestamp(), @@ -694,34 +596,19 @@ class HandlingTasks: if success: logger.info(f"=== TASK {task_index or '?'} COMPLETED SUCCESSFULLY: {task_step.objective} ===") - # Create database log entry for task completion - if total_tasks is not None: - self.chatInterface.createWorkflowLog({ - "workflowId": workflow.id, - "message": f"🎯 Task {task_index}/{total_tasks} completed", - "type": "success" - }) - else: - self.chatInterface.createWorkflowLog({ - "workflowId": workflow.id, - "message": f"🎯 Task {task_index}/? completed", - "type": "success" - }) - # Create a task completion message for the user task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index) # Enhanced completion message with criteria details - completion_message = f"🎯 Task {task_progress} Completed Successfully!\n\nObjective: {task_step.objective}\n\nFeedback: {feedback or 'Task completed successfully'}" + completion_message = f"🎯 **Task {task_progress}**\n\n✅ {feedback or 'Task completed successfully'}" # Add criteria status if available if hasattr(review_result, 'met_criteria') and review_result.met_criteria: - completion_message += f"\n\n✅ **Success Criteria Met:**\n" for criterion in review_result.met_criteria: - completion_message += f"• {criterion}\n" + completion_message += f"\n• {criterion}\n" if hasattr(review_result, 'quality_score'): - completion_message += f"\n📊 **Quality Score:** {review_result.quality_score}/10" + completion_message += f"\n📊 Score {review_result.quality_score}/10" task_completion_message = { "workflowId": workflow.id, @@ -740,10 +627,6 @@ class HandlingTasks: "taskProgress": "success" } - # Add user-friendly message if available - if task_step.userMessage: - task_completion_message["message"] += f"\n\n💬 {task_step.userMessage}" - message = self.chatInterface.createWorkflowMessage(task_completion_message) if message: workflow.messages.append(message) @@ -824,7 +707,7 @@ class HandlingTasks: retry_message = { "workflowId": workflow.id, "role": "assistant", - "message": f"🔄 Task {task_index} requires retry: {review_result.improvements}", + "message": f"🔄 **Task {task_index}** needs retry: {review_result.improvements}", "status": "step", "sequenceNr": len(workflow.messages) + 1, "publishedAt": get_utc_timestamp(), @@ -843,19 +726,19 @@ class HandlingTasks: continue else: logger.error(f"=== TASK {task_index or '?'} FAILED: {task_step.objective} after {attempt+1} attempts ===") - + task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index) + # Create user-facing error message for task failure - error_message = f"❌ Task {task_index or '?'} - '{task_step.objective}' failed after {attempt+1} attempts\n\n" - error_message += f"Objective: {task_step.objective}\n\n" + error_message = f"**Task {task_progress}**\n\n❌ '{task_step.objective}' {attempt+1}x failed\n\n" # Add specific error details if available if review_result and hasattr(review_result, 'reason') and review_result.reason: - error_message += f"Reason: {review_result.reason}\n\n" + error_message += f"{review_result.reason}\n\n" # Add criteria progress information if available if retry_context and hasattr(retry_context, 'criteria_progress'): progress = retry_context.criteria_progress - error_message += f"📊 **Progress Summary:**\n" + error_message += f"📊 **Details**\n" if progress.get('met_criteria'): error_message += f"✅ Met criteria: {', '.join(progress['met_criteria'])}\n" if progress.get('unmet_criteria'): @@ -908,19 +791,18 @@ class HandlingTasks: logger.error(f"=== TASK {task_index or '?'} FAILED AFTER ALL RETRIES: {task_step.objective} ===") # Create user-facing error message for task failure - error_message = f"❌ Task {task_index or '?'} - '{task_step.objective}' failed after all retries\n\n" - error_message += f"Objective: {task_step.objective}\n\n" + error_message = f"**Task {task_index or '?'}**\n\n❌ '{task_step.objective}' failed after all retries\n\n" + error_message += f"{task_step.objective}\n\n" # Add specific error details if available if retry_context and hasattr(retry_context, 'previous_review_result') and retry_context.previous_review_result: reason = retry_context.previous_review_result.get('reason', '') if reason and reason != "Task failed after all retries.": - error_message += f"Reason: {reason}\n\n" + error_message += f"{reason}\n\n" # Add retry information error_message += f"Retries attempted: {retry_context.retry_count if retry_context else 'Unknown'}\n" - error_message += f"Status: Task failed permanently\n\n" - error_message += "Please check the connection and try again, or contact support if the issue persists." + error_message += f"Status: Task failed permanently" # Create workflow message for user message_data = { @@ -1170,7 +1052,8 @@ class HandlingTasks: processingTime=createdAction.get("processingTime"), timestamp=float(createdAction.get("timestamp", get_utc_timestamp())), result=createdAction.get("result"), - resultDocuments=createdAction.get("resultDocuments", []) + resultDocuments=createdAction.get("resultDocuments", []), + userMessage=createdAction.get("userMessage") ) except Exception as e: @@ -1241,20 +1124,6 @@ class HandlingTasks: # Log action results logger.info(f"Action completed successfully") - # Create database log entry for action completion - if total_actions is not None: - self.chatInterface.createWorkflowLog({ - "workflowId": workflow.id, - "message": f"✅ Task {task_num} - Action {action_num}/{total_actions} completed", - "type": "success" - }) - else: - self.chatInterface.createWorkflowLog({ - "workflowId": workflow.id, - "message": f"✅ Task {task_num} - Action {action_num}/? completed", - "type": "success" - }) - if created_documents: logger.info(f"Output documents ({len(created_documents)}):") for i, doc in enumerate(created_documents): @@ -1276,19 +1145,12 @@ class HandlingTasks: await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index) # Create database log entry for action failure - if total_actions is not None: - self.chatInterface.createWorkflowLog({ - "workflowId": workflow.id, - "message": f"❌ Task {task_num} - Action {action_num}/{total_actions} failed: {result.error}", - "type": "error" - }) - else: - self.chatInterface.createWorkflowLog({ - "workflowId": workflow.id, - "message": f"❌ Task {task_num} - Action {action_num}/? failed: {result.error}", - "type": "error" - }) - + self.chatInterface.createWorkflowLog({ + "workflowId": workflow.id, + "message": f"❌ **Task {task_num}**\n\n❌ **Action {action_num}/{total_actions}** failed: {result.error}", + "type": "error" + }) + # Log action summary logger.info(f"=== TASK {task_num} ACTION {action_num} COMPLETED ===") @@ -1336,89 +1198,25 @@ class HandlingTasks: # Create a more meaningful message that includes task context task_objective = task_step.objective if task_step else 'Unknown task' - + + # Add comprehensive workflow context + current_round = workflow_context.get('currentRound', 0) + current_task = workflow_context.get('currentTask', 0) + total_tasks = workflow_stats.get('totalTasks', 0) + current_action = workflow_context.get('currentAction', 0) + total_actions = workflow_stats.get('totalActions', 0) + # Build a user-friendly message based on success/failure if result.success: - if created_documents and len(created_documents) > 0: - doc_names = [doc.fileName for doc in created_documents[:3]] - if len(created_documents) > 3: - doc_names.append(f"... and {len(created_documents) - 3} more") - - # Enhanced message with workflow context - message_text = f"✅ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Completed**\n\n" - message_text += f"**Objective:** {task_objective}\n\n" - message_text += f"**Generated {len(created_documents)} document(s):** {', '.join(doc_names)}\n\n" - message_text += f"**Result Label:** {result_label}\n" - - # Add comprehensive workflow context - current_round = workflow_context.get('currentRound', 0) - current_task = workflow_context.get('currentTask', 0) - total_tasks = workflow_stats.get('totalTasks', 0) - current_action = workflow_context.get('currentAction', 0) - total_actions = workflow_stats.get('totalActions', 0) - - message_text += f"**Workflow Context:**\n" - message_text += f"- Round: {current_round}\n" - if total_tasks > 0: - message_text += f"- Task: {current_task}/{total_tasks}\n" - else: - message_text += f"- Task: {current_task}\n" - if total_actions > 0: - message_text += f"- Action: {current_action}/{total_actions}\n" - else: - message_text += f"- Action: {current_action}\n" - message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}" - else: - message_text = f"✅ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Completed**\n\n" - message_text += f"**Objective:** {task_objective}\n\n" - message_text += "**Action executed successfully**\n\n" - message_text += f"**Result Label:** {result_label}\n" - - # Add comprehensive workflow context - current_round = workflow_context.get('currentRound', 0) - current_task = workflow_context.get('currentTask', 0) - total_tasks = workflow_stats.get('totalTasks', 0) - current_action = workflow_context.get('currentAction', 0) - total_actions = workflow_stats.get('totalActions', 0) - - message_text += f"**Workflow Context:**\n" - message_text += f"- Round: {current_round}\n" - if total_tasks > 0: - message_text += f"- Task: {current_task}/{total_tasks}\n" - else: - message_text += f"- Task: {current_task}\n" - if total_actions > 0: - message_text += f"- Action: {current_action}/{total_actions}\n" - else: - message_text += f"- Action: {current_action}\n" - message_text += f"- Status: {workflow_stats.get('workflowStats', 'unknown')}" + message_text = f"**Action {current_action}/{total_actions} ({action.execMethod}.{action.execAction})**\n\n" + message_text += f"✅ {task_objective}\n\n" else: # ⚠️ FAILURE MESSAGE - Show error details to user error_details = result.error if result.error else "Unknown error occurred" - message_text = f"❌ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Failed**\n\n" - message_text += f"**Objective:** {task_objective}\n\n" - message_text += f"**Error:** {error_details}\n\n" - message_text += f"**Result Label:** {result_label}\n" - - # Add comprehensive workflow context - current_round = workflow_context.get('currentRound', 0) - current_task = workflow_context.get('currentTask', 0) - total_tasks = workflow_stats.get('totalTasks', 0) - current_action = workflow_context.get('currentAction', 0) - total_actions = workflow_stats.get('totalActions', 0) - - message_text += f"**Workflow Context:**\n" - message_text += f"- Round: {current_round}\n" - if total_tasks > 0: - message_text += f"- Task: {current_task}/{total_tasks}\n" - else: - message_text += f"- Task: {current_task}\n" - if total_actions > 0: - message_text += f"- Action: {current_action}/{total_actions}\n" - message_text += f"- Action: {current_action}\n" - message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}\n\n" - message_text += "Please check the connection and try again." - + message_text = f"**Action {current_action}/{total_actions} ({action.execMethod}.{action.execAction})**\n\n" + message_text += f"❌ {task_objective}\n\n" + message_text += f"{error_details}\n\n" + message_data = { "workflowId": workflow.id, "role": "assistant", @@ -1432,19 +1230,12 @@ class HandlingTasks: "documentsLabel": result_label, "documents": created_documents, # Add workflow context fields - extract from result_label to match document reference - "roundNumber": workflow_context.get('currentRound', 0), - "taskNumber": task_index, - "actionNumber": self._extractActionNumberFromLabel(result_label) if result_label else workflow_context.get('currentAction', 0), + "roundNumber": current_round, + "taskNumber": current_task, + "actionNumber": current_action, "actionProgress": "success" if result.success else "fail" } - # Add user-friendly message if available - if action.userMessage: - if result.success: - message_data["message"] += f"\n\n💬 {action.userMessage}" - else: - message_data["message"] += f"\n\n💬 Action was intended to: {action.userMessage}" - # Add debugging for error messages if not result.success: logger.info(f"Creating ERROR message: {message_text}") diff --git a/modules/chat/handling/promptFactory.py b/modules/chat/handling/promptFactory.py index 884606e4..4587cce4 100644 --- a/modules/chat/handling/promptFactory.py +++ b/modules/chat/handling/promptFactory.py @@ -34,8 +34,8 @@ INSTRUCTIONS: 3. Focus on business outcomes, not technical operations 4. Each task should produce meaningful, usable outputs 5. Ensure proper handover between tasks using result labels -6. Generate user-friendly messages for each task in the user's language ({user_language}) -7. Detect the language of the user request and include it in languageUserDetected +6. Detect the language of the user request and include it in languageUserDetected +7. Generate user-friendly messages for each task in the user's request language 8. Return a JSON object with the exact structure shown below TASK GROUPING PRINCIPLES: @@ -63,15 +63,15 @@ TASK PLANNING PRINCIPLES: - Keep tasks at a meaningful level of abstraction - Each task should produce results that can be used by subsequent tasks - Ensure clear dependencies and handovers between tasks -- Provide clear, actionable user messages in the user's language ({user_language}) +- Provide clear, actionable user messages in the user's request language - Group related activities to minimize task fragmentation - Only create multiple tasks when dealing with truly different, independent objectives REQUIRED JSON STRUCTURE: {{ "overview": "Brief description of the overall plan", - "userMessage": "User-friendly message explaining the task plan in {user_language}", "languageUserDetected": "en", // Language code detected from user request (en, de, fr, it, es, etc.) + "userMessage": "User-friendly message explaining the task plan in user's request language", "tasks": [ {{ "id": "task_1", @@ -79,7 +79,7 @@ REQUIRED JSON STRUCTURE: "dependencies": ["task_0"], // IDs of tasks that must complete first "success_criteria": ["criteria1", "criteria2"], "estimated_complexity": "low|medium|high", - "userMessage": "User-friendly message explaining what this task will accomplish in {user_language}" + "userMessage": "User-friendly message explaining what this task will accomplish in user's request language" }} ] }} diff --git a/modules/chat/serviceCenter.py b/modules/chat/serviceCenter.py index 9a37030c..13545001 100644 --- a/modules/chat/serviceCenter.py +++ b/modules/chat/serviceCenter.py @@ -350,16 +350,19 @@ class ServiceCenter: doc_exchange = None if message.documents: if message.actionId and message.documentsLabel: - # Use new document label format + # Validate that we use the same label as in the message + validated_label = self._validateDocumentLabelConsistency(message) + + # Use the message's actual documentsLabel doc_refs = [] for doc in message.documents: doc_ref = self.getDocumentReferenceFromChatDocument(doc, message) doc_refs.append(doc_ref) - doc_exchange = DocumentExchange( - documentsLabel=message.documentsLabel, + doc_exchange = DocumentExchange( + documentsLabel=validated_label, documents=doc_refs - ) + ) else: # Generate new labels for documents without explicit labels doc_refs = [] @@ -444,8 +447,21 @@ class ServiceCenter: if document_list["chat"]: context += "CURRENT ROUND DOCUMENTS:\n" for exchange in document_list["chat"]: - # Generate docList reference for the exchange (using message ID) - doc_list_ref = f"docList:{exchange.documentsLabel}" + # Generate docList reference for the exchange (using message ID and label) + # Find the message that corresponds to this exchange + message_id = None + for message in self.workflow.messages: + if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel: + message_id = message.id + break + + if message_id: + doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}" + else: + # Fallback to label-only format if message ID not found + doc_list_ref = f"docList:{exchange.documentsLabel}" + + logger.debug(f"Using document label for action planning: {exchange.documentsLabel} (message_id: {message_id})") context += f"- {doc_list_ref} contains:\n" # Generate docItem references for each document in the list for doc_ref in exchange.documents: @@ -460,8 +476,21 @@ class ServiceCenter: if document_list["history"]: context += "WORKFLOW HISTORY DOCUMENTS:\n" for exchange in document_list["history"]: - # Generate docList reference for the exchange (using message ID) - doc_list_ref = f"docList:{exchange.documentsLabel}" + # Generate docList reference for the exchange (using message ID and label) + # Find the message that corresponds to this exchange + message_id = None + for message in self.workflow.messages: + if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel: + message_id = message.id + break + + if message_id: + doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}" + else: + # Fallback to label-only format if message ID not found + doc_list_ref = f"docList:{exchange.documentsLabel}" + + logger.debug(f"Using history document label for action planning: {exchange.documentsLabel} (message_id: {message_id})") context += f"- {doc_list_ref} contains:\n" # Generate docItem references for each document in the list for doc_ref in exchange.documents: @@ -481,6 +510,16 @@ class ServiceCenter: logger.error(f"Error generating enhanced document context: {str(e)}") return "NO DOCUMENTS AVAILABLE - Error generating document context." + def _validateDocumentLabelConsistency(self, message) -> str: + """Validate that the document label used for references matches the message's actual label""" + if not hasattr(message, 'documentsLabel') or not message.documentsLabel: + logger.debug(f"Message {message.id} has no documentsLabel, returning None") + return None + + # Simply return the message's actual documentsLabel - no correction, just validation + logger.debug(f"Using message's documentsLabel for references: '{message.documentsLabel}'") + return message.documentsLabel + def _extractDocumentInfoFromReference(self, doc_ref: str) -> Dict[str, str]: """Extract document information from reference string""" try: @@ -569,19 +608,56 @@ class ServiceCenter: if message.documents: for doc in message.documents: if doc.id == doc_id: + doc_name = getattr(doc, 'fileName', 'unknown') + logger.debug(f"Found docItem reference {doc_ref}: {doc_name}") all_documents.append(doc) break elif doc_ref.startswith("docList:"): - # docList::