#!/usr/bin/env python3 """ Test the integrated web research functionality. This tests the complete workflow from methodAi.py through to the services. """ import asyncio import logging import sys from pathlib import Path # Add the gateway directory to the Python path gateway_dir = Path(__file__).parent sys.path.insert(0, str(gateway_dir)) # Import the required modules from modules.services.serviceAi.mainServiceAi import AiService from modules.datamodels.datamodelWeb import WebResearchRequest, WebResearchOptions # Configure logging with more detailed output logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout) ] ) logger = logging.getLogger(__name__) # Set the main service logger to DEBUG level # Configure logging for the main service and connectors main_service_logger = logging.getLogger('modules.services.serviceAi.mainServiceAi') main_service_logger.setLevel(logging.DEBUG) tavily_logger = logging.getLogger('modules.connectors.connectorAiTavily') tavily_logger.setLevel(logging.DEBUG) interface_logger = logging.getLogger('modules.interfaces.interfaceAiObjects') interface_logger.setLevel(logging.DEBUG) async def test_web_research_integration(): """Test the integrated web research functionality.""" try: print("=" * 60) print("WEB RESEARCH INTEGRATION TEST") print("=" * 60) # Config sanity check (non-verbose) try: from modules.shared.configuration import APP_CONFIG env_type = APP_CONFIG.get('APP_ENV_TYPE') print(f"Environment: {env_type}") except Exception as e: print(f"Configuration loading failed: {e}") return # Initialize the AI service print("Initializing AI service...") try: ai_service = await asyncio.wait_for(AiService.create(), timeout=30.0) print("AI service initialized") except asyncio.TimeoutError: print("AI service initialization timed out after 30 seconds") return except Exception as e: print(f"AI service initialization failed: {e}") import traceback traceback.print_exc() return # Test 1: Basic web research print("\n" + "="*60) print("TEST 1: Basic Web Research") print("="*60) request = WebResearchRequest( search_query="Kannst Du mir eine Liste machen, welche Grundstücke aktuell im Kanton Zürich verkauft werden?", # search_query="Erstelle mir ein Firmenprofil von ValueOn AG in der Schweiz", max_results=10, options=WebResearchOptions( max_pages=10, search_depth="basic", extract_depth="advanced", format="markdown", return_report=True, pages_search_depth=2 # Test with depth 2: main pages + sub-pages ) ) print("Starting web research call...") try: result = await ai_service.webResearch(request) except Exception as e: print(f"Web research failed: {e}") import traceback traceback.print_exc() return if result.success: print("Web research completed successfully") doc = result.documents[0].documentData print(f"Websites analyzed: {doc.websites_analyzed}") print(f"Additional links found: {doc.additional_links_found}") # Show main URLs and sub-URLs for visibility print("Main URLs (sources):") for i, src in enumerate(doc.sources, 1): try: print(f" {i}. {src.title} - {src.url}") except Exception: print(f" {i}. {src}") print("Sub-URLs (additional links):") for i, link in enumerate(doc.additional_links, 1): print(f" {i}. {link}") print(f"Analysis result length: {len(doc.analysis_result)} characters") # Save result to file output_file = gateway_dir / "test_web_integration_result.md" with open(output_file, 'w', encoding='utf-8') as f: f.write("# Web Research Integration Test Result\n\n") f.write(f"**Query:** {request.search_query}\n\n") f.write(f"**Websites Analyzed:** {doc.websites_analyzed}\n") f.write(f"**Additional Links Found:** {doc.additional_links_found}\n\n") f.write("## Analysis Result\n\n") f.write(doc.analysis_result) f.write("\n\n## Sources\n\n") for source in doc.sources: try: f.write(f"- [{source.title}]({source.url})\n") except Exception: f.write(f"- {source}\n") f.write("\n## Additional Links\n\n") for link in doc.additional_links: f.write(f"- {link}\n") print(f"Result saved to: {output_file}") # Save individual website content files print(f"\nSaving individual website content files...") from datetime import datetime import os timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") test_dir = f"test_web_content_{timestamp}" os.makedirs(test_dir, exist_ok=True) website_files = [] # Save main URLs for i, src in enumerate(doc.sources, 1): try: url = src.url title = src.title except Exception: url = str(src) title = "Unknown" # Create safe filename from URL safe_filename = url.replace('https://', '').replace('http://', '').replace('/', '_').replace(':', '_') safe_filename = ''.join(c for c in safe_filename if c.isalnum() or c in '._-') safe_filename = safe_filename[:100] # Limit length filename = f"{test_dir}/main_url_{i:03d}_{safe_filename}.txt" with open(filename, 'w', encoding='utf-8') as f: f.write(f"Title: {title}\n") f.write(f"URL: {url}\n") f.write(f"Type: Main URL\n") f.write(f"Extracted: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write("="*80 + "\n\n") # Write actual content if available if hasattr(doc, 'individual_content') and doc.individual_content and url in doc.individual_content: f.write(doc.individual_content[url]) else: f.write("Content not available in individual_content field.\n") f.write("The combined analysis is available in the main result file.\n") website_files.append(filename) print(f" Created: {filename}") # Save additional links for i, link in enumerate(doc.additional_links, 1): # Create safe filename from URL safe_filename = link.replace('https://', '').replace('http://', '').replace('/', '_').replace(':', '_') safe_filename = ''.join(c for c in safe_filename if c.isalnum() or c in '._-') safe_filename = safe_filename[:100] # Limit length filename = f"{test_dir}/additional_link_{i:03d}_{safe_filename}.txt" with open(filename, 'w', encoding='utf-8') as f: f.write(f"URL: {link}\n") f.write(f"Type: Additional Link\n") f.write(f"Extracted: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write("="*80 + "\n\n") # Write actual content if available if hasattr(doc, 'individual_content') and doc.individual_content and link in doc.individual_content: f.write(doc.individual_content[link]) else: f.write("Content not available in individual_content field.\n") f.write("The combined analysis is available in the main result file.\n") website_files.append(filename) print(f" Created: {filename}") print(f"\nAll files saved to directory: {test_dir}") print(f"Main result file: {output_file}") print(f"Individual website files: {len(website_files)} files") else: print(f"Web research failed: {result.error}") print("\nIntegration test completed!") except Exception as e: print(f"Integration test failed: {str(e)}") import traceback traceback.print_exc() if __name__ == "__main__": # Add timeout to the entire test try: asyncio.run(asyncio.wait_for(test_web_research_integration(), timeout=600.0)) except asyncio.TimeoutError: print("Test timed out after 600 seconds - likely hanging during processing") except Exception as e: print(f"Test failed with error: {e}") import traceback traceback.print_exc()