gateway/test_ai_with_documents.py
2025-10-03 01:41:50 +02:00

214 lines
8 KiB
Python

#!/usr/bin/env python3
"""
Test AI calls with document processing.
This tests the AI service's ability to process documents and generate responses.
"""
import asyncio
import logging
import sys
from pathlib import Path
# Add the gateway directory to the Python path
gateway_dir = Path(__file__).parent
sys.path.insert(0, str(gateway_dir))
# Import the required modules
from modules.services.serviceAi.mainServiceAi import AiService
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallOptions, OperationType
# Configure logging with DEBUG level for AI service
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
# Set AI service logger to DEBUG level
ai_service_logger = logging.getLogger('modules.services.serviceAi.mainServiceAi')
ai_service_logger.setLevel(logging.DEBUG)
# Set extraction service loggers to DEBUG level
extraction_logger = logging.getLogger('modules.services.serviceExtraction.mainServiceExtraction')
extraction_logger.setLevel(logging.DEBUG)
pipeline_logger = logging.getLogger('modules.services.serviceExtraction.subPipeline')
pipeline_logger.setLevel(logging.DEBUG)
async def test_ai_with_documents():
"""Test AI calls with document processing."""
try:
print("=" * 60)
print("AI WITH DOCUMENTS TEST")
print("=" * 60)
# Config sanity check
try:
from modules.shared.configuration import APP_CONFIG
env_type = APP_CONFIG.get('APP_ENV_TYPE')
print(f"Environment: {env_type}")
except Exception as e:
print(f"Configuration loading failed: {e}")
return
# Initialize the AI service
print("Initializing AI service...")
try:
ai_service = await asyncio.wait_for(AiService.create(), timeout=30.0)
print("AI service initialized")
except asyncio.TimeoutError:
print("AI service initialization timed out after 30 seconds")
return
except Exception as e:
print(f"AI service initialization failed: {e}")
import traceback
traceback.print_exc()
return
# Test 1: AI call with web research result document
print("\n" + "="*60)
print("TEST 1: AI Call with Web Research Document")
print("="*60)
# Read the web research result file
web_result_file = gateway_dir / "test_web_integration_result.md"
if not web_result_file.exists():
print(f"Web research result file not found: {web_result_file}")
print("Please run test_web_integration.py first to generate the document.")
return
print(f"Reading web research result from: {web_result_file}")
with open(web_result_file, 'r', encoding='utf-8') as f:
web_content = f.read()
print(f"Document size: {len(web_content)} characters")
# Create a temporary file to simulate document storage for testing
print("Creating temporary document file for testing...")
import tempfile
import os
# Create a temporary file with the web content
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as temp_file:
temp_file.write(web_content)
temp_file_path = temp_file.name
print(f"Temporary file created: {temp_file_path}")
# Create ChatDocument with the temporary file
web_document = ChatDocument(
messageId="test_message_001",
fileId="temp_file_001",
fileName="web_research_result.md",
fileSize=len(web_content),
mimeType="text/markdown"
)
# Mock the database interface to return our file content
print("Setting up mock database interface...")
from unittest.mock import patch
def mock_get_file_data(file_id):
if file_id == "temp_file_001":
return web_content.encode('utf-8')
return None
# Create AI call options - process full content without compression
ai_options = AiCallOptions(
operationType=OperationType.ANALYSE_CONTENT,
compressPrompt=False, # Don't compress the prompt
compressContext=False, # Process full content, don't compress
processDocumentsIndividually=False,
maxContextBytes=None, # No artificial limit - let the engine handle it
safetyMargin=0.1
)
# Test prompt
prompt = "Erstelle aus den Daten der Internetsuche eine Liste der Grundstücke, welche zum Verkauf ausgeschrieben sind"
print(f"Prompt: {prompt}")
print("Processing document with AI...")
try:
# Use the document with mocked database interface
print(f"Original prompt length: {len(prompt)} characters")
print(f"Web content length: {len(web_content)} characters")
print(f"Document file: {web_document.fileName}")
# Mock the database interface and call AI with the document
with patch('modules.interfaces.interfaceDbComponentObjects.getInterface') as mock_get_interface:
mock_interface = mock_get_interface.return_value
mock_interface.getFileData = mock_get_file_data
print("Calling AI with document processing...")
result = await ai_service.callAi(
prompt=prompt,
documents=[web_document], # Use the document, not embedded content
options=ai_options
)
print("AI processing completed successfully!")
print(f"Result length: {len(result)} characters")
print("\n" + "="*60)
print("AI RESPONSE:")
print("="*60)
print(result)
# Save the AI response to a file
output_file = gateway_dir / "test_ai_document_response.md"
with open(output_file, 'w', encoding='utf-8') as f:
f.write("# AI Analysis of Web Research Data\n\n")
f.write(f"**Prompt:** {prompt}\n\n")
f.write(f"**Source Document:** web_research_result.md\n")
f.write(f"**Document Size:** {len(web_content)} characters\n\n")
f.write("## AI Response\n\n")
f.write(result)
print(f"\nAI response saved to: {output_file}")
# Clean up temporary file
try:
os.unlink(temp_file_path)
print(f"Cleaned up temporary file: {temp_file_path}")
except Exception as e:
print(f"Warning: Could not clean up temporary file: {e}")
except Exception as e:
print(f"AI processing failed: {e}")
import traceback
traceback.print_exc()
# Clean up temporary file on error
try:
os.unlink(temp_file_path)
except:
pass
return
# Test 2: Skipped as requested
print("\n" + "="*60)
print("TEST 2: SKIPPED (Multi-Document Test)")
print("="*60)
print("Multi-document test skipped as requested.")
print("\nAI with documents test completed!")
except Exception as e:
print(f"Test failed: {str(e)}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
# Add timeout to the entire test
try:
asyncio.run(asyncio.wait_for(test_ai_with_documents(), timeout=300.0))
except asyncio.TimeoutError:
print("Test timed out after 300 seconds")
except Exception as e:
print(f"Test failed with error: {e}")
import traceback
traceback.print_exc()