gateway/test_ai_with_documents.py

#!/usr/bin/env python3
"""
Test AI calls with document processing.
This tests the AI service's ability to process documents and generate responses.
"""

import asyncio
import logging
import sys
from pathlib import Path

# Add the gateway directory to the Python path
gateway_dir = Path(__file__).parent
sys.path.insert(0, str(gateway_dir))

# Import the required modules
from modules.services.serviceAi.mainServiceAi import AiService
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallOptions, OperationType

# Configure logging with DEBUG level for AI service
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger(__name__)

# Set AI service logger to DEBUG level
ai_service_logger = logging.getLogger('modules.services.serviceAi.mainServiceAi')
ai_service_logger.setLevel(logging.DEBUG)

# Set extraction service loggers to DEBUG level
extraction_logger = logging.getLogger('modules.services.serviceExtraction.mainServiceExtraction')
extraction_logger.setLevel(logging.DEBUG)

pipeline_logger = logging.getLogger('modules.services.serviceExtraction.subPipeline')
pipeline_logger.setLevel(logging.DEBUG)

async def test_ai_with_documents():
    """Test AI calls with document processing."""
    try:
        print("=" * 60)
        print("AI WITH DOCUMENTS TEST")
        print("=" * 60)

        # Config sanity check
        try:
            from modules.shared.configuration import APP_CONFIG
            env_type = APP_CONFIG.get('APP_ENV_TYPE')
            print(f"Environment: {env_type}")
        except Exception as e:
            print(f"Configuration loading failed: {e}")
            return

        # Initialize the AI service
        print("Initializing AI service...")
        try:
            ai_service = await asyncio.wait_for(AiService.create(), timeout=30.0)
            print("AI service initialized")
        except asyncio.TimeoutError:
            print("AI service initialization timed out after 30 seconds")
            return
        except Exception as e:
            print(f"AI service initialization failed: {e}")
            import traceback
            traceback.print_exc()
            return

        # Test 1: AI call with web research result document
        print("\n" + "="*60)
        print("TEST 1: AI Call with Web Research Document")
        print("="*60)

        # Read the web research result file
        web_result_file = gateway_dir / "test_web_integration_result.md"
        if not web_result_file.exists():
            print(f"Web research result file not found: {web_result_file}")
            print("Please run test_web_integration.py first to generate the document.")
            return

        print(f"Reading web research result from: {web_result_file}")
        with open(web_result_file, 'r', encoding='utf-8') as f:
            web_content = f.read()

        print(f"Document size: {len(web_content)} characters")

        # Create a temporary file to simulate document storage for testing
        print("Creating temporary document file for testing...")
        import tempfile
        import os

        # Create a temporary file with the web content
        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as temp_file:
            temp_file.write(web_content)
            temp_file_path = temp_file.name

        print(f"Temporary file created: {temp_file_path}")

        # Create ChatDocument with the temporary file
        web_document = ChatDocument(
            messageId="test_message_001",
            fileId="temp_file_001",
            fileName="web_research_result.md",
            fileSize=len(web_content),
            mimeType="text/markdown"
        )

        # Mock the database interface to return our file content
        print("Setting up mock database interface...")
        from unittest.mock import patch

        def mock_get_file_data(file_id):
            if file_id == "temp_file_001":
                return web_content.encode('utf-8')
            return None

        # Create AI call options - process full content without compression
        ai_options = AiCallOptions(
            operationType=OperationType.ANALYSE_CONTENT,
            compressPrompt=False,  # Don't compress the prompt
            compressContext=False,  # Process full content, don't compress
            processDocumentsIndividually=False,
            maxContextBytes=None,  # No artificial limit - let the engine handle it
            safetyMargin=0.1
        )

        # Test prompt
        prompt = "Erstelle aus den Daten der Internetsuche eine Liste der Grundstücke, welche zum Verkauf ausgeschrieben sind"

        print(f"Prompt: {prompt}")
        print("Processing document with AI...")

        try:
            # Use the document with mocked database interface
            print(f"Original prompt length: {len(prompt)} characters")
            print(f"Web content length: {len(web_content)} characters")
            print(f"Document file: {web_document.fileName}")

            # Mock the database interface and call AI with the document
            with patch('modules.interfaces.interfaceDbComponentObjects.getInterface') as mock_get_interface:
                mock_interface = mock_get_interface.return_value
                mock_interface.getFileData = mock_get_file_data

                print("Calling AI with document processing...")
                result = await ai_service.callAi(
                    prompt=prompt,
                    documents=[web_document],  # Use the document, not embedded content
                    options=ai_options
                )

            print("AI processing completed successfully!")
            print(f"Result length: {len(result)} characters")
            print("\n" + "="*60)
            print("AI RESPONSE:")
            print("="*60)
            print(result)

            # Save the AI response to a file
            output_file = gateway_dir / "test_ai_document_response.md"
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write("# AI Analysis of Web Research Data\n\n")
                f.write(f"**Prompt:** {prompt}\n\n")
                f.write(f"**Source Document:** web_research_result.md\n")
                f.write(f"**Document Size:** {len(web_content)} characters\n\n")
                f.write("## AI Response\n\n")
                f.write(result)

            print(f"\nAI response saved to: {output_file}")

            # Clean up temporary file
            try:
                os.unlink(temp_file_path)
                print(f"Cleaned up temporary file: {temp_file_path}")
            except Exception as e:
                print(f"Warning: Could not clean up temporary file: {e}")

        except Exception as e:
            print(f"AI processing failed: {e}")
            import traceback
            traceback.print_exc()

            # Clean up temporary file on error
            try:
                os.unlink(temp_file_path)
            except:
                pass
            return

        # Test 2: Skipped as requested
        print("\n" + "="*60)
        print("TEST 2: SKIPPED (Multi-Document Test)")
        print("="*60)
        print("Multi-document test skipped as requested.")

        print("\nAI with documents test completed!")

    except Exception as e:
        print(f"Test failed: {str(e)}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    # Add timeout to the entire test
    try:
        asyncio.run(asyncio.wait_for(test_ai_with_documents(), timeout=300.0))
    except asyncio.TimeoutError:
        print("Test timed out after 300 seconds")
    except Exception as e:
        print(f"Test failed with error: {e}")
        import traceback
        traceback.print_exc()