gateway/test_real_document_generation.py

#!/usr/bin/env python3
"""
Real Document Generation Test
Tests the generateReport action with REAL AI processing (no mocking)
"""

import asyncio
import sys
from pathlib import Path
import tempfile
import os

# Add the gateway directory to the Python path
gateway_dir = Path(__file__).parent
sys.path.insert(0, str(gateway_dir))

# Add the modules path to sys.path for imports
modules_path = gateway_dir / 'modules'
sys.path.insert(0, str(modules_path))

from modules.workflows.methods.methodDocument import MethodDocument
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelUam import User
from modules.interfaces.interfaceDbComponentObjects import getInterface

async def test_real_document_generation():
    """Test generateReport with REAL AI processing"""
    print("=" * 60)
    print("REAL DOCUMENT GENERATION TEST")
    print("=" * 60)

    try:
        # Initialize real services
        print("Environment: dev")
        print("Initializing MethodDocument with REAL services...")

        # Create a real user
        real_user = User(
            id="test_user_001",
            username="testuser",
            name="Test User",
            email="test@example.com"
        )

        # Initialize real services (this will use actual AI, extraction, generation services)
        from modules.services.serviceAi.mainServiceAi import AiService
        from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
        from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
        from modules.services.serviceWorkflow.mainServiceWorkflow import WorkflowService

        # Create a mock workflow with the document
        class MockWorkflow:
            def __init__(self, document):
                self.messages = [MockMessage(document)] if document else []

        class MockMessage:
            def __init__(self, document):
                self.documents = [document] if document else []

        # Create a mock service center for the workflow service
        class MockServiceCenter:
            def __init__(self, user, document):
                self.services = {}
                self.user = user
                self.workflow = MockWorkflow(document)
                self.interfaceDbChat = None  # Mock interface
                self.interfaceDbComponent = None  # Mock interface
                self.interfaceDbApp = None  # Mock interface

        mock_service_center = MockServiceCenter(real_user, None)  # Will be set later

        # Create a services object with attributes instead of a dictionary
        class ServicesObject:
            def __init__(self):
                # Note: AI service needs to be created with create() method for proper initialization
                self.ai = None  # Will be initialized in async setup
                self.extraction = ExtractionService()
                self.generation = GenerationService()
                self.workflow = WorkflowService(mock_service_center)
                self.user = real_user

                # AI service will be initialized in async setup

                # Add debugging to extraction service calls
                original_extract = self.extraction.extractContent
                def debug_extract_content(documents, options):
                    print(f"📄 Extraction Service called with {len(documents)} documents")
                    print(f"📄 Extraction options: {options}")
                    result = original_extract(documents, options)
                    print(f"📄 Extraction result: {len(result)} extracted content objects")
                    if result:
                        for i, content in enumerate(result):
                            print(f"📄 Content {i}: {len(content.parts)} parts")
                            for j, part in enumerate(content.parts):
                                print(f"📄 Part {j}: {part.typeGroup} - {len(part.data)} chars")
                    return result
                self.extraction.extractContent = debug_extract_content

                # Override the getChatDocumentsFromDocumentList method to add debugging
                original_method = self.workflow.getChatDocumentsFromDocumentList
                def debug_getChatDocumentsFromDocumentList(documentList):
                    print(f"🔍 Debug: Looking for documents: {documentList}")
                    print(f"🔍 Debug: Available documents in workflow:")
                    for i, message in enumerate(mock_service_center.workflow.messages):
                        print(f"  Message {i}: {len(message.documents)} documents")
                        for j, doc in enumerate(message.documents):
                            print(f"    Document {j}: ID={doc.id}, fileId={doc.fileId}, fileName={doc.fileName}")

                    # Add detailed debugging for the docItem parsing
                    for doc_ref in documentList:
                        if doc_ref.startswith("docItem:"):
                            parts = doc_ref.split(':')
                            print(f"🔍 Debug: Parsed docItem parts: {parts}")
                            if len(parts) >= 2:
                                doc_id = parts[1]
                                print(f"🔍 Debug: Looking for doc_id: '{doc_id}'")
                                for message in mock_service_center.workflow.messages:
                                    if message.documents:
                                        for doc in message.documents:
                                            print(f"🔍 Debug: Comparing '{doc_id}' == '{doc.id}' ? {doc_id == doc.id}")
                                            if doc.id == doc_id:
                                                print(f"🔍 Debug: MATCH FOUND!")
                                                break

                    # Debug the original method's workflow reference
                    print(f"🔍 Debug: Original method workflow: {self.workflow}")
                    print(f"🔍 Debug: Original method workflow.messages: {getattr(self.workflow, 'messages', 'NO_MESSAGES_ATTR')}")

                    result = original_method(documentList)
                    print(f"🔍 Debug: Found {len(result)} documents")
                    return result
                self.workflow.getChatDocumentsFromDocumentList = debug_getChatDocumentsFromDocumentList

        real_services = ServicesObject()

        # Initialize AI service properly
        print("Initializing AI service...")
        try:
            real_services.ai = await AiService.create()
            print(f"✅ AI service initialized successfully")
            print(f"✅ AI service aiObjects: {real_services.ai.aiObjects}")
        except Exception as e:
            print(f"❌ AI service initialization failed: {e}")
            import traceback
            traceback.print_exc()
            return

        # Add debugging to AI service calls
        original_call_ai = real_services.ai.callAi
        async def debug_call_ai(prompt, documents=None, options=None):
            print(f"🤖 AI Service called with prompt: {prompt[:200]}...")
            print(f"🤖 AI Service documents: {len(documents) if documents else 0}")
            print(f"🤖 AI Service options: {options}")
            result = await original_call_ai(prompt, documents, options)
            print(f"🤖 AI Service result length: {len(result) if result else 0}")
            print(f"🤖 AI Service result preview: {result[:200] if result else 'None'}...")
            return result
        real_services.ai.callAi = debug_call_ai

        method_doc = MethodDocument(services=real_services)
        print("MethodDocument initialized with REAL services")

        # Read the web integration result file
        web_result_file = gateway_dir / "test_web_integration_result.md"
        if not web_result_file.exists():
            print(f"Web integration result file not found: {web_result_file}")
            print("Please run test_web_integration.py first to generate the document.")
            return

        print(f"Reading web integration result from: {web_result_file}")
        with open(web_result_file, 'r', encoding='utf-8') as f:
            web_content = f.read()

        print(f"Document size: {len(web_content)} characters")

        # Create the document for the workflow
        web_document = ChatDocument(
            messageId="test_message_003",
            fileId="temp_file_003",
            fileName="test_web_integration_result.md",
            fileSize=len(web_content),
            mimeType="text/markdown"
        )

        # Debug: Show the actual generated ID
        print(f"🔍 Debug: Generated document ID: {web_document.id}")

        # Update the workflow with the document
        mock_service_center.workflow = MockWorkflow(web_document)

        # Also update the workflow in the WorkflowService since it was created before we set the workflow
        real_services.workflow.workflow = mock_service_center.workflow

        # Debug: Check if the document ID changed after adding to workflow
        print(f"🔍 Debug: Document ID after adding to workflow: {web_document.id}")
        if mock_service_center.workflow.messages and mock_service_center.workflow.messages[0].documents:
            workflow_doc = mock_service_center.workflow.messages[0].documents[0]
            print(f"🔍 Debug: Workflow document ID: {workflow_doc.id}")
            print(f"🔍 Debug: Same object? {web_document is workflow_doc}")

        # Debug: Check if the document is properly set up
        print(f"🔍 Debug: Document ID: {web_document.id}")
        print(f"🔍 Debug: Document fileId: {web_document.fileId}")
        print(f"🔍 Debug: Workflow messages: {len(mock_service_center.workflow.messages)}")
        if mock_service_center.workflow.messages:
            print(f"🔍 Debug: First message documents: {len(mock_service_center.workflow.messages[0].documents)}")
            if mock_service_center.workflow.messages[0].documents:
                doc = mock_service_center.workflow.messages[0].documents[0]
                print(f"🔍 Debug: First document ID: {doc.id}")
                print(f"🔍 Debug: First document fileId: {doc.fileId}")
        else:
            print("🔍 Debug: No messages in workflow!")

        # Create a temporary file to simulate document storage for testing
        print("Creating temporary document file for testing...")

        # Create a temporary file with the web content
        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as temp_file:
            temp_file.write(web_content)
            temp_file_path = temp_file.name

        print(f"Temporary file created: {temp_file_path}")

        # Mock the database interface to return our file content
        print("Setting up database interface...")

        def mock_get_file_data(file_id):
            if file_id == web_document.fileId:  # Use the actual fileId
                return web_content.encode('utf-8')
            return None

        # Test parameters for generateReport action
        # The documentList should contain document references, not ChatDocument objects
        # Use the actual document ID from the ChatDocument (now that it's in the workflow)
        document_ref = f"docItem:{web_document.id}:{web_document.fileName}"
        test_parameters = {
            "documentList": [document_ref],
            "prompt": "Erstelle einen prägnanten 2-seitigen Immobilienbericht basierend auf den Web-Recherchedaten. Fokussiere auf die wichtigsten Erkenntnisse, Markttrends, Preisentwicklungen und Empfehlungen. Halte den Bericht auf maximal 2 Seiten kompakt und strukturiert.",
            "title": "Immobilienmarkt Kanton Zürich - Web-Recherche Analyse",
            "outputFormat": "docx",
            "includeMetadata": True
        }

        print(f"🔍 Debug: Document reference: {document_ref}")

        print(f"Test parameters:")
        print(f"  - Document: {web_document.fileName}")
        print(f"  - Title: {test_parameters['title']}")
        print(f"  - Output Format: {test_parameters['outputFormat']}")
        print(f"  - Include Metadata: {test_parameters['includeMetadata']}")

        try:
            # Mock only the database interface to provide the file content
            from unittest.mock import patch

            with patch('modules.interfaces.interfaceDbComponentObjects.getInterface') as mock_get_interface:
                mock_interface = mock_get_interface.return_value
                mock_interface.getFileData = mock_get_file_data

                print("\nCalling generateReport action with REAL AI processing...")

                # Use the REAL generateReport method with REAL AI processing
                result = await method_doc.generateReport(test_parameters)

            print("Document generation completed successfully!")
            print(f"Result type: {type(result)}")

            if hasattr(result, 'success') and result.success:
                print(f"✅ Success: {result.success}")

                # Extract content from the ActionResult
                if hasattr(result, 'documents') and result.documents:
                    document = result.documents[0]

                    # ActionDocument objects have attributes, documentData is a dict
                    if hasattr(document, 'documentData'):
                        document_data = document.documentData
                        content = document_data.get('result', '') if isinstance(document_data, dict) else ''
                    else:
                        content = ''

                    mime_type = getattr(document, 'mimeType', 'application/pdf')
                    file_name = getattr(document, 'documentName', 'test_report.pdf')

                    print(f"📄 Content length: {len(content)} characters")
                    print(f"📋 MIME type: {mime_type}")
                    print(f"📁 File name: {file_name}")

                    # Save the generated DOCX content to a file
                    output_file = gateway_dir / "test_real_generated_report.docx"
                    with open(output_file, 'wb') as f:
                        # The content should be base64 encoded for DOCX
                        import base64
                        try:
                            docx_bytes = base64.b64decode(content)
                            f.write(docx_bytes)
                            print(f"📁 Generated DOCX saved to: {output_file}")
                            print(f"📊 DOCX file size: {len(docx_bytes)} bytes")
                        except Exception as e:
                            print(f"⚠️  Could not decode base64 content: {e}")
                            # Save as text if not base64
                            f.write(content.encode('utf-8'))
                            print(f"📁 Content saved as text to: {output_file}")

                    # Also save a text version for inspection
                    text_output_file = gateway_dir / "test_real_generated_report_content.txt"
                    with open(text_output_file, 'w', encoding='utf-8') as f:
                        f.write(f"Generated Report Content\n")
                        f.write(f"======================\n\n")
                        f.write(f"Title: {test_parameters['title']}\n")
                        f.write(f"Format: {test_parameters['outputFormat']}\n")
                        f.write(f"MIME Type: {mime_type}\n")
                        f.write(f"File Name: {file_name}\n")
                        f.write(f"Content Length: {len(content)} characters\n\n")
                        f.write("Content Preview (first 2000 characters):\n")
                        f.write("-" * 50 + "\n")
                        f.write(content[:2000])
                        if len(content) > 2000:
                            f.write("\n... (content truncated)")

                    print(f"📄 Content preview saved to: {text_output_file}")
                else:
                    print("❌ No documents found in result")
                    print(f"Result structure: {result}")

            else:
                print(f"❌ Generation failed: {result}")
                if hasattr(result, 'error'):
                    print(f"Error: {result.error}")

            # Clean up temporary file
            try:
                os.unlink(temp_file_path)
                print(f"🧹 Cleaned up temporary file: {temp_file_path}")
            except Exception as e:
                print(f"⚠️  Warning: Could not clean up temporary file: {e}")

        except Exception as e:
            print(f"❌ Document generation failed: {e}")
            import traceback
            traceback.print_exc()

            # Clean up temporary file on error
            try:
                os.unlink(temp_file_path)
            except:
                pass
            return

        print("\n" + "="*60)
        print("REAL DOCUMENT GENERATION TEST COMPLETED")
        print("="*60)

    except Exception as e:
        print(f"❌ Test failed: {str(e)}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    # Add timeout to the entire test
    try:
        asyncio.run(asyncio.wait_for(test_real_document_generation(), timeout=600.0))
    except asyncio.TimeoutError:
        print("⏰ Test timed out after 600 seconds")
    except Exception as e:
        print(f"❌ Test failed with error: {e}")
        import traceback
        traceback.print_exc()