#!/usr/bin/env python3 """ Real Document Generation Test Tests the generateReport action with REAL AI processing (no mocking) """ import asyncio import sys from pathlib import Path import tempfile import os # Add the gateway directory to the Python path gateway_dir = Path(__file__).parent sys.path.insert(0, str(gateway_dir)) # Add the modules path to sys.path for imports modules_path = gateway_dir / 'modules' sys.path.insert(0, str(modules_path)) from modules.workflows.methods.methodDocument import MethodDocument from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelUam import User from modules.interfaces.interfaceDbComponentObjects import getInterface async def test_real_document_generation(): """Test generateReport with REAL AI processing""" print("=" * 60) print("REAL DOCUMENT GENERATION TEST") print("=" * 60) try: # Initialize real services print("Environment: dev") print("Initializing MethodDocument with REAL services...") # Create a real user real_user = User( id="test_user_001", username="testuser", name="Test User", email="test@example.com" ) # Initialize real services (this will use actual AI, extraction, generation services) from modules.services.serviceAi.mainServiceAi import AiService from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService from modules.services.serviceGeneration.mainServiceGeneration import GenerationService from modules.services.serviceWorkflow.mainServiceWorkflow import WorkflowService # Create a mock workflow with the document class MockWorkflow: def __init__(self, document): self.messages = [MockMessage(document)] if document else [] class MockMessage: def __init__(self, document): self.documents = [document] if document else [] # Create a mock service center for the workflow service class MockServiceCenter: def __init__(self, user, document): self.services = {} self.user = user self.workflow = MockWorkflow(document) self.interfaceDbChat = None # Mock interface self.interfaceDbComponent = None # Mock interface self.interfaceDbApp = None # Mock interface mock_service_center = MockServiceCenter(real_user, None) # Will be set later # Create a services object with attributes instead of a dictionary class ServicesObject: def __init__(self): # Note: AI service needs to be created with create() method for proper initialization self.ai = None # Will be initialized in async setup self.extraction = ExtractionService() self.generation = GenerationService() self.workflow = WorkflowService(mock_service_center) self.user = real_user # AI service will be initialized in async setup # Add debugging to extraction service calls original_extract = self.extraction.extractContent def debug_extract_content(documents, options): print(f"πŸ“„ Extraction Service called with {len(documents)} documents") print(f"πŸ“„ Extraction options: {options}") result = original_extract(documents, options) print(f"πŸ“„ Extraction result: {len(result)} extracted content objects") if result: for i, content in enumerate(result): print(f"πŸ“„ Content {i}: {len(content.parts)} parts") for j, part in enumerate(content.parts): print(f"πŸ“„ Part {j}: {part.typeGroup} - {len(part.data)} chars") return result self.extraction.extractContent = debug_extract_content # Override the getChatDocumentsFromDocumentList method to add debugging original_method = self.workflow.getChatDocumentsFromDocumentList def debug_getChatDocumentsFromDocumentList(documentList): print(f"πŸ” Debug: Looking for documents: {documentList}") print(f"πŸ” Debug: Available documents in workflow:") for i, message in enumerate(mock_service_center.workflow.messages): print(f" Message {i}: {len(message.documents)} documents") for j, doc in enumerate(message.documents): print(f" Document {j}: ID={doc.id}, fileId={doc.fileId}, fileName={doc.fileName}") # Add detailed debugging for the docItem parsing for doc_ref in documentList: if doc_ref.startswith("docItem:"): parts = doc_ref.split(':') print(f"πŸ” Debug: Parsed docItem parts: {parts}") if len(parts) >= 2: doc_id = parts[1] print(f"πŸ” Debug: Looking for doc_id: '{doc_id}'") for message in mock_service_center.workflow.messages: if message.documents: for doc in message.documents: print(f"πŸ” Debug: Comparing '{doc_id}' == '{doc.id}' ? {doc_id == doc.id}") if doc.id == doc_id: print(f"πŸ” Debug: MATCH FOUND!") break # Debug the original method's workflow reference print(f"πŸ” Debug: Original method workflow: {self.workflow}") print(f"πŸ” Debug: Original method workflow.messages: {getattr(self.workflow, 'messages', 'NO_MESSAGES_ATTR')}") result = original_method(documentList) print(f"πŸ” Debug: Found {len(result)} documents") return result self.workflow.getChatDocumentsFromDocumentList = debug_getChatDocumentsFromDocumentList real_services = ServicesObject() # Initialize AI service properly print("Initializing AI service...") try: real_services.ai = await AiService.create() print(f"βœ… AI service initialized successfully") print(f"βœ… AI service aiObjects: {real_services.ai.aiObjects}") except Exception as e: print(f"❌ AI service initialization failed: {e}") import traceback traceback.print_exc() return # Add debugging to AI service calls original_call_ai = real_services.ai.callAi async def debug_call_ai(prompt, documents=None, options=None): print(f"πŸ€– AI Service called with prompt: {prompt[:200]}...") print(f"πŸ€– AI Service documents: {len(documents) if documents else 0}") print(f"πŸ€– AI Service options: {options}") result = await original_call_ai(prompt, documents, options) print(f"πŸ€– AI Service result length: {len(result) if result else 0}") print(f"πŸ€– AI Service result preview: {result[:200] if result else 'None'}...") return result real_services.ai.callAi = debug_call_ai method_doc = MethodDocument(services=real_services) print("MethodDocument initialized with REAL services") # Read the web integration result file web_result_file = gateway_dir / "test_web_integration_result.md" if not web_result_file.exists(): print(f"Web integration result file not found: {web_result_file}") print("Please run test_web_integration.py first to generate the document.") return print(f"Reading web integration result from: {web_result_file}") with open(web_result_file, 'r', encoding='utf-8') as f: web_content = f.read() print(f"Document size: {len(web_content)} characters") # Create the document for the workflow web_document = ChatDocument( messageId="test_message_003", fileId="temp_file_003", fileName="test_web_integration_result.md", fileSize=len(web_content), mimeType="text/markdown" ) # Debug: Show the actual generated ID print(f"πŸ” Debug: Generated document ID: {web_document.id}") # Update the workflow with the document mock_service_center.workflow = MockWorkflow(web_document) # Also update the workflow in the WorkflowService since it was created before we set the workflow real_services.workflow.workflow = mock_service_center.workflow # Debug: Check if the document ID changed after adding to workflow print(f"πŸ” Debug: Document ID after adding to workflow: {web_document.id}") if mock_service_center.workflow.messages and mock_service_center.workflow.messages[0].documents: workflow_doc = mock_service_center.workflow.messages[0].documents[0] print(f"πŸ” Debug: Workflow document ID: {workflow_doc.id}") print(f"πŸ” Debug: Same object? {web_document is workflow_doc}") # Debug: Check if the document is properly set up print(f"πŸ” Debug: Document ID: {web_document.id}") print(f"πŸ” Debug: Document fileId: {web_document.fileId}") print(f"πŸ” Debug: Workflow messages: {len(mock_service_center.workflow.messages)}") if mock_service_center.workflow.messages: print(f"πŸ” Debug: First message documents: {len(mock_service_center.workflow.messages[0].documents)}") if mock_service_center.workflow.messages[0].documents: doc = mock_service_center.workflow.messages[0].documents[0] print(f"πŸ” Debug: First document ID: {doc.id}") print(f"πŸ” Debug: First document fileId: {doc.fileId}") else: print("πŸ” Debug: No messages in workflow!") # Create a temporary file to simulate document storage for testing print("Creating temporary document file for testing...") # Create a temporary file with the web content with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as temp_file: temp_file.write(web_content) temp_file_path = temp_file.name print(f"Temporary file created: {temp_file_path}") # Mock the database interface to return our file content print("Setting up database interface...") def mock_get_file_data(file_id): if file_id == web_document.fileId: # Use the actual fileId return web_content.encode('utf-8') return None # Test parameters for generateReport action # The documentList should contain document references, not ChatDocument objects # Use the actual document ID from the ChatDocument (now that it's in the workflow) document_ref = f"docItem:{web_document.id}:{web_document.fileName}" test_parameters = { "documentList": [document_ref], "prompt": "Erstelle einen prΓ€gnanten 2-seitigen Immobilienbericht basierend auf den Web-Recherchedaten. Fokussiere auf die wichtigsten Erkenntnisse, Markttrends, Preisentwicklungen und Empfehlungen. Halte den Bericht auf maximal 2 Seiten kompakt und strukturiert.", "title": "Immobilienmarkt Kanton ZΓΌrich - Web-Recherche Analyse", "outputFormat": "docx", "includeMetadata": True } print(f"πŸ” Debug: Document reference: {document_ref}") print(f"Test parameters:") print(f" - Document: {web_document.fileName}") print(f" - Title: {test_parameters['title']}") print(f" - Output Format: {test_parameters['outputFormat']}") print(f" - Include Metadata: {test_parameters['includeMetadata']}") try: # Mock only the database interface to provide the file content from unittest.mock import patch with patch('modules.interfaces.interfaceDbComponentObjects.getInterface') as mock_get_interface: mock_interface = mock_get_interface.return_value mock_interface.getFileData = mock_get_file_data print("\nCalling generateReport action with REAL AI processing...") # Use the REAL generateReport method with REAL AI processing result = await method_doc.generateReport(test_parameters) print("Document generation completed successfully!") print(f"Result type: {type(result)}") if hasattr(result, 'success') and result.success: print(f"βœ… Success: {result.success}") # Extract content from the ActionResult if hasattr(result, 'documents') and result.documents: document = result.documents[0] # ActionDocument objects have attributes, documentData is a dict if hasattr(document, 'documentData'): document_data = document.documentData content = document_data.get('result', '') if isinstance(document_data, dict) else '' else: content = '' mime_type = getattr(document, 'mimeType', 'application/pdf') file_name = getattr(document, 'documentName', 'test_report.pdf') print(f"πŸ“„ Content length: {len(content)} characters") print(f"πŸ“‹ MIME type: {mime_type}") print(f"πŸ“ File name: {file_name}") # Save the generated DOCX content to a file output_file = gateway_dir / "test_real_generated_report.docx" with open(output_file, 'wb') as f: # The content should be base64 encoded for DOCX import base64 try: docx_bytes = base64.b64decode(content) f.write(docx_bytes) print(f"πŸ“ Generated DOCX saved to: {output_file}") print(f"πŸ“Š DOCX file size: {len(docx_bytes)} bytes") except Exception as e: print(f"⚠️ Could not decode base64 content: {e}") # Save as text if not base64 f.write(content.encode('utf-8')) print(f"πŸ“ Content saved as text to: {output_file}") # Also save a text version for inspection text_output_file = gateway_dir / "test_real_generated_report_content.txt" with open(text_output_file, 'w', encoding='utf-8') as f: f.write(f"Generated Report Content\n") f.write(f"======================\n\n") f.write(f"Title: {test_parameters['title']}\n") f.write(f"Format: {test_parameters['outputFormat']}\n") f.write(f"MIME Type: {mime_type}\n") f.write(f"File Name: {file_name}\n") f.write(f"Content Length: {len(content)} characters\n\n") f.write("Content Preview (first 2000 characters):\n") f.write("-" * 50 + "\n") f.write(content[:2000]) if len(content) > 2000: f.write("\n... (content truncated)") print(f"πŸ“„ Content preview saved to: {text_output_file}") else: print("❌ No documents found in result") print(f"Result structure: {result}") else: print(f"❌ Generation failed: {result}") if hasattr(result, 'error'): print(f"Error: {result.error}") # Clean up temporary file try: os.unlink(temp_file_path) print(f"🧹 Cleaned up temporary file: {temp_file_path}") except Exception as e: print(f"⚠️ Warning: Could not clean up temporary file: {e}") except Exception as e: print(f"❌ Document generation failed: {e}") import traceback traceback.print_exc() # Clean up temporary file on error try: os.unlink(temp_file_path) except: pass return print("\n" + "="*60) print("REAL DOCUMENT GENERATION TEST COMPLETED") print("="*60) except Exception as e: print(f"❌ Test failed: {str(e)}") import traceback traceback.print_exc() if __name__ == "__main__": # Add timeout to the entire test try: asyncio.run(asyncio.wait_for(test_real_document_generation(), timeout=600.0)) except asyncio.TimeoutError: print("⏰ Test timed out after 600 seconds") except Exception as e: print(f"❌ Test failed with error: {e}") import traceback traceback.print_exc()