gateway/test_real_document_generation.py
2025-10-03 01:41:50 +02:00

368 lines
18 KiB
Python

#!/usr/bin/env python3
"""
Real Document Generation Test
Tests the generateReport action with REAL AI processing (no mocking)
"""
import asyncio
import sys
from pathlib import Path
import tempfile
import os
# Add the gateway directory to the Python path
gateway_dir = Path(__file__).parent
sys.path.insert(0, str(gateway_dir))
# Add the modules path to sys.path for imports
modules_path = gateway_dir / 'modules'
sys.path.insert(0, str(modules_path))
from modules.workflows.methods.methodDocument import MethodDocument
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelUam import User
from modules.interfaces.interfaceDbComponentObjects import getInterface
async def test_real_document_generation():
"""Test generateReport with REAL AI processing"""
print("=" * 60)
print("REAL DOCUMENT GENERATION TEST")
print("=" * 60)
try:
# Initialize real services
print("Environment: dev")
print("Initializing MethodDocument with REAL services...")
# Create a real user
real_user = User(
id="test_user_001",
username="testuser",
name="Test User",
email="test@example.com"
)
# Initialize real services (this will use actual AI, extraction, generation services)
from modules.services.serviceAi.mainServiceAi import AiService
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
from modules.services.serviceWorkflow.mainServiceWorkflow import WorkflowService
# Create a mock workflow with the document
class MockWorkflow:
def __init__(self, document):
self.messages = [MockMessage(document)] if document else []
class MockMessage:
def __init__(self, document):
self.documents = [document] if document else []
# Create a mock service center for the workflow service
class MockServiceCenter:
def __init__(self, user, document):
self.services = {}
self.user = user
self.workflow = MockWorkflow(document)
self.interfaceDbChat = None # Mock interface
self.interfaceDbComponent = None # Mock interface
self.interfaceDbApp = None # Mock interface
mock_service_center = MockServiceCenter(real_user, None) # Will be set later
# Create a services object with attributes instead of a dictionary
class ServicesObject:
def __init__(self):
# Note: AI service needs to be created with create() method for proper initialization
self.ai = None # Will be initialized in async setup
self.extraction = ExtractionService()
self.generation = GenerationService()
self.workflow = WorkflowService(mock_service_center)
self.user = real_user
# AI service will be initialized in async setup
# Add debugging to extraction service calls
original_extract = self.extraction.extractContent
def debug_extract_content(documents, options):
print(f"📄 Extraction Service called with {len(documents)} documents")
print(f"📄 Extraction options: {options}")
result = original_extract(documents, options)
print(f"📄 Extraction result: {len(result)} extracted content objects")
if result:
for i, content in enumerate(result):
print(f"📄 Content {i}: {len(content.parts)} parts")
for j, part in enumerate(content.parts):
print(f"📄 Part {j}: {part.typeGroup} - {len(part.data)} chars")
return result
self.extraction.extractContent = debug_extract_content
# Override the getChatDocumentsFromDocumentList method to add debugging
original_method = self.workflow.getChatDocumentsFromDocumentList
def debug_getChatDocumentsFromDocumentList(documentList):
print(f"🔍 Debug: Looking for documents: {documentList}")
print(f"🔍 Debug: Available documents in workflow:")
for i, message in enumerate(mock_service_center.workflow.messages):
print(f" Message {i}: {len(message.documents)} documents")
for j, doc in enumerate(message.documents):
print(f" Document {j}: ID={doc.id}, fileId={doc.fileId}, fileName={doc.fileName}")
# Add detailed debugging for the docItem parsing
for doc_ref in documentList:
if doc_ref.startswith("docItem:"):
parts = doc_ref.split(':')
print(f"🔍 Debug: Parsed docItem parts: {parts}")
if len(parts) >= 2:
doc_id = parts[1]
print(f"🔍 Debug: Looking for doc_id: '{doc_id}'")
for message in mock_service_center.workflow.messages:
if message.documents:
for doc in message.documents:
print(f"🔍 Debug: Comparing '{doc_id}' == '{doc.id}' ? {doc_id == doc.id}")
if doc.id == doc_id:
print(f"🔍 Debug: MATCH FOUND!")
break
# Debug the original method's workflow reference
print(f"🔍 Debug: Original method workflow: {self.workflow}")
print(f"🔍 Debug: Original method workflow.messages: {getattr(self.workflow, 'messages', 'NO_MESSAGES_ATTR')}")
result = original_method(documentList)
print(f"🔍 Debug: Found {len(result)} documents")
return result
self.workflow.getChatDocumentsFromDocumentList = debug_getChatDocumentsFromDocumentList
real_services = ServicesObject()
# Initialize AI service properly
print("Initializing AI service...")
try:
real_services.ai = await AiService.create()
print(f"✅ AI service initialized successfully")
print(f"✅ AI service aiObjects: {real_services.ai.aiObjects}")
except Exception as e:
print(f"❌ AI service initialization failed: {e}")
import traceback
traceback.print_exc()
return
# Add debugging to AI service calls
original_call_ai = real_services.ai.callAi
async def debug_call_ai(prompt, documents=None, options=None):
print(f"🤖 AI Service called with prompt: {prompt[:200]}...")
print(f"🤖 AI Service documents: {len(documents) if documents else 0}")
print(f"🤖 AI Service options: {options}")
result = await original_call_ai(prompt, documents, options)
print(f"🤖 AI Service result length: {len(result) if result else 0}")
print(f"🤖 AI Service result preview: {result[:200] if result else 'None'}...")
return result
real_services.ai.callAi = debug_call_ai
method_doc = MethodDocument(services=real_services)
print("MethodDocument initialized with REAL services")
# Read the web integration result file
web_result_file = gateway_dir / "test_web_integration_result.md"
if not web_result_file.exists():
print(f"Web integration result file not found: {web_result_file}")
print("Please run test_web_integration.py first to generate the document.")
return
print(f"Reading web integration result from: {web_result_file}")
with open(web_result_file, 'r', encoding='utf-8') as f:
web_content = f.read()
print(f"Document size: {len(web_content)} characters")
# Create the document for the workflow
web_document = ChatDocument(
messageId="test_message_003",
fileId="temp_file_003",
fileName="test_web_integration_result.md",
fileSize=len(web_content),
mimeType="text/markdown"
)
# Debug: Show the actual generated ID
print(f"🔍 Debug: Generated document ID: {web_document.id}")
# Update the workflow with the document
mock_service_center.workflow = MockWorkflow(web_document)
# Also update the workflow in the WorkflowService since it was created before we set the workflow
real_services.workflow.workflow = mock_service_center.workflow
# Debug: Check if the document ID changed after adding to workflow
print(f"🔍 Debug: Document ID after adding to workflow: {web_document.id}")
if mock_service_center.workflow.messages and mock_service_center.workflow.messages[0].documents:
workflow_doc = mock_service_center.workflow.messages[0].documents[0]
print(f"🔍 Debug: Workflow document ID: {workflow_doc.id}")
print(f"🔍 Debug: Same object? {web_document is workflow_doc}")
# Debug: Check if the document is properly set up
print(f"🔍 Debug: Document ID: {web_document.id}")
print(f"🔍 Debug: Document fileId: {web_document.fileId}")
print(f"🔍 Debug: Workflow messages: {len(mock_service_center.workflow.messages)}")
if mock_service_center.workflow.messages:
print(f"🔍 Debug: First message documents: {len(mock_service_center.workflow.messages[0].documents)}")
if mock_service_center.workflow.messages[0].documents:
doc = mock_service_center.workflow.messages[0].documents[0]
print(f"🔍 Debug: First document ID: {doc.id}")
print(f"🔍 Debug: First document fileId: {doc.fileId}")
else:
print("🔍 Debug: No messages in workflow!")
# Create a temporary file to simulate document storage for testing
print("Creating temporary document file for testing...")
# Create a temporary file with the web content
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as temp_file:
temp_file.write(web_content)
temp_file_path = temp_file.name
print(f"Temporary file created: {temp_file_path}")
# Mock the database interface to return our file content
print("Setting up database interface...")
def mock_get_file_data(file_id):
if file_id == web_document.fileId: # Use the actual fileId
return web_content.encode('utf-8')
return None
# Test parameters for generateReport action
# The documentList should contain document references, not ChatDocument objects
# Use the actual document ID from the ChatDocument (now that it's in the workflow)
document_ref = f"docItem:{web_document.id}:{web_document.fileName}"
test_parameters = {
"documentList": [document_ref],
"prompt": "Erstelle einen prägnanten 2-seitigen Immobilienbericht basierend auf den Web-Recherchedaten. Fokussiere auf die wichtigsten Erkenntnisse, Markttrends, Preisentwicklungen und Empfehlungen. Halte den Bericht auf maximal 2 Seiten kompakt und strukturiert.",
"title": "Immobilienmarkt Kanton Zürich - Web-Recherche Analyse",
"outputFormat": "docx",
"includeMetadata": True
}
print(f"🔍 Debug: Document reference: {document_ref}")
print(f"Test parameters:")
print(f" - Document: {web_document.fileName}")
print(f" - Title: {test_parameters['title']}")
print(f" - Output Format: {test_parameters['outputFormat']}")
print(f" - Include Metadata: {test_parameters['includeMetadata']}")
try:
# Mock only the database interface to provide the file content
from unittest.mock import patch
with patch('modules.interfaces.interfaceDbComponentObjects.getInterface') as mock_get_interface:
mock_interface = mock_get_interface.return_value
mock_interface.getFileData = mock_get_file_data
print("\nCalling generateReport action with REAL AI processing...")
# Use the REAL generateReport method with REAL AI processing
result = await method_doc.generateReport(test_parameters)
print("Document generation completed successfully!")
print(f"Result type: {type(result)}")
if hasattr(result, 'success') and result.success:
print(f"✅ Success: {result.success}")
# Extract content from the ActionResult
if hasattr(result, 'documents') and result.documents:
document = result.documents[0]
# ActionDocument objects have attributes, documentData is a dict
if hasattr(document, 'documentData'):
document_data = document.documentData
content = document_data.get('result', '') if isinstance(document_data, dict) else ''
else:
content = ''
mime_type = getattr(document, 'mimeType', 'application/pdf')
file_name = getattr(document, 'documentName', 'test_report.pdf')
print(f"📄 Content length: {len(content)} characters")
print(f"📋 MIME type: {mime_type}")
print(f"📁 File name: {file_name}")
# Save the generated DOCX content to a file
output_file = gateway_dir / "test_real_generated_report.docx"
with open(output_file, 'wb') as f:
# The content should be base64 encoded for DOCX
import base64
try:
docx_bytes = base64.b64decode(content)
f.write(docx_bytes)
print(f"📁 Generated DOCX saved to: {output_file}")
print(f"📊 DOCX file size: {len(docx_bytes)} bytes")
except Exception as e:
print(f"⚠️ Could not decode base64 content: {e}")
# Save as text if not base64
f.write(content.encode('utf-8'))
print(f"📁 Content saved as text to: {output_file}")
# Also save a text version for inspection
text_output_file = gateway_dir / "test_real_generated_report_content.txt"
with open(text_output_file, 'w', encoding='utf-8') as f:
f.write(f"Generated Report Content\n")
f.write(f"======================\n\n")
f.write(f"Title: {test_parameters['title']}\n")
f.write(f"Format: {test_parameters['outputFormat']}\n")
f.write(f"MIME Type: {mime_type}\n")
f.write(f"File Name: {file_name}\n")
f.write(f"Content Length: {len(content)} characters\n\n")
f.write("Content Preview (first 2000 characters):\n")
f.write("-" * 50 + "\n")
f.write(content[:2000])
if len(content) > 2000:
f.write("\n... (content truncated)")
print(f"📄 Content preview saved to: {text_output_file}")
else:
print("❌ No documents found in result")
print(f"Result structure: {result}")
else:
print(f"❌ Generation failed: {result}")
if hasattr(result, 'error'):
print(f"Error: {result.error}")
# Clean up temporary file
try:
os.unlink(temp_file_path)
print(f"🧹 Cleaned up temporary file: {temp_file_path}")
except Exception as e:
print(f"⚠️ Warning: Could not clean up temporary file: {e}")
except Exception as e:
print(f"❌ Document generation failed: {e}")
import traceback
traceback.print_exc()
# Clean up temporary file on error
try:
os.unlink(temp_file_path)
except:
pass
return
print("\n" + "="*60)
print("REAL DOCUMENT GENERATION TEST COMPLETED")
print("="*60)
except Exception as e:
print(f"❌ Test failed: {str(e)}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
# Add timeout to the entire test
try:
asyncio.run(asyncio.wait_for(test_real_document_generation(), timeout=600.0))
except asyncio.TimeoutError:
print("⏰ Test timed out after 600 seconds")
except Exception as e:
print(f"❌ Test failed with error: {e}")
import traceback
traceback.print_exc()