368 lines
18 KiB
Python
368 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Real Document Generation Test
|
|
Tests the generateReport action with REAL AI processing (no mocking)
|
|
"""
|
|
|
|
import asyncio
|
|
import sys
|
|
from pathlib import Path
|
|
import tempfile
|
|
import os
|
|
|
|
# Add the gateway directory to the Python path
|
|
gateway_dir = Path(__file__).parent
|
|
sys.path.insert(0, str(gateway_dir))
|
|
|
|
# Add the modules path to sys.path for imports
|
|
modules_path = gateway_dir / 'modules'
|
|
sys.path.insert(0, str(modules_path))
|
|
|
|
from modules.workflows.methods.methodDocument import MethodDocument
|
|
from modules.datamodels.datamodelChat import ChatDocument
|
|
from modules.datamodels.datamodelUam import User
|
|
from modules.interfaces.interfaceDbComponentObjects import getInterface
|
|
|
|
async def test_real_document_generation():
|
|
"""Test generateReport with REAL AI processing"""
|
|
print("=" * 60)
|
|
print("REAL DOCUMENT GENERATION TEST")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
# Initialize real services
|
|
print("Environment: dev")
|
|
print("Initializing MethodDocument with REAL services...")
|
|
|
|
# Create a real user
|
|
real_user = User(
|
|
id="test_user_001",
|
|
username="testuser",
|
|
name="Test User",
|
|
email="test@example.com"
|
|
)
|
|
|
|
# Initialize real services (this will use actual AI, extraction, generation services)
|
|
from modules.services.serviceAi.mainServiceAi import AiService
|
|
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
|
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
|
from modules.services.serviceWorkflow.mainServiceWorkflow import WorkflowService
|
|
|
|
# Create a mock workflow with the document
|
|
class MockWorkflow:
|
|
def __init__(self, document):
|
|
self.messages = [MockMessage(document)] if document else []
|
|
|
|
class MockMessage:
|
|
def __init__(self, document):
|
|
self.documents = [document] if document else []
|
|
|
|
# Create a mock service center for the workflow service
|
|
class MockServiceCenter:
|
|
def __init__(self, user, document):
|
|
self.services = {}
|
|
self.user = user
|
|
self.workflow = MockWorkflow(document)
|
|
self.interfaceDbChat = None # Mock interface
|
|
self.interfaceDbComponent = None # Mock interface
|
|
self.interfaceDbApp = None # Mock interface
|
|
|
|
mock_service_center = MockServiceCenter(real_user, None) # Will be set later
|
|
|
|
# Create a services object with attributes instead of a dictionary
|
|
class ServicesObject:
|
|
def __init__(self):
|
|
# Note: AI service needs to be created with create() method for proper initialization
|
|
self.ai = None # Will be initialized in async setup
|
|
self.extraction = ExtractionService()
|
|
self.generation = GenerationService()
|
|
self.workflow = WorkflowService(mock_service_center)
|
|
self.user = real_user
|
|
|
|
# AI service will be initialized in async setup
|
|
|
|
# Add debugging to extraction service calls
|
|
original_extract = self.extraction.extractContent
|
|
def debug_extract_content(documents, options):
|
|
print(f"📄 Extraction Service called with {len(documents)} documents")
|
|
print(f"📄 Extraction options: {options}")
|
|
result = original_extract(documents, options)
|
|
print(f"📄 Extraction result: {len(result)} extracted content objects")
|
|
if result:
|
|
for i, content in enumerate(result):
|
|
print(f"📄 Content {i}: {len(content.parts)} parts")
|
|
for j, part in enumerate(content.parts):
|
|
print(f"📄 Part {j}: {part.typeGroup} - {len(part.data)} chars")
|
|
return result
|
|
self.extraction.extractContent = debug_extract_content
|
|
|
|
# Override the getChatDocumentsFromDocumentList method to add debugging
|
|
original_method = self.workflow.getChatDocumentsFromDocumentList
|
|
def debug_getChatDocumentsFromDocumentList(documentList):
|
|
print(f"🔍 Debug: Looking for documents: {documentList}")
|
|
print(f"🔍 Debug: Available documents in workflow:")
|
|
for i, message in enumerate(mock_service_center.workflow.messages):
|
|
print(f" Message {i}: {len(message.documents)} documents")
|
|
for j, doc in enumerate(message.documents):
|
|
print(f" Document {j}: ID={doc.id}, fileId={doc.fileId}, fileName={doc.fileName}")
|
|
|
|
# Add detailed debugging for the docItem parsing
|
|
for doc_ref in documentList:
|
|
if doc_ref.startswith("docItem:"):
|
|
parts = doc_ref.split(':')
|
|
print(f"🔍 Debug: Parsed docItem parts: {parts}")
|
|
if len(parts) >= 2:
|
|
doc_id = parts[1]
|
|
print(f"🔍 Debug: Looking for doc_id: '{doc_id}'")
|
|
for message in mock_service_center.workflow.messages:
|
|
if message.documents:
|
|
for doc in message.documents:
|
|
print(f"🔍 Debug: Comparing '{doc_id}' == '{doc.id}' ? {doc_id == doc.id}")
|
|
if doc.id == doc_id:
|
|
print(f"🔍 Debug: MATCH FOUND!")
|
|
break
|
|
|
|
# Debug the original method's workflow reference
|
|
print(f"🔍 Debug: Original method workflow: {self.workflow}")
|
|
print(f"🔍 Debug: Original method workflow.messages: {getattr(self.workflow, 'messages', 'NO_MESSAGES_ATTR')}")
|
|
|
|
result = original_method(documentList)
|
|
print(f"🔍 Debug: Found {len(result)} documents")
|
|
return result
|
|
self.workflow.getChatDocumentsFromDocumentList = debug_getChatDocumentsFromDocumentList
|
|
|
|
real_services = ServicesObject()
|
|
|
|
# Initialize AI service properly
|
|
print("Initializing AI service...")
|
|
try:
|
|
real_services.ai = await AiService.create()
|
|
print(f"✅ AI service initialized successfully")
|
|
print(f"✅ AI service aiObjects: {real_services.ai.aiObjects}")
|
|
except Exception as e:
|
|
print(f"❌ AI service initialization failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return
|
|
|
|
# Add debugging to AI service calls
|
|
original_call_ai = real_services.ai.callAi
|
|
async def debug_call_ai(prompt, documents=None, options=None):
|
|
print(f"🤖 AI Service called with prompt: {prompt[:200]}...")
|
|
print(f"🤖 AI Service documents: {len(documents) if documents else 0}")
|
|
print(f"🤖 AI Service options: {options}")
|
|
result = await original_call_ai(prompt, documents, options)
|
|
print(f"🤖 AI Service result length: {len(result) if result else 0}")
|
|
print(f"🤖 AI Service result preview: {result[:200] if result else 'None'}...")
|
|
return result
|
|
real_services.ai.callAi = debug_call_ai
|
|
|
|
method_doc = MethodDocument(services=real_services)
|
|
print("MethodDocument initialized with REAL services")
|
|
|
|
# Read the web integration result file
|
|
web_result_file = gateway_dir / "test_web_integration_result.md"
|
|
if not web_result_file.exists():
|
|
print(f"Web integration result file not found: {web_result_file}")
|
|
print("Please run test_web_integration.py first to generate the document.")
|
|
return
|
|
|
|
print(f"Reading web integration result from: {web_result_file}")
|
|
with open(web_result_file, 'r', encoding='utf-8') as f:
|
|
web_content = f.read()
|
|
|
|
print(f"Document size: {len(web_content)} characters")
|
|
|
|
# Create the document for the workflow
|
|
web_document = ChatDocument(
|
|
messageId="test_message_003",
|
|
fileId="temp_file_003",
|
|
fileName="test_web_integration_result.md",
|
|
fileSize=len(web_content),
|
|
mimeType="text/markdown"
|
|
)
|
|
|
|
# Debug: Show the actual generated ID
|
|
print(f"🔍 Debug: Generated document ID: {web_document.id}")
|
|
|
|
# Update the workflow with the document
|
|
mock_service_center.workflow = MockWorkflow(web_document)
|
|
|
|
# Also update the workflow in the WorkflowService since it was created before we set the workflow
|
|
real_services.workflow.workflow = mock_service_center.workflow
|
|
|
|
# Debug: Check if the document ID changed after adding to workflow
|
|
print(f"🔍 Debug: Document ID after adding to workflow: {web_document.id}")
|
|
if mock_service_center.workflow.messages and mock_service_center.workflow.messages[0].documents:
|
|
workflow_doc = mock_service_center.workflow.messages[0].documents[0]
|
|
print(f"🔍 Debug: Workflow document ID: {workflow_doc.id}")
|
|
print(f"🔍 Debug: Same object? {web_document is workflow_doc}")
|
|
|
|
# Debug: Check if the document is properly set up
|
|
print(f"🔍 Debug: Document ID: {web_document.id}")
|
|
print(f"🔍 Debug: Document fileId: {web_document.fileId}")
|
|
print(f"🔍 Debug: Workflow messages: {len(mock_service_center.workflow.messages)}")
|
|
if mock_service_center.workflow.messages:
|
|
print(f"🔍 Debug: First message documents: {len(mock_service_center.workflow.messages[0].documents)}")
|
|
if mock_service_center.workflow.messages[0].documents:
|
|
doc = mock_service_center.workflow.messages[0].documents[0]
|
|
print(f"🔍 Debug: First document ID: {doc.id}")
|
|
print(f"🔍 Debug: First document fileId: {doc.fileId}")
|
|
else:
|
|
print("🔍 Debug: No messages in workflow!")
|
|
|
|
# Create a temporary file to simulate document storage for testing
|
|
print("Creating temporary document file for testing...")
|
|
|
|
# Create a temporary file with the web content
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as temp_file:
|
|
temp_file.write(web_content)
|
|
temp_file_path = temp_file.name
|
|
|
|
print(f"Temporary file created: {temp_file_path}")
|
|
|
|
# Mock the database interface to return our file content
|
|
print("Setting up database interface...")
|
|
|
|
def mock_get_file_data(file_id):
|
|
if file_id == web_document.fileId: # Use the actual fileId
|
|
return web_content.encode('utf-8')
|
|
return None
|
|
|
|
# Test parameters for generateReport action
|
|
# The documentList should contain document references, not ChatDocument objects
|
|
# Use the actual document ID from the ChatDocument (now that it's in the workflow)
|
|
document_ref = f"docItem:{web_document.id}:{web_document.fileName}"
|
|
test_parameters = {
|
|
"documentList": [document_ref],
|
|
"prompt": "Erstelle einen prägnanten 2-seitigen Immobilienbericht basierend auf den Web-Recherchedaten. Fokussiere auf die wichtigsten Erkenntnisse, Markttrends, Preisentwicklungen und Empfehlungen. Halte den Bericht auf maximal 2 Seiten kompakt und strukturiert.",
|
|
"title": "Immobilienmarkt Kanton Zürich - Web-Recherche Analyse",
|
|
"outputFormat": "docx",
|
|
"includeMetadata": True
|
|
}
|
|
|
|
print(f"🔍 Debug: Document reference: {document_ref}")
|
|
|
|
print(f"Test parameters:")
|
|
print(f" - Document: {web_document.fileName}")
|
|
print(f" - Title: {test_parameters['title']}")
|
|
print(f" - Output Format: {test_parameters['outputFormat']}")
|
|
print(f" - Include Metadata: {test_parameters['includeMetadata']}")
|
|
|
|
try:
|
|
# Mock only the database interface to provide the file content
|
|
from unittest.mock import patch
|
|
|
|
with patch('modules.interfaces.interfaceDbComponentObjects.getInterface') as mock_get_interface:
|
|
mock_interface = mock_get_interface.return_value
|
|
mock_interface.getFileData = mock_get_file_data
|
|
|
|
print("\nCalling generateReport action with REAL AI processing...")
|
|
|
|
# Use the REAL generateReport method with REAL AI processing
|
|
result = await method_doc.generateReport(test_parameters)
|
|
|
|
print("Document generation completed successfully!")
|
|
print(f"Result type: {type(result)}")
|
|
|
|
if hasattr(result, 'success') and result.success:
|
|
print(f"✅ Success: {result.success}")
|
|
|
|
# Extract content from the ActionResult
|
|
if hasattr(result, 'documents') and result.documents:
|
|
document = result.documents[0]
|
|
|
|
# ActionDocument objects have attributes, documentData is a dict
|
|
if hasattr(document, 'documentData'):
|
|
document_data = document.documentData
|
|
content = document_data.get('result', '') if isinstance(document_data, dict) else ''
|
|
else:
|
|
content = ''
|
|
|
|
mime_type = getattr(document, 'mimeType', 'application/pdf')
|
|
file_name = getattr(document, 'documentName', 'test_report.pdf')
|
|
|
|
print(f"📄 Content length: {len(content)} characters")
|
|
print(f"📋 MIME type: {mime_type}")
|
|
print(f"📁 File name: {file_name}")
|
|
|
|
# Save the generated DOCX content to a file
|
|
output_file = gateway_dir / "test_real_generated_report.docx"
|
|
with open(output_file, 'wb') as f:
|
|
# The content should be base64 encoded for DOCX
|
|
import base64
|
|
try:
|
|
docx_bytes = base64.b64decode(content)
|
|
f.write(docx_bytes)
|
|
print(f"📁 Generated DOCX saved to: {output_file}")
|
|
print(f"📊 DOCX file size: {len(docx_bytes)} bytes")
|
|
except Exception as e:
|
|
print(f"⚠️ Could not decode base64 content: {e}")
|
|
# Save as text if not base64
|
|
f.write(content.encode('utf-8'))
|
|
print(f"📁 Content saved as text to: {output_file}")
|
|
|
|
# Also save a text version for inspection
|
|
text_output_file = gateway_dir / "test_real_generated_report_content.txt"
|
|
with open(text_output_file, 'w', encoding='utf-8') as f:
|
|
f.write(f"Generated Report Content\n")
|
|
f.write(f"======================\n\n")
|
|
f.write(f"Title: {test_parameters['title']}\n")
|
|
f.write(f"Format: {test_parameters['outputFormat']}\n")
|
|
f.write(f"MIME Type: {mime_type}\n")
|
|
f.write(f"File Name: {file_name}\n")
|
|
f.write(f"Content Length: {len(content)} characters\n\n")
|
|
f.write("Content Preview (first 2000 characters):\n")
|
|
f.write("-" * 50 + "\n")
|
|
f.write(content[:2000])
|
|
if len(content) > 2000:
|
|
f.write("\n... (content truncated)")
|
|
|
|
print(f"📄 Content preview saved to: {text_output_file}")
|
|
else:
|
|
print("❌ No documents found in result")
|
|
print(f"Result structure: {result}")
|
|
|
|
else:
|
|
print(f"❌ Generation failed: {result}")
|
|
if hasattr(result, 'error'):
|
|
print(f"Error: {result.error}")
|
|
|
|
# Clean up temporary file
|
|
try:
|
|
os.unlink(temp_file_path)
|
|
print(f"🧹 Cleaned up temporary file: {temp_file_path}")
|
|
except Exception as e:
|
|
print(f"⚠️ Warning: Could not clean up temporary file: {e}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Document generation failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
# Clean up temporary file on error
|
|
try:
|
|
os.unlink(temp_file_path)
|
|
except:
|
|
pass
|
|
return
|
|
|
|
print("\n" + "="*60)
|
|
print("REAL DOCUMENT GENERATION TEST COMPLETED")
|
|
print("="*60)
|
|
|
|
except Exception as e:
|
|
print(f"❌ Test failed: {str(e)}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
if __name__ == "__main__":
|
|
# Add timeout to the entire test
|
|
try:
|
|
asyncio.run(asyncio.wait_for(test_real_document_generation(), timeout=600.0))
|
|
except asyncio.TimeoutError:
|
|
print("⏰ Test timed out after 600 seconds")
|
|
except Exception as e:
|
|
print(f"❌ Test failed with error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|