189 lines
7.2 KiB
Python
189 lines
7.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Simple test script for enhanced Excel processing functionality.
|
|
This script tests the DocumentExtraction class with Excel files.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import asyncio
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.DEBUG,
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Add the gateway directory to the path
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
|
|
|
async def test_excel_processing():
|
|
"""Test Excel processing functionality."""
|
|
try:
|
|
# Import required modules
|
|
from modules.chat.documents.documentExtraction import DocumentExtraction
|
|
from modules.chat.serviceCenter import ServiceCenter
|
|
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
|
|
from modules.interfaces.interfaceChatModel import ChatWorkflow
|
|
from datetime import datetime, UTC
|
|
|
|
logger.info("Testing Excel processing functionality...")
|
|
|
|
# Create mock service center
|
|
mock_user = User(
|
|
id="test_user_001",
|
|
username="testuser",
|
|
email="test@example.com",
|
|
fullName="Test User",
|
|
language="en",
|
|
enabled=True,
|
|
privilege=UserPrivilege.USER,
|
|
authenticationAuthority=AuthAuthority.LOCAL,
|
|
mandateId="test_mandate_001"
|
|
)
|
|
|
|
current_time = datetime.now(UTC).isoformat()
|
|
mock_workflow = ChatWorkflow(
|
|
id="test_workflow_001",
|
|
mandateId="test_mandate_001",
|
|
status="active",
|
|
name="Test Excel Processing Workflow",
|
|
currentRound=1,
|
|
lastActivity=current_time,
|
|
startedAt=current_time,
|
|
logs=[],
|
|
messages=[],
|
|
stats=None,
|
|
tasks=[]
|
|
)
|
|
|
|
service_center = ServiceCenter(mock_user, mock_workflow)
|
|
logger.info("✓ ServiceCenter created successfully")
|
|
|
|
# Create DocumentExtraction instance
|
|
extractor = DocumentExtraction(service_center)
|
|
logger.info("✓ DocumentExtraction created successfully")
|
|
|
|
# Test with a sample Excel file if available
|
|
test_file_path = "d:/temp/test-extraction/test.xlsx"
|
|
|
|
if os.path.exists(test_file_path):
|
|
logger.info(f"Found test file: {test_file_path}")
|
|
|
|
# Read the file
|
|
with open(test_file_path, 'rb') as f:
|
|
file_data = f.read()
|
|
|
|
logger.info(f"File size: {len(file_data)} bytes")
|
|
|
|
# Process the Excel file
|
|
logger.info("Processing Excel file...")
|
|
result = await extractor.processFileData(
|
|
fileData=file_data,
|
|
fileName="test.xlsx",
|
|
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
base64Encoded=False,
|
|
prompt=None,
|
|
enableAI=False
|
|
)
|
|
|
|
logger.info(f"✓ Excel processing completed successfully!")
|
|
logger.info(f"Generated {len(result.contents)} content items:")
|
|
|
|
for i, content_item in enumerate(result.contents):
|
|
logger.info(f" Item {i+1}: {content_item.label}")
|
|
logger.info(f" MIME type: {content_item.metadata.mimeType}")
|
|
logger.info(f" Size: {content_item.metadata.size} bytes")
|
|
if content_item.data:
|
|
logger.info(f" Data preview: {content_item.data[:100]}...")
|
|
else:
|
|
logger.info(f" Data: None")
|
|
|
|
else:
|
|
logger.info("No test Excel file found. Creating a simple test...")
|
|
|
|
# Test the openpyxl library directly
|
|
try:
|
|
import openpyxl
|
|
from openpyxl import Workbook
|
|
|
|
# Create a test workbook
|
|
wb = Workbook()
|
|
ws = wb.active
|
|
ws.title = "Test Sheet"
|
|
|
|
# Add some test data
|
|
ws['A1'] = "Name"
|
|
ws['B1'] = "Age"
|
|
ws['C1'] = "City"
|
|
ws['A2'] = "John Doe"
|
|
ws['B2'] = 30
|
|
ws['C2'] = "New York"
|
|
ws['A3'] = "Jane Smith"
|
|
ws['B3'] = 25
|
|
ws['C3'] = "Los Angeles"
|
|
|
|
# Test properties
|
|
wb.properties.title = "Test Workbook"
|
|
wb.properties.creator = "Test User"
|
|
wb.properties.subject = "Test Subject"
|
|
|
|
logger.info("✓ Test workbook created successfully")
|
|
logger.info(f" Title: {wb.properties.title}")
|
|
logger.info(f" Creator: {wb.properties.creator}")
|
|
logger.info(f" Subject: {wb.properties.subject}")
|
|
logger.info(f" Sheets: {wb.sheetnames}")
|
|
|
|
# Test the DocumentExtraction with this workbook
|
|
from io import BytesIO
|
|
|
|
# Save to bytes
|
|
buffer = BytesIO()
|
|
wb.save(buffer)
|
|
buffer.seek(0)
|
|
file_data = buffer.getvalue()
|
|
|
|
logger.info(f"Test workbook size: {len(file_data)} bytes")
|
|
|
|
# Process with DocumentExtraction
|
|
result = await extractor.processFileData(
|
|
fileData=file_data,
|
|
fileName="test_workbook.xlsx",
|
|
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
base64Encoded=False,
|
|
prompt=None,
|
|
enableAI=False
|
|
)
|
|
|
|
logger.info(f"✓ Test workbook processing completed successfully!")
|
|
logger.info(f"Generated {len(result.contents)} content items:")
|
|
|
|
for i, content_item in enumerate(result.contents):
|
|
logger.info(f" Item {i+1}: {content_item.label}")
|
|
logger.info(f" MIME type: {content_item.metadata.mimeType}")
|
|
logger.info(f" Size: {content_item.metadata.size} bytes")
|
|
if content_item.data:
|
|
logger.info(f" Data preview: {content_item.data[:200]}...")
|
|
else:
|
|
logger.info(f" Data: None")
|
|
|
|
except ImportError as e:
|
|
logger.error(f"openpyxl not available: {e}")
|
|
except Exception as e:
|
|
logger.error(f"Error testing Excel functionality: {e}")
|
|
|
|
logger.info("Excel processing test completed!")
|
|
|
|
except ImportError as e:
|
|
logger.error(f"Failed to import required modules: {e}")
|
|
logger.error("Make sure you're running this script from the gateway directory")
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(test_excel_processing())
|