gateway/test_excel_processing.py

189 lines
7.2 KiB
Python

#!/usr/bin/env python3
"""
Simple test script for enhanced Excel processing functionality.
This script tests the DocumentExtraction class with Excel files.
"""
import os
import sys
import asyncio
import logging
from pathlib import Path
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Add the gateway directory to the path
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
async def test_excel_processing():
"""Test Excel processing functionality."""
try:
# Import required modules
from modules.chat.documents.documentExtraction import DocumentExtraction
from modules.chat.serviceCenter import ServiceCenter
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
from modules.interfaces.interfaceChatModel import ChatWorkflow
from datetime import datetime, UTC
logger.info("Testing Excel processing functionality...")
# Create mock service center
mock_user = User(
id="test_user_001",
username="testuser",
email="test@example.com",
fullName="Test User",
language="en",
enabled=True,
privilege=UserPrivilege.USER,
authenticationAuthority=AuthAuthority.LOCAL,
mandateId="test_mandate_001"
)
current_time = datetime.now(UTC).isoformat()
mock_workflow = ChatWorkflow(
id="test_workflow_001",
mandateId="test_mandate_001",
status="active",
name="Test Excel Processing Workflow",
currentRound=1,
lastActivity=current_time,
startedAt=current_time,
logs=[],
messages=[],
stats=None,
tasks=[]
)
service_center = ServiceCenter(mock_user, mock_workflow)
logger.info("✓ ServiceCenter created successfully")
# Create DocumentExtraction instance
extractor = DocumentExtraction(service_center)
logger.info("✓ DocumentExtraction created successfully")
# Test with a sample Excel file if available
test_file_path = "d:/temp/test-extraction/test.xlsx"
if os.path.exists(test_file_path):
logger.info(f"Found test file: {test_file_path}")
# Read the file
with open(test_file_path, 'rb') as f:
file_data = f.read()
logger.info(f"File size: {len(file_data)} bytes")
# Process the Excel file
logger.info("Processing Excel file...")
result = await extractor.processFileData(
fileData=file_data,
fileName="test.xlsx",
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
base64Encoded=False,
prompt=None,
enableAI=False
)
logger.info(f"✓ Excel processing completed successfully!")
logger.info(f"Generated {len(result.contents)} content items:")
for i, content_item in enumerate(result.contents):
logger.info(f" Item {i+1}: {content_item.label}")
logger.info(f" MIME type: {content_item.metadata.mimeType}")
logger.info(f" Size: {content_item.metadata.size} bytes")
if content_item.data:
logger.info(f" Data preview: {content_item.data[:100]}...")
else:
logger.info(f" Data: None")
else:
logger.info("No test Excel file found. Creating a simple test...")
# Test the openpyxl library directly
try:
import openpyxl
from openpyxl import Workbook
# Create a test workbook
wb = Workbook()
ws = wb.active
ws.title = "Test Sheet"
# Add some test data
ws['A1'] = "Name"
ws['B1'] = "Age"
ws['C1'] = "City"
ws['A2'] = "John Doe"
ws['B2'] = 30
ws['C2'] = "New York"
ws['A3'] = "Jane Smith"
ws['B3'] = 25
ws['C3'] = "Los Angeles"
# Test properties
wb.properties.title = "Test Workbook"
wb.properties.creator = "Test User"
wb.properties.subject = "Test Subject"
logger.info("✓ Test workbook created successfully")
logger.info(f" Title: {wb.properties.title}")
logger.info(f" Creator: {wb.properties.creator}")
logger.info(f" Subject: {wb.properties.subject}")
logger.info(f" Sheets: {wb.sheetnames}")
# Test the DocumentExtraction with this workbook
from io import BytesIO
# Save to bytes
buffer = BytesIO()
wb.save(buffer)
buffer.seek(0)
file_data = buffer.getvalue()
logger.info(f"Test workbook size: {len(file_data)} bytes")
# Process with DocumentExtraction
result = await extractor.processFileData(
fileData=file_data,
fileName="test_workbook.xlsx",
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
base64Encoded=False,
prompt=None,
enableAI=False
)
logger.info(f"✓ Test workbook processing completed successfully!")
logger.info(f"Generated {len(result.contents)} content items:")
for i, content_item in enumerate(result.contents):
logger.info(f" Item {i+1}: {content_item.label}")
logger.info(f" MIME type: {content_item.metadata.mimeType}")
logger.info(f" Size: {content_item.metadata.size} bytes")
if content_item.data:
logger.info(f" Data preview: {content_item.data[:200]}...")
else:
logger.info(f" Data: None")
except ImportError as e:
logger.error(f"openpyxl not available: {e}")
except Exception as e:
logger.error(f"Error testing Excel functionality: {e}")
logger.info("Excel processing test completed!")
except ImportError as e:
logger.error(f"Failed to import required modules: {e}")
logger.error("Make sure you're running this script from the gateway directory")
except Exception as e:
logger.error(f"Unexpected error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
asyncio.run(test_excel_processing())