#!/usr/bin/env python3 """ Simple test script for enhanced Excel processing functionality. This script tests the DocumentExtraction class with Excel files. """ import os import sys import asyncio import logging from pathlib import Path # Configure logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Add the gateway directory to the path sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..')) async def test_excel_processing(): """Test Excel processing functionality.""" try: # Import required modules from modules.chat.documents.documentExtraction import DocumentExtraction from modules.chat.serviceCenter import ServiceCenter from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority from modules.interfaces.interfaceChatModel import ChatWorkflow from datetime import datetime, UTC logger.info("Testing Excel processing functionality...") # Create mock service center mock_user = User( id="test_user_001", username="testuser", email="test@example.com", fullName="Test User", language="en", enabled=True, privilege=UserPrivilege.USER, authenticationAuthority=AuthAuthority.LOCAL, mandateId="test_mandate_001" ) current_time = datetime.now(UTC).isoformat() mock_workflow = ChatWorkflow( id="test_workflow_001", mandateId="test_mandate_001", status="active", name="Test Excel Processing Workflow", currentRound=1, lastActivity=current_time, startedAt=current_time, logs=[], messages=[], stats=None, tasks=[] ) service_center = ServiceCenter(mock_user, mock_workflow) logger.info("✓ ServiceCenter created successfully") # Create DocumentExtraction instance extractor = DocumentExtraction(service_center) logger.info("✓ DocumentExtraction created successfully") # Test with a sample Excel file if available test_file_path = "d:/temp/test-extraction/test.xlsx" if os.path.exists(test_file_path): logger.info(f"Found test file: {test_file_path}") # Read the file with open(test_file_path, 'rb') as f: file_data = f.read() logger.info(f"File size: {len(file_data)} bytes") # Process the Excel file logger.info("Processing Excel file...") result = await extractor.processFileData( fileData=file_data, filename="test.xlsx", mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", base64Encoded=False, prompt=None, enableAI=False ) logger.info(f"✓ Excel processing completed successfully!") logger.info(f"Generated {len(result.contents)} content items:") for i, content_item in enumerate(result.contents): logger.info(f" Item {i+1}: {content_item.label}") logger.info(f" MIME type: {content_item.metadata.mimeType}") logger.info(f" Size: {content_item.metadata.size} bytes") if content_item.data: logger.info(f" Data preview: {content_item.data[:100]}...") else: logger.info(f" Data: None") else: logger.info("No test Excel file found. Creating a simple test...") # Test the openpyxl library directly try: import openpyxl from openpyxl import Workbook # Create a test workbook wb = Workbook() ws = wb.active ws.title = "Test Sheet" # Add some test data ws['A1'] = "Name" ws['B1'] = "Age" ws['C1'] = "City" ws['A2'] = "John Doe" ws['B2'] = 30 ws['C2'] = "New York" ws['A3'] = "Jane Smith" ws['B3'] = 25 ws['C3'] = "Los Angeles" # Test properties wb.properties.title = "Test Workbook" wb.properties.creator = "Test User" wb.properties.subject = "Test Subject" logger.info("✓ Test workbook created successfully") logger.info(f" Title: {wb.properties.title}") logger.info(f" Creator: {wb.properties.creator}") logger.info(f" Subject: {wb.properties.subject}") logger.info(f" Sheets: {wb.sheetnames}") # Test the DocumentExtraction with this workbook from io import BytesIO # Save to bytes buffer = BytesIO() wb.save(buffer) buffer.seek(0) file_data = buffer.getvalue() logger.info(f"Test workbook size: {len(file_data)} bytes") # Process with DocumentExtraction result = await extractor.processFileData( fileData=file_data, filename="test_workbook.xlsx", mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", base64Encoded=False, prompt=None, enableAI=False ) logger.info(f"✓ Test workbook processing completed successfully!") logger.info(f"Generated {len(result.contents)} content items:") for i, content_item in enumerate(result.contents): logger.info(f" Item {i+1}: {content_item.label}") logger.info(f" MIME type: {content_item.metadata.mimeType}") logger.info(f" Size: {content_item.metadata.size} bytes") if content_item.data: logger.info(f" Data preview: {content_item.data[:200]}...") else: logger.info(f" Data: None") except ImportError as e: logger.error(f"openpyxl not available: {e}") except Exception as e: logger.error(f"Error testing Excel functionality: {e}") logger.info("Excel processing test completed!") except ImportError as e: logger.error(f"Failed to import required modules: {e}") logger.error("Make sure you're running this script from the gateway directory") except Exception as e: logger.error(f"Unexpected error: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(test_excel_processing())