#!/usr/bin/env python3 """ Test script for multi-file processing implementation. This script tests the new multi-file functionality without breaking existing single-file processing. """ import asyncio import json import logging from typing import Dict, Any, List # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) async def test_multi_file_detection(): """Test AI-powered multi-file detection.""" print("=== Testing Multi-File Detection ===") # Mock AI service for testing class MockAiService: async def call(self, request): class MockResponse: def __init__(self, content): self.content = content return MockResponse('{"is_multi_file": true, "strategy": "per_entity", "criteria": "customer_id", "file_naming_pattern": "{customer_name}_data.docx", "reasoning": "User wants separate files for each customer"}') class MockAiObjects: def __init__(self): self.call = MockAiService().call # Import the AI service try: from modules.services.serviceAi.mainServiceAi import AiService # Create mock service center class MockServiceCenter: def __init__(self): self.utils = MockUtils() class MockUtils: def debugLogToFile(self, message, category): print(f"[{category}] {message}") # Create AI service instance ai_service = AiService(MockServiceCenter()) ai_service.aiObjects = MockAiObjects() # Test prompts test_prompts = [ "Create one file for each customer in the document", "Split the data into separate files by category", "Generate individual files for each product", "Create a single report with all data", "Erstelle eine Datei für jeden Kunden", # German "Créer un fichier par section" # French ] for prompt in test_prompts: print(f"\nTesting prompt: '{prompt}'") try: analysis = await ai_service._analyzePromptIntent(prompt, ai_service) print(f" Analysis: {analysis}") if analysis.get("is_multi_file"): print(f" ✓ Detected as multi-file with strategy: {analysis.get('strategy')}") else: print(f" ✓ Detected as single-file") except Exception as e: print(f" ✗ Error: {str(e)}") print("\n=== Multi-File Detection Test Complete ===") return True except ImportError as e: print(f"Import error: {e}") print("Make sure you're running from the gateway directory") return False except Exception as e: print(f"Error during testing: {e}") return False async def test_json_schema_validation(): """Test JSON schema validation for both single and multi-file.""" print("\n=== Testing JSON Schema Validation ===") try: from modules.services.serviceGeneration.subJsonSchema import ( get_document_subJsonSchema, get_multi_document_subJsonSchema, get_adaptive_json_schema, validate_json_document ) # Test single document schema single_doc_schema = get_document_subJsonSchema() print(f"✓ Single document schema loaded: {len(single_doc_schema)} properties") # Test multi-document schema multi_doc_schema = get_multi_document_subJsonSchema() print(f"✓ Multi-document schema loaded: {len(multi_doc_schema)} properties") # Test adaptive schema selection single_analysis = {"is_multi_file": False} multi_analysis = {"is_multi_file": True} single_schema = get_adaptive_json_schema(single_analysis) multi_schema = get_adaptive_json_schema(multi_analysis) print(f"✓ Adaptive schema selection working") print(f" Single-file schema type: {single_schema.get('type', 'unknown')}") print(f" Multi-file schema type: {multi_schema.get('type', 'unknown')}") # Test validation with sample data single_doc_data = { "metadata": {"title": "Test Document"}, "sections": [ { "id": "section_1", "content_type": "paragraph", "elements": [{"text": "Test content"}], "order": 1 } ] } multi_doc_data = { "metadata": { "title": "Test Documents", "splitStrategy": "per_entity" }, "documents": [ { "id": "doc_1", "title": "Document 1", "filename": "doc1.docx", "sections": [ { "id": "section_1", "content_type": "paragraph", "elements": [{"text": "Content 1"}], "order": 1 } ] } ] } single_valid = validate_json_document(single_doc_data) multi_valid = validate_json_document(multi_doc_data) print(f"✓ Single document validation: {'PASS' if single_valid else 'FAIL'}") print(f"✓ Multi-document validation: {'PASS' if multi_valid else 'FAIL'}") print("\n=== JSON Schema Validation Test Complete ===") return True except ImportError as e: print(f"Import error: {e}") return False except Exception as e: print(f"Error during schema testing: {e}") return False async def test_prompt_builder(): """Test adaptive prompt building.""" print("\n=== Testing Prompt Builder ===") try: from modules.services.serviceGeneration.subPromptBuilder import ( buildAdaptiveExtractionPrompt, buildGenericExtractionPrompt ) # Mock services class MockServices: def __init__(self): self.utils = MockUtils() class MockUtils: def debugLogToFile(self, message, category): print(f"[{category}] {message}") services = MockServices() # Test adaptive prompt building prompt_analysis = { "is_multi_file": True, "strategy": "per_entity", "criteria": "customer_id", "file_naming_pattern": "{customer_name}_data.docx" } adaptive_prompt = await buildAdaptiveExtractionPrompt( outputFormat="docx", userPrompt="Create one file for each customer", title="Customer Data", promptAnalysis=prompt_analysis, aiService=None, services=services ) print(f"✓ Adaptive prompt generated: {len(adaptive_prompt)} characters") print(f" Contains multi-file instructions: {'documents' in adaptive_prompt}") # Test generic prompt building generic_prompt = await buildGenericExtractionPrompt( outputFormat="docx", userPrompt="Create a single report", title="Report", aiService=None, services=services ) print(f"✓ Generic prompt generated: {len(generic_prompt)} characters") print(f" Contains single-file instructions: {'sections' in generic_prompt}") print("\n=== Prompt Builder Test Complete ===") return True except ImportError as e: print(f"Import error: {e}") return False except Exception as e: print(f"Error during prompt builder testing: {e}") return False async def main(): """Run all tests.""" print("Starting Multi-File Processing Tests...") print("=" * 50) tests = [ test_multi_file_detection, test_json_schema_validation, test_prompt_builder ] results = [] for test in tests: try: result = await test() results.append(result) except Exception as e: print(f"Test failed with exception: {e}") results.append(False) print("\n" + "=" * 50) print("Test Results Summary:") print(f" Tests run: {len(tests)}") print(f" Passed: {sum(results)}") print(f" Failed: {len(tests) - sum(results)}") if all(results): print("\n🎉 All tests passed! Multi-file processing is ready.") else: print("\n⚠️ Some tests failed. Check the implementation.") return all(results) if __name__ == "__main__": asyncio.run(main())