gateway/test_multifile_processing.py

#!/usr/bin/env python3
"""
Test script for multi-file processing implementation.
This script tests the new multi-file functionality without breaking existing single-file processing.
"""

import asyncio
import json
import logging
from typing import Dict, Any, List

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

async def test_multi_file_detection():
    """Test AI-powered multi-file detection."""
    print("=== Testing Multi-File Detection ===")

    # Mock AI service for testing
    class MockAiService:
        async def call(self, request):
            class MockResponse:
                def __init__(self, content):
                    self.content = content
            return MockResponse('{"is_multi_file": true, "strategy": "per_entity", "criteria": "customer_id", "file_naming_pattern": "{customer_name}_data.docx", "reasoning": "User wants separate files for each customer"}')

    class MockAiObjects:
        def __init__(self):
            self.call = MockAiService().call

    # Import the AI service
    try:
        from modules.services.serviceAi.mainServiceAi import AiService

        # Create mock service center
        class MockServiceCenter:
            def __init__(self):
                self.utils = MockUtils()

        class MockUtils:
            def debugLogToFile(self, message, category):
                print(f"[{category}] {message}")

        # Create AI service instance
        ai_service = AiService(MockServiceCenter())
        ai_service.aiObjects = MockAiObjects()

        # Test prompts
        test_prompts = [
            "Create one file for each customer in the document",
            "Split the data into separate files by category",
            "Generate individual files for each product",
            "Create a single report with all data",
            "Erstelle eine Datei für jeden Kunden",  # German
            "Créer un fichier par section"  # French
        ]

        for prompt in test_prompts:
            print(f"\nTesting prompt: '{prompt}'")
            try:
                analysis = await ai_service._analyzePromptIntent(prompt, ai_service)
                print(f"  Analysis: {analysis}")

                if analysis.get("is_multi_file"):
                    print(f"  ✓ Detected as multi-file with strategy: {analysis.get('strategy')}")
                else:
                    print(f"  ✓ Detected as single-file")

            except Exception as e:
                print(f"  ✗ Error: {str(e)}")

        print("\n=== Multi-File Detection Test Complete ===")
        return True

    except ImportError as e:
        print(f"Import error: {e}")
        print("Make sure you're running from the gateway directory")
        return False
    except Exception as e:
        print(f"Error during testing: {e}")
        return False

async def test_json_schema_validation():
    """Test JSON schema validation for both single and multi-file."""
    print("\n=== Testing JSON Schema Validation ===")

    try:
        from modules.services.serviceGeneration.subJsonSchema import (
            get_document_subJsonSchema,
            get_multi_document_subJsonSchema,
            get_adaptive_json_schema,
            validate_json_document
        )

        # Test single document schema
        single_doc_schema = get_document_subJsonSchema()
        print(f"✓ Single document schema loaded: {len(single_doc_schema)} properties")

        # Test multi-document schema
        multi_doc_schema = get_multi_document_subJsonSchema()
        print(f"✓ Multi-document schema loaded: {len(multi_doc_schema)} properties")

        # Test adaptive schema selection
        single_analysis = {"is_multi_file": False}
        multi_analysis = {"is_multi_file": True}

        single_schema = get_adaptive_json_schema(single_analysis)
        multi_schema = get_adaptive_json_schema(multi_analysis)

        print(f"✓ Adaptive schema selection working")
        print(f"  Single-file schema type: {single_schema.get('type', 'unknown')}")
        print(f"  Multi-file schema type: {multi_schema.get('type', 'unknown')}")

        # Test validation with sample data
        single_doc_data = {
            "metadata": {"title": "Test Document"},
            "sections": [
                {
                    "id": "section_1",
                    "content_type": "paragraph",
                    "elements": [{"text": "Test content"}],
                    "order": 1
                }
            ]
        }

        multi_doc_data = {
            "metadata": {
                "title": "Test Documents",
                "splitStrategy": "per_entity"
            },
            "documents": [
                {
                    "id": "doc_1",
                    "title": "Document 1",
                    "filename": "doc1.docx",
                    "sections": [
                        {
                            "id": "section_1",
                            "content_type": "paragraph",
                            "elements": [{"text": "Content 1"}],
                            "order": 1
                        }
                    ]
                }
            ]
        }

        single_valid = validate_json_document(single_doc_data)
        multi_valid = validate_json_document(multi_doc_data)

        print(f"✓ Single document validation: {'PASS' if single_valid else 'FAIL'}")
        print(f"✓ Multi-document validation: {'PASS' if multi_valid else 'FAIL'}")

        print("\n=== JSON Schema Validation Test Complete ===")
        return True

    except ImportError as e:
        print(f"Import error: {e}")
        return False
    except Exception as e:
        print(f"Error during schema testing: {e}")
        return False

async def test_prompt_builder():
    """Test adaptive prompt building."""
    print("\n=== Testing Prompt Builder ===")

    try:
        from modules.services.serviceGeneration.subPromptBuilder import (
            buildAdaptiveExtractionPrompt,
            buildGenericExtractionPrompt
        )

        # Mock services
        class MockServices:
            def __init__(self):
                self.utils = MockUtils()

        class MockUtils:
            def debugLogToFile(self, message, category):
                print(f"[{category}] {message}")

        services = MockServices()

        # Test adaptive prompt building
        prompt_analysis = {
            "is_multi_file": True,
            "strategy": "per_entity",
            "criteria": "customer_id",
            "file_naming_pattern": "{customer_name}_data.docx"
        }

        adaptive_prompt = await buildAdaptiveExtractionPrompt(
            outputFormat="docx",
            userPrompt="Create one file for each customer",
            title="Customer Data",
            promptAnalysis=prompt_analysis,
            aiService=None,
            services=services
        )

        print(f"✓ Adaptive prompt generated: {len(adaptive_prompt)} characters")
        print(f"  Contains multi-file instructions: {'documents' in adaptive_prompt}")

        # Test generic prompt building
        generic_prompt = await buildGenericExtractionPrompt(
            outputFormat="docx",
            userPrompt="Create a single report",
            title="Report",
            aiService=None,
            services=services
        )

        print(f"✓ Generic prompt generated: {len(generic_prompt)} characters")
        print(f"  Contains single-file instructions: {'sections' in generic_prompt}")

        print("\n=== Prompt Builder Test Complete ===")
        return True

    except ImportError as e:
        print(f"Import error: {e}")
        return False
    except Exception as e:
        print(f"Error during prompt builder testing: {e}")
        return False

async def main():
    """Run all tests."""
    print("Starting Multi-File Processing Tests...")
    print("=" * 50)

    tests = [
        test_multi_file_detection,
        test_json_schema_validation,
        test_prompt_builder
    ]

    results = []
    for test in tests:
        try:
            result = await test()
            results.append(result)
        except Exception as e:
            print(f"Test failed with exception: {e}")
            results.append(False)

    print("\n" + "=" * 50)
    print("Test Results Summary:")
    print(f"  Tests run: {len(tests)}")
    print(f"  Passed: {sum(results)}")
    print(f"  Failed: {len(tests) - sum(results)}")

    if all(results):
        print("\n🎉 All tests passed! Multi-file processing is ready.")
    else:
        print("\n⚠️  Some tests failed. Check the implementation.")

    return all(results)

if __name__ == "__main__":
    asyncio.run(main())