gateway/test_multifile_processing.py
2025-10-14 00:23:59 +02:00

263 lines
8.8 KiB
Python

#!/usr/bin/env python3
"""
Test script for multi-file processing implementation.
This script tests the new multi-file functionality without breaking existing single-file processing.
"""
import asyncio
import json
import logging
from typing import Dict, Any, List
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
async def test_multi_file_detection():
"""Test AI-powered multi-file detection."""
print("=== Testing Multi-File Detection ===")
# Mock AI service for testing
class MockAiService:
async def call(self, request):
class MockResponse:
def __init__(self, content):
self.content = content
return MockResponse('{"is_multi_file": true, "strategy": "per_entity", "criteria": "customer_id", "file_naming_pattern": "{customer_name}_data.docx", "reasoning": "User wants separate files for each customer"}')
class MockAiObjects:
def __init__(self):
self.call = MockAiService().call
# Import the AI service
try:
from modules.services.serviceAi.mainServiceAi import AiService
# Create mock service center
class MockServiceCenter:
def __init__(self):
self.utils = MockUtils()
class MockUtils:
def debugLogToFile(self, message, category):
print(f"[{category}] {message}")
# Create AI service instance
ai_service = AiService(MockServiceCenter())
ai_service.aiObjects = MockAiObjects()
# Test prompts
test_prompts = [
"Create one file for each customer in the document",
"Split the data into separate files by category",
"Generate individual files for each product",
"Create a single report with all data",
"Erstelle eine Datei für jeden Kunden", # German
"Créer un fichier par section" # French
]
for prompt in test_prompts:
print(f"\nTesting prompt: '{prompt}'")
try:
analysis = await ai_service._analyzePromptIntent(prompt, ai_service)
print(f" Analysis: {analysis}")
if analysis.get("is_multi_file"):
print(f" ✓ Detected as multi-file with strategy: {analysis.get('strategy')}")
else:
print(f" ✓ Detected as single-file")
except Exception as e:
print(f" ✗ Error: {str(e)}")
print("\n=== Multi-File Detection Test Complete ===")
return True
except ImportError as e:
print(f"Import error: {e}")
print("Make sure you're running from the gateway directory")
return False
except Exception as e:
print(f"Error during testing: {e}")
return False
async def test_json_schema_validation():
"""Test JSON schema validation for both single and multi-file."""
print("\n=== Testing JSON Schema Validation ===")
try:
from modules.services.serviceGeneration.subJsonSchema import (
get_document_subJsonSchema,
get_multi_document_subJsonSchema,
get_adaptive_json_schema,
validate_json_document
)
# Test single document schema
single_doc_schema = get_document_subJsonSchema()
print(f"✓ Single document schema loaded: {len(single_doc_schema)} properties")
# Test multi-document schema
multi_doc_schema = get_multi_document_subJsonSchema()
print(f"✓ Multi-document schema loaded: {len(multi_doc_schema)} properties")
# Test adaptive schema selection
single_analysis = {"is_multi_file": False}
multi_analysis = {"is_multi_file": True}
single_schema = get_adaptive_json_schema(single_analysis)
multi_schema = get_adaptive_json_schema(multi_analysis)
print(f"✓ Adaptive schema selection working")
print(f" Single-file schema type: {single_schema.get('type', 'unknown')}")
print(f" Multi-file schema type: {multi_schema.get('type', 'unknown')}")
# Test validation with sample data
single_doc_data = {
"metadata": {"title": "Test Document"},
"sections": [
{
"id": "section_1",
"content_type": "paragraph",
"elements": [{"text": "Test content"}],
"order": 1
}
]
}
multi_doc_data = {
"metadata": {
"title": "Test Documents",
"splitStrategy": "per_entity"
},
"documents": [
{
"id": "doc_1",
"title": "Document 1",
"filename": "doc1.docx",
"sections": [
{
"id": "section_1",
"content_type": "paragraph",
"elements": [{"text": "Content 1"}],
"order": 1
}
]
}
]
}
single_valid = validate_json_document(single_doc_data)
multi_valid = validate_json_document(multi_doc_data)
print(f"✓ Single document validation: {'PASS' if single_valid else 'FAIL'}")
print(f"✓ Multi-document validation: {'PASS' if multi_valid else 'FAIL'}")
print("\n=== JSON Schema Validation Test Complete ===")
return True
except ImportError as e:
print(f"Import error: {e}")
return False
except Exception as e:
print(f"Error during schema testing: {e}")
return False
async def test_prompt_builder():
"""Test adaptive prompt building."""
print("\n=== Testing Prompt Builder ===")
try:
from modules.services.serviceGeneration.subPromptBuilder import (
buildAdaptiveExtractionPrompt,
buildGenericExtractionPrompt
)
# Mock services
class MockServices:
def __init__(self):
self.utils = MockUtils()
class MockUtils:
def debugLogToFile(self, message, category):
print(f"[{category}] {message}")
services = MockServices()
# Test adaptive prompt building
prompt_analysis = {
"is_multi_file": True,
"strategy": "per_entity",
"criteria": "customer_id",
"file_naming_pattern": "{customer_name}_data.docx"
}
adaptive_prompt = await buildAdaptiveExtractionPrompt(
outputFormat="docx",
userPrompt="Create one file for each customer",
title="Customer Data",
promptAnalysis=prompt_analysis,
aiService=None,
services=services
)
print(f"✓ Adaptive prompt generated: {len(adaptive_prompt)} characters")
print(f" Contains multi-file instructions: {'documents' in adaptive_prompt}")
# Test generic prompt building
generic_prompt = await buildGenericExtractionPrompt(
outputFormat="docx",
userPrompt="Create a single report",
title="Report",
aiService=None,
services=services
)
print(f"✓ Generic prompt generated: {len(generic_prompt)} characters")
print(f" Contains single-file instructions: {'sections' in generic_prompt}")
print("\n=== Prompt Builder Test Complete ===")
return True
except ImportError as e:
print(f"Import error: {e}")
return False
except Exception as e:
print(f"Error during prompt builder testing: {e}")
return False
async def main():
"""Run all tests."""
print("Starting Multi-File Processing Tests...")
print("=" * 50)
tests = [
test_multi_file_detection,
test_json_schema_validation,
test_prompt_builder
]
results = []
for test in tests:
try:
result = await test()
results.append(result)
except Exception as e:
print(f"Test failed with exception: {e}")
results.append(False)
print("\n" + "=" * 50)
print("Test Results Summary:")
print(f" Tests run: {len(tests)}")
print(f" Passed: {sum(results)}")
print(f" Failed: {len(tests) - sum(results)}")
if all(results):
print("\n🎉 All tests passed! Multi-file processing is ready.")
else:
print("\n⚠️ Some tests failed. Check the implementation.")
return all(results)
if __name__ == "__main__":
asyncio.run(main())