263 lines
8.8 KiB
Python
263 lines
8.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for multi-file processing implementation.
|
|
This script tests the new multi-file functionality without breaking existing single-file processing.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
from typing import Dict, Any, List
|
|
|
|
# Setup logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
async def test_multi_file_detection():
|
|
"""Test AI-powered multi-file detection."""
|
|
print("=== Testing Multi-File Detection ===")
|
|
|
|
# Mock AI service for testing
|
|
class MockAiService:
|
|
async def call(self, request):
|
|
class MockResponse:
|
|
def __init__(self, content):
|
|
self.content = content
|
|
return MockResponse('{"is_multi_file": true, "strategy": "per_entity", "criteria": "customer_id", "file_naming_pattern": "{customer_name}_data.docx", "reasoning": "User wants separate files for each customer"}')
|
|
|
|
class MockAiObjects:
|
|
def __init__(self):
|
|
self.call = MockAiService().call
|
|
|
|
# Import the AI service
|
|
try:
|
|
from modules.services.serviceAi.mainServiceAi import AiService
|
|
|
|
# Create mock service center
|
|
class MockServiceCenter:
|
|
def __init__(self):
|
|
self.utils = MockUtils()
|
|
|
|
class MockUtils:
|
|
def debugLogToFile(self, message, category):
|
|
print(f"[{category}] {message}")
|
|
|
|
# Create AI service instance
|
|
ai_service = AiService(MockServiceCenter())
|
|
ai_service.aiObjects = MockAiObjects()
|
|
|
|
# Test prompts
|
|
test_prompts = [
|
|
"Create one file for each customer in the document",
|
|
"Split the data into separate files by category",
|
|
"Generate individual files for each product",
|
|
"Create a single report with all data",
|
|
"Erstelle eine Datei für jeden Kunden", # German
|
|
"Créer un fichier par section" # French
|
|
]
|
|
|
|
for prompt in test_prompts:
|
|
print(f"\nTesting prompt: '{prompt}'")
|
|
try:
|
|
analysis = await ai_service._analyzePromptIntent(prompt, ai_service)
|
|
print(f" Analysis: {analysis}")
|
|
|
|
if analysis.get("is_multi_file"):
|
|
print(f" ✓ Detected as multi-file with strategy: {analysis.get('strategy')}")
|
|
else:
|
|
print(f" ✓ Detected as single-file")
|
|
|
|
except Exception as e:
|
|
print(f" ✗ Error: {str(e)}")
|
|
|
|
print("\n=== Multi-File Detection Test Complete ===")
|
|
return True
|
|
|
|
except ImportError as e:
|
|
print(f"Import error: {e}")
|
|
print("Make sure you're running from the gateway directory")
|
|
return False
|
|
except Exception as e:
|
|
print(f"Error during testing: {e}")
|
|
return False
|
|
|
|
async def test_json_schema_validation():
|
|
"""Test JSON schema validation for both single and multi-file."""
|
|
print("\n=== Testing JSON Schema Validation ===")
|
|
|
|
try:
|
|
from modules.services.serviceGeneration.subJsonSchema import (
|
|
get_document_subJsonSchema,
|
|
get_multi_document_subJsonSchema,
|
|
get_adaptive_json_schema,
|
|
validate_json_document
|
|
)
|
|
|
|
# Test single document schema
|
|
single_doc_schema = get_document_subJsonSchema()
|
|
print(f"✓ Single document schema loaded: {len(single_doc_schema)} properties")
|
|
|
|
# Test multi-document schema
|
|
multi_doc_schema = get_multi_document_subJsonSchema()
|
|
print(f"✓ Multi-document schema loaded: {len(multi_doc_schema)} properties")
|
|
|
|
# Test adaptive schema selection
|
|
single_analysis = {"is_multi_file": False}
|
|
multi_analysis = {"is_multi_file": True}
|
|
|
|
single_schema = get_adaptive_json_schema(single_analysis)
|
|
multi_schema = get_adaptive_json_schema(multi_analysis)
|
|
|
|
print(f"✓ Adaptive schema selection working")
|
|
print(f" Single-file schema type: {single_schema.get('type', 'unknown')}")
|
|
print(f" Multi-file schema type: {multi_schema.get('type', 'unknown')}")
|
|
|
|
# Test validation with sample data
|
|
single_doc_data = {
|
|
"metadata": {"title": "Test Document"},
|
|
"sections": [
|
|
{
|
|
"id": "section_1",
|
|
"content_type": "paragraph",
|
|
"elements": [{"text": "Test content"}],
|
|
"order": 1
|
|
}
|
|
]
|
|
}
|
|
|
|
multi_doc_data = {
|
|
"metadata": {
|
|
"title": "Test Documents",
|
|
"splitStrategy": "per_entity"
|
|
},
|
|
"documents": [
|
|
{
|
|
"id": "doc_1",
|
|
"title": "Document 1",
|
|
"filename": "doc1.docx",
|
|
"sections": [
|
|
{
|
|
"id": "section_1",
|
|
"content_type": "paragraph",
|
|
"elements": [{"text": "Content 1"}],
|
|
"order": 1
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
single_valid = validate_json_document(single_doc_data)
|
|
multi_valid = validate_json_document(multi_doc_data)
|
|
|
|
print(f"✓ Single document validation: {'PASS' if single_valid else 'FAIL'}")
|
|
print(f"✓ Multi-document validation: {'PASS' if multi_valid else 'FAIL'}")
|
|
|
|
print("\n=== JSON Schema Validation Test Complete ===")
|
|
return True
|
|
|
|
except ImportError as e:
|
|
print(f"Import error: {e}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"Error during schema testing: {e}")
|
|
return False
|
|
|
|
async def test_prompt_builder():
|
|
"""Test adaptive prompt building."""
|
|
print("\n=== Testing Prompt Builder ===")
|
|
|
|
try:
|
|
from modules.services.serviceGeneration.subPromptBuilder import (
|
|
buildAdaptiveExtractionPrompt,
|
|
buildGenericExtractionPrompt
|
|
)
|
|
|
|
# Mock services
|
|
class MockServices:
|
|
def __init__(self):
|
|
self.utils = MockUtils()
|
|
|
|
class MockUtils:
|
|
def debugLogToFile(self, message, category):
|
|
print(f"[{category}] {message}")
|
|
|
|
services = MockServices()
|
|
|
|
# Test adaptive prompt building
|
|
prompt_analysis = {
|
|
"is_multi_file": True,
|
|
"strategy": "per_entity",
|
|
"criteria": "customer_id",
|
|
"file_naming_pattern": "{customer_name}_data.docx"
|
|
}
|
|
|
|
adaptive_prompt = await buildAdaptiveExtractionPrompt(
|
|
outputFormat="docx",
|
|
userPrompt="Create one file for each customer",
|
|
title="Customer Data",
|
|
promptAnalysis=prompt_analysis,
|
|
aiService=None,
|
|
services=services
|
|
)
|
|
|
|
print(f"✓ Adaptive prompt generated: {len(adaptive_prompt)} characters")
|
|
print(f" Contains multi-file instructions: {'documents' in adaptive_prompt}")
|
|
|
|
# Test generic prompt building
|
|
generic_prompt = await buildGenericExtractionPrompt(
|
|
outputFormat="docx",
|
|
userPrompt="Create a single report",
|
|
title="Report",
|
|
aiService=None,
|
|
services=services
|
|
)
|
|
|
|
print(f"✓ Generic prompt generated: {len(generic_prompt)} characters")
|
|
print(f" Contains single-file instructions: {'sections' in generic_prompt}")
|
|
|
|
print("\n=== Prompt Builder Test Complete ===")
|
|
return True
|
|
|
|
except ImportError as e:
|
|
print(f"Import error: {e}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"Error during prompt builder testing: {e}")
|
|
return False
|
|
|
|
async def main():
|
|
"""Run all tests."""
|
|
print("Starting Multi-File Processing Tests...")
|
|
print("=" * 50)
|
|
|
|
tests = [
|
|
test_multi_file_detection,
|
|
test_json_schema_validation,
|
|
test_prompt_builder
|
|
]
|
|
|
|
results = []
|
|
for test in tests:
|
|
try:
|
|
result = await test()
|
|
results.append(result)
|
|
except Exception as e:
|
|
print(f"Test failed with exception: {e}")
|
|
results.append(False)
|
|
|
|
print("\n" + "=" * 50)
|
|
print("Test Results Summary:")
|
|
print(f" Tests run: {len(tests)}")
|
|
print(f" Passed: {sum(results)}")
|
|
print(f" Failed: {len(tests) - sum(results)}")
|
|
|
|
if all(results):
|
|
print("\n🎉 All tests passed! Multi-file processing is ready.")
|
|
else:
|
|
print("\n⚠️ Some tests failed. Check the implementation.")
|
|
|
|
return all(results)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|