117 lines
4.5 KiB
Python
117 lines
4.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script to demonstrate enhanced extractor format support.
|
|
Shows all supported file extensions and MIME types for each extractor.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# Add the gateway module to the path
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules'))
|
|
|
|
from modules.services.serviceExtraction.subRegistry import ExtractorRegistry
|
|
|
|
def test_extractor_formats():
|
|
"""Test and display all supported formats from extractors."""
|
|
print("🔍 Testing Plug-and-Play Extractor System")
|
|
print("=" * 60)
|
|
|
|
# Create registry
|
|
registry = ExtractorRegistry()
|
|
|
|
# Get all supported formats
|
|
formats = registry.getAllSupportedFormats()
|
|
|
|
print("\n📋 Supported File Extensions by Extractor:")
|
|
print("-" * 50)
|
|
for extractor_name, extensions in formats["extensions"].items():
|
|
if extensions:
|
|
print(f" {extractor_name:20} → {', '.join(extensions)}")
|
|
else:
|
|
print(f" {extractor_name:20} → (all extensions - fallback)")
|
|
|
|
print("\n📋 Supported MIME Types by Extractor:")
|
|
print("-" * 50)
|
|
for extractor_name, mime_types in formats["mime_types"].items():
|
|
if mime_types:
|
|
print(f" {extractor_name:20} → {', '.join(mime_types)}")
|
|
else:
|
|
print(f" {extractor_name:20} → (all MIME types - fallback)")
|
|
|
|
# Test individual extractors
|
|
print("\n🧪 Testing Individual Extractors:")
|
|
print("-" * 50)
|
|
|
|
# Get all registered extractors
|
|
for key, extractor in registry._map.items():
|
|
if hasattr(extractor, 'getSupportedExtensions') and hasattr(extractor, 'getSupportedMimeTypes'):
|
|
extensions = extractor.getSupportedExtensions()
|
|
mime_types = extractor.getSupportedMimeTypes()
|
|
print(f"\n {extractor.__class__.__name__}:")
|
|
print(f" Extensions: {extensions}")
|
|
print(f" MIME Types: {mime_types}")
|
|
|
|
# Test detection with various file types
|
|
print("\n🔬 Testing File Detection:")
|
|
print("-" * 50)
|
|
|
|
test_files = [
|
|
# Document formats
|
|
("document.pdf", "application/pdf"),
|
|
("spreadsheet.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
|
|
("presentation.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation"),
|
|
("document.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
|
|
|
|
# Text and code files
|
|
("readme.txt", "text/plain"),
|
|
("readme.md", "text/markdown"),
|
|
("app.log", "text/plain"),
|
|
("Main.java", "text/x-java-source"),
|
|
("script.js", "text/javascript"),
|
|
("component.tsx", "text/typescript"),
|
|
("main.py", "text/x-python"),
|
|
("config.yaml", "text/x-yaml"),
|
|
("package.json", "application/json"),
|
|
("data.csv", "text/csv"),
|
|
("config.xml", "application/xml"),
|
|
("webpage.html", "text/html"),
|
|
("styles.css", "text/css"),
|
|
("script.sh", "text/x-sh"),
|
|
("Dockerfile", "text/plain"),
|
|
(".gitignore", "text/plain"),
|
|
("app.config", "text/plain"),
|
|
("database.sql", "text/x-sql"),
|
|
("schema.ddl", "application/sql"),
|
|
|
|
# Images
|
|
("image.png", "image/png"),
|
|
("photo.jpg", "image/jpeg"),
|
|
|
|
# Unknown
|
|
("unknown.xyz", "application/octet-stream")
|
|
]
|
|
|
|
for filename, mime_type in test_files:
|
|
extractor = registry.resolve(mime_type, filename)
|
|
if extractor:
|
|
print(f" {filename:25} ({mime_type:50}) → {extractor.__class__.__name__}")
|
|
else:
|
|
print(f" {filename:25} ({mime_type:50}) → No extractor found")
|
|
|
|
print("\n✅ Plug-and-Play extractor system test completed!")
|
|
print("\nKey improvements:")
|
|
print(" • 🔌 TRUE PLUG-AND-PLAY: Just add extractor file, it auto-registers!")
|
|
print(" • 📋 No more manual registration of file types")
|
|
print(" • 🔍 Auto-discovery scans extractors directory")
|
|
print(" • 📝 Each extractor declares its own supported formats")
|
|
print(" • 🚀 Easy to add new file types - just create new extractor")
|
|
print(" • 🧹 Clean, maintainable code with no redundancy")
|
|
print("\nTo add a new file type:")
|
|
print(" 1. Create extractorXyz.py in extractors/ directory")
|
|
print(" 2. Implement Extractor interface with getSupportedExtensions()")
|
|
print(" 3. That's it! No registry changes needed!")
|
|
|
|
if __name__ == "__main__":
|
|
test_extractor_formats()
|