#!/usr/bin/env python3
"""
Test script to demonstrate enhanced extractor format support.
Shows all supported file extensions and MIME types for each extractor.
"""

import sys
import os
from pathlib import Path

# Add the gateway module to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules'))

from modules.services.serviceExtraction.subRegistry import ExtractorRegistry

def test_extractor_formats():
    """Test and display all supported formats from extractors."""
    print("🔍 Testing Plug-and-Play Extractor System")
    print("=" * 60)
    
    # Create registry
    registry = ExtractorRegistry()
    
    # Get all supported formats
    formats = registry.getAllSupportedFormats()
    
    print("\n📋 Supported File Extensions by Extractor:")
    print("-" * 50)
    for extractor_name, extensions in formats["extensions"].items():
        if extensions:
            print(f"  {extractor_name:20} → {', '.join(extensions)}")
        else:
            print(f"  {extractor_name:20} → (all extensions - fallback)")
    
    print("\n📋 Supported MIME Types by Extractor:")
    print("-" * 50)
    for extractor_name, mime_types in formats["mime_types"].items():
        if mime_types:
            print(f"  {extractor_name:20} → {', '.join(mime_types)}")
        else:
            print(f"  {extractor_name:20} → (all MIME types - fallback)")
    
    # Test individual extractors
    print("\n🧪 Testing Individual Extractors:")
    print("-" * 50)
    
    # Get all registered extractors
    for key, extractor in registry._map.items():
        if hasattr(extractor, 'getSupportedExtensions') and hasattr(extractor, 'getSupportedMimeTypes'):
            extensions = extractor.getSupportedExtensions()
            mime_types = extractor.getSupportedMimeTypes()
            print(f"\n  {extractor.__class__.__name__}:")
            print(f"    Extensions: {extensions}")
            print(f"    MIME Types: {mime_types}")
    
    # Test detection with various file types
    print("\n🔬 Testing File Detection:")
    print("-" * 50)
    
    test_files = [
        # Document formats
        ("document.pdf", "application/pdf"),
        ("spreadsheet.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
        ("presentation.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation"),
        ("document.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
        
        # Text and code files
        ("readme.txt", "text/plain"),
        ("readme.md", "text/markdown"),
        ("app.log", "text/plain"),
        ("Main.java", "text/x-java-source"),
        ("script.js", "text/javascript"),
        ("component.tsx", "text/typescript"),
        ("main.py", "text/x-python"),
        ("config.yaml", "text/x-yaml"),
        ("package.json", "application/json"),
        ("data.csv", "text/csv"),
        ("config.xml", "application/xml"),
        ("webpage.html", "text/html"),
        ("styles.css", "text/css"),
        ("script.sh", "text/x-sh"),
        ("Dockerfile", "text/plain"),
        (".gitignore", "text/plain"),
        ("app.config", "text/plain"),
        ("database.sql", "text/x-sql"),
        ("schema.ddl", "application/sql"),
        
        # Images
        ("image.png", "image/png"),
        ("photo.jpg", "image/jpeg"),
        
        # Unknown
        ("unknown.xyz", "application/octet-stream")
    ]
    
    for filename, mime_type in test_files:
        extractor = registry.resolve(mime_type, filename)
        if extractor:
            print(f"  {filename:25} ({mime_type:50}) → {extractor.__class__.__name__}")
        else:
            print(f"  {filename:25} ({mime_type:50}) → No extractor found")
    
    print("\n✅ Plug-and-Play extractor system test completed!")
    print("\nKey improvements:")
    print("  • 🔌 TRUE PLUG-AND-PLAY: Just add extractor file, it auto-registers!")
    print("  • 📋 No more manual registration of file types")
    print("  • 🔍 Auto-discovery scans extractors directory")
    print("  • 📝 Each extractor declares its own supported formats")
    print("  • 🚀 Easy to add new file types - just create new extractor")
    print("  • 🧹 Clean, maintainable code with no redundancy")
    print("\nTo add a new file type:")
    print("  1. Create extractorXyz.py in extractors/ directory")
    print("  2. Implement Extractor interface with getSupportedExtensions()")
    print("  3. That's it! No registry changes needed!")

if __name__ == "__main__":
    test_extractor_formats()