from typing import Any, Dict, List import base64 from ..subUtils import makeId from modules.datamodels.datamodelExtraction import ContentPart from ..subRegistry import Extractor class BinaryExtractor(Extractor): """ Fallback extractor for unsupported file types. This extractor handles any file type that doesn't match other extractors. It encodes the file as base64 and marks it as binary data. Supported formats: - All file types (fallback) - MIME types: application/octet-stream (default) - File extensions: All (fallback) """ def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool: return True def getSupportedExtensions(self) -> list[str]: """Return list of supported file extensions (all).""" return [] # Accepts all extensions as fallback def getSupportedMimeTypes(self) -> list[str]: """Return list of supported MIME types (all).""" return [] # Accepts all MIME types as fallback def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]: mimeType = context.get("mimeType") or "application/octet-stream" return [ContentPart( id=makeId(), parentId=None, label="binary", typeGroup="binary", mimeType=mimeType, data=base64.b64encode(fileBytes).decode("utf-8"), metadata={"size": len(fileBytes), "warning": "Unsupported file type"} )]