from typing import Any, Dict, List import base64 from ..subUtils import makeId from modules.datamodels.datamodelExtraction import ContentPart from ..subRegistry import Extractor class BinaryExtractor(Extractor): def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool: return True def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]: mimeType = context.get("mimeType") or "application/octet-stream" return [ContentPart( id=makeId(), parentId=None, label="binary", typeGroup="binary", mimeType=mimeType, data=base64.b64encode(fileBytes).decode("utf-8"), metadata={"size": len(fileBytes), "warning": "Unsupported file type"} )]