gateway/modules/services/serviceExtraction/extractors/extractorBinary.py
2025-10-12 01:14:07 +02:00

25 lines
803 B
Python

from typing import Any, Dict, List
import base64
from ..subUtils import makeId
from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Extractor
class BinaryExtractor(Extractor):
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
return True
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
mimeType = context.get("mimeType") or "application/octet-stream"
return [ContentPart(
id=makeId(),
parentId=None,
label="binary",
typeGroup="binary",
mimeType=mimeType,
data=base64.b64encode(fileBytes).decode("utf-8"),
metadata={"size": len(fileBytes), "warning": "Unsupported file type"}
)]