25 lines
800 B
Python
25 lines
800 B
Python
from typing import Any, Dict, List
|
|
import base64
|
|
|
|
from ..utils import makeId
|
|
from modules.datamodels.datamodelExtraction import ContentPart
|
|
from ..subRegistry import Extractor
|
|
|
|
|
|
class BinaryExtractor(Extractor):
|
|
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
|
|
return True
|
|
|
|
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
|
|
mimeType = context.get("mimeType") or "application/octet-stream"
|
|
return [ContentPart(
|
|
id=makeId(),
|
|
parentId=None,
|
|
label="binary",
|
|
typeGroup="binary",
|
|
mimeType=mimeType,
|
|
data=base64.b64encode(fileBytes).decode("utf-8"),
|
|
metadata={"size": len(fileBytes), "warning": "Unsupported file type"}
|
|
)]
|
|
|
|
|