47 lines
1.5 KiB
Python
47 lines
1.5 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
from typing import Any, Dict, List
|
|
import base64
|
|
|
|
from ..subUtils import makeId
|
|
from modules.datamodels.datamodelExtraction import ContentPart
|
|
from ..subRegistry import Extractor
|
|
|
|
|
|
class BinaryExtractor(Extractor):
|
|
"""
|
|
Fallback extractor for unsupported file types.
|
|
|
|
This extractor handles any file type that doesn't match other extractors.
|
|
It encodes the file as base64 and marks it as binary data.
|
|
|
|
Supported formats:
|
|
- All file types (fallback)
|
|
- MIME types: application/octet-stream (default)
|
|
- File extensions: All (fallback)
|
|
"""
|
|
|
|
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
|
|
return True
|
|
|
|
def getSupportedExtensions(self) -> list[str]:
|
|
"""Return list of supported file extensions (all)."""
|
|
return [] # Accepts all extensions as fallback
|
|
|
|
def getSupportedMimeTypes(self) -> list[str]:
|
|
"""Return list of supported MIME types (all)."""
|
|
return [] # Accepts all MIME types as fallback
|
|
|
|
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
|
|
mimeType = context.get("mimeType") or "application/octet-stream"
|
|
return [ContentPart(
|
|
id=makeId(),
|
|
parentId=None,
|
|
label="binary",
|
|
typeGroup="binary",
|
|
mimeType=mimeType,
|
|
data=base64.b64encode(fileBytes).decode("utf-8"),
|
|
metadata={"size": len(fileBytes), "warning": "Unsupported file type"}
|
|
)]
|
|
|
|
|