gateway/modules/features/aichat/serviceExtraction/extractors/extractorBinary.py
2026-01-22 17:00:29 +01:00

47 lines
1.5 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
from typing import Any, Dict, List
import base64
from ..subUtils import makeId
from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Extractor
class BinaryExtractor(Extractor):
"""
Fallback extractor for unsupported file types.
This extractor handles any file type that doesn't match other extractors.
It encodes the file as base64 and marks it as binary data.
Supported formats:
- All file types (fallback)
- MIME types: application/octet-stream (default)
- File extensions: All (fallback)
"""
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
return True
def getSupportedExtensions(self) -> list[str]:
"""Return list of supported file extensions (all)."""
return [] # Accepts all extensions as fallback
def getSupportedMimeTypes(self) -> list[str]:
"""Return list of supported MIME types (all)."""
return [] # Accepts all MIME types as fallback
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
mimeType = context.get("mimeType") or "application/octet-stream"
return [ContentPart(
id=makeId(),
parentId=None,
label="binary",
typeGroup="binary",
mimeType=mimeType,
data=base64.b64encode(fileBytes).decode("utf-8"),
metadata={"size": len(fileBytes), "warning": "Unsupported file type"}
)]