from typing import Any, Dict, List

from modules.datamodels.datamodelExtraction import ContentPart
from ..utils import makeId
from ..subRegistry import Extractor


class TextExtractor(Extractor):
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType in ("text/plain", "text/markdown")

    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        fileName = context.get("fileName")
        mimeType = context.get("mimeType") or "text/plain"
        data = fileBytes.decode("utf-8", errors="replace")
        return [ContentPart(
            id=makeId(),
            parentId=None,
            label="main",
            typeGroup="text",
            mimeType=mimeType,
            data=data,
            metadata={"size": len(fileBytes)}
        )]