# Copyright (c) 2025 Patrick Motsch # All rights reserved. from typing import Any, Dict, List from modules.datamodels.datamodelExtraction import ContentPart from ..subUtils import makeId from ..subRegistry import Extractor class CsvExtractor(Extractor): """ Extractor for CSV files. Supported formats: - MIME types: text/csv - File extensions: .csv - Special handling: Treats as table data """ def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool: return mimeType == "text/csv" or (fileName or "").lower().endswith(".csv") def getSupportedExtensions(self) -> list[str]: """Return list of supported file extensions.""" return [".csv"] def getSupportedMimeTypes(self) -> list[str]: """Return list of supported MIME types.""" return ["text/csv"] def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]: fileName = context.get("fileName") mimeType = context.get("mimeType") or "text/csv" data = fileBytes.decode("utf-8", errors="replace") return [ContentPart( id=makeId(), parentId=None, label="main", typeGroup="table", mimeType=mimeType, data=data, metadata={"size": len(fileBytes)} )]