# Copyright (c) 2025 Patrick Motsch # All rights reserved. from typing import Any, Dict, List from modules.datamodels.datamodelExtraction import ContentPart from ..subUtils import makeId from ..subRegistry import Extractor class SqlExtractor(Extractor): """ Extractor for SQL files. Supported formats: - MIME types: text/x-sql, application/sql - File extensions: .sql, .ddl, .dml, .dcl, .tcl - Special handling: Treats as structured text with SQL syntax """ def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool: return (mimeType in ("text/x-sql", "application/sql") or (fileName or "").lower().endswith((".sql", ".ddl", ".dml", ".dcl", ".tcl"))) def getSupportedExtensions(self) -> list[str]: """Return list of supported file extensions.""" return [".sql", ".ddl", ".dml", ".dcl", ".tcl"] def getSupportedMimeTypes(self) -> list[str]: """Return list of supported MIME types.""" return ["text/x-sql", "application/sql"] def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]: fileName = context.get("fileName") mimeType = context.get("mimeType") or "text/x-sql" data = fileBytes.decode("utf-8", errors="replace") # Add SQL-specific metadata metadata = { "size": len(fileBytes), "file_type": "sql", "line_count": len(data.splitlines()), "has_select": "SELECT" in data.upper(), "has_insert": "INSERT" in data.upper(), "has_update": "UPDATE" in data.upper(), "has_delete": "DELETE" in data.upper(), "has_create": "CREATE" in data.upper(), "has_drop": "DROP" in data.upper() } return [ContentPart( id=makeId(), parentId=None, label="main", typeGroup="structure", mimeType=mimeType, data=data, metadata=metadata )]