58 lines
2 KiB
Python
58 lines
2 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
from typing import Any, Dict, List
|
|
|
|
from modules.datamodels.datamodelExtraction import ContentPart
|
|
from ..subUtils import makeId
|
|
from ..subRegistry import Extractor
|
|
|
|
|
|
class SqlExtractor(Extractor):
|
|
"""
|
|
Extractor for SQL files.
|
|
|
|
Supported formats:
|
|
- MIME types: text/x-sql, application/sql
|
|
- File extensions: .sql, .ddl, .dml, .dcl, .tcl
|
|
- Special handling: Treats as structured text with SQL syntax
|
|
"""
|
|
|
|
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
|
|
return (mimeType in ("text/x-sql", "application/sql") or
|
|
(fileName or "").lower().endswith((".sql", ".ddl", ".dml", ".dcl", ".tcl")))
|
|
|
|
def getSupportedExtensions(self) -> list[str]:
|
|
"""Return list of supported file extensions."""
|
|
return [".sql", ".ddl", ".dml", ".dcl", ".tcl"]
|
|
|
|
def getSupportedMimeTypes(self) -> list[str]:
|
|
"""Return list of supported MIME types."""
|
|
return ["text/x-sql", "application/sql"]
|
|
|
|
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
|
|
fileName = context.get("fileName")
|
|
mimeType = context.get("mimeType") or "text/x-sql"
|
|
data = fileBytes.decode("utf-8", errors="replace")
|
|
|
|
# Add SQL-specific metadata
|
|
metadata = {
|
|
"size": len(fileBytes),
|
|
"file_type": "sql",
|
|
"line_count": len(data.splitlines()),
|
|
"has_select": "SELECT" in data.upper(),
|
|
"has_insert": "INSERT" in data.upper(),
|
|
"has_update": "UPDATE" in data.upper(),
|
|
"has_delete": "DELETE" in data.upper(),
|
|
"has_create": "CREATE" in data.upper(),
|
|
"has_drop": "DROP" in data.upper()
|
|
}
|
|
|
|
return [ContentPart(
|
|
id=makeId(),
|
|
parentId=None,
|
|
label="main",
|
|
typeGroup="structure",
|
|
mimeType=mimeType,
|
|
data=data,
|
|
metadata=metadata
|
|
)]
|