gateway/modules/connectors/connectorDbJson.py

573 lines
No EOL
22 KiB
Python

import json
import os
from typing import List, Dict, Any, Optional, Union
import logging
from datetime import datetime
import uuid
logger = logging.getLogger(__name__)
class DatabaseConnector:
"""
A connector for JSON-based data storage.
Provides generic database operations without user/mandate filtering.
Stores tables as folders and records as individual files.
Implements lazy loading for better performance.
"""
def __init__(self, dbHost: str, dbDatabase: str, dbUser: str = None, dbPassword: str = None,
_mandateId: str = None, _userId: str = None, skipInitialIdLookup: bool = False):
# Store the input parameters
self.dbHost = dbHost
self.dbDatabase = dbDatabase
self.dbUser = dbUser
self.dbPassword = dbPassword
# Check if context parameters are set
if _mandateId is None and _userId is None:
# Allow initialization with empty strings
self._mandateId = ''
self._userId = ''
else:
# Ensure both parameters are provided
if _mandateId is None or _userId is None:
raise ValueError("_mandateId and _userId must both be provided or both be None")
self._mandateId = _mandateId
self._userId = _userId
# Ensure the database directory exists
self.dbFolder = os.path.join(self.dbHost, self.dbDatabase)
os.makedirs(self.dbFolder, exist_ok=True)
# Cache for loaded data
self._tablesCache = {}
self._tableMetadataCache = {} # Cache for table metadata (record IDs, etc.)
# Initialize system table
self._systemTableName = "_system"
self._initializeSystemTable()
# If IDs are empty and we're not skipping lookup, try to use initial IDs
if not skipInitialIdLookup:
self._resolveInitialIds()
logger.debug(f"Context: _mandateId={self._mandateId}, _userId={self._userId}")
def _resolveInitialIds(self):
"""
Resolve initial IDs for mandate and user if they're empty.
"""
if not self._mandateId:
initialMandateId = self.getInitialId("mandates")
if initialMandateId is not None:
self._mandateId = initialMandateId
logger.info(f"Using initial _mandateId: {initialMandateId}")
if not self._userId:
initialUserId = self.getInitialId("users")
if initialUserId is not None:
self._userId = initialUserId
logger.info(f"Using initial _userId: {initialUserId}")
def _initializeSystemTable(self):
"""Initializes the system table if it doesn't exist yet."""
systemTablePath = self._getTablePath(self._systemTableName)
if not os.path.exists(systemTablePath):
emptySystemTable = {}
self._saveSystemTable(emptySystemTable)
logger.info(f"System table initialized in {systemTablePath}")
else:
# Load existing system table to ensure it's available
self._loadSystemTable()
logger.debug(f"Existing system table loaded from {systemTablePath}")
def _loadSystemTable(self) -> Dict[str, str]:
"""Loads the system table with the initial IDs."""
# Check if system table is in cache
if f"_{self._systemTableName}" in self._tablesCache:
return self._tablesCache[f"_{self._systemTableName}"]
systemTablePath = self._getTablePath(self._systemTableName)
try:
if os.path.exists(systemTablePath):
with open(systemTablePath, 'r', encoding='utf-8') as f:
data = json.load(f)
# Store in cache with special prefix to avoid collision with regular tables
self._tablesCache[f"_{self._systemTableName}"] = data
return data
else:
self._tablesCache[f"_{self._systemTableName}"] = {}
return {}
except Exception as e:
logger.error(f"Error loading the system table: {e}")
self._tablesCache[f"_{self._systemTableName}"] = {}
return {}
def _saveSystemTable(self, data: Dict[str, str]) -> bool:
"""Saves the system table with the initial IDs."""
systemTablePath = self._getTablePath(self._systemTableName)
try:
with open(systemTablePath, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# Update cache
self._tablesCache[f"_{self._systemTableName}"] = data
return True
except Exception as e:
logger.error(f"Error saving the system table: {e}")
return False
def _getTablePath(self, table: str) -> str:
"""Returns the full path to a table folder"""
return os.path.join(self.dbFolder, table)
def _getRecordPath(self, table: str, recordId: Union[str, int]) -> str:
"""Returns the full path to a record file"""
return os.path.join(self._getTablePath(table), f"{recordId}.json")
def _ensureTableDirectory(self, table: str) -> bool:
"""Ensures the table directory exists."""
if table == self._systemTableName:
return True
tablePath = self._getTablePath(table)
try:
os.makedirs(tablePath, exist_ok=True)
return True
except Exception as e:
logger.error(f"Error creating table directory {tablePath}: {e}")
return False
def _loadTableMetadata(self, table: str) -> Dict[str, Any]:
"""Loads table metadata (list of record IDs) without loading actual records."""
if table in self._tableMetadataCache:
return self._tableMetadataCache[table]
# Ensure table directory exists
if not self._ensureTableDirectory(table):
return {"recordIds": []}
tablePath = self._getTablePath(table)
metadata = {"recordIds": []}
try:
if os.path.exists(tablePath):
for filename in os.listdir(tablePath):
if filename.endswith('.json'):
recordId = filename[:-5] # Remove .json extension
metadata["recordIds"].append(recordId)
metadata["recordIds"].sort()
self._tableMetadataCache[table] = metadata
except Exception as e:
logger.error(f"Error loading table metadata for {table}: {e}")
return metadata
def _loadRecord(self, table: str, recordId: Union[str, int]) -> Optional[Dict[str, Any]]:
"""Loads a single record from the table."""
recordPath = self._getRecordPath(table, recordId)
try:
if os.path.exists(recordPath):
with open(recordPath, 'r', encoding='utf-8') as f:
record = json.load(f)
# Ensure ID is a string
if "id" in record:
record["id"] = str(record["id"])
return record
except Exception as e:
logger.error(f"Error loading record {recordId} from table {table}: {e}")
return None
def _loadTable(self, table: str) -> List[Dict[str, Any]]:
"""Loads all records from a table folder."""
# If the table is the system table, load it directly
if table == self._systemTableName:
return self._loadSystemTable()
# If the table is already in the cache, use the cache
if table in self._tablesCache:
return self._tablesCache[table]
# Load metadata first
metadata = self._loadTableMetadata(table)
records = []
# Load each record
for recordId in metadata["recordIds"]:
record = self._loadRecord(table, recordId)
if record:
records.append(record)
self._tablesCache[table] = records
return records
def _saveTable(self, table: str, data: List[Dict[str, Any]]) -> bool:
"""Saves all records to a table folder"""
# The system table is handled specially
if table == self._systemTableName:
return self._saveSystemTable(data)
tablePath = self._getTablePath(table)
try:
# Ensure table directory exists
os.makedirs(tablePath, exist_ok=True)
# Save each record as a separate file
for record in data:
if "id" not in record:
logger.error(f"Record missing ID in table {table}")
continue
recordPath = self._getRecordPath(table, record["id"])
with open(recordPath, 'w', encoding='utf-8') as f:
json.dump(record, f, indent=2, ensure_ascii=False)
# Update the cache
self._tablesCache[table] = data
logger.debug(f"Successfully saved table {table}")
return True
except Exception as e:
logger.error(f"Error saving table {table}: {str(e)}")
logger.error(f"Error type: {type(e).__name__}")
logger.error(f"Error details: {e.__dict__ if hasattr(e, '__dict__') else 'No details available'}")
return False
def _applyRecordFilter(self, records: List[Dict[str, Any]], recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""Applies a record filter to the records"""
if not recordFilter:
return records
filteredRecords = []
for record in records:
match = True
for field, value in recordFilter.items():
# Check if the field exists
if field not in record:
match = False
break
# Convert both values to strings for comparison
recordValue = str(record[field])
filterValue = str(value)
# Direct string comparison
if recordValue != filterValue:
match = False
break
if match:
filteredRecords.append(record)
return filteredRecords
def _registerInitialId(self, table: str, initialId: str) -> bool:
"""Registers the initial ID for a table."""
try:
systemData = self._loadSystemTable()
if table not in systemData:
systemData[table] = initialId
success = self._saveSystemTable(systemData)
if success:
logger.info(f"Initial ID {initialId} for table {table} registered")
return success
return True # If already present, this is not an error
except Exception as e:
logger.error(f"Error registering the initial ID for table {table}: {e}")
return False
def _removeInitialId(self, table: str) -> bool:
"""Removes the initial ID for a table from the system table."""
try:
systemData = self._loadSystemTable()
if table in systemData:
del systemData[table]
success = self._saveSystemTable(systemData)
if success:
logger.info(f"Initial ID for table {table} removed from system table")
return success
return True # If not present, this is not an error
except Exception as e:
logger.error(f"Error removing initial ID for table {table}: {e}")
return False
def _getCurrentTimestamp(self) -> str:
"""Returns the current timestamp in ISO format."""
return datetime.now().isoformat()
def _saveTableMetadata(self, table: str, metadata: Dict[str, Any]) -> bool:
"""Saves table metadata to a metadata file."""
try:
# Create metadata file path
metadataPath = os.path.join(self._getTablePath(table), "_metadata.json")
# Save metadata
with open(metadataPath, 'w', encoding='utf-8') as f:
json.dump(metadata, f, indent=2, ensure_ascii=False)
# Update cache
self._tableMetadataCache[table] = metadata
return True
except Exception as e:
logger.error(f"Error saving metadata for table {table}: {e}")
return False
def updateContext(self, _mandateId: str, _userId: str) -> None:
"""Updates the context of the database connector."""
if _mandateId is None or _userId is None:
raise ValueError("_mandateId and _userId must both be provided")
self._mandateId = _mandateId
self._userId = _userId
logger.info(f"Updated database context: _mandateId={self._mandateId}, _userId={self._userId}")
# Clear cache to ensure fresh data with new context
self._tablesCache = {}
self._tableMetadataCache = {}
# Public API
def getTables(self) -> List[str]:
"""Returns a list of all available tables."""
tables = []
try:
for item in os.listdir(self.dbFolder):
itemPath = os.path.join(self.dbFolder, item)
if os.path.isdir(itemPath) and not item.startswith('_'):
tables.append(item)
except Exception as e:
logger.error(f"Error reading the database directory: {e}")
return tables
def getFields(self, table: str) -> List[str]:
"""Returns a list of all fields in a table."""
data = self._loadTable(table)
if not data:
return []
fields = list(data[0].keys()) if data else []
return fields
def getSchema(self, table: str, language: str = None) -> Dict[str, Dict[str, Any]]:
"""Returns a schema object for a table with data types and labels."""
data = self._loadTable(table)
schema = {}
if not data:
return schema
firstRecord = data[0]
for field, value in firstRecord.items():
dataType = type(value).__name__
label = field
schema[field] = {
"type": dataType,
"label": label
}
return schema
def getRecordset(self, table: str, fieldFilter: List[str] = None, recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""Returns a list of records from a table, filtered by criteria."""
# If we have specific record IDs in the filter, only load those records
if recordFilter and "id" in recordFilter:
recordId = recordFilter["id"]
record = self._loadRecord(table, recordId)
if record:
records = [record]
else:
return []
else:
# Load all records if no specific ID filter
records = self._loadTable(table)
# Apply recordFilter if available
if recordFilter:
records = self._applyRecordFilter(records, recordFilter)
# If fieldFilter is available, reduce the fields
if fieldFilter and isinstance(fieldFilter, list):
result = []
for record in records:
filteredRecord = {}
for field in fieldFilter:
if field in record:
filteredRecord[field] = record[field]
result.append(filteredRecord)
return result
return records
def recordCreate(self, table: str, recordData: Dict[str, Any]) -> Dict[str, Any]:
"""Creates a new record in the specified table."""
try:
# Ensure table directory exists
if not self._ensureTableDirectory(table):
raise ValueError(f"Error creating table directory for {table}")
# Load table metadata
metadata = self._loadTableMetadata(table)
# Generate new ID if not provided
if "id" not in recordData:
recordData["id"] = str(uuid.uuid4())
else:
# Ensure ID is a string
recordData["id"] = str(recordData["id"])
# Add context fields
recordData["_mandateId"] = self._mandateId
recordData["_userId"] = self._userId
# Update metadata
if "recordIds" not in metadata:
metadata["recordIds"] = []
metadata["recordIds"].append(recordData["id"])
metadata["recordIds"].sort()
# Add creation timestamp
currentTime = self._getCurrentTimestamp()
recordData["_createdAt"] = currentTime
recordData["_modifiedAt"] = currentTime
# Save the record
recordPath = self._getRecordPath(table, recordData["id"])
os.makedirs(os.path.dirname(recordPath), exist_ok=True)
with open(recordPath, 'w', encoding='utf-8') as f:
json.dump(recordData, f, indent=2, ensure_ascii=False)
# Save metadata
if not self._saveTableMetadata(table, metadata):
raise ValueError(f"Error saving metadata for table {table}")
# Update cache safely
if table in self._tablesCache:
if isinstance(self._tablesCache[table], list):
self._tablesCache[table].append(recordData)
else:
self._tablesCache[table] = [recordData]
else:
self._tablesCache[table] = [recordData]
# Verify the record was created
if not os.path.exists(recordPath):
raise ValueError(f"Record file was not created at {recordPath}")
return recordData
except Exception as e:
logger.error(f"Error creating record in table {table}: {str(e)}")
raise ValueError(f"Error creating the record in table {table}")
def recordDelete(self, table: str, recordId: str) -> bool:
"""Deletes a record from the table."""
# Load metadata
metadata = self._loadTableMetadata(table)
if recordId not in metadata["recordIds"]:
return False
# Check if it's an initial record
initialId = self.getInitialId(table)
if initialId is not None and initialId == recordId:
self._removeInitialId(table)
logger.info(f"Initial ID {recordId} for table {table} has been removed from the system table")
# Delete the record file
recordPath = self._getRecordPath(table, recordId)
try:
if os.path.exists(recordPath):
os.remove(recordPath)
# Update metadata cache
metadata["recordIds"].remove(recordId)
self._tableMetadataCache[table] = metadata
# Update table cache if it exists
if table in self._tablesCache:
self._tablesCache[table] = [r for r in self._tablesCache[table] if r.get("id") != recordId]
return True
except Exception as e:
logger.error(f"Error deleting record file {recordPath}: {e}")
return False
return False
def recordModify(self, table: str, recordId: str, recordData: Dict[str, Any]) -> Dict[str, Any]:
"""Modifies a record in the table."""
# Ensure table directory exists
if not self._ensureTableDirectory(table):
raise ValueError(f"Error creating table directory for {table}")
# Load metadata to check if record exists
metadata = self._loadTableMetadata(table)
# Ensure recordId is a string
recordId = str(recordId)
if recordId not in metadata["recordIds"]:
raise ValueError(f"Record with ID {recordId} not found in table {table}")
# Prevent changing the ID
if "id" in recordData and str(recordData["id"]) != recordId:
raise ValueError(f"The ID of a record in table {table} cannot be changed")
# Load existing record
existingRecord = self._loadRecord(table, recordId)
if not existingRecord:
raise ValueError(f"Record with ID {recordId} not found in table {table}")
# Update the record
for key, value in recordData.items():
existingRecord[key] = value
# Update modified timestamp
existingRecord["_modifiedAt"] = self._getCurrentTimestamp()
# Save the updated record
recordPath = self._getRecordPath(table, recordId)
try:
with open(recordPath, 'w', encoding='utf-8') as f:
json.dump(existingRecord, f, indent=2, ensure_ascii=False)
# Update table cache if it exists
if table in self._tablesCache:
for i, record in enumerate(self._tablesCache[table]):
if str(record.get("id")) == recordId:
self._tablesCache[table][i] = existingRecord
break
return existingRecord
except Exception as e:
logger.error(f"Error updating record file {recordPath}: {e}")
raise ValueError(f"Error updating record in table {table}")
def hasInitialId(self, table: str) -> bool:
"""Checks if an initial ID is registered for a table."""
systemData = self._loadSystemTable()
return table in systemData
def getInitialId(self, table: str) -> Optional[str]:
"""Returns the initial ID for a table."""
systemData = self._loadSystemTable()
initialId = systemData.get(table)
logger.debug(f"Database '{self.dbDatabase}': Table: {systemData}, Initial ID for table '{table}' is {initialId}")
if initialId is None:
logger.debug(f"No initial ID found for table {table}")
return initialId
def getAllInitialIds(self) -> Dict[str, str]:
"""Returns all registered initial IDs."""
systemData = self._loadSystemTable()
return systemData.copy() # Return a copy to protect the original