1435 lines
57 KiB
Python
1435 lines
57 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
import contextvars
|
|
import re
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
import logging
|
|
from typing import List, Dict, Any, Optional, Union, get_origin, get_args, Type
|
|
import uuid
|
|
from pydantic import BaseModel, Field
|
|
import threading
|
|
|
|
from modules.shared.timeUtils import getUtcTimestamp
|
|
from modules.shared.configuration import APP_CONFIG
|
|
from modules.datamodels.datamodelUam import User, AccessLevel, UserPermissions
|
|
from modules.datamodels.datamodelRbac import AccessRule, AccessRuleContext
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# No mapping needed - table name = Pydantic model name exactly
|
|
|
|
|
|
class SystemTable(BaseModel):
|
|
"""Data model for system table entries"""
|
|
|
|
table_name: str = Field(
|
|
description="Name of the table",
|
|
json_schema_extra={
|
|
"frontend_type": "text",
|
|
"frontend_readonly": True,
|
|
"frontend_required": True,
|
|
}
|
|
)
|
|
initial_id: Optional[str] = Field(
|
|
default=None,
|
|
description="Initial ID for the table",
|
|
json_schema_extra={
|
|
"frontend_type": "text",
|
|
"frontend_readonly": True,
|
|
"frontend_required": False,
|
|
}
|
|
)
|
|
|
|
|
|
def _isVectorType(sqlType: str) -> bool:
|
|
"""Check if a SQL type string represents a pgvector column."""
|
|
return sqlType.upper().startswith("VECTOR")
|
|
|
|
|
|
def _isJsonbType(fieldType) -> bool:
|
|
"""Check if a type should be stored as JSONB in PostgreSQL."""
|
|
# Direct dict or list
|
|
if fieldType == dict or fieldType == list:
|
|
return True
|
|
|
|
# Generic List[X] or Dict[X, Y]
|
|
origin = get_origin(fieldType)
|
|
if origin in (dict, list):
|
|
return True
|
|
|
|
# Direct Pydantic BaseModel subclass
|
|
if isinstance(fieldType, type) and issubclass(fieldType, BaseModel):
|
|
return True
|
|
|
|
# Optional[X] - check the inner type
|
|
if origin is Union:
|
|
args = get_args(fieldType)
|
|
for arg in args:
|
|
if arg is type(None):
|
|
continue
|
|
# Recursively check the inner type
|
|
if _isJsonbType(arg):
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def _get_model_fields(model_class) -> Dict[str, str]:
|
|
"""Get all fields from Pydantic model and map to SQL types.
|
|
|
|
Supports explicit db_type override via json_schema_extra={"db_type": "vector(1536)"}.
|
|
This enables pgvector columns without special-casing field names.
|
|
"""
|
|
model_fields = model_class.model_fields
|
|
|
|
fields = {}
|
|
for field_name, field_info in model_fields.items():
|
|
field_type = field_info.annotation
|
|
|
|
# Explicit db_type override (e.g. vector columns)
|
|
extra = field_info.json_schema_extra
|
|
if extra and isinstance(extra, dict) and "db_type" in extra:
|
|
fields[field_name] = extra["db_type"]
|
|
continue
|
|
|
|
# Unwrap Optional[X] → X (handles both typing.Union and types.UnionType)
|
|
origin = get_origin(field_type)
|
|
if origin is Union:
|
|
args = [a for a in get_args(field_type) if a is not type(None)]
|
|
if len(args) == 1:
|
|
field_type = args[0]
|
|
elif hasattr(field_type, '__args__') and type(None) in getattr(field_type, '__args__', ()):
|
|
args = [a for a in field_type.__args__ if a is not type(None)]
|
|
if len(args) == 1:
|
|
field_type = args[0]
|
|
|
|
if _isJsonbType(field_type):
|
|
fields[field_name] = "JSONB"
|
|
elif field_type is bool:
|
|
fields[field_name] = "BOOLEAN"
|
|
elif field_type is int:
|
|
fields[field_name] = "INTEGER"
|
|
elif field_type is float:
|
|
fields[field_name] = "DOUBLE PRECISION"
|
|
elif field_type in (str, type(None)):
|
|
fields[field_name] = "TEXT"
|
|
else:
|
|
fields[field_name] = "TEXT"
|
|
|
|
return fields
|
|
|
|
|
|
def _parseRecordFields(record: Dict[str, Any], fields: Dict[str, str], context: str = "") -> None:
|
|
"""Parse record fields in-place: numeric typing, vector parsing, JSONB deserialization."""
|
|
import json as _json
|
|
|
|
for fieldName, fieldType in fields.items():
|
|
if fieldName not in record:
|
|
continue
|
|
value = record[fieldName]
|
|
|
|
if fieldType in ("DOUBLE PRECISION", "INTEGER") and value is not None:
|
|
try:
|
|
record[fieldName] = float(value) if fieldType == "DOUBLE PRECISION" else int(value)
|
|
except (ValueError, TypeError):
|
|
logger.warning(f"Could not convert {fieldName} to {fieldType} ({context}): {value}")
|
|
|
|
elif _isVectorType(fieldType) and value is not None:
|
|
if isinstance(value, str):
|
|
try:
|
|
record[fieldName] = [float(v) for v in value.strip("[]").split(",")]
|
|
except (ValueError, TypeError):
|
|
logger.warning(f"Could not parse vector field {fieldName} ({context})")
|
|
elif isinstance(value, list):
|
|
pass # already a list
|
|
|
|
elif fieldType == "BOOLEAN":
|
|
record[fieldName] = bool(value) if value is not None else False
|
|
|
|
elif fieldType == "JSONB" and value is not None:
|
|
try:
|
|
if isinstance(value, str):
|
|
record[fieldName] = _json.loads(value)
|
|
elif not isinstance(value, (dict, list)):
|
|
record[fieldName] = _json.loads(str(value))
|
|
except (_json.JSONDecodeError, TypeError, ValueError):
|
|
logger.warning(f"Could not parse JSONB field {fieldName}, keeping as string ({context})")
|
|
|
|
|
|
# Cache connectors by (host, database, port) to avoid duplicate inits for same database.
|
|
# Thread safety: _connector_cache_lock protects cache access. userId is request-scoped via
|
|
# contextvars to avoid races when concurrent requests share the same connector.
|
|
_MAX_CACHED_CONNECTORS = 32
|
|
_connector_cache: Dict[tuple, "DatabaseConnector"] = {}
|
|
_connector_cache_order: List[tuple] = [] # FIFO order for eviction
|
|
_connector_cache_lock = threading.Lock()
|
|
_current_user_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
|
|
"db_connector_user_id", default=None
|
|
)
|
|
|
|
|
|
def _get_cached_connector(
|
|
dbHost: str,
|
|
dbDatabase: str,
|
|
dbUser: str = None,
|
|
dbPassword: str = None,
|
|
dbPort: int = None,
|
|
userId: str = None,
|
|
) -> "DatabaseConnector":
|
|
"""Return cached DatabaseConnector for same (host, database, port) to avoid duplicate PostgreSQL inits.
|
|
Uses contextvars for userId so concurrent requests sharing the same connector get correct _createdBy/_modifiedBy.
|
|
"""
|
|
port = int(dbPort) if dbPort is not None else 5432
|
|
key = (dbHost, dbDatabase, port)
|
|
with _connector_cache_lock:
|
|
if key not in _connector_cache:
|
|
# Evict oldest if at capacity
|
|
while len(_connector_cache) >= _MAX_CACHED_CONNECTORS and _connector_cache_order:
|
|
oldest_key = _connector_cache_order.pop(0)
|
|
if oldest_key in _connector_cache:
|
|
try:
|
|
_connector_cache[oldest_key].close(forceClose=True)
|
|
except Exception as e:
|
|
logger.warning(f"Error closing evicted connector: {e}")
|
|
del _connector_cache[oldest_key]
|
|
_connector_cache[key] = DatabaseConnector(
|
|
dbHost=dbHost,
|
|
dbDatabase=dbDatabase,
|
|
dbUser=dbUser,
|
|
dbPassword=dbPassword,
|
|
dbPort=dbPort,
|
|
userId=userId,
|
|
)
|
|
_connector_cache[key]._isCachedShared = True
|
|
_connector_cache_order.append(key)
|
|
conn = _connector_cache[key]
|
|
# Set request-scoped userId via contextvar (avoids mutating shared connector)
|
|
if userId is not None:
|
|
_current_user_id.set(userId)
|
|
return conn
|
|
|
|
|
|
class DatabaseConnector:
|
|
"""
|
|
A connector for PostgreSQL-based data storage.
|
|
Provides generic database operations without user/mandate filtering.
|
|
Uses PostgreSQL with JSONB columns for flexible data storage.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
dbHost: str,
|
|
dbDatabase: str,
|
|
dbUser: str = None,
|
|
dbPassword: str = None,
|
|
dbPort: int = None,
|
|
userId: str = None,
|
|
):
|
|
# Store the input parameters
|
|
self.dbHost = dbHost
|
|
self.dbDatabase = dbDatabase
|
|
self.dbUser = dbUser
|
|
self.dbPassword = dbPassword
|
|
self.dbPort = dbPort
|
|
|
|
# Set userId (default to empty string if None)
|
|
self.userId = userId if userId is not None else ""
|
|
|
|
# Initialize database system first (creates database if needed)
|
|
self.connection = None
|
|
self._isCachedShared = False
|
|
self.initDbSystem()
|
|
|
|
# No caching needed with proper database - PostgreSQL handles performance
|
|
|
|
# Thread safety
|
|
self._lock = threading.Lock()
|
|
|
|
# pgvector extension state
|
|
self._vectorExtensionEnabled = False
|
|
|
|
# Initialize system table
|
|
self._systemTableName = "_system"
|
|
self._initializeSystemTable()
|
|
|
|
def initDbSystem(self):
|
|
"""Initialize the database system - creates database and tables."""
|
|
try:
|
|
# Create database if it doesn't exist
|
|
self._create_database_if_not_exists()
|
|
|
|
# Create tables
|
|
self._create_tables()
|
|
|
|
# Establish connection to the database
|
|
self._connect()
|
|
|
|
logger.info("PostgreSQL database system initialized successfully")
|
|
except Exception as e:
|
|
logger.error(f"FATAL ERROR: Database system initialization failed: {e}")
|
|
raise
|
|
|
|
def _create_database_if_not_exists(self):
|
|
"""Create the database if it doesn't exist."""
|
|
try:
|
|
# Use the configured user for database creation
|
|
conn = psycopg2.connect(
|
|
host=self.dbHost,
|
|
port=self.dbPort,
|
|
database="postgres",
|
|
user=self.dbUser,
|
|
password=self.dbPassword,
|
|
client_encoding="utf8",
|
|
)
|
|
conn.autocommit = True
|
|
|
|
with conn.cursor() as cursor:
|
|
# Check if database exists
|
|
cursor.execute(
|
|
"SELECT 1 FROM pg_database WHERE datname = %s", (self.dbDatabase,)
|
|
)
|
|
exists = cursor.fetchone()
|
|
|
|
if not exists:
|
|
# Create database with proper quoting for names with hyphens
|
|
quoted_db_name = f'"{self.dbDatabase}"'
|
|
cursor.execute(f"CREATE DATABASE {quoted_db_name}")
|
|
logger.info(f"Created database: {self.dbDatabase}")
|
|
|
|
conn.close()
|
|
|
|
except Exception as e:
|
|
logger.error(f"FATAL ERROR: Cannot create database: {e}")
|
|
logger.error("Database connection failed - application cannot start")
|
|
raise RuntimeError(
|
|
f"FATAL ERROR: Cannot create database '{self.dbDatabase}': {e}"
|
|
)
|
|
|
|
def _create_tables(self):
|
|
"""Create only the system table - application tables are created by interfaces."""
|
|
try:
|
|
# Use the configured user for table creation
|
|
conn = psycopg2.connect(
|
|
host=self.dbHost,
|
|
port=self.dbPort,
|
|
database=self.dbDatabase,
|
|
user=self.dbUser,
|
|
password=self.dbPassword,
|
|
client_encoding="utf8",
|
|
)
|
|
conn.autocommit = True
|
|
|
|
with conn.cursor() as cursor:
|
|
# Create only the system table
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS _system (
|
|
id SERIAL PRIMARY KEY,
|
|
table_name VARCHAR(255) UNIQUE NOT NULL,
|
|
initial_id VARCHAR(255) NOT NULL,
|
|
_createdAt DOUBLE PRECISION,
|
|
_modifiedAt DOUBLE PRECISION
|
|
)
|
|
""")
|
|
conn.close()
|
|
|
|
except Exception as e:
|
|
logger.error(f"FATAL ERROR: Cannot create system table: {e}")
|
|
logger.error(
|
|
"Database system table creation failed - application cannot start"
|
|
)
|
|
raise RuntimeError(f"FATAL ERROR: Cannot create system table: {e}")
|
|
|
|
def _connect(self):
|
|
"""Establish connection to PostgreSQL database."""
|
|
try:
|
|
# Use configured user for main connection with proper parameter handling
|
|
self.connection = psycopg2.connect(
|
|
host=self.dbHost,
|
|
port=self.dbPort,
|
|
database=self.dbDatabase,
|
|
user=self.dbUser,
|
|
password=self.dbPassword,
|
|
client_encoding="utf8",
|
|
cursor_factory=psycopg2.extras.RealDictCursor,
|
|
)
|
|
self.connection.autocommit = False # Use transactions
|
|
except Exception as e:
|
|
logger.error(f"Failed to connect to PostgreSQL: {e}")
|
|
raise
|
|
|
|
def _ensure_connection(self):
|
|
"""Ensure database connection is alive, reconnect if necessary."""
|
|
try:
|
|
if self.connection is None or self.connection.closed:
|
|
self._connect()
|
|
else:
|
|
# Test connection with a simple query
|
|
with self.connection.cursor() as cursor:
|
|
cursor.execute("SELECT 1")
|
|
except Exception as e:
|
|
logger.warning(f"Connection lost, reconnecting: {e}")
|
|
self._connect()
|
|
|
|
def _initializeSystemTable(self):
|
|
"""Initializes the system table if it doesn't exist yet."""
|
|
try:
|
|
# First ensure the system table exists
|
|
self._ensureTableExists(SystemTable)
|
|
|
|
with self.connection.cursor() as cursor:
|
|
# Check if system table has any data
|
|
cursor.execute('SELECT COUNT(*) FROM "_system"')
|
|
row = cursor.fetchone()
|
|
count = row["count"] if row else 0
|
|
|
|
self.connection.commit()
|
|
except Exception as e:
|
|
logger.error(f"Error initializing system table: {e}")
|
|
self.connection.rollback()
|
|
raise
|
|
|
|
def _loadSystemTable(self) -> Dict[str, str]:
|
|
"""Loads the system table with the initial IDs."""
|
|
try:
|
|
with self.connection.cursor() as cursor:
|
|
cursor.execute('SELECT "table_name", "initial_id" FROM "_system"')
|
|
rows = cursor.fetchall()
|
|
|
|
system_data = {}
|
|
for row in rows:
|
|
system_data[row["table_name"]] = row["initial_id"]
|
|
|
|
return system_data
|
|
except Exception as e:
|
|
logger.error(f"Error loading system table: {e}")
|
|
return {}
|
|
|
|
def _saveSystemTable(self, data: Dict[str, str]) -> bool:
|
|
"""Saves the system table with the initial IDs."""
|
|
try:
|
|
with self.connection.cursor() as cursor:
|
|
# Clear existing data
|
|
cursor.execute('DELETE FROM "_system"')
|
|
|
|
# Insert new data
|
|
for table_name, initial_id in data.items():
|
|
cursor.execute(
|
|
"""
|
|
INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt")
|
|
VALUES (%s, %s, %s)
|
|
""",
|
|
(table_name, initial_id, getUtcTimestamp()),
|
|
)
|
|
|
|
self.connection.commit()
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error saving system table: {e}")
|
|
self.connection.rollback()
|
|
return False
|
|
|
|
def _ensureSystemTableExists(self) -> bool:
|
|
"""Ensures the system table exists, creates it if it doesn't."""
|
|
try:
|
|
self._ensure_connection()
|
|
|
|
with self.connection.cursor() as cursor:
|
|
# Check if system table exists
|
|
cursor.execute(
|
|
"SELECT COUNT(*) FROM pg_stat_user_tables WHERE relname = %s",
|
|
(self._systemTableName,),
|
|
)
|
|
exists = cursor.fetchone()["count"] > 0
|
|
|
|
if not exists:
|
|
# Create system table
|
|
cursor.execute(f"""
|
|
CREATE TABLE "{self._systemTableName}" (
|
|
"table_name" VARCHAR(255) PRIMARY KEY,
|
|
"initial_id" VARCHAR(255),
|
|
"_createdAt" DOUBLE PRECISION,
|
|
"_modifiedAt" DOUBLE PRECISION
|
|
)
|
|
""")
|
|
logger.info("System table created successfully")
|
|
else:
|
|
# Check if we need to add missing columns to existing table
|
|
cursor.execute(
|
|
"""
|
|
SELECT column_name FROM information_schema.columns
|
|
WHERE table_name = %s AND table_schema = 'public'
|
|
""",
|
|
(self._systemTableName,),
|
|
)
|
|
existing_columns = [row["column_name"] for row in cursor.fetchall()]
|
|
|
|
if "_modifiedAt" not in existing_columns:
|
|
cursor.execute(
|
|
f'ALTER TABLE "{self._systemTableName}" ADD COLUMN "_modifiedAt" DOUBLE PRECISION'
|
|
)
|
|
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error ensuring system table exists: {e}")
|
|
return False
|
|
|
|
def _ensureTableExists(self, model_class: type) -> bool:
|
|
"""Ensures a table exists, creates it if it doesn't."""
|
|
table = model_class.__name__
|
|
|
|
if table == "SystemTable":
|
|
# Handle system table specially - it uses _system as the actual table name
|
|
return self._ensureSystemTableExists()
|
|
|
|
try:
|
|
self._ensure_connection()
|
|
|
|
with self.connection.cursor() as cursor:
|
|
# Check if table exists by querying information_schema with case-insensitive search
|
|
cursor.execute(
|
|
"""
|
|
SELECT COUNT(*) FROM information_schema.tables
|
|
WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public'
|
|
""",
|
|
(table,),
|
|
)
|
|
exists = cursor.fetchone()["count"] > 0
|
|
|
|
if not exists:
|
|
# Create table from Pydantic model
|
|
self._create_table_from_model(cursor, table, model_class)
|
|
logger.info(
|
|
f"Created table '{table}' with columns from Pydantic model"
|
|
)
|
|
else:
|
|
# Table exists: ensure all columns from model are present (simple additive migration)
|
|
try:
|
|
cursor.execute(
|
|
"""
|
|
SELECT column_name FROM information_schema.columns
|
|
WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public'
|
|
""",
|
|
(table,),
|
|
)
|
|
existing_columns = {
|
|
row["column_name"] for row in cursor.fetchall()
|
|
}
|
|
|
|
# Desired columns based on model
|
|
model_fields = _get_model_fields(model_class)
|
|
desired_columns = (
|
|
set(["id"])
|
|
| set(model_fields.keys())
|
|
| {"_createdAt", "_modifiedAt", "_createdBy", "_modifiedBy"}
|
|
)
|
|
|
|
# Add missing columns
|
|
for col in sorted(desired_columns - existing_columns):
|
|
# Determine SQL type
|
|
if col in ["id"]:
|
|
continue # primary key exists already
|
|
sql_type = model_fields.get(col)
|
|
if col in ["_createdAt"]:
|
|
sql_type = "DOUBLE PRECISION"
|
|
elif col in ["_modifiedAt"]:
|
|
sql_type = "DOUBLE PRECISION"
|
|
elif col in ["_createdBy", "_modifiedBy"]:
|
|
sql_type = "VARCHAR(255)"
|
|
if not sql_type:
|
|
sql_type = "TEXT"
|
|
try:
|
|
cursor.execute(
|
|
f'ALTER TABLE "{table}" ADD COLUMN "{col}" {sql_type}'
|
|
)
|
|
logger.info(
|
|
f"Added missing column '{col}' ({sql_type}) to '{table}'"
|
|
)
|
|
except Exception as add_err:
|
|
logger.warning(
|
|
f"Could not add column '{col}' to '{table}': {add_err}"
|
|
)
|
|
except Exception as ensure_err:
|
|
logger.warning(
|
|
f"Could not ensure columns for existing table '{table}': {ensure_err}"
|
|
)
|
|
|
|
self.connection.commit()
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error ensuring table {table} exists: {e}")
|
|
if hasattr(self, "connection") and self.connection:
|
|
self.connection.rollback()
|
|
return False
|
|
|
|
def _ensureVectorExtension(self) -> bool:
|
|
"""Enable pgvector extension if not already enabled. Called lazily on first vector table."""
|
|
if self._vectorExtensionEnabled:
|
|
return True
|
|
try:
|
|
self._ensure_connection()
|
|
with self.connection.cursor() as cursor:
|
|
cursor.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
|
self.connection.commit()
|
|
self._vectorExtensionEnabled = True
|
|
logger.info("pgvector extension enabled")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Failed to enable pgvector extension: {e}")
|
|
if hasattr(self, "connection") and self.connection:
|
|
self.connection.rollback()
|
|
return False
|
|
|
|
def _create_table_from_model(self, cursor, table: str, model_class: type) -> None:
|
|
"""Create table with columns matching Pydantic model fields."""
|
|
fields = _get_model_fields(model_class)
|
|
|
|
# Enable pgvector if any field uses vector type
|
|
if any(_isVectorType(sqlType) for sqlType in fields.values()):
|
|
self._ensureVectorExtension()
|
|
|
|
# Build column definitions with quoted identifiers to preserve exact case
|
|
columns = ['"id" VARCHAR(255) PRIMARY KEY']
|
|
for field_name, sql_type in fields.items():
|
|
if field_name != "id": # Skip id, already defined
|
|
columns.append(f'"{field_name}" {sql_type}')
|
|
|
|
# Add metadata columns
|
|
columns.extend(
|
|
[
|
|
'"_createdAt" DOUBLE PRECISION',
|
|
'"_modifiedAt" DOUBLE PRECISION',
|
|
'"_createdBy" VARCHAR(255)',
|
|
'"_modifiedBy" VARCHAR(255)',
|
|
]
|
|
)
|
|
|
|
# Create table
|
|
sql = f'CREATE TABLE IF NOT EXISTS "{table}" ({", ".join(columns)})'
|
|
cursor.execute(sql)
|
|
|
|
# Create indexes for foreign keys
|
|
for field_name in fields:
|
|
if field_name.endswith("Id") and field_name != "id":
|
|
cursor.execute(
|
|
f'CREATE INDEX IF NOT EXISTS "idx_{table}_{field_name}" ON "{table}" ("{field_name}")'
|
|
)
|
|
|
|
def _save_record(
|
|
self,
|
|
cursor,
|
|
table: str,
|
|
recordId: str,
|
|
record: Dict[str, Any],
|
|
model_class: type,
|
|
) -> None:
|
|
"""Save record to normalized table with explicit columns."""
|
|
# Get columns from Pydantic model instead of database schema
|
|
fields = _get_model_fields(model_class)
|
|
columns = (
|
|
["id"]
|
|
+ [field for field in fields.keys() if field != "id"]
|
|
+ ["_createdAt", "_createdBy", "_modifiedAt", "_modifiedBy"]
|
|
)
|
|
|
|
if not columns:
|
|
logger.error(f"No columns found for table {table}")
|
|
return
|
|
|
|
# Filter record data to only include columns that exist in the table
|
|
filtered_record = {k: v for k, v in record.items() if k in columns}
|
|
|
|
# Ensure id is set
|
|
filtered_record["id"] = recordId
|
|
|
|
# Prepare values in the correct order
|
|
values = []
|
|
for col in columns:
|
|
value = filtered_record.get(col)
|
|
|
|
# Handle timestamp fields - store as Unix timestamps (floats) for consistency
|
|
if col in ["_createdAt", "_modifiedAt"] and value is not None:
|
|
if isinstance(value, str):
|
|
# Try to parse string as timestamp
|
|
try:
|
|
value = float(value)
|
|
except:
|
|
pass # Keep as string if parsing fails
|
|
|
|
# Convert enum values to their string representation
|
|
elif hasattr(value, "value"):
|
|
value = value.value
|
|
|
|
# Handle vector fields (pgvector) - convert List[float] to string
|
|
elif col in fields and _isVectorType(fields[col]) and value is not None:
|
|
if isinstance(value, list):
|
|
value = f"[{','.join(str(v) for v in value)}]"
|
|
|
|
# Handle JSONB fields - ensure proper JSON format for PostgreSQL
|
|
elif col in fields and fields[col] == "JSONB" and value is not None:
|
|
import json
|
|
|
|
if isinstance(value, (dict, list)):
|
|
value = json.dumps(value)
|
|
elif isinstance(value, str):
|
|
try:
|
|
json.loads(value)
|
|
except (json.JSONDecodeError, TypeError):
|
|
value = json.dumps(value)
|
|
elif hasattr(value, 'model_dump'):
|
|
value = json.dumps(value.model_dump())
|
|
else:
|
|
value = json.dumps(value)
|
|
|
|
values.append(value)
|
|
|
|
# Build INSERT/UPDATE with quoted identifiers
|
|
col_names = ", ".join([f'"{col}"' for col in columns])
|
|
placeholders = ", ".join(["%s"] * len(columns))
|
|
updates = ", ".join(
|
|
[
|
|
f'"{col}" = EXCLUDED."{col}"'
|
|
for col in columns[1:]
|
|
if col not in ["_createdAt", "_createdBy"]
|
|
]
|
|
)
|
|
|
|
sql = f'INSERT INTO "{table}" ({col_names}) VALUES ({placeholders}) ON CONFLICT ("id") DO UPDATE SET {updates}'
|
|
|
|
cursor.execute(sql, values)
|
|
|
|
def _loadRecord(self, model_class: type, recordId: str) -> Optional[Dict[str, Any]]:
|
|
"""Loads a single record from the normalized table."""
|
|
table = model_class.__name__
|
|
|
|
try:
|
|
if not self._ensureTableExists(model_class):
|
|
return None
|
|
|
|
with self.connection.cursor() as cursor:
|
|
cursor.execute(f'SELECT * FROM "{table}" WHERE "id" = %s', (recordId,))
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return None
|
|
|
|
# Convert row to dict and handle JSONB fields
|
|
record = dict(row)
|
|
fields = _get_model_fields(model_class)
|
|
|
|
_parseRecordFields(record, fields, f"record {recordId}")
|
|
|
|
return record
|
|
except Exception as e:
|
|
logger.error(f"Error loading record {recordId} from table {table}: {e}")
|
|
return None
|
|
|
|
def _saveRecord(
|
|
self, model_class: type, recordId: str, record: Dict[str, Any]
|
|
) -> bool:
|
|
"""Saves a single record to the table."""
|
|
table = model_class.__name__
|
|
|
|
try:
|
|
if not self._ensureTableExists(model_class):
|
|
return False
|
|
|
|
recordId = str(recordId)
|
|
if "id" in record and str(record["id"]) != recordId:
|
|
raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}")
|
|
|
|
# Add metadata - use contextvar for request-scoped userId when sharing connector
|
|
effective_user_id = _current_user_id.get()
|
|
if effective_user_id is None:
|
|
effective_user_id = self.userId
|
|
currentTime = getUtcTimestamp()
|
|
# Set _createdAt and _createdBy if this is a new record (record doesn't have _createdAt)
|
|
if "_createdAt" not in record:
|
|
record["_createdAt"] = currentTime
|
|
if effective_user_id:
|
|
record["_createdBy"] = effective_user_id
|
|
elif "_createdBy" not in record or not record.get("_createdBy"):
|
|
if effective_user_id:
|
|
record["_createdBy"] = effective_user_id
|
|
record["_modifiedAt"] = currentTime
|
|
if effective_user_id:
|
|
record["_modifiedBy"] = effective_user_id
|
|
|
|
with self.connection.cursor() as cursor:
|
|
self._save_record(cursor, table, recordId, record, model_class)
|
|
|
|
self.connection.commit()
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error saving record {recordId} to table {table}: {e}")
|
|
self.connection.rollback()
|
|
return False
|
|
|
|
def _loadTable(self, model_class: type) -> List[Dict[str, Any]]:
|
|
"""Loads all records from a normalized table."""
|
|
table = model_class.__name__
|
|
|
|
if table == self._systemTableName:
|
|
return self._loadSystemTable()
|
|
|
|
try:
|
|
if not self._ensureTableExists(model_class):
|
|
return []
|
|
|
|
with self.connection.cursor() as cursor:
|
|
cursor.execute(f'SELECT * FROM "{table}" ORDER BY "id"')
|
|
records = [dict(row) for row in cursor.fetchall()]
|
|
|
|
fields = _get_model_fields(model_class)
|
|
modelFields = model_class.model_fields
|
|
for record in records:
|
|
_parseRecordFields(record, fields, f"table {table}")
|
|
# Set type-aware defaults for NULL JSONB fields
|
|
for fieldName, fieldType in fields.items():
|
|
if fieldType == "JSONB" and fieldName in record and record[fieldName] is None:
|
|
fieldInfo = modelFields.get(fieldName)
|
|
if fieldInfo:
|
|
fieldAnnotation = fieldInfo.annotation
|
|
if (fieldAnnotation == list or
|
|
(hasattr(fieldAnnotation, "__origin__") and
|
|
fieldAnnotation.__origin__ is list)):
|
|
record[fieldName] = []
|
|
elif (fieldAnnotation == dict or
|
|
(hasattr(fieldAnnotation, "__origin__") and
|
|
fieldAnnotation.__origin__ is dict)):
|
|
record[fieldName] = {}
|
|
|
|
return records
|
|
except Exception as e:
|
|
logger.error(f"Error loading table {table}: {e}")
|
|
return []
|
|
|
|
def _registerInitialId(self, table: str, initialId: str) -> bool:
|
|
"""Registers the initial ID for a table."""
|
|
try:
|
|
systemData = self._loadSystemTable()
|
|
|
|
if table not in systemData:
|
|
systemData[table] = initialId
|
|
success = self._saveSystemTable(systemData)
|
|
if success:
|
|
logger.info(f"Initial ID {initialId} for table {table} registered")
|
|
return success
|
|
else:
|
|
# Table already has an initial ID registered
|
|
logger.debug(f"Table {table} already has initial ID {systemData[table]}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error registering the initial ID for table {table}: {e}")
|
|
return False
|
|
|
|
def _removeInitialId(self, table: str) -> bool:
|
|
"""Removes the initial ID for a table from the system table."""
|
|
try:
|
|
systemData = self._loadSystemTable()
|
|
|
|
if table in systemData:
|
|
del systemData[table]
|
|
success = self._saveSystemTable(systemData)
|
|
if success:
|
|
logger.info(
|
|
f"Initial ID for table {table} removed from system table"
|
|
)
|
|
return success
|
|
return True # If not present, this is not an error
|
|
except Exception as e:
|
|
logger.error(f"Error removing initial ID for table {table}: {e}")
|
|
return False
|
|
|
|
def updateContext(self, userId: str) -> None:
|
|
"""Updates the context of the database connector.
|
|
Sets both instance userId and contextvar for request-scoped use when connector is shared.
|
|
"""
|
|
if userId is None:
|
|
raise ValueError("userId must be provided")
|
|
self.userId = userId
|
|
_current_user_id.set(userId)
|
|
|
|
# Public API
|
|
|
|
def getTables(self) -> List[str]:
|
|
"""Returns a list of all available tables."""
|
|
tables = []
|
|
|
|
try:
|
|
# Ensure connection is alive
|
|
self._ensure_connection()
|
|
|
|
if not self.connection or self.connection.closed:
|
|
logger.error("Database connection is not available")
|
|
return tables
|
|
|
|
with self.connection.cursor() as cursor:
|
|
cursor.execute("""
|
|
SELECT table_name
|
|
FROM information_schema.tables
|
|
WHERE table_schema = 'public'
|
|
ORDER BY table_name
|
|
""")
|
|
rows = cursor.fetchall()
|
|
tables = [row["table_name"] for row in rows]
|
|
except Exception as e:
|
|
logger.error(f"Error reading the database {self.dbDatabase}: {e}")
|
|
|
|
return tables
|
|
|
|
def getFields(self, model_class: type) -> List[str]:
|
|
"""Returns a list of all fields in a table."""
|
|
data = self._loadTable(model_class)
|
|
|
|
if not data:
|
|
return []
|
|
|
|
fields = list(data[0].keys()) if data else []
|
|
|
|
return fields
|
|
|
|
def getSchema(
|
|
self, model_class: type, language: str = None
|
|
) -> Dict[str, Dict[str, Any]]:
|
|
"""Returns a schema object for a table with data types and labels."""
|
|
data = self._loadTable(model_class)
|
|
|
|
schema = {}
|
|
|
|
if not data:
|
|
return schema
|
|
|
|
firstRecord = data[0]
|
|
|
|
for field, value in firstRecord.items():
|
|
dataType = type(value).__name__
|
|
label = field
|
|
|
|
schema[field] = {"type": dataType, "label": label}
|
|
|
|
return schema
|
|
|
|
def getRecordset(
|
|
self,
|
|
model_class: type,
|
|
fieldFilter: List[str] = None,
|
|
recordFilter: Dict[str, Any] = None,
|
|
) -> List[Dict[str, Any]]:
|
|
"""Returns a list of records from a table, filtered by criteria."""
|
|
table = model_class.__name__
|
|
|
|
try:
|
|
if not self._ensureTableExists(model_class):
|
|
return []
|
|
|
|
# Build WHERE clause from recordFilter
|
|
where_conditions = []
|
|
where_values = []
|
|
|
|
if recordFilter:
|
|
for field, value in recordFilter.items():
|
|
if value is None:
|
|
# Use IS NULL for None values (= NULL is always false in SQL)
|
|
where_conditions.append(f'"{field}" IS NULL')
|
|
else:
|
|
where_conditions.append(f'"{field}" = %s')
|
|
where_values.append(value)
|
|
|
|
# Build the query
|
|
if where_conditions:
|
|
where_clause = " WHERE " + " AND ".join(where_conditions)
|
|
else:
|
|
where_clause = ""
|
|
|
|
query = f'SELECT * FROM "{table}"{where_clause} ORDER BY "id"'
|
|
|
|
with self.connection.cursor() as cursor:
|
|
cursor.execute(query, where_values)
|
|
records = [dict(row) for row in cursor.fetchall()]
|
|
|
|
fields = _get_model_fields(model_class)
|
|
modelFields = model_class.model_fields
|
|
for record in records:
|
|
_parseRecordFields(record, fields, f"table {table}")
|
|
for fieldName, fieldType in fields.items():
|
|
if fieldType == "JSONB" and fieldName in record and record[fieldName] is None:
|
|
fieldInfo = modelFields.get(fieldName)
|
|
if fieldInfo:
|
|
fieldAnnotation = fieldInfo.annotation
|
|
if (fieldAnnotation == list or
|
|
(hasattr(fieldAnnotation, "__origin__") and
|
|
fieldAnnotation.__origin__ is list)):
|
|
record[fieldName] = []
|
|
elif (fieldAnnotation == dict or
|
|
(hasattr(fieldAnnotation, "__origin__") and
|
|
fieldAnnotation.__origin__ is dict)):
|
|
record[fieldName] = {}
|
|
|
|
# If fieldFilter is available, reduce the fields
|
|
if fieldFilter and isinstance(fieldFilter, list):
|
|
result = []
|
|
for record in records:
|
|
filteredRecord = {}
|
|
for field in fieldFilter:
|
|
if field in record:
|
|
filteredRecord[field] = record[field]
|
|
result.append(filteredRecord)
|
|
return result
|
|
|
|
return records
|
|
except Exception as e:
|
|
logger.error(f"Error loading records from table {table}: {e}")
|
|
return []
|
|
|
|
def _buildPaginationClauses(
|
|
self,
|
|
model_class: type,
|
|
pagination,
|
|
recordFilter: Dict[str, Any] = None,
|
|
):
|
|
"""
|
|
Translate PaginationParams + recordFilter into SQL clauses.
|
|
Returns (where_clause, order_clause, limit_clause, values, count_values).
|
|
"""
|
|
fields = _get_model_fields(model_class)
|
|
validColumns = set(fields.keys())
|
|
where_parts: List[str] = []
|
|
values: List[Any] = []
|
|
|
|
if recordFilter:
|
|
for field, value in recordFilter.items():
|
|
if value is None:
|
|
where_parts.append(f'"{field}" IS NULL')
|
|
elif isinstance(value, list):
|
|
where_parts.append(f'"{field}" = ANY(%s)')
|
|
values.append(value)
|
|
else:
|
|
where_parts.append(f'"{field}" = %s')
|
|
values.append(value)
|
|
|
|
if pagination and pagination.filters:
|
|
for key, val in pagination.filters.items():
|
|
if key == "search" and isinstance(val, str) and val.strip():
|
|
term = f"%{val.strip()}%"
|
|
textCols = [c for c, t in fields.items() if t == "TEXT"]
|
|
if textCols:
|
|
orParts = [f'COALESCE("{c}"::TEXT, \'\') ILIKE %s' for c in textCols]
|
|
where_parts.append(f"({' OR '.join(orParts)})")
|
|
values.extend([term] * len(textCols))
|
|
continue
|
|
if key not in validColumns:
|
|
logger.debug(f"_buildPaginationClauses: key '{key}' NOT in validColumns {list(validColumns)[:10]}")
|
|
continue
|
|
colType = fields.get(key, "TEXT")
|
|
logger.debug(f"_buildPaginationClauses: filter key='{key}' val={val!r} type(val)={type(val).__name__} colType={colType}")
|
|
if isinstance(val, dict):
|
|
op = val.get("operator", "equals")
|
|
v = val.get("value", "")
|
|
if op in ("equals", "eq"):
|
|
if colType == "BOOLEAN":
|
|
where_parts.append(f'COALESCE("{key}", FALSE) = %s')
|
|
values.append(str(v).lower() == "true")
|
|
else:
|
|
where_parts.append(f'"{key}"::TEXT = %s')
|
|
values.append(str(v))
|
|
elif op == "contains":
|
|
where_parts.append(f'"{key}"::TEXT ILIKE %s')
|
|
values.append(f"%{v}%")
|
|
elif op == "startsWith":
|
|
where_parts.append(f'"{key}"::TEXT ILIKE %s')
|
|
values.append(f"{v}%")
|
|
elif op == "endsWith":
|
|
where_parts.append(f'"{key}"::TEXT ILIKE %s')
|
|
values.append(f"%{v}")
|
|
elif op in ("gt", "gte", "lt", "lte"):
|
|
sqlOp = {"gt": ">", "gte": ">=", "lt": "<", "lte": "<="}[op]
|
|
where_parts.append(f'"{key}"::TEXT {sqlOp} %s')
|
|
values.append(str(v))
|
|
elif op == "between":
|
|
fromVal = v.get("from", "") if isinstance(v, dict) else ""
|
|
toVal = v.get("to", "") if isinstance(v, dict) else ""
|
|
if not fromVal and not toVal:
|
|
continue
|
|
colType = fields.get(key, "TEXT")
|
|
isNumericCol = colType in ("INTEGER", "DOUBLE PRECISION")
|
|
isDateVal = bool(fromVal and re.match(r'^\d{4}-\d{2}-\d{2}$', str(fromVal))) or \
|
|
bool(toVal and re.match(r'^\d{4}-\d{2}-\d{2}$', str(toVal)))
|
|
if isNumericCol and isDateVal:
|
|
from datetime import datetime as _dt, timezone as _tz
|
|
if fromVal and toVal:
|
|
fromTs = _dt.strptime(str(fromVal), '%Y-%m-%d').replace(tzinfo=_tz.utc).timestamp()
|
|
toTs = _dt.strptime(str(toVal), '%Y-%m-%d').replace(hour=23, minute=59, second=59, tzinfo=_tz.utc).timestamp()
|
|
where_parts.append(f'"{key}" >= %s AND "{key}" <= %s')
|
|
values.extend([fromTs, toTs])
|
|
elif fromVal:
|
|
fromTs = _dt.strptime(str(fromVal), '%Y-%m-%d').replace(tzinfo=_tz.utc).timestamp()
|
|
where_parts.append(f'"{key}" >= %s')
|
|
values.append(fromTs)
|
|
else:
|
|
toTs = _dt.strptime(str(toVal), '%Y-%m-%d').replace(hour=23, minute=59, second=59, tzinfo=_tz.utc).timestamp()
|
|
where_parts.append(f'"{key}" <= %s')
|
|
values.append(toTs)
|
|
else:
|
|
if fromVal and toVal:
|
|
where_parts.append(f'"{key}"::TEXT >= %s AND "{key}"::TEXT <= %s')
|
|
values.extend([str(fromVal), str(toVal)])
|
|
elif fromVal:
|
|
where_parts.append(f'"{key}"::TEXT >= %s')
|
|
values.append(str(fromVal))
|
|
elif toVal:
|
|
where_parts.append(f'"{key}"::TEXT <= %s')
|
|
values.append(str(toVal))
|
|
else:
|
|
if colType == "BOOLEAN":
|
|
where_parts.append(f'COALESCE("{key}", FALSE) = %s')
|
|
values.append(str(val).lower() == "true")
|
|
else:
|
|
where_parts.append(f'"{key}"::TEXT ILIKE %s')
|
|
values.append(str(val))
|
|
|
|
where_clause = " WHERE " + " AND ".join(where_parts) if where_parts else ""
|
|
count_values = list(values)
|
|
|
|
orderParts: List[str] = []
|
|
if pagination and pagination.sort:
|
|
for sf in pagination.sort:
|
|
if sf.field in validColumns:
|
|
direction = "DESC" if sf.direction.lower() == "desc" else "ASC"
|
|
colType = fields.get(sf.field, "TEXT")
|
|
if colType == "BOOLEAN":
|
|
orderParts.append(f'COALESCE("{sf.field}", FALSE) {direction}')
|
|
else:
|
|
orderParts.append(f'"{sf.field}" {direction} NULLS LAST')
|
|
if not orderParts:
|
|
orderParts.append('"id"')
|
|
order_clause = " ORDER BY " + ", ".join(orderParts)
|
|
|
|
limit_clause = ""
|
|
if pagination:
|
|
offset = (pagination.page - 1) * pagination.pageSize
|
|
limit_clause = f" LIMIT {pagination.pageSize} OFFSET {offset}"
|
|
|
|
return where_clause, order_clause, limit_clause, values, count_values
|
|
|
|
def getRecordsetPaginated(
|
|
self,
|
|
model_class: type,
|
|
pagination=None,
|
|
recordFilter: Dict[str, Any] = None,
|
|
fieldFilter: List[str] = None,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Returns paginated records with filtering + sorting at the SQL level.
|
|
Returns { "items": [...], "totalItems": int, "totalPages": int }.
|
|
If pagination is None, returns all records (no LIMIT/OFFSET).
|
|
"""
|
|
from modules.datamodels.datamodelPagination import PaginationParams
|
|
import math
|
|
|
|
table = model_class.__name__
|
|
|
|
try:
|
|
if not self._ensureTableExists(model_class):
|
|
return {"items": [], "totalItems": 0, "totalPages": 0}
|
|
|
|
where_clause, order_clause, limit_clause, values, count_values = \
|
|
self._buildPaginationClauses(model_class, pagination, recordFilter)
|
|
|
|
with self.connection.cursor() as cursor:
|
|
countSql = f'SELECT COUNT(*) FROM "{table}"{where_clause}'
|
|
cursor.execute(countSql, count_values)
|
|
totalItems = cursor.fetchone()["count"]
|
|
|
|
dataSql = f'SELECT * FROM "{table}"{where_clause}{order_clause}{limit_clause}'
|
|
cursor.execute(dataSql, values)
|
|
records = [dict(row) for row in cursor.fetchall()]
|
|
|
|
fields = _get_model_fields(model_class)
|
|
modelFields = model_class.model_fields
|
|
for record in records:
|
|
_parseRecordFields(record, fields, f"table {table}")
|
|
for fieldName, fieldType in fields.items():
|
|
if fieldType == "JSONB" and fieldName in record and record[fieldName] is None:
|
|
fieldInfo = modelFields.get(fieldName)
|
|
if fieldInfo:
|
|
fieldAnnotation = fieldInfo.annotation
|
|
if (fieldAnnotation == list or
|
|
(hasattr(fieldAnnotation, "__origin__") and
|
|
fieldAnnotation.__origin__ is list)):
|
|
record[fieldName] = []
|
|
elif (fieldAnnotation == dict or
|
|
(hasattr(fieldAnnotation, "__origin__") and
|
|
fieldAnnotation.__origin__ is dict)):
|
|
record[fieldName] = {}
|
|
|
|
if fieldFilter and isinstance(fieldFilter, list):
|
|
records = [{f: r[f] for f in fieldFilter if f in r} for r in records]
|
|
|
|
pageSize = pagination.pageSize if pagination else max(totalItems, 1)
|
|
totalPages = math.ceil(totalItems / pageSize) if totalItems > 0 else 0
|
|
|
|
return {"items": records, "totalItems": totalItems, "totalPages": totalPages}
|
|
except Exception as e:
|
|
logger.error(f"Error in getRecordsetPaginated for table {table}: {e}")
|
|
return {"items": [], "totalItems": 0, "totalPages": 0}
|
|
|
|
def getDistinctColumnValues(
|
|
self,
|
|
model_class: type,
|
|
column: str,
|
|
pagination=None,
|
|
recordFilter: Dict[str, Any] = None,
|
|
) -> List[str]:
|
|
"""
|
|
Returns sorted distinct non-null values for a column using SQL DISTINCT.
|
|
Applies cross-filtering (all filters except the requested column).
|
|
"""
|
|
table = model_class.__name__
|
|
fields = _get_model_fields(model_class)
|
|
|
|
if column not in fields:
|
|
return []
|
|
|
|
try:
|
|
if not self._ensureTableExists(model_class):
|
|
return []
|
|
|
|
if pagination:
|
|
if pagination.filters and column in pagination.filters:
|
|
import copy
|
|
pagination = copy.deepcopy(pagination)
|
|
pagination.filters.pop(column, None)
|
|
|
|
where_clause, _, _, values, _ = \
|
|
self._buildPaginationClauses(model_class, pagination, recordFilter)
|
|
|
|
sql = (
|
|
f'SELECT DISTINCT "{column}"::TEXT AS val FROM "{table}"{where_clause} '
|
|
f'WHERE "{column}" IS NOT NULL AND "{column}"::TEXT != \'\' '
|
|
if not where_clause else
|
|
f'SELECT DISTINCT "{column}"::TEXT AS val FROM "{table}"{where_clause} '
|
|
f'AND "{column}" IS NOT NULL AND "{column}"::TEXT != \'\' '
|
|
)
|
|
sql += 'ORDER BY val'
|
|
|
|
with self.connection.cursor() as cursor:
|
|
cursor.execute(sql, values)
|
|
return [row["val"] for row in cursor.fetchall()]
|
|
except Exception as e:
|
|
logger.error(f"Error in getDistinctColumnValues for {table}.{column}: {e}")
|
|
return []
|
|
|
|
def recordCreate(
|
|
self, model_class: type, record: Union[Dict[str, Any], BaseModel]
|
|
) -> Dict[str, Any]:
|
|
"""Creates a new record in a table based on Pydantic model class."""
|
|
# If record is a Pydantic model, convert to dict
|
|
if isinstance(record, BaseModel):
|
|
record = record.model_dump()
|
|
elif isinstance(record, dict):
|
|
record = record.copy()
|
|
else:
|
|
raise ValueError("Record must be a Pydantic model or dictionary")
|
|
|
|
# Ensure record has an ID
|
|
if "id" not in record:
|
|
record["id"] = str(uuid.uuid4())
|
|
|
|
# Save record
|
|
success = self._saveRecord(model_class, record["id"], record)
|
|
if not success:
|
|
table = model_class.__name__
|
|
raise ValueError(f"Failed to save record {record['id']} to table {table}")
|
|
|
|
# Check if this is the first record in the table and register as initial ID
|
|
table = model_class.__name__
|
|
existingInitialId = self.getInitialId(model_class)
|
|
if existingInitialId is None:
|
|
# This is the first record, register it as the initial ID
|
|
self._registerInitialId(table, record["id"])
|
|
logger.info(f"Registered initial ID {record['id']} for table {table}")
|
|
|
|
return record
|
|
|
|
def recordModify(
|
|
self, model_class: type, recordId: str, record: Union[Dict[str, Any], BaseModel]
|
|
) -> Dict[str, Any]:
|
|
"""Modifies an existing record in a table based on Pydantic model class."""
|
|
# Load existing record
|
|
existingRecord = self._loadRecord(model_class, recordId)
|
|
if not existingRecord:
|
|
table = model_class.__name__
|
|
raise ValueError(f"Record {recordId} not found in table {table}")
|
|
|
|
# If record is a Pydantic model, convert to dict
|
|
if isinstance(record, BaseModel):
|
|
record = record.model_dump()
|
|
elif isinstance(record, dict):
|
|
record = record.copy()
|
|
else:
|
|
raise ValueError("Record must be a Pydantic model or dictionary")
|
|
|
|
# CRITICAL: Ensure we never modify the ID
|
|
if "id" in record and str(record["id"]) != recordId:
|
|
logger.error(
|
|
f"Attempted to modify record ID from {recordId} to {record['id']}"
|
|
)
|
|
raise ValueError(
|
|
"Cannot modify record ID - it must match the provided recordId"
|
|
)
|
|
|
|
# Update existing record with new data
|
|
existingRecord.update(record)
|
|
|
|
# Save updated record
|
|
saved = self._saveRecord(model_class, recordId, existingRecord)
|
|
if not saved:
|
|
table = model_class.__name__
|
|
raise ValueError(f"Failed to save record {recordId} to table {table}")
|
|
return existingRecord
|
|
|
|
def recordDelete(self, model_class: type, recordId: str) -> bool:
|
|
"""Deletes a record from the table based on Pydantic model class."""
|
|
table = model_class.__name__
|
|
|
|
try:
|
|
if not self._ensureTableExists(model_class):
|
|
return False
|
|
|
|
with self.connection.cursor() as cursor:
|
|
# Check if record exists
|
|
cursor.execute(
|
|
f'SELECT "id" FROM "{table}" WHERE "id" = %s', (recordId,)
|
|
)
|
|
if not cursor.fetchone():
|
|
return False
|
|
|
|
# Check if it's an initial record
|
|
initialId = self.getInitialId(model_class)
|
|
if initialId is not None and initialId == recordId:
|
|
self._removeInitialId(table)
|
|
logger.info(
|
|
f"Initial ID {recordId} for table {table} has been removed from the system table"
|
|
)
|
|
|
|
# Delete the record
|
|
cursor.execute(f'DELETE FROM "{table}" WHERE "id" = %s', (recordId,))
|
|
|
|
# No cache to update - database handles consistency
|
|
|
|
self.connection.commit()
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error deleting record {recordId} from table {table}: {e}")
|
|
self.connection.rollback()
|
|
return False
|
|
|
|
def getInitialId(self, model_class: type) -> Optional[str]:
|
|
"""Returns the initial ID for a table."""
|
|
table = model_class.__name__
|
|
systemData = self._loadSystemTable()
|
|
initialId = systemData.get(table)
|
|
return initialId
|
|
|
|
def semanticSearch(
|
|
self,
|
|
modelClass: type,
|
|
vectorColumn: str,
|
|
queryVector: List[float],
|
|
limit: int = 10,
|
|
recordFilter: Dict[str, Any] = None,
|
|
minScore: float = None,
|
|
) -> List[Dict[str, Any]]:
|
|
"""Semantic search using pgvector cosine distance.
|
|
|
|
Args:
|
|
modelClass: Pydantic model class for the table.
|
|
vectorColumn: Name of the vector column to search.
|
|
queryVector: Query vector as List[float].
|
|
limit: Maximum number of results.
|
|
recordFilter: Additional WHERE filters (field: value).
|
|
minScore: Minimum cosine similarity (0.0 - 1.0).
|
|
|
|
Returns:
|
|
List of records with an added '_score' field (cosine similarity),
|
|
sorted by similarity descending.
|
|
"""
|
|
table = modelClass.__name__
|
|
|
|
try:
|
|
if not self._ensureTableExists(modelClass):
|
|
return []
|
|
|
|
vectorStr = f"[{','.join(str(v) for v in queryVector)}]"
|
|
|
|
whereConditions = []
|
|
whereValues = []
|
|
|
|
if recordFilter:
|
|
for field, value in recordFilter.items():
|
|
if value is None:
|
|
whereConditions.append(f'"{field}" IS NULL')
|
|
elif isinstance(value, (list, tuple)):
|
|
if not value:
|
|
whereConditions.append("1 = 0")
|
|
else:
|
|
whereConditions.append(f'"{field}" = ANY(%s)')
|
|
whereValues.append(list(value))
|
|
else:
|
|
whereConditions.append(f'"{field}" = %s')
|
|
whereValues.append(value)
|
|
|
|
if minScore is not None:
|
|
whereConditions.append(
|
|
f'1 - ("{vectorColumn}" <=> %s::vector) >= %s'
|
|
)
|
|
whereValues.extend([vectorStr, minScore])
|
|
|
|
whereClause = ""
|
|
if whereConditions:
|
|
whereClause = " WHERE " + " AND ".join(whereConditions)
|
|
|
|
query = (
|
|
f'SELECT *, 1 - ("{vectorColumn}" <=> %s::vector) AS "_score" '
|
|
f'FROM "{table}"{whereClause} '
|
|
f'ORDER BY "{vectorColumn}" <=> %s::vector '
|
|
f'LIMIT %s'
|
|
)
|
|
params = [vectorStr] + whereValues + [vectorStr, limit]
|
|
|
|
with self.connection.cursor() as cursor:
|
|
cursor.execute(query, params)
|
|
records = [dict(row) for row in cursor.fetchall()]
|
|
|
|
fields = _get_model_fields(modelClass)
|
|
for record in records:
|
|
_parseRecordFields(record, fields, f"semanticSearch {table}")
|
|
|
|
return records
|
|
except Exception as e:
|
|
logger.error(f"Error in semantic search on {table}: {e}")
|
|
return []
|
|
|
|
def close(self, forceClose: bool = False):
|
|
"""Close the database connection.
|
|
|
|
Shared cached connectors are intentionally kept open unless forceClose=True.
|
|
This prevents accidental shutdown from interface __del__ methods while
|
|
other requests are still using the same cached connector instance.
|
|
"""
|
|
if self._isCachedShared and not forceClose:
|
|
return
|
|
if (
|
|
hasattr(self, "connection")
|
|
and self.connection
|
|
and not self.connection.closed
|
|
):
|
|
self.connection.close()
|
|
|
|
def __del__(self):
|
|
"""Cleanup method to close connection."""
|
|
try:
|
|
self.close()
|
|
except Exception:
|
|
pass
|