756 lines
28 KiB
Python
756 lines
28 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Database health utilities — table statistics and orphan detection/cleanup.
|
|
|
|
All functions are intended for SysAdmin use only (access control in the route layer).
|
|
"""
|
|
|
|
import logging
|
|
import time
|
|
import threading
|
|
from dataclasses import dataclass, asdict
|
|
from typing import Dict, List, Optional, Set
|
|
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
|
|
from modules.shared.configuration import APP_CONFIG
|
|
from modules.shared.dbRegistry import _getRegisteredDatabases
|
|
from modules.shared.fkRegistry import _getFkRelationships, FkRelationship
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_ORPHAN_CACHE_TTL = 300 # 5 minutes
|
|
_orphanCacheLock = threading.Lock()
|
|
_orphanCache: Optional[Dict] = None # {"ts": float, "results": [...]}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Dataclasses
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@dataclass
|
|
class TableStats:
|
|
db: str
|
|
table: str
|
|
estimatedRows: int
|
|
totalSizeBytes: int
|
|
indexSizeBytes: int
|
|
lastVacuum: Optional[str]
|
|
lastAnalyze: Optional[str]
|
|
|
|
|
|
@dataclass
|
|
class OrphanResult:
|
|
sourceDb: str
|
|
sourceTable: str
|
|
sourceColumn: str
|
|
targetDb: str
|
|
targetTable: str
|
|
targetColumn: str
|
|
orphanCount: int
|
|
sourceRowCount: int = 0
|
|
targetRowCount: int = 0
|
|
targetEmpty: bool = False
|
|
wouldDeleteAll: bool = False
|
|
|
|
|
|
@dataclass
|
|
class OrphanRecord:
|
|
"""A single orphan source-row -- includes the unresolved FK value plus the full row data.
|
|
|
|
Used by the SysAdmin UI download button so the human can verify the orphan
|
|
list before pressing "clean".
|
|
"""
|
|
sourceDb: str
|
|
sourceTable: str
|
|
sourceColumn: str
|
|
targetDb: str
|
|
targetTable: str
|
|
targetColumn: str
|
|
orphanFkValue: str
|
|
rowId: Optional[str]
|
|
row: Dict
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Safety thresholds for cleanup
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# If a single cleanup would delete more than this fraction of the source table,
|
|
# refuse without an explicit force=True. Protects against catastrophic wipes
|
|
# caused by misconfigured / empty target tables.
|
|
_MAX_CLEANUP_FRACTION = 0.5
|
|
|
|
|
|
class OrphanCleanupRefused(Exception):
|
|
"""Raised when a cleanup is refused for safety reasons (use force=True to override)."""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Low-level DB helpers (read-only, lightweight connections)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _getConnection(dbName: str):
|
|
"""Open a psycopg2 connection to the given registered database."""
|
|
registeredDbs = _getRegisteredDatabases()
|
|
configPrefix = registeredDbs.get(dbName)
|
|
if configPrefix is None:
|
|
raise ValueError(f"Database '{dbName}' is not registered.")
|
|
|
|
hostKey = f"{configPrefix}_HOST" if configPrefix != "DB" else "DB_HOST"
|
|
portKey = f"{configPrefix}_PORT" if configPrefix != "DB" else "DB_PORT"
|
|
userKey = f"{configPrefix}_USER" if configPrefix != "DB" else "DB_USER"
|
|
passwordKey = (
|
|
f"{configPrefix}_PASSWORD_SECRET" if configPrefix != "DB" else "DB_PASSWORD_SECRET"
|
|
)
|
|
|
|
return psycopg2.connect(
|
|
host=APP_CONFIG.get(hostKey, "localhost"),
|
|
port=int(APP_CONFIG.get(portKey, 5432)),
|
|
database=dbName,
|
|
user=APP_CONFIG.get(userKey),
|
|
password=APP_CONFIG.get(passwordKey),
|
|
client_encoding="utf8",
|
|
cursor_factory=psycopg2.extras.RealDictCursor,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Table statistics
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _getTableStats(dbFilter: Optional[str] = None) -> List[dict]:
|
|
"""Query pg_stat_user_tables + pg_total_relation_size for every registered DB.
|
|
|
|
Returns a list of TableStats dicts, optionally filtered by database name.
|
|
"""
|
|
registeredDbs = _getRegisteredDatabases()
|
|
if dbFilter:
|
|
registeredDbs = {k: v for k, v in registeredDbs.items() if k == dbFilter}
|
|
|
|
results: List[dict] = []
|
|
for dbName in sorted(registeredDbs):
|
|
try:
|
|
conn = _getConnection(dbName)
|
|
try:
|
|
with conn.cursor() as cur:
|
|
cur.execute("""
|
|
SELECT
|
|
s.relname AS "table",
|
|
s.n_live_tup AS "estimatedRows",
|
|
pg_total_relation_size(quote_ident(s.relname)) AS "totalSizeBytes",
|
|
pg_indexes_size(quote_ident(s.relname)) AS "indexSizeBytes",
|
|
s.last_vacuum::text AS "lastVacuum",
|
|
s.last_analyze::text AS "lastAnalyze"
|
|
FROM pg_stat_user_tables s
|
|
WHERE s.schemaname = 'public'
|
|
AND s.relname NOT LIKE '\\_%%'
|
|
ORDER BY s.relname
|
|
""")
|
|
for row in cur.fetchall():
|
|
results.append(asdict(TableStats(
|
|
db=dbName,
|
|
table=row["table"],
|
|
estimatedRows=row["estimatedRows"],
|
|
totalSizeBytes=row["totalSizeBytes"],
|
|
indexSizeBytes=row["indexSizeBytes"],
|
|
lastVacuum=row["lastVacuum"],
|
|
lastAnalyze=row["lastAnalyze"],
|
|
)))
|
|
finally:
|
|
conn.close()
|
|
except Exception as e:
|
|
logger.error(f"Failed to get table stats for {dbName}: {e}")
|
|
|
|
return results
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Orphan scanning
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _loadParentIds(conn, tableName: str, columnName: str) -> Set[str]:
|
|
"""Load all distinct values of a column from a table (for cross-DB checks)."""
|
|
ids: Set[str] = set()
|
|
with conn.cursor() as cur:
|
|
cur.execute(f'SELECT DISTINCT "{columnName}" FROM "{tableName}"')
|
|
for row in cur.fetchall():
|
|
val = row[columnName]
|
|
if val is not None:
|
|
ids.add(str(val))
|
|
return ids
|
|
|
|
|
|
def _loadPhysicalColumns(conn, tableName: str) -> Set[str]:
|
|
"""Return the set of physical (scalar) columns present on a table.
|
|
|
|
Used by the orphan scanner to skip FK relationships whose ``sourceColumn``
|
|
is annotated on the Pydantic model but does NOT exist as a physical column
|
|
-- e.g. virtual / computed fields, or fields that the database interface
|
|
decided to fold into a JSONB blob (List/Dict typed fields). Comparing a
|
|
JSONB array against a scalar via ``=`` always fails and would otherwise
|
|
flag every single source row as an orphan (the user-reported "false
|
|
positives").
|
|
"""
|
|
cols: Set[str] = set()
|
|
try:
|
|
with conn.cursor() as cur:
|
|
cur.execute(
|
|
"""
|
|
SELECT column_name
|
|
FROM information_schema.columns
|
|
WHERE table_schema = 'public' AND table_name = %s
|
|
""",
|
|
(tableName,),
|
|
)
|
|
for row in cur.fetchall():
|
|
cols.add(row["column_name"])
|
|
except Exception:
|
|
pass
|
|
return cols
|
|
|
|
|
|
def _countRows(conn, tableName: str) -> int:
|
|
"""Count physical rows in a table. Returns 0 on any error."""
|
|
try:
|
|
with conn.cursor() as cur:
|
|
cur.execute(f'SELECT COUNT(*) AS cnt FROM "{tableName}"')
|
|
return int(cur.fetchone()["cnt"])
|
|
except Exception:
|
|
return 0
|
|
|
|
|
|
def _countNonNullSource(conn, tableName: str, columnName: str) -> int:
|
|
"""Count source rows where the FK column is non-null/non-empty."""
|
|
try:
|
|
with conn.cursor() as cur:
|
|
cur.execute(f"""
|
|
SELECT COUNT(*) AS cnt
|
|
FROM "{tableName}"
|
|
WHERE "{columnName}" IS NOT NULL
|
|
AND "{columnName}" != ''
|
|
""")
|
|
return int(cur.fetchone()["cnt"])
|
|
except Exception:
|
|
return 0
|
|
|
|
|
|
def _countOrphansSameDb(
|
|
conn, sourceTable: str, sourceColumn: str,
|
|
targetTable: str, targetColumn: str,
|
|
) -> int:
|
|
"""Count orphans when source and target live in the same DB."""
|
|
with conn.cursor() as cur:
|
|
cur.execute(f"""
|
|
SELECT COUNT(*) AS cnt
|
|
FROM "{sourceTable}" s
|
|
WHERE s."{sourceColumn}" IS NOT NULL
|
|
AND s."{sourceColumn}" != ''
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM "{targetTable}" t
|
|
WHERE t."{targetColumn}" = s."{sourceColumn}"
|
|
)
|
|
""")
|
|
return cur.fetchone()["cnt"]
|
|
|
|
|
|
def _countOrphansCrossDb(
|
|
sourceConn, sourceTable: str, sourceColumn: str,
|
|
parentIds: Set[str],
|
|
) -> int:
|
|
"""Count orphans when parent IDs come from a different DB."""
|
|
if not parentIds:
|
|
with sourceConn.cursor() as cur:
|
|
cur.execute(f"""
|
|
SELECT COUNT(*) AS cnt
|
|
FROM "{sourceTable}"
|
|
WHERE "{sourceColumn}" IS NOT NULL
|
|
AND "{sourceColumn}" != ''
|
|
""")
|
|
return cur.fetchone()["cnt"]
|
|
|
|
with sourceConn.cursor() as cur:
|
|
cur.execute(f"""
|
|
SELECT COUNT(*) AS cnt
|
|
FROM "{sourceTable}"
|
|
WHERE "{sourceColumn}" IS NOT NULL
|
|
AND "{sourceColumn}" != ''
|
|
AND "{sourceColumn}" NOT IN (
|
|
SELECT unnest(%(ids)s::text[])
|
|
)
|
|
""", {"ids": list(parentIds)})
|
|
return cur.fetchone()["cnt"]
|
|
|
|
|
|
def _scanOrphans(dbFilter: Optional[str] = None) -> List[dict]:
|
|
"""Scan for orphaned records across all FK relationships.
|
|
|
|
Uses a 5-minute cache to avoid repeated heavy scans.
|
|
"""
|
|
global _orphanCache
|
|
with _orphanCacheLock:
|
|
if _orphanCache and (time.time() - _orphanCache["ts"]) < _ORPHAN_CACHE_TTL:
|
|
cached = _orphanCache["results"]
|
|
if dbFilter:
|
|
return [r for r in cached if r["sourceDb"] == dbFilter]
|
|
return list(cached)
|
|
|
|
relationships = _getFkRelationships()
|
|
if dbFilter:
|
|
relationships = [r for r in relationships if r.sourceDb == dbFilter]
|
|
|
|
connCache: Dict[str, any] = {}
|
|
tableCache: Dict[str, Set[str]] = {}
|
|
columnCache: Dict[str, Set[str]] = {}
|
|
parentIdCache: Dict[str, Set[str]] = {}
|
|
results: List[dict] = []
|
|
|
|
def _ensureConn(dbName: str):
|
|
if dbName not in connCache:
|
|
connCache[dbName] = _getConnection(dbName)
|
|
return connCache[dbName]
|
|
|
|
def _existingTables(dbName: str) -> Set[str]:
|
|
"""Cached lookup of physically existing public tables in a DB."""
|
|
if dbName not in tableCache:
|
|
try:
|
|
conn = _ensureConn(dbName)
|
|
with conn.cursor() as cur:
|
|
cur.execute("""
|
|
SELECT table_name FROM information_schema.tables
|
|
WHERE table_schema = 'public'
|
|
""")
|
|
tableCache[dbName] = {row["table_name"] for row in cur.fetchall()}
|
|
except Exception:
|
|
tableCache[dbName] = set()
|
|
return tableCache[dbName]
|
|
|
|
def _existingColumns(dbName: str, tableName: str) -> Set[str]:
|
|
cacheKey = f"{dbName}.{tableName}"
|
|
if cacheKey not in columnCache:
|
|
try:
|
|
columnCache[cacheKey] = _loadPhysicalColumns(_ensureConn(dbName), tableName)
|
|
except Exception:
|
|
columnCache[cacheKey] = set()
|
|
return columnCache[cacheKey]
|
|
|
|
try:
|
|
for rel in relationships:
|
|
try:
|
|
sourceTables = _existingTables(rel.sourceDb)
|
|
if rel.sourceTable not in sourceTables:
|
|
continue
|
|
|
|
if rel.sourceDb == rel.targetDb:
|
|
if rel.targetTable not in sourceTables:
|
|
continue
|
|
else:
|
|
targetTables = _existingTables(rel.targetDb)
|
|
if rel.targetTable not in targetTables:
|
|
continue
|
|
|
|
# Skip FK annotations whose source column is not a physical
|
|
# scalar column (virtual / JSONB-resident / computed field).
|
|
# See _loadPhysicalColumns docstring for why this matters.
|
|
sourceColumns = _existingColumns(rel.sourceDb, rel.sourceTable)
|
|
if rel.sourceColumn not in sourceColumns:
|
|
logger.debug(
|
|
"Skipping FK %s.%s.%s -- column not present as physical column",
|
|
rel.sourceDb, rel.sourceTable, rel.sourceColumn,
|
|
)
|
|
continue
|
|
targetColumns = _existingColumns(rel.targetDb, rel.targetTable)
|
|
if rel.targetColumn not in targetColumns:
|
|
logger.debug(
|
|
"Skipping FK %s.%s.%s -> %s.%s.%s -- target column not present",
|
|
rel.sourceDb, rel.sourceTable, rel.sourceColumn,
|
|
rel.targetDb, rel.targetTable, rel.targetColumn,
|
|
)
|
|
continue
|
|
|
|
sourceConn = _ensureConn(rel.sourceDb)
|
|
|
|
if rel.sourceDb == rel.targetDb:
|
|
targetRowCount = _countRows(sourceConn, rel.targetTable)
|
|
count = _countOrphansSameDb(
|
|
sourceConn, rel.sourceTable, rel.sourceColumn,
|
|
rel.targetTable, rel.targetColumn,
|
|
)
|
|
else:
|
|
targetConn = _ensureConn(rel.targetDb)
|
|
targetRowCount = _countRows(targetConn, rel.targetTable)
|
|
|
|
parentKey = f"{rel.targetDb}.{rel.targetTable}.{rel.targetColumn}"
|
|
if parentKey not in parentIdCache:
|
|
parentIdCache[parentKey] = _loadParentIds(
|
|
targetConn, rel.targetTable, rel.targetColumn,
|
|
)
|
|
|
|
count = _countOrphansCrossDb(
|
|
sourceConn, rel.sourceTable, rel.sourceColumn,
|
|
parentIdCache[parentKey],
|
|
)
|
|
|
|
sourceRowCount = _countNonNullSource(
|
|
sourceConn, rel.sourceTable, rel.sourceColumn,
|
|
)
|
|
wouldDeleteAll = (count > 0 and count >= sourceRowCount)
|
|
targetEmpty = (targetRowCount == 0)
|
|
|
|
results.append(asdict(OrphanResult(
|
|
sourceDb=rel.sourceDb,
|
|
sourceTable=rel.sourceTable,
|
|
sourceColumn=rel.sourceColumn,
|
|
targetDb=rel.targetDb,
|
|
targetTable=rel.targetTable,
|
|
targetColumn=rel.targetColumn,
|
|
orphanCount=count,
|
|
sourceRowCount=sourceRowCount,
|
|
targetRowCount=targetRowCount,
|
|
targetEmpty=targetEmpty,
|
|
wouldDeleteAll=wouldDeleteAll,
|
|
)))
|
|
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"Orphan scan failed for {rel.sourceDb}.{rel.sourceTable}.{rel.sourceColumn}: {e}"
|
|
)
|
|
for dbKey in (rel.sourceDb, rel.targetDb):
|
|
if dbKey in connCache:
|
|
try:
|
|
connCache[dbKey].rollback()
|
|
except Exception:
|
|
pass
|
|
finally:
|
|
for conn in connCache.values():
|
|
try:
|
|
conn.close()
|
|
except Exception:
|
|
pass
|
|
|
|
with _orphanCacheLock:
|
|
_orphanCache = {"ts": time.time(), "results": results}
|
|
|
|
return results
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Orphan cleanup
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _cleanOrphans(db: str, table: str, column: str, force: bool = False) -> int:
|
|
"""Delete orphaned records for a single FK relationship. Returns count deleted.
|
|
|
|
Safety guards (require force=True to override):
|
|
- Refuses if the target table is empty (likely misconfiguration / lazy table).
|
|
- Refuses if the cleanup would delete >= _MAX_CLEANUP_FRACTION of the source rows.
|
|
|
|
These guards prevent catastrophic wipes (e.g. emptying FeatureInstance because
|
|
the User table happened to be empty in the wrong DB at scan time).
|
|
"""
|
|
relationships = _getFkRelationships()
|
|
rel = next(
|
|
(r for r in relationships
|
|
if r.sourceDb == db and r.sourceTable == table and r.sourceColumn == column),
|
|
None,
|
|
)
|
|
if rel is None:
|
|
raise ValueError(f"No FK relationship found for {db}.{table}.{column}")
|
|
|
|
conn = _getConnection(rel.sourceDb)
|
|
targetConn = None
|
|
try:
|
|
if rel.sourceDb == rel.targetDb:
|
|
targetRowCount = _countRows(conn, rel.targetTable)
|
|
parentIds: Optional[Set[str]] = None
|
|
else:
|
|
targetConn = _getConnection(rel.targetDb)
|
|
targetRowCount = _countRows(targetConn, rel.targetTable)
|
|
parentIds = _loadParentIds(targetConn, rel.targetTable, rel.targetColumn)
|
|
|
|
sourceRowCount = _countNonNullSource(conn, rel.sourceTable, rel.sourceColumn)
|
|
|
|
if not force:
|
|
if targetRowCount == 0 and sourceRowCount > 0:
|
|
raise OrphanCleanupRefused(
|
|
f"Refusing cleanup: target table '{rel.targetDb}.{rel.targetTable}' "
|
|
f"is empty but source '{rel.sourceDb}.{rel.sourceTable}' has "
|
|
f"{sourceRowCount} rows with non-null '{rel.sourceColumn}'. "
|
|
f"This likely indicates a misconfiguration. Use force=True to override."
|
|
)
|
|
|
|
if rel.sourceDb == rel.targetDb:
|
|
with conn.cursor() as cur:
|
|
cur.execute(f"""
|
|
SELECT COUNT(*) AS cnt
|
|
FROM "{rel.sourceTable}" s
|
|
WHERE s."{rel.sourceColumn}" IS NOT NULL
|
|
AND s."{rel.sourceColumn}" != ''
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM "{rel.targetTable}" t
|
|
WHERE t."{rel.targetColumn}" = s."{rel.sourceColumn}"
|
|
)
|
|
""")
|
|
wouldDelete = int(cur.fetchone()["cnt"])
|
|
else:
|
|
if not parentIds:
|
|
wouldDelete = sourceRowCount
|
|
else:
|
|
with conn.cursor() as cur:
|
|
cur.execute(f"""
|
|
SELECT COUNT(*) AS cnt
|
|
FROM "{rel.sourceTable}"
|
|
WHERE "{rel.sourceColumn}" IS NOT NULL
|
|
AND "{rel.sourceColumn}" != ''
|
|
AND "{rel.sourceColumn}" NOT IN (
|
|
SELECT unnest(%(ids)s::text[])
|
|
)
|
|
""", {"ids": list(parentIds)})
|
|
wouldDelete = int(cur.fetchone()["cnt"])
|
|
|
|
if not force and sourceRowCount > 0:
|
|
fraction = wouldDelete / sourceRowCount
|
|
if fraction >= _MAX_CLEANUP_FRACTION:
|
|
raise OrphanCleanupRefused(
|
|
f"Refusing cleanup: would delete {wouldDelete} of {sourceRowCount} "
|
|
f"non-null rows ({fraction:.0%}) from '{rel.sourceDb}.{rel.sourceTable}'. "
|
|
f"Threshold is {_MAX_CLEANUP_FRACTION:.0%}. Use force=True to override."
|
|
)
|
|
|
|
if rel.sourceDb == rel.targetDb:
|
|
with conn.cursor() as cur:
|
|
cur.execute(f"""
|
|
DELETE FROM "{rel.sourceTable}"
|
|
WHERE "{rel.sourceColumn}" IS NOT NULL
|
|
AND "{rel.sourceColumn}" != ''
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM "{rel.targetTable}" t
|
|
WHERE t."{rel.targetColumn}" = "{rel.sourceTable}"."{rel.sourceColumn}"
|
|
)
|
|
""")
|
|
deleted = cur.rowcount
|
|
conn.commit()
|
|
else:
|
|
if not parentIds:
|
|
with conn.cursor() as cur:
|
|
cur.execute(f"""
|
|
DELETE FROM "{rel.sourceTable}"
|
|
WHERE "{rel.sourceColumn}" IS NOT NULL
|
|
AND "{rel.sourceColumn}" != ''
|
|
""")
|
|
deleted = cur.rowcount
|
|
else:
|
|
with conn.cursor() as cur:
|
|
cur.execute(f"""
|
|
DELETE FROM "{rel.sourceTable}"
|
|
WHERE "{rel.sourceColumn}" IS NOT NULL
|
|
AND "{rel.sourceColumn}" != ''
|
|
AND "{rel.sourceColumn}" NOT IN (
|
|
SELECT unnest(%(ids)s::text[])
|
|
)
|
|
""", {"ids": list(parentIds)})
|
|
deleted = cur.rowcount
|
|
conn.commit()
|
|
except Exception:
|
|
conn.rollback()
|
|
raise
|
|
finally:
|
|
if targetConn is not None:
|
|
try:
|
|
targetConn.close()
|
|
except Exception:
|
|
pass
|
|
conn.close()
|
|
|
|
_invalidateOrphanCache()
|
|
logger.info(
|
|
f"Cleaned {deleted} orphans from {db}.{table}.{column} (force={force})"
|
|
)
|
|
return deleted
|
|
|
|
|
|
def _cleanAllOrphans(force: bool = False) -> List[dict]:
|
|
"""Clean all detected orphans. Returns list of {db, table, column, deleted, [error|skipped]}.
|
|
|
|
Safety: each individual cleanup re-validates target row counts at delete-time
|
|
to avoid cascading wipes (e.g. one delete emptying a target table that the
|
|
next iteration depends on). Without force=True, dangerous cleanups are skipped.
|
|
"""
|
|
orphans = _scanOrphans()
|
|
results = []
|
|
for orphan in orphans:
|
|
if orphan.get("orphanCount", 0) <= 0:
|
|
continue
|
|
try:
|
|
deleted = _cleanOrphans(
|
|
orphan["sourceDb"],
|
|
orphan["sourceTable"],
|
|
orphan["sourceColumn"],
|
|
force=force,
|
|
)
|
|
results.append({
|
|
"db": orphan["sourceDb"],
|
|
"table": orphan["sourceTable"],
|
|
"column": orphan["sourceColumn"],
|
|
"deleted": deleted,
|
|
})
|
|
except OrphanCleanupRefused as e:
|
|
logger.warning(
|
|
f"Skipping orphan cleanup for {orphan['sourceDb']}.{orphan['sourceTable']}.{orphan['sourceColumn']}: {e}"
|
|
)
|
|
results.append({
|
|
"db": orphan["sourceDb"],
|
|
"table": orphan["sourceTable"],
|
|
"column": orphan["sourceColumn"],
|
|
"deleted": 0,
|
|
"skipped": str(e),
|
|
})
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Failed to clean orphans for {orphan['sourceDb']}.{orphan['sourceTable']}.{orphan['sourceColumn']}: {e}"
|
|
)
|
|
results.append({
|
|
"db": orphan["sourceDb"],
|
|
"table": orphan["sourceTable"],
|
|
"column": orphan["sourceColumn"],
|
|
"deleted": 0,
|
|
"error": str(e),
|
|
})
|
|
return results
|
|
|
|
|
|
def _invalidateOrphanCache() -> None:
|
|
global _orphanCache
|
|
with _orphanCacheLock:
|
|
_orphanCache = None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Listing orphans (for SysAdmin "download / inspect" workflow)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _listOrphans(
|
|
db: str,
|
|
table: str,
|
|
column: str,
|
|
limit: int = 1000,
|
|
) -> List[dict]:
|
|
"""Return up to ``limit`` actual orphan source-rows for one FK relationship.
|
|
|
|
Each entry is ``{"orphanFkValue": str, "rowId": str|None, "row": dict}`` so
|
|
the SysAdmin UI can present them as a download (CSV/JSON) for review before
|
|
the destructive cleanup is triggered.
|
|
"""
|
|
relationships = _getFkRelationships()
|
|
rel = next(
|
|
(r for r in relationships
|
|
if r.sourceDb == db and r.sourceTable == table and r.sourceColumn == column),
|
|
None,
|
|
)
|
|
if rel is None:
|
|
raise ValueError(f"No FK relationship found for {db}.{table}.{column}")
|
|
|
|
safeLimit = max(1, min(int(limit), 10000))
|
|
|
|
sourceConn = _getConnection(rel.sourceDb)
|
|
targetConn = None
|
|
try:
|
|
sourceColumns = _loadPhysicalColumns(sourceConn, rel.sourceTable)
|
|
if rel.sourceColumn not in sourceColumns:
|
|
return []
|
|
|
|
if rel.sourceDb == rel.targetDb:
|
|
targetColumns = _loadPhysicalColumns(sourceConn, rel.targetTable)
|
|
if rel.targetColumn not in targetColumns:
|
|
return []
|
|
with sourceConn.cursor() as cur:
|
|
cur.execute(f"""
|
|
SELECT s.*
|
|
FROM "{rel.sourceTable}" s
|
|
WHERE s."{rel.sourceColumn}" IS NOT NULL
|
|
AND s."{rel.sourceColumn}" != ''
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM "{rel.targetTable}" t
|
|
WHERE t."{rel.targetColumn}" = s."{rel.sourceColumn}"
|
|
)
|
|
LIMIT %s
|
|
""", (safeLimit,))
|
|
rows = cur.fetchall()
|
|
else:
|
|
targetConn = _getConnection(rel.targetDb)
|
|
targetColumns = _loadPhysicalColumns(targetConn, rel.targetTable)
|
|
if rel.targetColumn not in targetColumns:
|
|
return []
|
|
parentIds = _loadParentIds(targetConn, rel.targetTable, rel.targetColumn)
|
|
with sourceConn.cursor() as cur:
|
|
if not parentIds:
|
|
cur.execute(f"""
|
|
SELECT *
|
|
FROM "{rel.sourceTable}"
|
|
WHERE "{rel.sourceColumn}" IS NOT NULL
|
|
AND "{rel.sourceColumn}" != ''
|
|
LIMIT %s
|
|
""", (safeLimit,))
|
|
else:
|
|
cur.execute(f"""
|
|
SELECT *
|
|
FROM "{rel.sourceTable}"
|
|
WHERE "{rel.sourceColumn}" IS NOT NULL
|
|
AND "{rel.sourceColumn}" != ''
|
|
AND "{rel.sourceColumn}" NOT IN (
|
|
SELECT unnest(%(ids)s::text[])
|
|
)
|
|
LIMIT %(lim)s
|
|
""", {"ids": list(parentIds), "lim": safeLimit})
|
|
rows = cur.fetchall()
|
|
finally:
|
|
if targetConn is not None:
|
|
try:
|
|
targetConn.close()
|
|
except Exception:
|
|
pass
|
|
sourceConn.close()
|
|
|
|
out: List[dict] = []
|
|
for row in rows:
|
|
rowDict = {k: _jsonSafe(v) for k, v in dict(row).items()}
|
|
out.append(asdict(OrphanRecord(
|
|
sourceDb=rel.sourceDb,
|
|
sourceTable=rel.sourceTable,
|
|
sourceColumn=rel.sourceColumn,
|
|
targetDb=rel.targetDb,
|
|
targetTable=rel.targetTable,
|
|
targetColumn=rel.targetColumn,
|
|
orphanFkValue=str(rowDict.get(rel.sourceColumn, "")),
|
|
rowId=str(rowDict.get("id")) if rowDict.get("id") is not None else None,
|
|
row=rowDict,
|
|
)))
|
|
return out
|
|
|
|
|
|
def _jsonSafe(v):
|
|
"""Coerce psycopg2 row values into JSON-serialisable primitives."""
|
|
import datetime
|
|
import decimal
|
|
import uuid
|
|
|
|
if v is None or isinstance(v, (str, int, float, bool)):
|
|
return v
|
|
if isinstance(v, (datetime.datetime, datetime.date, datetime.time)):
|
|
return v.isoformat()
|
|
if isinstance(v, decimal.Decimal):
|
|
return float(v)
|
|
if isinstance(v, uuid.UUID):
|
|
return str(v)
|
|
if isinstance(v, (list, tuple)):
|
|
return [_jsonSafe(x) for x in v]
|
|
if isinstance(v, dict):
|
|
return {str(k): _jsonSafe(val) for k, val in v.items()}
|
|
if isinstance(v, (bytes, bytearray, memoryview)):
|
|
try:
|
|
return bytes(v).decode("utf-8", errors="replace")
|
|
except Exception:
|
|
return repr(v)
|
|
return str(v)
|