# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Database health utilities — table statistics and orphan detection/cleanup.

All functions are intended for SysAdmin use only (access control in the route layer).
"""

import logging
import time
import threading
from dataclasses import dataclass, asdict
from typing import Dict, List, Optional, Set

import psycopg2
import psycopg2.extras

from modules.shared.configuration import APP_CONFIG
from modules.shared.dbRegistry import getRegisteredDatabases
from modules.shared.fkRegistry import getFkRelationships, FkRelationship

logger = logging.getLogger(__name__)

_ORPHAN_CACHE_TTL = 300  # 5 minutes
_orphanCacheLock = threading.Lock()
_orphanCache: Optional[Dict] = None  # {"ts": float, "results": [...]}


# ---------------------------------------------------------------------------
# Dataclasses
# ---------------------------------------------------------------------------

@dataclass
class TableStats:
    db: str
    table: str
    estimatedRows: int
    totalSizeBytes: int
    indexSizeBytes: int
    lastVacuum: Optional[str]
    lastAnalyze: Optional[str]


@dataclass
class OrphanResult:
    sourceDb: str
    sourceTable: str
    sourceColumn: str
    targetDb: str
    targetTable: str
    targetColumn: str
    orphanCount: int
    sourceRowCount: int = 0
    targetRowCount: int = 0
    targetEmpty: bool = False
    wouldDeleteAll: bool = False


@dataclass
class OrphanRecord:
    """A single orphan source-row -- includes the unresolved FK value plus the full row data.

    Used by the SysAdmin UI download button so the human can verify the orphan
    list before pressing "clean".
    """
    sourceDb: str
    sourceTable: str
    sourceColumn: str
    targetDb: str
    targetTable: str
    targetColumn: str
    orphanFkValue: str
    rowId: Optional[str]
    row: Dict


# ---------------------------------------------------------------------------
# Safety thresholds for cleanup
# ---------------------------------------------------------------------------

# If a single cleanup would delete more than this fraction of the source table,
# refuse without an explicit force=True. Protects against catastrophic wipes
# caused by misconfigured / empty target tables.
_MAX_CLEANUP_FRACTION = 0.5


class OrphanCleanupRefused(Exception):
    """Raised when a cleanup is refused for safety reasons (use force=True to override)."""


# ---------------------------------------------------------------------------
# Low-level DB helpers (read-only, lightweight connections)
# ---------------------------------------------------------------------------

def _getConnection(dbName: str):
    """Open a psycopg2 connection to the given registered database."""
    registeredDbs = getRegisteredDatabases()
    configPrefix = registeredDbs.get(dbName)
    if configPrefix is None:
        raise ValueError(f"Database '{dbName}' is not registered.")

    hostKey = f"{configPrefix}_HOST" if configPrefix != "DB" else "DB_HOST"
    portKey = f"{configPrefix}_PORT" if configPrefix != "DB" else "DB_PORT"
    userKey = f"{configPrefix}_USER" if configPrefix != "DB" else "DB_USER"
    passwordKey = (
        f"{configPrefix}_PASSWORD_SECRET" if configPrefix != "DB" else "DB_PASSWORD_SECRET"
    )

    return psycopg2.connect(
        host=APP_CONFIG.get(hostKey, "localhost"),
        port=int(APP_CONFIG.get(portKey, 5432)),
        database=dbName,
        user=APP_CONFIG.get(userKey),
        password=APP_CONFIG.get(passwordKey),
        client_encoding="utf8",
        cursor_factory=psycopg2.extras.RealDictCursor,
    )


# ---------------------------------------------------------------------------
# Table statistics
# ---------------------------------------------------------------------------

def _getTableStats(dbFilter: Optional[str] = None) -> List[dict]:
    """Query pg_stat_user_tables + pg_total_relation_size for every registered DB.

    Returns a list of TableStats dicts, optionally filtered by database name.
    """
    registeredDbs = getRegisteredDatabases()
    if dbFilter:
        registeredDbs = {k: v for k, v in registeredDbs.items() if k == dbFilter}

    results: List[dict] = []
    for dbName in sorted(registeredDbs):
        try:
            conn = _getConnection(dbName)
            try:
                with conn.cursor() as cur:
                    cur.execute("""
                        SELECT
                            s.relname                                      AS "table",
                            s.n_live_tup                                   AS "estimatedRows",
                            pg_total_relation_size(quote_ident(s.relname)) AS "totalSizeBytes",
                            pg_indexes_size(quote_ident(s.relname))        AS "indexSizeBytes",
                            s.last_vacuum::text                            AS "lastVacuum",
                            s.last_analyze::text                           AS "lastAnalyze"
                        FROM pg_stat_user_tables s
                        WHERE s.schemaname = 'public'
                          AND s.relname NOT LIKE '\\_%%'
                        ORDER BY s.relname
                    """)
                    for row in cur.fetchall():
                        results.append(asdict(TableStats(
                            db=dbName,
                            table=row["table"],
                            estimatedRows=row["estimatedRows"],
                            totalSizeBytes=row["totalSizeBytes"],
                            indexSizeBytes=row["indexSizeBytes"],
                            lastVacuum=row["lastVacuum"],
                            lastAnalyze=row["lastAnalyze"],
                        )))
            finally:
                conn.close()
        except Exception as e:
            logger.error(f"Failed to get table stats for {dbName}: {e}")

    return results


# ---------------------------------------------------------------------------
# Orphan scanning
# ---------------------------------------------------------------------------

def _loadParentIds(conn, tableName: str, columnName: str) -> Set[str]:
    """Load all distinct values of a column from a table (for cross-DB checks)."""
    ids: Set[str] = set()
    with conn.cursor() as cur:
        cur.execute(f'SELECT DISTINCT "{columnName}" FROM "{tableName}"')
        for row in cur.fetchall():
            val = row[columnName]
            if val is not None:
                ids.add(str(val))
    return ids


def _loadPhysicalColumns(conn, tableName: str) -> Set[str]:
    """Return the set of physical (scalar) columns present on a table.

    Used by the orphan scanner to skip FK relationships whose ``sourceColumn``
    is annotated on the Pydantic model but does NOT exist as a physical column
    -- e.g. virtual / computed fields, or fields that the database interface
    decided to fold into a JSONB blob (List/Dict typed fields). Comparing a
    JSONB array against a scalar via ``=`` always fails and would otherwise
    flag every single source row as an orphan (the user-reported "false
    positives").
    """
    cols: Set[str] = set()
    try:
        with conn.cursor() as cur:
            cur.execute(
                """
                SELECT column_name
                FROM information_schema.columns
                WHERE table_schema = 'public' AND table_name = %s
                """,
                (tableName,),
            )
            for row in cur.fetchall():
                cols.add(row["column_name"])
    except Exception:
        pass
    return cols


def _countRows(conn, tableName: str) -> int:
    """Count physical rows in a table. Returns 0 on any error."""
    try:
        with conn.cursor() as cur:
            cur.execute(f'SELECT COUNT(*) AS cnt FROM "{tableName}"')
            return int(cur.fetchone()["cnt"])
    except Exception:
        return 0


def _countNonNullSource(conn, tableName: str, columnName: str) -> int:
    """Count source rows where the FK column is non-null/non-empty."""
    try:
        with conn.cursor() as cur:
            cur.execute(f"""
                SELECT COUNT(*) AS cnt
                FROM "{tableName}"
                WHERE "{columnName}" IS NOT NULL
                  AND "{columnName}" != ''
            """)
            return int(cur.fetchone()["cnt"])
    except Exception:
        return 0


def _countOrphansSameDb(
    conn, sourceTable: str, sourceColumn: str,
    targetTable: str, targetColumn: str,
) -> int:
    """Count orphans when source and target live in the same DB."""
    with conn.cursor() as cur:
        cur.execute(f"""
            SELECT COUNT(*) AS cnt
            FROM "{sourceTable}" s
            WHERE s."{sourceColumn}" IS NOT NULL
              AND s."{sourceColumn}" != ''
              AND NOT EXISTS (
                  SELECT 1 FROM "{targetTable}" t
                  WHERE t."{targetColumn}" = s."{sourceColumn}"
              )
        """)
        return cur.fetchone()["cnt"]


def _countOrphansCrossDb(
    sourceConn, sourceTable: str, sourceColumn: str,
    parentIds: Set[str],
) -> int:
    """Count orphans when parent IDs come from a different DB."""
    if not parentIds:
        with sourceConn.cursor() as cur:
            cur.execute(f"""
                SELECT COUNT(*) AS cnt
                FROM "{sourceTable}"
                WHERE "{sourceColumn}" IS NOT NULL
                  AND "{sourceColumn}" != ''
            """)
            return cur.fetchone()["cnt"]

    with sourceConn.cursor() as cur:
        cur.execute(f"""
            SELECT COUNT(*) AS cnt
            FROM "{sourceTable}"
            WHERE "{sourceColumn}" IS NOT NULL
              AND "{sourceColumn}" != ''
              AND "{sourceColumn}" NOT IN (
                  SELECT unnest(%(ids)s::text[])
              )
        """, {"ids": list(parentIds)})
        return cur.fetchone()["cnt"]


def _scanOrphans(dbFilter: Optional[str] = None) -> List[dict]:
    """Scan for orphaned records across all FK relationships.

    Uses a 5-minute cache to avoid repeated heavy scans.
    """
    global _orphanCache
    with _orphanCacheLock:
        if _orphanCache and (time.time() - _orphanCache["ts"]) < _ORPHAN_CACHE_TTL:
            cached = _orphanCache["results"]
            if dbFilter:
                return [r for r in cached if r["sourceDb"] == dbFilter]
            return list(cached)

    relationships = getFkRelationships()
    if dbFilter:
        relationships = [r for r in relationships if r.sourceDb == dbFilter]

    connCache: Dict[str, any] = {}
    tableCache: Dict[str, Set[str]] = {}
    columnCache: Dict[str, Set[str]] = {}
    parentIdCache: Dict[str, Set[str]] = {}
    results: List[dict] = []

    def _ensureConn(dbName: str):
        if dbName not in connCache:
            connCache[dbName] = _getConnection(dbName)
        return connCache[dbName]

    def _existingTables(dbName: str) -> Set[str]:
        """Cached lookup of physically existing public tables in a DB."""
        if dbName not in tableCache:
            try:
                conn = _ensureConn(dbName)
                with conn.cursor() as cur:
                    cur.execute("""
                        SELECT table_name FROM information_schema.tables
                        WHERE table_schema = 'public'
                    """)
                    tableCache[dbName] = {row["table_name"] for row in cur.fetchall()}
            except Exception:
                tableCache[dbName] = set()
        return tableCache[dbName]

    def _existingColumns(dbName: str, tableName: str) -> Set[str]:
        cacheKey = f"{dbName}.{tableName}"
        if cacheKey not in columnCache:
            try:
                columnCache[cacheKey] = _loadPhysicalColumns(_ensureConn(dbName), tableName)
            except Exception:
                columnCache[cacheKey] = set()
        return columnCache[cacheKey]

    try:
        for rel in relationships:
            try:
                if rel.softFk:
                    logger.debug(
                        "Skipping soft FK %s.%s.%s -> %s.%s.%s",
                        rel.sourceDb, rel.sourceTable, rel.sourceColumn,
                        rel.targetDb, rel.targetTable, rel.targetColumn,
                    )
                    continue

                sourceTables = _existingTables(rel.sourceDb)
                if rel.sourceTable not in sourceTables:
                    continue

                if rel.sourceDb == rel.targetDb:
                    if rel.targetTable not in sourceTables:
                        continue
                else:
                    targetTables = _existingTables(rel.targetDb)
                    if rel.targetTable not in targetTables:
                        continue

                # Skip FK annotations whose source column is not a physical
                # scalar column (virtual / JSONB-resident / computed field).
                # See _loadPhysicalColumns docstring for why this matters.
                sourceColumns = _existingColumns(rel.sourceDb, rel.sourceTable)
                if rel.sourceColumn not in sourceColumns:
                    logger.debug(
                        "Skipping FK %s.%s.%s -- column not present as physical column",
                        rel.sourceDb, rel.sourceTable, rel.sourceColumn,
                    )
                    continue
                targetColumns = _existingColumns(rel.targetDb, rel.targetTable)
                if rel.targetColumn not in targetColumns:
                    logger.debug(
                        "Skipping FK %s.%s.%s -> %s.%s.%s -- target column not present",
                        rel.sourceDb, rel.sourceTable, rel.sourceColumn,
                        rel.targetDb, rel.targetTable, rel.targetColumn,
                    )
                    continue

                sourceConn = _ensureConn(rel.sourceDb)

                if rel.sourceDb == rel.targetDb:
                    targetRowCount = _countRows(sourceConn, rel.targetTable)
                    count = _countOrphansSameDb(
                        sourceConn, rel.sourceTable, rel.sourceColumn,
                        rel.targetTable, rel.targetColumn,
                    )
                else:
                    targetConn = _ensureConn(rel.targetDb)
                    targetRowCount = _countRows(targetConn, rel.targetTable)

                    parentKey = f"{rel.targetDb}.{rel.targetTable}.{rel.targetColumn}"
                    if parentKey not in parentIdCache:
                        parentIdCache[parentKey] = _loadParentIds(
                            targetConn, rel.targetTable, rel.targetColumn,
                        )

                    count = _countOrphansCrossDb(
                        sourceConn, rel.sourceTable, rel.sourceColumn,
                        parentIdCache[parentKey],
                    )

                sourceRowCount = _countNonNullSource(
                    sourceConn, rel.sourceTable, rel.sourceColumn,
                )
                wouldDeleteAll = (count > 0 and count >= sourceRowCount)
                targetEmpty = (targetRowCount == 0)

                results.append(asdict(OrphanResult(
                    sourceDb=rel.sourceDb,
                    sourceTable=rel.sourceTable,
                    sourceColumn=rel.sourceColumn,
                    targetDb=rel.targetDb,
                    targetTable=rel.targetTable,
                    targetColumn=rel.targetColumn,
                    orphanCount=count,
                    sourceRowCount=sourceRowCount,
                    targetRowCount=targetRowCount,
                    targetEmpty=targetEmpty,
                    wouldDeleteAll=wouldDeleteAll,
                )))

            except Exception as e:
                logger.warning(
                    f"Orphan scan failed for {rel.sourceDb}.{rel.sourceTable}.{rel.sourceColumn}: {e}"
                )
                for dbKey in (rel.sourceDb, rel.targetDb):
                    if dbKey in connCache:
                        try:
                            connCache[dbKey].rollback()
                        except Exception:
                            pass
    finally:
        for conn in connCache.values():
            try:
                conn.close()
            except Exception:
                pass

    with _orphanCacheLock:
        _orphanCache = {"ts": time.time(), "results": results}

    return results


# ---------------------------------------------------------------------------
# Orphan cleanup
# ---------------------------------------------------------------------------

def _cleanOrphans(db: str, table: str, column: str, force: bool = False) -> int:
    """Delete orphaned records for a single FK relationship. Returns count deleted.

    Safety guards (require force=True to override):
    - Refuses if the target table is empty (likely misconfiguration / lazy table).
    - Refuses if the cleanup would delete >= _MAX_CLEANUP_FRACTION of the source rows.

    These guards prevent catastrophic wipes (e.g. emptying FeatureInstance because
    the User table happened to be empty in the wrong DB at scan time).
    """
    relationships = getFkRelationships()
    rel = next(
        (r for r in relationships
         if r.sourceDb == db and r.sourceTable == table and r.sourceColumn == column),
        None,
    )
    if rel is None:
        raise ValueError(f"No FK relationship found for {db}.{table}.{column}")
    if rel.softFk:
        raise OrphanCleanupRefused(
            f"Refusing cleanup: {rel.sourceDb}.{rel.sourceTable}.{rel.sourceColumn} is "
            f"declared as a soft FK (sentinel / lineage reference) and is intentionally "
            f"excluded from orphan deletion."
        )

    conn = _getConnection(rel.sourceDb)
    targetConn = None
    try:
        if rel.sourceDb == rel.targetDb:
            targetRowCount = _countRows(conn, rel.targetTable)
            parentIds: Optional[Set[str]] = None
        else:
            targetConn = _getConnection(rel.targetDb)
            targetRowCount = _countRows(targetConn, rel.targetTable)
            parentIds = _loadParentIds(targetConn, rel.targetTable, rel.targetColumn)

        sourceRowCount = _countNonNullSource(conn, rel.sourceTable, rel.sourceColumn)

        if not force:
            if targetRowCount == 0 and sourceRowCount > 0:
                raise OrphanCleanupRefused(
                    f"Refusing cleanup: target table '{rel.targetDb}.{rel.targetTable}' "
                    f"is empty but source '{rel.sourceDb}.{rel.sourceTable}' has "
                    f"{sourceRowCount} rows with non-null '{rel.sourceColumn}'. "
                    f"This likely indicates a misconfiguration. Use force=True to override."
                )

        if rel.sourceDb == rel.targetDb:
            with conn.cursor() as cur:
                cur.execute(f"""
                    SELECT COUNT(*) AS cnt
                    FROM "{rel.sourceTable}" s
                    WHERE s."{rel.sourceColumn}" IS NOT NULL
                      AND s."{rel.sourceColumn}" != ''
                      AND NOT EXISTS (
                          SELECT 1 FROM "{rel.targetTable}" t
                          WHERE t."{rel.targetColumn}" = s."{rel.sourceColumn}"
                      )
                """)
                wouldDelete = int(cur.fetchone()["cnt"])
        else:
            if not parentIds:
                wouldDelete = sourceRowCount
            else:
                with conn.cursor() as cur:
                    cur.execute(f"""
                        SELECT COUNT(*) AS cnt
                        FROM "{rel.sourceTable}"
                        WHERE "{rel.sourceColumn}" IS NOT NULL
                          AND "{rel.sourceColumn}" != ''
                          AND "{rel.sourceColumn}" NOT IN (
                              SELECT unnest(%(ids)s::text[])
                          )
                    """, {"ids": list(parentIds)})
                    wouldDelete = int(cur.fetchone()["cnt"])

        if not force and sourceRowCount > 0:
            fraction = wouldDelete / sourceRowCount
            if fraction >= _MAX_CLEANUP_FRACTION:
                raise OrphanCleanupRefused(
                    f"Refusing cleanup: would delete {wouldDelete} of {sourceRowCount} "
                    f"non-null rows ({fraction:.0%}) from '{rel.sourceDb}.{rel.sourceTable}'. "
                    f"Threshold is {_MAX_CLEANUP_FRACTION:.0%}. Use force=True to override."
                )

        if rel.sourceDb == rel.targetDb:
            with conn.cursor() as cur:
                cur.execute(f"""
                    DELETE FROM "{rel.sourceTable}"
                    WHERE "{rel.sourceColumn}" IS NOT NULL
                      AND "{rel.sourceColumn}" != ''
                      AND NOT EXISTS (
                          SELECT 1 FROM "{rel.targetTable}" t
                          WHERE t."{rel.targetColumn}" = "{rel.sourceTable}"."{rel.sourceColumn}"
                      )
                """)
                deleted = cur.rowcount
            conn.commit()
        else:
            if not parentIds:
                with conn.cursor() as cur:
                    cur.execute(f"""
                        DELETE FROM "{rel.sourceTable}"
                        WHERE "{rel.sourceColumn}" IS NOT NULL
                          AND "{rel.sourceColumn}" != ''
                    """)
                    deleted = cur.rowcount
            else:
                with conn.cursor() as cur:
                    cur.execute(f"""
                        DELETE FROM "{rel.sourceTable}"
                        WHERE "{rel.sourceColumn}" IS NOT NULL
                          AND "{rel.sourceColumn}" != ''
                          AND "{rel.sourceColumn}" NOT IN (
                              SELECT unnest(%(ids)s::text[])
                          )
                    """, {"ids": list(parentIds)})
                    deleted = cur.rowcount
            conn.commit()
    except Exception:
        conn.rollback()
        raise
    finally:
        if targetConn is not None:
            try:
                targetConn.close()
            except Exception:
                pass
        conn.close()

    _invalidateOrphanCache()
    logger.info(
        f"Cleaned {deleted} orphans from {db}.{table}.{column} (force={force})"
    )
    return deleted


def _isUserIdFk(targetTable: str, targetColumn: str) -> bool:
    """Match the UserInDB.id reference exactly (case-insensitive on table name).

    Orphans pointing at deleted users are a distinct category: they accumulate
    naturally on every audit / billing / membership row when a user is deleted,
    and the SysAdmin typically wants to handle them separately from "real" FK
    drift. The orphan UI exposes a checkbox `excludeUserFks` that hides them
    from the list and skips them in `clean-all`; this helper keeps the rule
    in one place so frontend + clean-all + scan stay in lock-step.
    """
    return targetTable.lower() == "userindb" and targetColumn == "id"


def _cleanAllOrphans(force: bool = False, excludeUserFks: bool = False) -> List[dict]:
    """Clean all detected orphans. Returns list of {db, table, column, deleted, [error|skipped]}.

    Safety: each individual cleanup re-validates target row counts at delete-time
    to avoid cascading wipes (e.g. one delete emptying a target table that the
    next iteration depends on). Without force=True, dangerous cleanups are skipped.

    When ``excludeUserFks=True``, FK relationships pointing at ``UserInDB.id``
    are skipped entirely — those orphans (deleted-user remnants in audit /
    billing / membership tables) are typically handled by a dedicated user
    purge workflow, not by generic FK cleanup.
    """
    orphans = _scanOrphans()
    results = []
    for orphan in orphans:
        if orphan.get("orphanCount", 0) <= 0:
            continue
        if excludeUserFks and _isUserIdFk(orphan.get("targetTable", ""), orphan.get("targetColumn", "")):
            continue
        try:
            deleted = _cleanOrphans(
                orphan["sourceDb"],
                orphan["sourceTable"],
                orphan["sourceColumn"],
                force=force,
            )
            results.append({
                "db": orphan["sourceDb"],
                "table": orphan["sourceTable"],
                "column": orphan["sourceColumn"],
                "deleted": deleted,
            })
        except OrphanCleanupRefused as e:
            logger.warning(
                f"Skipping orphan cleanup for {orphan['sourceDb']}.{orphan['sourceTable']}.{orphan['sourceColumn']}: {e}"
            )
            results.append({
                "db": orphan["sourceDb"],
                "table": orphan["sourceTable"],
                "column": orphan["sourceColumn"],
                "deleted": 0,
                "skipped": str(e),
            })
        except Exception as e:
            logger.error(
                f"Failed to clean orphans for {orphan['sourceDb']}.{orphan['sourceTable']}.{orphan['sourceColumn']}: {e}"
            )
            results.append({
                "db": orphan["sourceDb"],
                "table": orphan["sourceTable"],
                "column": orphan["sourceColumn"],
                "deleted": 0,
                "error": str(e),
            })
    return results


def _invalidateOrphanCache() -> None:
    global _orphanCache
    with _orphanCacheLock:
        _orphanCache = None


# ---------------------------------------------------------------------------
# Listing orphans (for SysAdmin "download / inspect" workflow)
# ---------------------------------------------------------------------------

def _listOrphans(
    db: str,
    table: str,
    column: str,
    limit: int = 1000,
) -> List[dict]:
    """Return up to ``limit`` actual orphan source-rows for one FK relationship.

    Each entry is ``{"orphanFkValue": str, "rowId": str|None, "row": dict}`` so
    the SysAdmin UI can present them as a download (CSV/JSON) for review before
    the destructive cleanup is triggered.
    """
    relationships = getFkRelationships()
    rel = next(
        (r for r in relationships
         if r.sourceDb == db and r.sourceTable == table and r.sourceColumn == column),
        None,
    )
    if rel is None:
        raise ValueError(f"No FK relationship found for {db}.{table}.{column}")
    if rel.softFk:
        return []

    safeLimit = max(1, min(int(limit), 10000))

    sourceConn = _getConnection(rel.sourceDb)
    targetConn = None
    try:
        sourceColumns = _loadPhysicalColumns(sourceConn, rel.sourceTable)
        if rel.sourceColumn not in sourceColumns:
            return []

        if rel.sourceDb == rel.targetDb:
            targetColumns = _loadPhysicalColumns(sourceConn, rel.targetTable)
            if rel.targetColumn not in targetColumns:
                return []
            with sourceConn.cursor() as cur:
                cur.execute(f"""
                    SELECT s.*
                    FROM "{rel.sourceTable}" s
                    WHERE s."{rel.sourceColumn}" IS NOT NULL
                      AND s."{rel.sourceColumn}" != ''
                      AND NOT EXISTS (
                          SELECT 1 FROM "{rel.targetTable}" t
                          WHERE t."{rel.targetColumn}" = s."{rel.sourceColumn}"
                      )
                    LIMIT %s
                """, (safeLimit,))
                rows = cur.fetchall()
        else:
            targetConn = _getConnection(rel.targetDb)
            targetColumns = _loadPhysicalColumns(targetConn, rel.targetTable)
            if rel.targetColumn not in targetColumns:
                return []
            parentIds = _loadParentIds(targetConn, rel.targetTable, rel.targetColumn)
            with sourceConn.cursor() as cur:
                if not parentIds:
                    cur.execute(f"""
                        SELECT *
                        FROM "{rel.sourceTable}"
                        WHERE "{rel.sourceColumn}" IS NOT NULL
                          AND "{rel.sourceColumn}" != ''
                        LIMIT %s
                    """, (safeLimit,))
                else:
                    cur.execute(f"""
                        SELECT *
                        FROM "{rel.sourceTable}"
                        WHERE "{rel.sourceColumn}" IS NOT NULL
                          AND "{rel.sourceColumn}" != ''
                          AND "{rel.sourceColumn}" NOT IN (
                              SELECT unnest(%(ids)s::text[])
                          )
                        LIMIT %(lim)s
                    """, {"ids": list(parentIds), "lim": safeLimit})
                rows = cur.fetchall()
    finally:
        if targetConn is not None:
            try:
                targetConn.close()
            except Exception:
                pass
        sourceConn.close()

    out: List[dict] = []
    for row in rows:
        rowDict = {k: _jsonSafe(v) for k, v in dict(row).items()}
        out.append(asdict(OrphanRecord(
            sourceDb=rel.sourceDb,
            sourceTable=rel.sourceTable,
            sourceColumn=rel.sourceColumn,
            targetDb=rel.targetDb,
            targetTable=rel.targetTable,
            targetColumn=rel.targetColumn,
            orphanFkValue=str(rowDict.get(rel.sourceColumn, "")),
            rowId=str(rowDict.get("id")) if rowDict.get("id") is not None else None,
            row=rowDict,
        )))
    return out


def _jsonSafe(v):
    """Coerce psycopg2 row values into JSON-serialisable primitives."""
    import datetime
    import decimal
    import uuid

    if v is None or isinstance(v, (str, int, float, bool)):
        return v
    if isinstance(v, (datetime.datetime, datetime.date, datetime.time)):
        return v.isoformat()
    if isinstance(v, decimal.Decimal):
        return float(v)
    if isinstance(v, uuid.UUID):
        return str(v)
    if isinstance(v, (list, tuple)):
        return [_jsonSafe(x) for x in v]
    if isinstance(v, dict):
        return {str(k): _jsonSafe(val) for k, val in v.items()}
    if isinstance(v, (bytes, bytearray, memoryview)):
        try:
            return bytes(v).decode("utf-8", errors="replace")
        except Exception:
            return repr(v)
    return str(v)