279 lines
9.8 KiB
Python
279 lines
9.8 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
FK-Discovery — scans the Model-Registry for `fk_target` annotations and
|
|
builds a cached list of foreign-key relationships.
|
|
|
|
Each relationship describes one directed edge:
|
|
sourceTable.sourceColumn → targetTable.targetColumn
|
|
(possibly across databases)
|
|
|
|
The table→db mapping is derived automatically from the `fk_target`
|
|
annotations themselves: every `fk_target` declares `{"db": "...", "table": "..."}`
|
|
for the *target* side. By collecting all such declarations we know which DB
|
|
each table lives in — no extra registration step needed.
|
|
|
|
Usage:
|
|
from modules.shared.fkRegistry import getFkRelationships
|
|
rels = getFkRelationships()
|
|
"""
|
|
|
|
import importlib
|
|
import logging
|
|
import os
|
|
import threading
|
|
from dataclasses import dataclass
|
|
from typing import Dict, List, Optional
|
|
|
|
from modules.datamodels.datamodelBase import MODEL_REGISTRY
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_modelsLoaded = False
|
|
|
|
|
|
def _ensureModelsLoaded() -> None:
|
|
"""Import all datamodel modules so that __init_subclass__ fills MODEL_REGISTRY.
|
|
|
|
In a running server the interfaces import the datamodels automatically.
|
|
This function makes FK-Discovery work in standalone / test contexts too.
|
|
"""
|
|
global _modelsLoaded
|
|
if _modelsLoaded:
|
|
return
|
|
|
|
gatewayRoot = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
datamodelDir = os.path.join(gatewayRoot, "modules", "datamodels")
|
|
for fname in os.listdir(datamodelDir):
|
|
if fname.startswith("datamodel") and fname.endswith(".py") and fname != "__init__.py":
|
|
modName = f"modules.datamodels.{fname[:-3]}"
|
|
try:
|
|
importlib.import_module(modName)
|
|
except Exception as e:
|
|
logger.debug(f"Could not import {modName}: {e}")
|
|
|
|
featuresDir = os.path.join(gatewayRoot, "modules", "features")
|
|
if os.path.isdir(featuresDir):
|
|
for featureDir in os.listdir(featuresDir):
|
|
featurePath = os.path.join(featuresDir, featureDir)
|
|
if not os.path.isdir(featurePath):
|
|
continue
|
|
for fname in os.listdir(featurePath):
|
|
if fname.startswith("datamodel") and fname.endswith(".py"):
|
|
modName = f"modules.features.{featureDir}.{fname[:-3]}"
|
|
try:
|
|
importlib.import_module(modName)
|
|
except Exception as e:
|
|
logger.debug(f"Could not import {modName}: {e}")
|
|
|
|
_modelsLoaded = True
|
|
|
|
_lock = threading.Lock()
|
|
_cachedRelationships: Optional[List["FkRelationship"]] = None
|
|
_cachedTableToDb: Optional[Dict[str, str]] = None
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class FkRelationship:
|
|
sourceDb: str
|
|
sourceTable: str
|
|
sourceColumn: str
|
|
targetDb: str
|
|
targetTable: str
|
|
targetColumn: str
|
|
# Soft references hold sentinel / lineage values that are intentionally
|
|
# not backed by a DB row (e.g. AutoWorkflow.templateSourceId can store an
|
|
# in-code template ID like "trustee-receipt-import"). The orphan scanner
|
|
# MUST skip these to avoid deleting valid records.
|
|
softFk: bool = False
|
|
|
|
|
|
def _buildTableToDbMap() -> Dict[str, str]:
|
|
"""Derive {tableName → dbName} for every PowerOnModel subclass.
|
|
|
|
Two-pass approach:
|
|
1. Collect explicit mappings from fk_target annotations
|
|
(every fk_target declares the DB for its *target* table).
|
|
2. For models still unmapped, query each registered database's
|
|
catalog (information_schema) to find the table there.
|
|
"""
|
|
_ensureModelsLoaded()
|
|
|
|
mapping: Dict[str, str] = {}
|
|
for modelCls in MODEL_REGISTRY.values():
|
|
for fieldInfo in modelCls.model_fields.values():
|
|
extra = fieldInfo.json_schema_extra
|
|
if not isinstance(extra, dict):
|
|
continue
|
|
fkTarget = extra.get("fk_target")
|
|
if not isinstance(fkTarget, dict):
|
|
continue
|
|
table = fkTarget.get("table", "")
|
|
db = fkTarget.get("db", "")
|
|
if table and db:
|
|
mapping[table] = db
|
|
|
|
unmapped = [name for name in MODEL_REGISTRY if name not in mapping]
|
|
if unmapped:
|
|
try:
|
|
from modules.shared.dbRegistry import getRegisteredDatabases
|
|
_resolveUnmappedTablesFromCatalog(mapping, unmapped, getRegisteredDatabases())
|
|
except Exception as e:
|
|
logger.warning(f"Could not resolve unmapped tables from catalog: {e}")
|
|
|
|
return mapping
|
|
|
|
|
|
def _resolveUnmappedTablesFromCatalog(
|
|
mapping: Dict[str, str],
|
|
unmapped: List[str],
|
|
registeredDbs: Dict[str, str],
|
|
) -> None:
|
|
"""Query information_schema in each registered DB for unmapped table names."""
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
from modules.shared.configuration import APP_CONFIG
|
|
|
|
unmappedSet = set(unmapped)
|
|
for dbName, configPrefix in registeredDbs.items():
|
|
if not unmappedSet:
|
|
break
|
|
try:
|
|
hostKey = f"{configPrefix}_HOST" if configPrefix != "DB" else "DB_HOST"
|
|
portKey = f"{configPrefix}_PORT" if configPrefix != "DB" else "DB_PORT"
|
|
userKey = f"{configPrefix}_USER" if configPrefix != "DB" else "DB_USER"
|
|
pwKey = f"{configPrefix}_PASSWORD_SECRET" if configPrefix != "DB" else "DB_PASSWORD_SECRET"
|
|
|
|
conn = psycopg2.connect(
|
|
host=APP_CONFIG.get(hostKey, "localhost"),
|
|
port=int(APP_CONFIG.get(portKey, 5432)),
|
|
database=dbName,
|
|
user=APP_CONFIG.get(userKey),
|
|
password=APP_CONFIG.get(pwKey),
|
|
client_encoding="utf8",
|
|
)
|
|
try:
|
|
with conn.cursor() as cur:
|
|
cur.execute("""
|
|
SELECT table_name FROM information_schema.tables
|
|
WHERE table_schema = 'public'
|
|
AND table_name NOT LIKE '\\_%%'
|
|
""")
|
|
dbTables = {row[0] for row in cur.fetchall()}
|
|
|
|
for tableName in list(unmappedSet):
|
|
if tableName in dbTables:
|
|
mapping[tableName] = dbName
|
|
unmappedSet.discard(tableName)
|
|
finally:
|
|
conn.close()
|
|
except Exception as e:
|
|
logger.debug(f"Catalog query for {dbName} failed: {e}")
|
|
|
|
|
|
def _discoverFkRelationships() -> List[FkRelationship]:
|
|
"""Scan every PowerOnModel subclass for `fk_target` in json_schema_extra.
|
|
|
|
Returns a de-duplicated, sorted list of FkRelationship objects.
|
|
"""
|
|
tableToDb = _buildTableToDbMap()
|
|
|
|
relationships: List[FkRelationship] = []
|
|
for tableName, modelCls in MODEL_REGISTRY.items():
|
|
sourceDb = tableToDb.get(tableName)
|
|
if sourceDb is None:
|
|
continue
|
|
|
|
for fieldName, fieldInfo in modelCls.model_fields.items():
|
|
extra = fieldInfo.json_schema_extra
|
|
if not isinstance(extra, dict):
|
|
continue
|
|
|
|
fkTarget = extra.get("fk_target")
|
|
if not isinstance(fkTarget, dict):
|
|
continue
|
|
|
|
targetDb = fkTarget.get("db", "")
|
|
targetTable = fkTarget.get("table", "")
|
|
targetColumn = fkTarget.get("column", "id")
|
|
softFk = bool(fkTarget.get("softFk", False))
|
|
|
|
if not targetDb or not targetTable:
|
|
continue
|
|
|
|
relationships.append(
|
|
FkRelationship(
|
|
sourceDb=sourceDb,
|
|
sourceTable=tableName,
|
|
sourceColumn=fieldName,
|
|
targetDb=targetDb,
|
|
targetTable=targetTable,
|
|
targetColumn=targetColumn,
|
|
softFk=softFk,
|
|
)
|
|
)
|
|
|
|
relationships.sort(key=lambda r: (r.sourceDb, r.sourceTable, r.sourceColumn))
|
|
return relationships
|
|
|
|
|
|
def getFkRelationships() -> List[FkRelationship]:
|
|
"""Return the cached list of FK relationships (discovered on first call)."""
|
|
global _cachedRelationships
|
|
with _lock:
|
|
if _cachedRelationships is not None:
|
|
return _cachedRelationships
|
|
rels = _discoverFkRelationships()
|
|
with _lock:
|
|
_cachedRelationships = rels
|
|
return rels
|
|
|
|
|
|
def _getTableToDbMap() -> Dict[str, str]:
|
|
"""Return the cached table→db mapping (built on first call)."""
|
|
global _cachedTableToDb
|
|
with _lock:
|
|
if _cachedTableToDb is not None:
|
|
return _cachedTableToDb
|
|
mapping = _buildTableToDbMap()
|
|
with _lock:
|
|
_cachedTableToDb = mapping
|
|
return mapping
|
|
|
|
|
|
def _invalidateFkCache() -> None:
|
|
"""Force re-scan on next call (useful for testing)."""
|
|
global _cachedRelationships, _cachedTableToDb
|
|
with _lock:
|
|
_cachedRelationships = None
|
|
_cachedTableToDb = None
|
|
|
|
|
|
_FK_TARGET_REQUIRED_KEYS = {"db", "table", "labelField"}
|
|
|
|
|
|
def validateFkTargets() -> List[str]:
|
|
"""Validate every ``fk_target`` dict across all registered PowerOnModel subclasses.
|
|
|
|
Returns a list of error strings (empty = all good).
|
|
Each ``fk_target`` must contain exactly ``db``, ``table``, and ``labelField``
|
|
(``labelField`` may be ``None``).
|
|
"""
|
|
_ensureModelsLoaded()
|
|
errors: List[str] = []
|
|
for tableName, modelCls in MODEL_REGISTRY.items():
|
|
for fieldName, fieldInfo in modelCls.model_fields.items():
|
|
extra = fieldInfo.json_schema_extra
|
|
if not isinstance(extra, dict):
|
|
continue
|
|
fkTarget = extra.get("fk_target")
|
|
if fkTarget is None:
|
|
continue
|
|
if not isinstance(fkTarget, dict):
|
|
errors.append(f"{tableName}.{fieldName}: fk_target is not a dict ({type(fkTarget).__name__})")
|
|
continue
|
|
missing = _FK_TARGET_REQUIRED_KEYS - fkTarget.keys()
|
|
if missing:
|
|
errors.append(f"{tableName}.{fieldName}: fk_target missing keys {sorted(missing)}")
|
|
return errors
|