gateway/scripts/script_migrate_feature_instance_refs.py
2026-04-25 01:13:01 +02:00

213 lines
6.8 KiB
Python

#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Persistent DB migration: rewrite raw ``featureInstanceId`` UUIDs in stored
workflow graphs to typed ``FeatureInstanceRef`` envelopes.
Why
---
The runtime engine (``executeGraph``) already calls
``materializeFeatureInstanceRefs`` on every run, so legacy graphs *execute*
correctly today. The Editor however reads the persisted ``graph`` field
directly and shows whatever shape is on disk — until a workflow is saved
again it still displays the old plain-string format.
What this script does
---------------------
Walks every row of:
* ``poweron_graphicaleditor.Automation2Workflow`` (legacy ``graph`` column)
* ``poweron_graphicaleditor.AutoVersion`` (canonical ``graph`` column)
For each row, it:
1. Loads the JSONB ``graph`` column.
2. Applies :func:`materializeFeatureInstanceRefs`.
3. Persists the result if (and only if) it differs from the input.
Idempotent — re-runs are no-ops.
Usage
-----
::
python scripts/script_migrate_feature_instance_refs.py --dry-run
python scripts/script_migrate_feature_instance_refs.py
Plan: ``wiki/c-work/1-plan/2026-04-typed-action-followups.md`` (Track C1).
"""
from __future__ import annotations
import argparse
import json
import logging
import os
import sys
from pathlib import Path
from typing import Any, Dict, Iterable, List, Tuple
_scriptPath = Path(__file__).resolve()
_gatewayPath = _scriptPath.parent.parent
sys.path.insert(0, str(_gatewayPath))
os.chdir(str(_gatewayPath))
import psycopg2 # noqa: E402
from psycopg2.extras import Json, RealDictCursor # noqa: E402
from modules.shared.configuration import APP_CONFIG # noqa: E402
from modules.workflows.automation2.featureInstanceRefMigration import ( # noqa: E402
materializeFeatureInstanceRefs,
)
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger("script_migrate_feature_instance_refs")
_DB_NAME = "poweron_graphicaleditor"
_TABLES_AND_PK: List[Tuple[str, str]] = [
('"Automation2Workflow"', "id"),
('"AutoVersion"', "id"),
]
def _connect() -> "psycopg2.extensions.connection":
cfg = {
"host": APP_CONFIG.get("DB_HOST", "localhost"),
"port": int(APP_CONFIG.get("DB_PORT", "5432")),
"user": APP_CONFIG.get("DB_USER"),
"password": (
APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD")
),
"database": _DB_NAME,
}
if not cfg["user"] or not cfg["password"]:
raise SystemExit("DB_USER and DB_PASSWORD/DB_PASSWORD_SECRET must be set")
return psycopg2.connect(**cfg)
def _loadGraph(value: Any) -> Dict[str, Any]:
"""psycopg2 returns JSONB as a Python dict, but legacy data may be a JSON string."""
if isinstance(value, dict):
return value
if isinstance(value, (bytes, bytearray)):
value = value.decode("utf-8", errors="replace")
if isinstance(value, str) and value.strip():
try:
return json.loads(value)
except json.JSONDecodeError:
return {}
return {}
def _countMigrations(before: Dict[str, Any], after: Dict[str, Any]) -> int:
"""Count how many ``featureInstanceId`` values were rewritten."""
if before == after:
return 0
bnodes = before.get("nodes") if isinstance(before, dict) else None
anodes = after.get("nodes") if isinstance(after, dict) else None
if not isinstance(bnodes, list) or not isinstance(anodes, list):
return 0
count = 0
for bn, an in zip(bnodes, anodes):
bp = (bn.get("parameters") or {}) if isinstance(bn, dict) else {}
ap = (an.get("parameters") or {}) if isinstance(an, dict) else {}
if bp.get("featureInstanceId") != ap.get("featureInstanceId"):
count += 1
return count
def _migrateOneTable(
conn,
table: str,
pk: str,
*,
dryRun: bool,
) -> Dict[str, int]:
"""Process one table; returns counts dict."""
counts = {"scanned": 0, "rowsChanged": 0, "fieldsRewritten": 0}
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(f'SELECT {pk} AS pk, "graph" AS graph FROM {table}')
rows: Iterable[Dict[str, Any]] = cur.fetchall()
for row in rows:
counts["scanned"] += 1
before = _loadGraph(row.get("graph"))
if not before:
continue
after = materializeFeatureInstanceRefs(before)
if before == after:
continue
rewritten = _countMigrations(before, after)
if rewritten == 0:
continue
counts["rowsChanged"] += 1
counts["fieldsRewritten"] += rewritten
logger.info(
"%s id=%s: %d featureInstanceId value(s) %s",
table,
row["pk"],
rewritten,
"would be migrated [dry-run]" if dryRun else "migrated",
)
if not dryRun:
with conn.cursor() as updCur:
updCur.execute(
f'UPDATE {table} SET "graph" = %s WHERE {pk} = %s',
(Json(after), row["pk"]),
)
if not dryRun:
conn.commit()
return counts
def migrate(dryRun: bool = False) -> Dict[str, Dict[str, int]]:
"""Walk all tracked tables and migrate. Returns per-table counts."""
summary: Dict[str, Dict[str, int]] = {}
conn = _connect()
try:
for table, pk in _TABLES_AND_PK:
summary[table] = _migrateOneTable(conn, table, pk, dryRun=dryRun)
finally:
conn.close()
return summary
def main() -> int:
parser = argparse.ArgumentParser(
description="Persist materializeFeatureInstanceRefs into stored workflow graphs."
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Report what would be migrated without writing back.",
)
args = parser.parse_args()
logger.info(
"Starting featureInstanceRef DB migration (dry-run=%s, db=%s)",
args.dry_run,
_DB_NAME,
)
summary = migrate(dryRun=args.dry_run)
totalRows = sum(s["rowsChanged"] for s in summary.values())
totalFields = sum(s["fieldsRewritten"] for s in summary.values())
for table, counts in summary.items():
logger.info(
"%s: scanned=%d rowsChanged=%d fieldsRewritten=%d",
table,
counts["scanned"],
counts["rowsChanged"],
counts["fieldsRewritten"],
)
logger.info(
"%s: %d row(s) %s, %d featureInstanceId value(s) total.",
"Dry-run summary" if args.dry_run else "Migration summary",
totalRows,
"would be updated" if args.dry_run else "updated",
totalFields,
)
return 0
if __name__ == "__main__":
sys.exit(main())