#!/usr/bin/env python3 # Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Persistent DB migration: rewrite raw ``featureInstanceId`` UUIDs in stored workflow graphs to typed ``FeatureInstanceRef`` envelopes. Why --- The runtime engine (``executeGraph``) already calls ``materializeFeatureInstanceRefs`` on every run, so legacy graphs *execute* correctly today. The Editor however reads the persisted ``graph`` field directly and shows whatever shape is on disk — until a workflow is saved again it still displays the old plain-string format. What this script does --------------------- Walks every row of: * ``poweron_graphicaleditor.Automation2Workflow`` (legacy ``graph`` column) * ``poweron_graphicaleditor.AutoVersion`` (canonical ``graph`` column) For each row, it: 1. Loads the JSONB ``graph`` column. 2. Applies :func:`materializeFeatureInstanceRefs`. 3. Persists the result if (and only if) it differs from the input. Idempotent — re-runs are no-ops. Usage ----- :: python scripts/script_migrate_feature_instance_refs.py --dry-run python scripts/script_migrate_feature_instance_refs.py Plan: ``wiki/c-work/1-plan/2026-04-typed-action-followups.md`` (Track C1). """ from __future__ import annotations import argparse import json import logging import os import sys from pathlib import Path from typing import Any, Dict, Iterable, List, Tuple _scriptPath = Path(__file__).resolve() _gatewayPath = _scriptPath.parent.parent sys.path.insert(0, str(_gatewayPath)) os.chdir(str(_gatewayPath)) import psycopg2 # noqa: E402 from psycopg2.extras import Json, RealDictCursor # noqa: E402 from modules.shared.configuration import APP_CONFIG # noqa: E402 from modules.workflows.automation2.featureInstanceRefMigration import ( # noqa: E402 materializeFeatureInstanceRefs, ) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger("script_migrate_feature_instance_refs") _DB_NAME = "poweron_graphicaleditor" _TABLES_AND_PK: List[Tuple[str, str]] = [ ('"Automation2Workflow"', "id"), ('"AutoVersion"', "id"), ] def _connect() -> "psycopg2.extensions.connection": cfg = { "host": APP_CONFIG.get("DB_HOST", "localhost"), "port": int(APP_CONFIG.get("DB_PORT", "5432")), "user": APP_CONFIG.get("DB_USER"), "password": ( APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD") ), "database": _DB_NAME, } if not cfg["user"] or not cfg["password"]: raise SystemExit("DB_USER and DB_PASSWORD/DB_PASSWORD_SECRET must be set") return psycopg2.connect(**cfg) def _loadGraph(value: Any) -> Dict[str, Any]: """psycopg2 returns JSONB as a Python dict, but legacy data may be a JSON string.""" if isinstance(value, dict): return value if isinstance(value, (bytes, bytearray)): value = value.decode("utf-8", errors="replace") if isinstance(value, str) and value.strip(): try: return json.loads(value) except json.JSONDecodeError: return {} return {} def _countMigrations(before: Dict[str, Any], after: Dict[str, Any]) -> int: """Count how many ``featureInstanceId`` values were rewritten.""" if before == after: return 0 bnodes = before.get("nodes") if isinstance(before, dict) else None anodes = after.get("nodes") if isinstance(after, dict) else None if not isinstance(bnodes, list) or not isinstance(anodes, list): return 0 count = 0 for bn, an in zip(bnodes, anodes): bp = (bn.get("parameters") or {}) if isinstance(bn, dict) else {} ap = (an.get("parameters") or {}) if isinstance(an, dict) else {} if bp.get("featureInstanceId") != ap.get("featureInstanceId"): count += 1 return count def _migrateOneTable( conn, table: str, pk: str, *, dryRun: bool, ) -> Dict[str, int]: """Process one table; returns counts dict.""" counts = {"scanned": 0, "rowsChanged": 0, "fieldsRewritten": 0} with conn.cursor(cursor_factory=RealDictCursor) as cur: cur.execute(f'SELECT {pk} AS pk, "graph" AS graph FROM {table}') rows: Iterable[Dict[str, Any]] = cur.fetchall() for row in rows: counts["scanned"] += 1 before = _loadGraph(row.get("graph")) if not before: continue after = materializeFeatureInstanceRefs(before) if before == after: continue rewritten = _countMigrations(before, after) if rewritten == 0: continue counts["rowsChanged"] += 1 counts["fieldsRewritten"] += rewritten logger.info( "%s id=%s: %d featureInstanceId value(s) %s", table, row["pk"], rewritten, "would be migrated [dry-run]" if dryRun else "migrated", ) if not dryRun: with conn.cursor() as updCur: updCur.execute( f'UPDATE {table} SET "graph" = %s WHERE {pk} = %s', (Json(after), row["pk"]), ) if not dryRun: conn.commit() return counts def migrate(dryRun: bool = False) -> Dict[str, Dict[str, int]]: """Walk all tracked tables and migrate. Returns per-table counts.""" summary: Dict[str, Dict[str, int]] = {} conn = _connect() try: for table, pk in _TABLES_AND_PK: summary[table] = _migrateOneTable(conn, table, pk, dryRun=dryRun) finally: conn.close() return summary def main() -> int: parser = argparse.ArgumentParser( description="Persist materializeFeatureInstanceRefs into stored workflow graphs." ) parser.add_argument( "--dry-run", action="store_true", help="Report what would be migrated without writing back.", ) args = parser.parse_args() logger.info( "Starting featureInstanceRef DB migration (dry-run=%s, db=%s)", args.dry_run, _DB_NAME, ) summary = migrate(dryRun=args.dry_run) totalRows = sum(s["rowsChanged"] for s in summary.values()) totalFields = sum(s["fieldsRewritten"] for s in summary.values()) for table, counts in summary.items(): logger.info( "%s: scanned=%d rowsChanged=%d fieldsRewritten=%d", table, counts["scanned"], counts["rowsChanged"], counts["fieldsRewritten"], ) logger.info( "%s: %d row(s) %s, %d featureInstanceId value(s) total.", "Dry-run summary" if args.dry_run else "Migration summary", totalRows, "would be updated" if args.dry_run else "updated", totalFields, ) return 0 if __name__ == "__main__": sys.exit(main())