gateway/scripts/script_migrate_feature_instance_refs.py

#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Persistent DB migration: rewrite raw ``featureInstanceId`` UUIDs in stored
workflow graphs to typed ``FeatureInstanceRef`` envelopes.

Why
---
The runtime engine (``executeGraph``) already calls
``materializeFeatureInstanceRefs`` on every run, so legacy graphs *execute*
correctly today.  The Editor however reads the persisted ``graph`` field
directly and shows whatever shape is on disk — until a workflow is saved
again it still displays the old plain-string format.

What this script does
---------------------
Walks every row of:

* ``poweron_graphicaleditor.Automation2Workflow`` (legacy ``graph`` column)
* ``poweron_graphicaleditor.AutoVersion``         (canonical ``graph`` column)

For each row, it:

1. Loads the JSONB ``graph`` column.
2. Applies :func:`materializeFeatureInstanceRefs`.
3. Persists the result if (and only if) it differs from the input.

Idempotent — re-runs are no-ops.

Usage
-----
::

    python scripts/script_migrate_feature_instance_refs.py --dry-run
    python scripts/script_migrate_feature_instance_refs.py

Plan: ``wiki/c-work/1-plan/2026-04-typed-action-followups.md`` (Track C1).
"""
from __future__ import annotations

import argparse
import json
import logging
import os
import sys
from pathlib import Path
from typing import Any, Dict, Iterable, List, Tuple

_scriptPath = Path(__file__).resolve()
_gatewayPath = _scriptPath.parent.parent
sys.path.insert(0, str(_gatewayPath))
os.chdir(str(_gatewayPath))

import psycopg2  # noqa: E402
from psycopg2.extras import Json, RealDictCursor  # noqa: E402

from modules.shared.configuration import APP_CONFIG  # noqa: E402
from modules.workflows.automation2.featureInstanceRefMigration import (  # noqa: E402
    materializeFeatureInstanceRefs,
)

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger("script_migrate_feature_instance_refs")


_DB_NAME = "poweron_graphicaleditor"
_TABLES_AND_PK: List[Tuple[str, str]] = [
    ('"Automation2Workflow"', "id"),
    ('"AutoVersion"', "id"),
]


def _connect() -> "psycopg2.extensions.connection":
    cfg = {
        "host": APP_CONFIG.get("DB_HOST", "localhost"),
        "port": int(APP_CONFIG.get("DB_PORT", "5432")),
        "user": APP_CONFIG.get("DB_USER"),
        "password": (
            APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD")
        ),
        "database": _DB_NAME,
    }
    if not cfg["user"] or not cfg["password"]:
        raise SystemExit("DB_USER and DB_PASSWORD/DB_PASSWORD_SECRET must be set")
    return psycopg2.connect(**cfg)


def _loadGraph(value: Any) -> Dict[str, Any]:
    """psycopg2 returns JSONB as a Python dict, but legacy data may be a JSON string."""
    if isinstance(value, dict):
        return value
    if isinstance(value, (bytes, bytearray)):
        value = value.decode("utf-8", errors="replace")
    if isinstance(value, str) and value.strip():
        try:
            return json.loads(value)
        except json.JSONDecodeError:
            return {}
    return {}


def _countMigrations(before: Dict[str, Any], after: Dict[str, Any]) -> int:
    """Count how many ``featureInstanceId`` values were rewritten."""
    if before == after:
        return 0
    bnodes = before.get("nodes") if isinstance(before, dict) else None
    anodes = after.get("nodes") if isinstance(after, dict) else None
    if not isinstance(bnodes, list) or not isinstance(anodes, list):
        return 0
    count = 0
    for bn, an in zip(bnodes, anodes):
        bp = (bn.get("parameters") or {}) if isinstance(bn, dict) else {}
        ap = (an.get("parameters") or {}) if isinstance(an, dict) else {}
        if bp.get("featureInstanceId") != ap.get("featureInstanceId"):
            count += 1
    return count


def _migrateOneTable(
    conn,
    table: str,
    pk: str,
    *,
    dryRun: bool,
) -> Dict[str, int]:
    """Process one table; returns counts dict."""
    counts = {"scanned": 0, "rowsChanged": 0, "fieldsRewritten": 0}
    with conn.cursor(cursor_factory=RealDictCursor) as cur:
        cur.execute(f'SELECT {pk} AS pk, "graph" AS graph FROM {table}')
        rows: Iterable[Dict[str, Any]] = cur.fetchall()
        for row in rows:
            counts["scanned"] += 1
            before = _loadGraph(row.get("graph"))
            if not before:
                continue
            after = materializeFeatureInstanceRefs(before)
            if before == after:
                continue
            rewritten = _countMigrations(before, after)
            if rewritten == 0:
                continue
            counts["rowsChanged"] += 1
            counts["fieldsRewritten"] += rewritten
            logger.info(
                "%s id=%s: %d featureInstanceId value(s) %s",
                table,
                row["pk"],
                rewritten,
                "would be migrated [dry-run]" if dryRun else "migrated",
            )
            if not dryRun:
                with conn.cursor() as updCur:
                    updCur.execute(
                        f'UPDATE {table} SET "graph" = %s WHERE {pk} = %s',
                        (Json(after), row["pk"]),
                    )
        if not dryRun:
            conn.commit()
    return counts


def migrate(dryRun: bool = False) -> Dict[str, Dict[str, int]]:
    """Walk all tracked tables and migrate.  Returns per-table counts."""
    summary: Dict[str, Dict[str, int]] = {}
    conn = _connect()
    try:
        for table, pk in _TABLES_AND_PK:
            summary[table] = _migrateOneTable(conn, table, pk, dryRun=dryRun)
    finally:
        conn.close()
    return summary


def main() -> int:
    parser = argparse.ArgumentParser(
        description="Persist materializeFeatureInstanceRefs into stored workflow graphs."
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Report what would be migrated without writing back.",
    )
    args = parser.parse_args()

    logger.info(
        "Starting featureInstanceRef DB migration (dry-run=%s, db=%s)",
        args.dry_run,
        _DB_NAME,
    )
    summary = migrate(dryRun=args.dry_run)
    totalRows = sum(s["rowsChanged"] for s in summary.values())
    totalFields = sum(s["fieldsRewritten"] for s in summary.values())
    for table, counts in summary.items():
        logger.info(
            "%s: scanned=%d rowsChanged=%d fieldsRewritten=%d",
            table,
            counts["scanned"],
            counts["rowsChanged"],
            counts["fieldsRewritten"],
        )
    logger.info(
        "%s: %d row(s) %s, %d featureInstanceId value(s) total.",
        "Dry-run summary" if args.dry_run else "Migration summary",
        totalRows,
        "would be updated" if args.dry_run else "updated",
        totalFields,
    )
    return 0


if __name__ == "__main__":
    sys.exit(main())