gateway/modules/migrations/_archive/migrate_folders_to_groups.py

261 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
One-time migration: Convert FileFolder tree + FileItem.folderId to table_groupings.
Archived per wiki plan 2026-05-formgenerator-tree-and-folder-recovery (Stage 1.A).
Product direction: keep FileFolder + folderId; do not run DROP migrations.
This script remains for audit / one-off data rescue only.
Run this BEFORE dropping the physical FileFolder table and FileItem.folderId column
from the database (those would be separate Alembic/SQL steps -- not part of current product path).
Usage (from gateway working directory):
python -m modules.migrations._archive.migrate_folders_to_groups [--dry-run] [--verbose]
python -m modules.migrations._archive.migrate_folders_to_groups --execute --verbose
Steps:
1. For each distinct (userId, mandateId) combination that has FileFolder records:
a. Build the full folder tree (recursive)
b. Write it as a TableGroupNode tree into table_groupings (contextKey='files/list')
merges with any existing groups rather than overwriting
c. For each FileItem with a folderId that maps into this tree,
add its id to the matching group's itemIds
2. Print a summary (rows migrated, groups created, files assigned)
3. If not --dry-run: commits the inserts/updates
NOTE: Schema changes (ALTER TABLE DROP COLUMN, DROP TABLE) are intentionally
NOT performed by this script. Run the corresponding Alembic migration
(migrations/versions/xxxx_drop_folder_columns.py) afterwards.
"""
import argparse
import json
import logging
import uuid
from typing import Optional
logger = logging.getLogger(__name__)
def _scalarRow(row):
if row is None:
return None
if isinstance(row, dict):
return next(iter(row.values()))
return row[0]
# ── Helpers ──────────────────────────────────────────────────────────────────
def _build_tree(folders: list, parent_id: Optional[str]) -> list:
"""Recursively build TableGroupNode-compatible dicts from a flat folder list."""
children = [f for f in folders if f.get("parentId") == parent_id]
result = []
for folder in children:
node = {
"id": str(uuid.uuid4()),
"name": folder["name"],
"itemIds": [],
"subGroups": _build_tree(folders, folder["id"]),
"meta": {"migratedFromFolderId": folder["id"]},
}
result.append(node)
return result
def _assign_files_to_nodes(nodes: list, files_by_folder: dict) -> list:
"""Recursively assign file IDs to group nodes based on folder mapping."""
for node in nodes:
folder_id = (node.get("meta") or {}).get("migratedFromFolderId")
if folder_id and folder_id in files_by_folder:
node["itemIds"] = list(files_by_folder[folder_id])
node["subGroups"] = _assign_files_to_nodes(node.get("subGroups", []), files_by_folder)
return nodes
def _count_items(nodes: list) -> int:
total = 0
for node in nodes:
total += len(node.get("itemIds", []))
total += _count_items(node.get("subGroups", []))
return total
def _now_ts() -> str:
from modules.shared.timeUtils import getUtcTimestamp
return getUtcTimestamp()
# ── Main migration ────────────────────────────────────────────────────────────
def run_migration(dry_run: bool = True, verbose: bool = False):
"""Main migration entry point."""
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
logger.info(f"Starting folder to group migration (dry_run={dry_run})")
from modules.connectors.connectorDbPostgre import getCachedConnector
from modules.shared.configuration import APP_CONFIG
connector = getCachedConnector(
dbHost=APP_CONFIG.get("DB_HOST", "_no_config_default_data"),
dbDatabase="poweron_management",
dbUser=APP_CONFIG.get("DB_USER"),
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET"),
dbPort=int(APP_CONFIG.get("DB_PORT", 5432)),
userId=None,
)
if not connector or not connector.connection:
logger.error("Could not obtain a DB connection. Aborting.")
return
conn = connector.connection
cur = conn.cursor()
# ── 1. Check that the source tables still exist ───────────────────────────
cur.execute("""
SELECT EXISTS (
SELECT 1 FROM information_schema.tables
WHERE table_name = 'FileFolder'
) AS ok
""")
folder_table_exists = bool(_scalarRow(cur.fetchone()))
cur.execute("""
SELECT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'FileItem' AND column_name = 'folderId'
) AS ok
""")
folder_column_exists = bool(_scalarRow(cur.fetchone()))
if not folder_table_exists and not folder_column_exists:
logger.info("FileFolder table and FileItem.folderId column not found — migration already applied or not needed.")
return
if not folder_table_exists:
logger.warning("FileFolder table missing but FileItem.folderId column still present. Only file assignments will be migrated.")
if not folder_column_exists:
logger.warning("FileItem.folderId column missing but FileFolder table still present. Only group tree structure will be migrated.")
# ── 2. Load all folders ───────────────────────────────────────────────────
folders_by_user: dict = {}
if folder_table_exists:
cur.execute('SELECT "id", "name", "parentId", "sysCreatedBy", "mandateId" FROM "FileFolder"')
for row in cur.fetchall():
fid, fname, parent_id, user_id, mandate_id = row
key = (str(user_id), str(mandate_id) if mandate_id else "")
folders_by_user.setdefault(key, []).append({
"id": fid, "name": fname, "parentId": parent_id,
})
logger.info(f"Loaded folders for {len(folders_by_user)} (user, mandate) combinations")
# ── 3. Load file to folder assignments ────────────────────────────────────
files_by_key: dict = {}
if folder_column_exists:
cur.execute(
'SELECT "id", "folderId", "sysCreatedBy", "mandateId" FROM "FileItem" WHERE "folderId" IS NOT NULL AND "folderId" != \'\''
)
for row in cur.fetchall():
file_id, folder_id, user_id, mandate_id = row
key = (str(user_id), str(mandate_id) if mandate_id else "")
files_by_key.setdefault(key, {}).setdefault(folder_id, []).append(file_id)
total_files = sum(
sum(len(v) for v in d.values()) for d in files_by_key.values()
)
logger.info(f"Found {total_files} file to folder assignments across {len(files_by_key)} (user, mandate) combos")
# ── 4. Combine and upsert groupings ──────────────────────────────────────
all_keys = set(folders_by_user.keys()) | set(files_by_key.keys())
stats = {"groups_created": 0, "groupings_upserted": 0, "files_assigned": 0}
for key in all_keys:
user_id, mandate_id = key
folders = folders_by_user.get(key, [])
files_by_folder = files_by_key.get(key, {})
# Build tree
roots = _build_tree(folders, None)
roots = _assign_files_to_nodes(roots, files_by_folder)
# Handle files in unknown folders (folder no longer in tree)
known_folder_ids = {f["id"] for f in folders}
for folder_id, file_ids in files_by_folder.items():
if folder_id not in known_folder_ids:
# Orphaned files: put them in an "Orphaned" group
roots.append({
"id": str(uuid.uuid4()),
"name": f"Orphaned (folder {folder_id[:8]}…)",
"itemIds": file_ids,
"subGroups": [],
"meta": {"migratedFromFolderId": folder_id, "orphaned": True},
})
if not roots:
continue
n_items = _count_items(roots)
stats["groups_created"] += len(roots)
stats["files_assigned"] += n_items
context_key = "files/list"
if verbose:
logger.debug(f" user={user_id} mandate={mandate_id}: {len(roots)} root groups, {n_items} files")
if not dry_run:
# Check for existing grouping
cur.execute(
'SELECT "id", "rootGroups" FROM "TableGrouping" WHERE "userId" = %s AND "contextKey" = %s',
(user_id, context_key),
)
existing_row = cur.fetchone()
if existing_row:
existing_id, existing_raw = existing_row
existing_roots = json.loads(existing_raw) if isinstance(existing_raw, str) else (existing_raw or [])
# Merge: append migrated groups (avoid duplicates by migratedFromFolderId)
existing_meta_ids = {
(n.get("meta") or {}).get("migratedFromFolderId")
for n in existing_roots
if (n.get("meta") or {}).get("migratedFromFolderId")
}
new_roots = existing_roots + [
r for r in roots
if (r.get("meta") or {}).get("migratedFromFolderId") not in existing_meta_ids
]
cur.execute(
'UPDATE "TableGrouping" SET "rootGroups" = %s, "updatedAt" = %s WHERE "id" = %s',
(json.dumps(new_roots), _now_ts(), existing_id),
)
else:
new_id = str(uuid.uuid4())
cur.execute(
'INSERT INTO "TableGrouping" ("id", "userId", "contextKey", "rootGroups", "updatedAt") VALUES (%s, %s, %s, %s, %s)',
(new_id, user_id, context_key, json.dumps(roots), _now_ts()),
)
stats["groupings_upserted"] += 1
# ── 5. Summary ────────────────────────────────────────────────────────────
if not dry_run:
conn.commit()
logger.info("Migration committed.")
else:
logger.info("DRY RUN — no changes written.")
logger.info(
f"Summary: groupings_upserted={stats['groupings_upserted']}, "
f"groups_created={stats['groups_created']}, "
f"files_assigned={stats['files_assigned']}"
)
logger.info(
"Next steps (run after verifying data):\n"
" 1. Run Alembic migration to DROP COLUMN FileItem.folderId\n"
" 2. Run Alembic migration to DROP TABLE FileFolder"
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Migrate FileFolder tree to table_groupings (archived script)")
parser.add_argument("--dry-run", action="store_true", default=True, help="Preview only, no DB writes (default)")
parser.add_argument("--execute", action="store_true", help="Actually write to DB (disables dry-run)")
parser.add_argument("--verbose", action="store_true", help="Show per-user details")
args = parser.parse_args()
dry_run = not args.execute
run_migration(dry_run=dry_run, verbose=args.verbose)