gateway/modules/migrations/migrate_folders_to_groups.py

240 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
One-time migration: Convert FileFolder tree + FileItem.folderId → table_groupings.
Run this BEFORE dropping the physical FileFolder table and FileItem.folderId column
from the database (those are separate Alembic/SQL steps).
Usage:
python -m modules.migrations.migrate_folders_to_groups [--dry-run] [--verbose]
Steps:
1. For each distinct (userId, mandateId) combination that has FileFolder records:
a. Build the full folder tree (recursive)
b. Write it as a TableGroupNode tree into table_groupings (contextKey='files/list')
merges with any existing groups rather than overwriting
c. For each FileItem with a folderId that maps into this tree,
add its id to the matching group's itemIds
2. Print a summary (rows migrated, groups created, files assigned)
3. If not --dry-run: commits the inserts/updates
NOTE: Schema changes (ALTER TABLE DROP COLUMN, DROP TABLE) are intentionally
NOT performed by this script. Run the corresponding Alembic migration
(migrations/versions/xxxx_drop_folder_columns.py) afterwards.
"""
import argparse
import json
import logging
import uuid
from typing import Optional
logger = logging.getLogger(__name__)
# ── Helpers ──────────────────────────────────────────────────────────────────
def _build_tree(folders: list, parent_id: Optional[str]) -> list:
"""Recursively build TableGroupNode-compatible dicts from a flat folder list."""
children = [f for f in folders if f.get("parentId") == parent_id]
result = []
for folder in children:
node = {
"id": str(uuid.uuid4()),
"name": folder["name"],
"itemIds": [],
"subGroups": _build_tree(folders, folder["id"]),
"meta": {"migratedFromFolderId": folder["id"]},
}
result.append(node)
return result
def _assign_files_to_nodes(nodes: list, files_by_folder: dict) -> list:
"""Recursively assign file IDs to group nodes based on folder mapping."""
for node in nodes:
folder_id = (node.get("meta") or {}).get("migratedFromFolderId")
if folder_id and folder_id in files_by_folder:
node["itemIds"] = list(files_by_folder[folder_id])
node["subGroups"] = _assign_files_to_nodes(node.get("subGroups", []), files_by_folder)
return nodes
def _count_items(nodes: list) -> int:
total = 0
for node in nodes:
total += len(node.get("itemIds", []))
total += _count_items(node.get("subGroups", []))
return total
def _now_ts() -> str:
from modules.shared.timeUtils import getUtcTimestamp
return getUtcTimestamp()
# ── Main migration ────────────────────────────────────────────────────────────
def run_migration(dry_run: bool = True, verbose: bool = False):
"""Main migration entry point."""
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
logger.info(f"Starting folder→group migration (dry_run={dry_run})")
from modules.connectors.connectorDbPostgre import getCachedConnector
connector = getCachedConnector()
if not connector or not connector.connection:
logger.error("Could not obtain a DB connection. Aborting.")
return
conn = connector.connection
cur = conn.cursor()
# ── 1. Check that the source tables still exist ───────────────────────────
cur.execute("""
SELECT EXISTS (
SELECT 1 FROM information_schema.tables
WHERE table_name = 'FileFolder'
)
""")
folder_table_exists = cur.fetchone()[0]
cur.execute("""
SELECT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'FileItem' AND column_name = 'folderId'
)
""")
folder_column_exists = cur.fetchone()[0]
if not folder_table_exists and not folder_column_exists:
logger.info("FileFolder table and FileItem.folderId column not found — migration already applied or not needed.")
return
if not folder_table_exists:
logger.warning("FileFolder table missing but FileItem.folderId column still present. Only file assignments will be migrated.")
if not folder_column_exists:
logger.warning("FileItem.folderId column missing but FileFolder table still present. Only group tree structure will be migrated.")
# ── 2. Load all folders ───────────────────────────────────────────────────
folders_by_user: dict = {}
if folder_table_exists:
cur.execute('SELECT "id", "name", "parentId", "sysCreatedBy", "mandateId" FROM "FileFolder"')
for row in cur.fetchall():
fid, fname, parent_id, user_id, mandate_id = row
key = (str(user_id), str(mandate_id) if mandate_id else "")
folders_by_user.setdefault(key, []).append({
"id": fid, "name": fname, "parentId": parent_id,
})
logger.info(f"Loaded folders for {len(folders_by_user)} (user, mandate) combinations")
# ── 3. Load file→folder assignments ──────────────────────────────────────
files_by_key: dict = {}
if folder_column_exists:
cur.execute(
'SELECT "id", "folderId", "sysCreatedBy", "mandateId" FROM "FileItem" WHERE "folderId" IS NOT NULL AND "folderId" != \'\''
)
for row in cur.fetchall():
file_id, folder_id, user_id, mandate_id = row
key = (str(user_id), str(mandate_id) if mandate_id else "")
files_by_key.setdefault(key, {}).setdefault(folder_id, []).append(file_id)
total_files = sum(
sum(len(v) for v in d.values()) for d in files_by_key.values()
)
logger.info(f"Found {total_files} file→folder assignments across {len(files_by_key)} (user, mandate) combos")
# ── 4. Combine and upsert groupings ──────────────────────────────────────
all_keys = set(folders_by_user.keys()) | set(files_by_key.keys())
stats = {"groups_created": 0, "groupings_upserted": 0, "files_assigned": 0}
for key in all_keys:
user_id, mandate_id = key
folders = folders_by_user.get(key, [])
files_by_folder = files_by_key.get(key, {})
# Build tree
roots = _build_tree(folders, None)
roots = _assign_files_to_nodes(roots, files_by_folder)
# Handle files in unknown folders (folder no longer in tree)
known_folder_ids = {f["id"] for f in folders}
for folder_id, file_ids in files_by_folder.items():
if folder_id not in known_folder_ids:
# Orphaned files: put them in an "Orphaned" group
roots.append({
"id": str(uuid.uuid4()),
"name": f"Orphaned (folder {folder_id[:8]}…)",
"itemIds": file_ids,
"subGroups": [],
"meta": {"migratedFromFolderId": folder_id, "orphaned": True},
})
if not roots:
continue
n_items = _count_items(roots)
stats["groups_created"] += len(roots)
stats["files_assigned"] += n_items
context_key = "files/list"
if verbose:
logger.debug(f" user={user_id} mandate={mandate_id}: {len(roots)} root groups, {n_items} files")
if not dry_run:
# Check for existing grouping
cur.execute(
'SELECT "id", "rootGroups" FROM "TableGrouping" WHERE "userId" = %s AND "contextKey" = %s',
(user_id, context_key),
)
existing_row = cur.fetchone()
if existing_row:
existing_id, existing_raw = existing_row
existing_roots = json.loads(existing_raw) if isinstance(existing_raw, str) else (existing_raw or [])
# Merge: append migrated groups (avoid duplicates by migratedFromFolderId)
existing_meta_ids = {
(n.get("meta") or {}).get("migratedFromFolderId")
for n in existing_roots
if (n.get("meta") or {}).get("migratedFromFolderId")
}
new_roots = existing_roots + [
r for r in roots
if (r.get("meta") or {}).get("migratedFromFolderId") not in existing_meta_ids
]
cur.execute(
'UPDATE "TableGrouping" SET "rootGroups" = %s, "updatedAt" = %s WHERE "id" = %s',
(json.dumps(new_roots), _now_ts(), existing_id),
)
else:
new_id = str(uuid.uuid4())
cur.execute(
'INSERT INTO "TableGrouping" ("id", "userId", "contextKey", "rootGroups", "updatedAt") VALUES (%s, %s, %s, %s, %s)',
(new_id, user_id, context_key, json.dumps(roots), _now_ts()),
)
stats["groupings_upserted"] += 1
# ── 5. Summary ────────────────────────────────────────────────────────────
if not dry_run:
conn.commit()
logger.info("Migration committed.")
else:
logger.info("DRY RUN — no changes written.")
logger.info(
f"Summary: groupings_upserted={stats['groupings_upserted']}, "
f"groups_created={stats['groups_created']}, "
f"files_assigned={stats['files_assigned']}"
)
logger.info(
"Next steps (run after verifying data):\n"
" 1. Run Alembic migration to DROP COLUMN FileItem.folderId\n"
" 2. Run Alembic migration to DROP TABLE FileFolder"
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Migrate FileFolder tree to table_groupings")
parser.add_argument("--dry-run", action="store_true", default=True, help="Preview only, no DB writes (default)")
parser.add_argument("--execute", action="store_true", help="Actually write to DB (disables dry-run)")
parser.add_argument("--verbose", action="store_true", help="Show per-user details")
args = parser.parse_args()
dry_run = not args.execute
run_migration(dry_run=dry_run, verbose=args.verbose)