""" One-time migration: Convert FileFolder tree + FileItem.folderId → table_groupings. Run this BEFORE dropping the physical FileFolder table and FileItem.folderId column from the database (those are separate Alembic/SQL steps). Usage: python -m modules.migrations.migrate_folders_to_groups [--dry-run] [--verbose] Steps: 1. For each distinct (userId, mandateId) combination that has FileFolder records: a. Build the full folder tree (recursive) b. Write it as a TableGroupNode tree into table_groupings (contextKey='files/list') – merges with any existing groups rather than overwriting c. For each FileItem with a folderId that maps into this tree, add its id to the matching group's itemIds 2. Print a summary (rows migrated, groups created, files assigned) 3. If not --dry-run: commits the inserts/updates NOTE: Schema changes (ALTER TABLE DROP COLUMN, DROP TABLE) are intentionally NOT performed by this script. Run the corresponding Alembic migration (migrations/versions/xxxx_drop_folder_columns.py) afterwards. """ import argparse import json import logging import uuid from typing import Optional logger = logging.getLogger(__name__) # ── Helpers ────────────────────────────────────────────────────────────────── def _build_tree(folders: list, parent_id: Optional[str]) -> list: """Recursively build TableGroupNode-compatible dicts from a flat folder list.""" children = [f for f in folders if f.get("parentId") == parent_id] result = [] for folder in children: node = { "id": str(uuid.uuid4()), "name": folder["name"], "itemIds": [], "subGroups": _build_tree(folders, folder["id"]), "meta": {"migratedFromFolderId": folder["id"]}, } result.append(node) return result def _assign_files_to_nodes(nodes: list, files_by_folder: dict) -> list: """Recursively assign file IDs to group nodes based on folder mapping.""" for node in nodes: folder_id = (node.get("meta") or {}).get("migratedFromFolderId") if folder_id and folder_id in files_by_folder: node["itemIds"] = list(files_by_folder[folder_id]) node["subGroups"] = _assign_files_to_nodes(node.get("subGroups", []), files_by_folder) return nodes def _count_items(nodes: list) -> int: total = 0 for node in nodes: total += len(node.get("itemIds", [])) total += _count_items(node.get("subGroups", [])) return total def _now_ts() -> str: from modules.shared.timeUtils import getUtcTimestamp return getUtcTimestamp() # ── Main migration ──────────────────────────────────────────────────────────── def run_migration(dry_run: bool = True, verbose: bool = False): """Main migration entry point.""" logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) logger.info(f"Starting folder→group migration (dry_run={dry_run})") from modules.connectors.connectorDbPostgre import getCachedConnector connector = getCachedConnector() if not connector or not connector.connection: logger.error("Could not obtain a DB connection. Aborting.") return conn = connector.connection cur = conn.cursor() # ── 1. Check that the source tables still exist ─────────────────────────── cur.execute(""" SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_name = 'FileFolder' ) """) folder_table_exists = cur.fetchone()[0] cur.execute(""" SELECT EXISTS ( SELECT 1 FROM information_schema.columns WHERE table_name = 'FileItem' AND column_name = 'folderId' ) """) folder_column_exists = cur.fetchone()[0] if not folder_table_exists and not folder_column_exists: logger.info("FileFolder table and FileItem.folderId column not found — migration already applied or not needed.") return if not folder_table_exists: logger.warning("FileFolder table missing but FileItem.folderId column still present. Only file assignments will be migrated.") if not folder_column_exists: logger.warning("FileItem.folderId column missing but FileFolder table still present. Only group tree structure will be migrated.") # ── 2. Load all folders ─────────────────────────────────────────────────── folders_by_user: dict = {} if folder_table_exists: cur.execute('SELECT "id", "name", "parentId", "sysCreatedBy", "mandateId" FROM "FileFolder"') for row in cur.fetchall(): fid, fname, parent_id, user_id, mandate_id = row key = (str(user_id), str(mandate_id) if mandate_id else "") folders_by_user.setdefault(key, []).append({ "id": fid, "name": fname, "parentId": parent_id, }) logger.info(f"Loaded folders for {len(folders_by_user)} (user, mandate) combinations") # ── 3. Load file→folder assignments ────────────────────────────────────── files_by_key: dict = {} if folder_column_exists: cur.execute( 'SELECT "id", "folderId", "sysCreatedBy", "mandateId" FROM "FileItem" WHERE "folderId" IS NOT NULL AND "folderId" != \'\'' ) for row in cur.fetchall(): file_id, folder_id, user_id, mandate_id = row key = (str(user_id), str(mandate_id) if mandate_id else "") files_by_key.setdefault(key, {}).setdefault(folder_id, []).append(file_id) total_files = sum( sum(len(v) for v in d.values()) for d in files_by_key.values() ) logger.info(f"Found {total_files} file→folder assignments across {len(files_by_key)} (user, mandate) combos") # ── 4. Combine and upsert groupings ────────────────────────────────────── all_keys = set(folders_by_user.keys()) | set(files_by_key.keys()) stats = {"groups_created": 0, "groupings_upserted": 0, "files_assigned": 0} for key in all_keys: user_id, mandate_id = key folders = folders_by_user.get(key, []) files_by_folder = files_by_key.get(key, {}) # Build tree roots = _build_tree(folders, None) roots = _assign_files_to_nodes(roots, files_by_folder) # Handle files in unknown folders (folder no longer in tree) known_folder_ids = {f["id"] for f in folders} for folder_id, file_ids in files_by_folder.items(): if folder_id not in known_folder_ids: # Orphaned files: put them in an "Orphaned" group roots.append({ "id": str(uuid.uuid4()), "name": f"Orphaned (folder {folder_id[:8]}…)", "itemIds": file_ids, "subGroups": [], "meta": {"migratedFromFolderId": folder_id, "orphaned": True}, }) if not roots: continue n_items = _count_items(roots) stats["groups_created"] += len(roots) stats["files_assigned"] += n_items context_key = "files/list" if verbose: logger.debug(f" user={user_id} mandate={mandate_id}: {len(roots)} root groups, {n_items} files") if not dry_run: # Check for existing grouping cur.execute( 'SELECT "id", "rootGroups" FROM "TableGrouping" WHERE "userId" = %s AND "contextKey" = %s', (user_id, context_key), ) existing_row = cur.fetchone() if existing_row: existing_id, existing_raw = existing_row existing_roots = json.loads(existing_raw) if isinstance(existing_raw, str) else (existing_raw or []) # Merge: append migrated groups (avoid duplicates by migratedFromFolderId) existing_meta_ids = { (n.get("meta") or {}).get("migratedFromFolderId") for n in existing_roots if (n.get("meta") or {}).get("migratedFromFolderId") } new_roots = existing_roots + [ r for r in roots if (r.get("meta") or {}).get("migratedFromFolderId") not in existing_meta_ids ] cur.execute( 'UPDATE "TableGrouping" SET "rootGroups" = %s, "updatedAt" = %s WHERE "id" = %s', (json.dumps(new_roots), _now_ts(), existing_id), ) else: new_id = str(uuid.uuid4()) cur.execute( 'INSERT INTO "TableGrouping" ("id", "userId", "contextKey", "rootGroups", "updatedAt") VALUES (%s, %s, %s, %s, %s)', (new_id, user_id, context_key, json.dumps(roots), _now_ts()), ) stats["groupings_upserted"] += 1 # ── 5. Summary ──────────────────────────────────────────────────────────── if not dry_run: conn.commit() logger.info("Migration committed.") else: logger.info("DRY RUN — no changes written.") logger.info( f"Summary: groupings_upserted={stats['groupings_upserted']}, " f"groups_created={stats['groups_created']}, " f"files_assigned={stats['files_assigned']}" ) logger.info( "Next steps (run after verifying data):\n" " 1. Run Alembic migration to DROP COLUMN FileItem.folderId\n" " 2. Run Alembic migration to DROP TABLE FileFolder" ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Migrate FileFolder tree to table_groupings") parser.add_argument("--dry-run", action="store_true", default=True, help="Preview only, no DB writes (default)") parser.add_argument("--execute", action="store_true", help="Actually write to DB (disables dry-run)") parser.add_argument("--verbose", action="store_true", help="Show per-user details") args = parser.parse_args() dry_run = not args.execute run_migration(dry_run=dry_run, verbose=args.verbose)