240 lines
10 KiB
Python
240 lines
10 KiB
Python
"""
|
||
One-time migration: Convert FileFolder tree + FileItem.folderId → table_groupings.
|
||
|
||
Run this BEFORE dropping the physical FileFolder table and FileItem.folderId column
|
||
from the database (those are separate Alembic/SQL steps).
|
||
|
||
Usage:
|
||
python -m modules.migrations.migrate_folders_to_groups [--dry-run] [--verbose]
|
||
|
||
Steps:
|
||
1. For each distinct (userId, mandateId) combination that has FileFolder records:
|
||
a. Build the full folder tree (recursive)
|
||
b. Write it as a TableGroupNode tree into table_groupings (contextKey='files/list')
|
||
– merges with any existing groups rather than overwriting
|
||
c. For each FileItem with a folderId that maps into this tree,
|
||
add its id to the matching group's itemIds
|
||
2. Print a summary (rows migrated, groups created, files assigned)
|
||
3. If not --dry-run: commits the inserts/updates
|
||
NOTE: Schema changes (ALTER TABLE DROP COLUMN, DROP TABLE) are intentionally
|
||
NOT performed by this script. Run the corresponding Alembic migration
|
||
(migrations/versions/xxxx_drop_folder_columns.py) afterwards.
|
||
"""
|
||
|
||
import argparse
|
||
import json
|
||
import logging
|
||
import uuid
|
||
from typing import Optional
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||
|
||
def _build_tree(folders: list, parent_id: Optional[str]) -> list:
|
||
"""Recursively build TableGroupNode-compatible dicts from a flat folder list."""
|
||
children = [f for f in folders if f.get("parentId") == parent_id]
|
||
result = []
|
||
for folder in children:
|
||
node = {
|
||
"id": str(uuid.uuid4()),
|
||
"name": folder["name"],
|
||
"itemIds": [],
|
||
"subGroups": _build_tree(folders, folder["id"]),
|
||
"meta": {"migratedFromFolderId": folder["id"]},
|
||
}
|
||
result.append(node)
|
||
return result
|
||
|
||
|
||
def _assign_files_to_nodes(nodes: list, files_by_folder: dict) -> list:
|
||
"""Recursively assign file IDs to group nodes based on folder mapping."""
|
||
for node in nodes:
|
||
folder_id = (node.get("meta") or {}).get("migratedFromFolderId")
|
||
if folder_id and folder_id in files_by_folder:
|
||
node["itemIds"] = list(files_by_folder[folder_id])
|
||
node["subGroups"] = _assign_files_to_nodes(node.get("subGroups", []), files_by_folder)
|
||
return nodes
|
||
|
||
|
||
def _count_items(nodes: list) -> int:
|
||
total = 0
|
||
for node in nodes:
|
||
total += len(node.get("itemIds", []))
|
||
total += _count_items(node.get("subGroups", []))
|
||
return total
|
||
|
||
|
||
def _now_ts() -> str:
|
||
from modules.shared.timeUtils import getUtcTimestamp
|
||
return getUtcTimestamp()
|
||
|
||
|
||
# ── Main migration ────────────────────────────────────────────────────────────
|
||
|
||
def run_migration(dry_run: bool = True, verbose: bool = False):
|
||
"""Main migration entry point."""
|
||
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
|
||
logger.info(f"Starting folder→group migration (dry_run={dry_run})")
|
||
|
||
from modules.connectors.connectorDbPostgre import getCachedConnector
|
||
|
||
connector = getCachedConnector()
|
||
if not connector or not connector.connection:
|
||
logger.error("Could not obtain a DB connection. Aborting.")
|
||
return
|
||
|
||
conn = connector.connection
|
||
cur = conn.cursor()
|
||
|
||
# ── 1. Check that the source tables still exist ───────────────────────────
|
||
cur.execute("""
|
||
SELECT EXISTS (
|
||
SELECT 1 FROM information_schema.tables
|
||
WHERE table_name = 'FileFolder'
|
||
)
|
||
""")
|
||
folder_table_exists = cur.fetchone()[0]
|
||
|
||
cur.execute("""
|
||
SELECT EXISTS (
|
||
SELECT 1 FROM information_schema.columns
|
||
WHERE table_name = 'FileItem' AND column_name = 'folderId'
|
||
)
|
||
""")
|
||
folder_column_exists = cur.fetchone()[0]
|
||
|
||
if not folder_table_exists and not folder_column_exists:
|
||
logger.info("FileFolder table and FileItem.folderId column not found — migration already applied or not needed.")
|
||
return
|
||
|
||
if not folder_table_exists:
|
||
logger.warning("FileFolder table missing but FileItem.folderId column still present. Only file assignments will be migrated.")
|
||
if not folder_column_exists:
|
||
logger.warning("FileItem.folderId column missing but FileFolder table still present. Only group tree structure will be migrated.")
|
||
|
||
# ── 2. Load all folders ───────────────────────────────────────────────────
|
||
folders_by_user: dict = {}
|
||
if folder_table_exists:
|
||
cur.execute('SELECT "id", "name", "parentId", "sysCreatedBy", "mandateId" FROM "FileFolder"')
|
||
for row in cur.fetchall():
|
||
fid, fname, parent_id, user_id, mandate_id = row
|
||
key = (str(user_id), str(mandate_id) if mandate_id else "")
|
||
folders_by_user.setdefault(key, []).append({
|
||
"id": fid, "name": fname, "parentId": parent_id,
|
||
})
|
||
logger.info(f"Loaded folders for {len(folders_by_user)} (user, mandate) combinations")
|
||
|
||
# ── 3. Load file→folder assignments ──────────────────────────────────────
|
||
files_by_key: dict = {}
|
||
if folder_column_exists:
|
||
cur.execute(
|
||
'SELECT "id", "folderId", "sysCreatedBy", "mandateId" FROM "FileItem" WHERE "folderId" IS NOT NULL AND "folderId" != \'\''
|
||
)
|
||
for row in cur.fetchall():
|
||
file_id, folder_id, user_id, mandate_id = row
|
||
key = (str(user_id), str(mandate_id) if mandate_id else "")
|
||
files_by_key.setdefault(key, {}).setdefault(folder_id, []).append(file_id)
|
||
total_files = sum(
|
||
sum(len(v) for v in d.values()) for d in files_by_key.values()
|
||
)
|
||
logger.info(f"Found {total_files} file→folder assignments across {len(files_by_key)} (user, mandate) combos")
|
||
|
||
# ── 4. Combine and upsert groupings ──────────────────────────────────────
|
||
all_keys = set(folders_by_user.keys()) | set(files_by_key.keys())
|
||
stats = {"groups_created": 0, "groupings_upserted": 0, "files_assigned": 0}
|
||
|
||
for key in all_keys:
|
||
user_id, mandate_id = key
|
||
folders = folders_by_user.get(key, [])
|
||
files_by_folder = files_by_key.get(key, {})
|
||
|
||
# Build tree
|
||
roots = _build_tree(folders, None)
|
||
roots = _assign_files_to_nodes(roots, files_by_folder)
|
||
|
||
# Handle files in unknown folders (folder no longer in tree)
|
||
known_folder_ids = {f["id"] for f in folders}
|
||
for folder_id, file_ids in files_by_folder.items():
|
||
if folder_id not in known_folder_ids:
|
||
# Orphaned files: put them in an "Orphaned" group
|
||
roots.append({
|
||
"id": str(uuid.uuid4()),
|
||
"name": f"Orphaned (folder {folder_id[:8]}…)",
|
||
"itemIds": file_ids,
|
||
"subGroups": [],
|
||
"meta": {"migratedFromFolderId": folder_id, "orphaned": True},
|
||
})
|
||
|
||
if not roots:
|
||
continue
|
||
|
||
n_items = _count_items(roots)
|
||
stats["groups_created"] += len(roots)
|
||
stats["files_assigned"] += n_items
|
||
|
||
context_key = "files/list"
|
||
if verbose:
|
||
logger.debug(f" user={user_id} mandate={mandate_id}: {len(roots)} root groups, {n_items} files")
|
||
|
||
if not dry_run:
|
||
# Check for existing grouping
|
||
cur.execute(
|
||
'SELECT "id", "rootGroups" FROM "TableGrouping" WHERE "userId" = %s AND "contextKey" = %s',
|
||
(user_id, context_key),
|
||
)
|
||
existing_row = cur.fetchone()
|
||
|
||
if existing_row:
|
||
existing_id, existing_raw = existing_row
|
||
existing_roots = json.loads(existing_raw) if isinstance(existing_raw, str) else (existing_raw or [])
|
||
# Merge: append migrated groups (avoid duplicates by migratedFromFolderId)
|
||
existing_meta_ids = {
|
||
(n.get("meta") or {}).get("migratedFromFolderId")
|
||
for n in existing_roots
|
||
if (n.get("meta") or {}).get("migratedFromFolderId")
|
||
}
|
||
new_roots = existing_roots + [
|
||
r for r in roots
|
||
if (r.get("meta") or {}).get("migratedFromFolderId") not in existing_meta_ids
|
||
]
|
||
cur.execute(
|
||
'UPDATE "TableGrouping" SET "rootGroups" = %s, "updatedAt" = %s WHERE "id" = %s',
|
||
(json.dumps(new_roots), _now_ts(), existing_id),
|
||
)
|
||
else:
|
||
new_id = str(uuid.uuid4())
|
||
cur.execute(
|
||
'INSERT INTO "TableGrouping" ("id", "userId", "contextKey", "rootGroups", "updatedAt") VALUES (%s, %s, %s, %s, %s)',
|
||
(new_id, user_id, context_key, json.dumps(roots), _now_ts()),
|
||
)
|
||
stats["groupings_upserted"] += 1
|
||
|
||
# ── 5. Summary ────────────────────────────────────────────────────────────
|
||
if not dry_run:
|
||
conn.commit()
|
||
logger.info("Migration committed.")
|
||
else:
|
||
logger.info("DRY RUN — no changes written.")
|
||
|
||
logger.info(
|
||
f"Summary: groupings_upserted={stats['groupings_upserted']}, "
|
||
f"groups_created={stats['groups_created']}, "
|
||
f"files_assigned={stats['files_assigned']}"
|
||
)
|
||
logger.info(
|
||
"Next steps (run after verifying data):\n"
|
||
" 1. Run Alembic migration to DROP COLUMN FileItem.folderId\n"
|
||
" 2. Run Alembic migration to DROP TABLE FileFolder"
|
||
)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
parser = argparse.ArgumentParser(description="Migrate FileFolder tree to table_groupings")
|
||
parser.add_argument("--dry-run", action="store_true", default=True, help="Preview only, no DB writes (default)")
|
||
parser.add_argument("--execute", action="store_true", help="Actually write to DB (disables dry-run)")
|
||
parser.add_argument("--verbose", action="store_true", help="Show per-user details")
|
||
args = parser.parse_args()
|
||
dry_run = not args.execute
|
||
run_migration(dry_run=dry_run, verbose=args.verbose)
|