gateway/modules/migration/migrateRagScopeFields.py
2026-03-29 21:55:09 +02:00

114 lines
4.2 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Migration: Backfill FileContentIndex scope fields from FileItem (Single Source of Truth).
Fixes legacy rows in poweron_knowledge where scope/mandateId/featureInstanceId
are empty or default ("personal") despite the corresponding FileItem having correct values.
Idempotent — safe to run multiple times. Uses a DB flag to skip if already completed.
"""
import logging
from modules.shared.configuration import APP_CONFIG
from modules.connectors.connectorDbPostgre import _get_cached_connector
logger = logging.getLogger(__name__)
_MIGRATION_FLAG_KEY = "migration_rag_scope_fields_completed"
def _isMigrationCompleted(appDb) -> bool:
try:
from modules.datamodels.datamodelUam import Mandate
records = appDb.getRecordset(Mandate, recordFilter={"name": _MIGRATION_FLAG_KEY})
return len(records) > 0
except Exception:
return False
def _setMigrationCompleted(appDb) -> None:
try:
from modules.datamodels.datamodelUam import Mandate
flag = Mandate(name=_MIGRATION_FLAG_KEY, description="RAG scope fields migration completed")
appDb.recordCreate(Mandate, flag)
except Exception as e:
logger.error("Could not set migration flag: %s", e)
def runMigration(appDb=None) -> dict:
"""Backfill FileContentIndex rows from FileItem metadata.
Returns dict with counts: {total, updated, skipped, orphaned}.
"""
from modules.datamodels.datamodelKnowledge import FileContentIndex
from modules.datamodels.datamodelFiles import FileItem
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
from modules.interfaces.interfaceDbManagement import ComponentObjects
if appDb is None:
from modules.interfaces.interfaceDbApp import getRootInterface
appDb = getRootInterface().db
if _isMigrationCompleted(appDb):
logger.info("migrateRagScopeFields: already completed, skipping")
return {"total": 0, "updated": 0, "skipped": 0, "orphaned": 0}
knowDb = getKnowledgeInterface(None).db
mgmtDb = ComponentObjects().db
allIndexes = knowDb.getRecordset(FileContentIndex, recordFilter={})
total = len(allIndexes)
updated = 0
skipped = 0
orphaned = 0
logger.info("migrateRagScopeFields: processing %d FileContentIndex rows", total)
for idx in allIndexes:
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
if not idxId:
skipped += 1
continue
fileItem = mgmtDb._loadRecord(FileItem, str(idxId))
if not fileItem:
orphaned += 1
continue
_get = (lambda k, d="": fileItem.get(k, d)) if isinstance(fileItem, dict) else (lambda k, d="": getattr(fileItem, k, d))
fiScope = _get("scope") or "personal"
fiMandateId = str(_get("mandateId") or "")
fiFeatureInstanceId = str(_get("featureInstanceId") or "")
idxGet = (lambda k, d="": idx.get(k, d)) if isinstance(idx, dict) else (lambda k, d="": getattr(idx, k, d))
currentScope = idxGet("scope") or "personal"
currentMandateId = str(idxGet("mandateId") or "")
currentFeatureInstanceId = str(idxGet("featureInstanceId") or "")
updates = {}
if fiScope != currentScope:
updates["scope"] = fiScope
if fiMandateId and fiMandateId != currentMandateId:
updates["mandateId"] = fiMandateId
if fiFeatureInstanceId and fiFeatureInstanceId != currentFeatureInstanceId:
updates["featureInstanceId"] = fiFeatureInstanceId
if updates:
try:
knowDb.recordModify(FileContentIndex, str(idxId), updates)
updated += 1
logger.debug("migrateRagScopeFields: updated %s -> %s", idxId, updates)
except Exception as e:
logger.error("migrateRagScopeFields: failed to update %s: %s", idxId, e)
skipped += 1
else:
skipped += 1
_setMigrationCompleted(appDb)
logger.info(
"migrateRagScopeFields complete: total=%d, updated=%d, skipped=%d, orphaned=%d",
total, updated, skipped, orphaned,
)
return {"total": total, "updated": updated, "skipped": skipped, "orphaned": orphaned}