platform-core/scripts/debug_rag_job_result.py
2026-05-18 07:56:53 +02:00

70 lines
2.7 KiB
Python

"""Diagnose: read a connection.bootstrap job result and print its keys.
Usage (from repo root):
python gateway\scripts\debug_rag_job_result.py
Prints the most recent SUCCESS connection.bootstrap job per UserConnection so
we can see whether the `stoppedAtLimit` key actually landed in the JSONB
`result` column. If it is missing here, the bug is in the writer (handler or
_markSuccess); if it is present here but absent in the HTTP response, the bug
is in routeRagInventory.
"""
from __future__ import annotations
import os
import sys
import json
from pathlib import Path
_HERE = Path(__file__).resolve()
sys.path.insert(0, str(_HERE.parent.parent)) # gateway/
os.chdir(_HERE.parent.parent)
from modules.shared.configuration import APP_CONFIG # noqa: E402
from modules.connectors.connectorDbPostgre import getCachedConnector # noqa: E402
from modules.datamodels.datamodelBackgroundJob import BackgroundJob # noqa: E402
from modules.routes.routeRagInventory import _flattenJobResult # noqa: E402
def _main() -> None:
db = getCachedConnector(
dbDatabase=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
dbPort=int(APP_CONFIG.get("DB_PORT", "5432")),
dbUser=APP_CONFIG.get("DB_USER"),
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET"),
)
rows = db.getRecordset(BackgroundJob)
rows = [r for r in rows if r.get("jobType") == "connection.bootstrap"]
rows = [r for r in rows if r.get("status") == "SUCCESS"]
rows.sort(key=lambda r: r.get("createdAt") or 0, reverse=True)
if not rows:
print("No SUCCESS connection.bootstrap jobs found.")
return
seenConnections: set[str] = set()
for j in rows:
connId = (j.get("payload") or {}).get("connectionId", "<unknown>")
if connId in seenConnections:
continue
seenConnections.add(connId)
result = j.get("result") or {}
flat = _flattenJobResult(result) if isinstance(result, dict) else {}
print("=" * 80)
print(f"jobId = {j.get('id')}")
print(f"connectionId = {connId}")
print(f"finishedAt = {j.get('finishedAt')}")
print(f"raw keys = {sorted(result.keys()) if isinstance(result, dict) else 'N/A'}")
print("--- flattened (what the API will return now) ---")
print(f" indexed = {flat.get('indexed')}")
print(f" skippedDuplicate= {flat.get('skippedDuplicate')}")
print(f" skippedPolicy = {flat.get('skippedPolicy')}")
print(f" stoppedAtLimit = {flat.get('stoppedAtLimit')!r} <-- KEY CHECK")
print(f" limits = {flat.get('limits')}")
print(f" bytesProcessed = {flat.get('bytesProcessed')}")
if __name__ == "__main__":
_main()