- connection.established/revoked callbacks from OAuth routes and connection management endpoints - KnowledgeIngestionConsumer dispatches bootstrap job (established) and synchronous purge (revoked) - FileContentIndex: add connectionId + sourceKind columns - SharePoint bootstrap with @odata.nextLink pagination and eTag-based idempotency - Outlook bootstrap treats messages as virtual documents with cleanEmailBody for HTML/quote/signature stripping - fix(rag): lower buildAgentContext minScore thresholds from 0.55/0.65/0.70 to 0.35 — previous values blocked all real matches from text-embedding-3-small - 24 new unit tests covering purge, consumer dispatch, email cleaning and both bootstrap paths
172 lines
5.5 KiB
Python
172 lines
5.5 KiB
Python
#!/usr/bin/env python3
|
|
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""Unit tests for KnowledgeIngestionConsumer event dispatch.
|
|
|
|
- `connection.established` → enqueue a `connection.bootstrap` job.
|
|
- `connection.revoked` → synchronous purge via KnowledgeObjects.
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
import types
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
|
|
|
from modules.serviceCenter.services.serviceKnowledge import subConnectorIngestConsumer as consumer
|
|
|
|
|
|
def _resetRegistration(monkeypatch):
|
|
"""Force the module-level guard to register fresh in each test."""
|
|
monkeypatch.setattr(consumer, "_registered", False)
|
|
|
|
|
|
def test_onConnectionEstablished_enqueues_bootstrap(monkeypatch):
|
|
startedJobs = []
|
|
|
|
async def _fakeStartJob(jobType, payload, **kwargs):
|
|
startedJobs.append({"jobType": jobType, "payload": payload, "kwargs": kwargs})
|
|
return "job-1"
|
|
|
|
monkeypatch.setattr(consumer, "startJob", _fakeStartJob)
|
|
consumer._onConnectionEstablished(
|
|
connectionId="c1", authority="msft", userId="u1"
|
|
)
|
|
# Drain pending tasks created by the consumer.
|
|
loop = asyncio.new_event_loop()
|
|
try:
|
|
asyncio.set_event_loop(loop)
|
|
# If the consumer created a Task on a closed loop the fake startJob
|
|
# was still called synchronously via asyncio.run — in either case we
|
|
# check the recorded call.
|
|
finally:
|
|
loop.close()
|
|
|
|
assert len(startedJobs) == 1
|
|
assert startedJobs[0]["jobType"] == consumer.BOOTSTRAP_JOB_TYPE
|
|
assert startedJobs[0]["payload"]["connectionId"] == "c1"
|
|
assert startedJobs[0]["payload"]["authority"] == "msft"
|
|
assert startedJobs[0]["kwargs"]["triggeredBy"] == "u1"
|
|
|
|
|
|
def test_onConnectionEstablished_ignores_missing_id(monkeypatch):
|
|
called = []
|
|
|
|
async def _fakeStartJob(*a, **kw):
|
|
called.append(1)
|
|
return "x"
|
|
|
|
monkeypatch.setattr(consumer, "startJob", _fakeStartJob)
|
|
consumer._onConnectionEstablished(connectionId="", authority="msft")
|
|
assert called == []
|
|
|
|
|
|
def test_onConnectionRevoked_runs_sync_purge(monkeypatch):
|
|
class _FakeKnowledge:
|
|
def __init__(self):
|
|
self.calls = []
|
|
|
|
def deleteFileContentIndexByConnectionId(self, cid):
|
|
self.calls.append(cid)
|
|
return {"indexRows": 2, "chunks": 5}
|
|
|
|
fakeKnow = _FakeKnowledge()
|
|
|
|
def _fakeGetInterface(_user=None):
|
|
return fakeKnow
|
|
|
|
monkeypatch.setattr(consumer, "getKnowledgeInterface", _fakeGetInterface)
|
|
consumer._onConnectionRevoked(
|
|
connectionId="c1", authority="msft", userId="u1", reason="disconnected"
|
|
)
|
|
assert fakeKnow.calls == ["c1"]
|
|
|
|
|
|
def test_onConnectionRevoked_ignores_missing_id(monkeypatch):
|
|
seen = []
|
|
|
|
def _fakeGetInterface(_user=None):
|
|
class _K:
|
|
def deleteFileContentIndexByConnectionId(self, cid):
|
|
seen.append(cid)
|
|
return {"indexRows": 0, "chunks": 0}
|
|
|
|
return _K()
|
|
|
|
monkeypatch.setattr(consumer, "getKnowledgeInterface", _fakeGetInterface)
|
|
consumer._onConnectionRevoked(connectionId="")
|
|
assert seen == []
|
|
|
|
|
|
def test_bootstrap_job_skips_non_pilot_authority(monkeypatch):
|
|
async def _run():
|
|
result = await consumer._bootstrapJobHandler(
|
|
{"payload": {"connectionId": "c1", "authority": "google"}},
|
|
lambda *_: None,
|
|
)
|
|
return result
|
|
|
|
result = asyncio.run(_run())
|
|
assert result["skipped"] is True
|
|
assert result["authority"] == "google"
|
|
|
|
|
|
def test_bootstrap_job_dispatches_msft_parts(monkeypatch):
|
|
calls = {"sp": 0, "ol": 0}
|
|
|
|
async def _fakeSp(connectionId, progressCb=None):
|
|
calls["sp"] += 1
|
|
return {"indexed": 1}
|
|
|
|
async def _fakeOl(connectionId, progressCb=None):
|
|
calls["ol"] += 1
|
|
return {"indexed": 2}
|
|
|
|
# subConnectorSync* are lazy-imported inside the handler; install fake
|
|
# modules before invoking.
|
|
fakeSharepoint = types.ModuleType("subConnectorSyncSharepoint")
|
|
fakeSharepoint.bootstrapSharepoint = _fakeSp
|
|
fakeOutlook = types.ModuleType("subConnectorSyncOutlook")
|
|
fakeOutlook.bootstrapOutlook = _fakeOl
|
|
monkeypatch.setitem(
|
|
sys.modules,
|
|
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint",
|
|
fakeSharepoint,
|
|
)
|
|
monkeypatch.setitem(
|
|
sys.modules,
|
|
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook",
|
|
fakeOutlook,
|
|
)
|
|
|
|
async def _run():
|
|
return await consumer._bootstrapJobHandler(
|
|
{"payload": {"connectionId": "c1", "authority": "msft"}},
|
|
lambda *_: None,
|
|
)
|
|
|
|
result = asyncio.run(_run())
|
|
assert calls == {"sp": 1, "ol": 1}
|
|
assert result["sharepoint"] == {"indexed": 1}
|
|
assert result["outlook"] == {"indexed": 2}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Usable without pytest fixtures for a quick smoke run.
|
|
class _MP:
|
|
def __init__(self):
|
|
self.undos = []
|
|
|
|
def setattr(self, target, name_or_value, value=None):
|
|
if value is None:
|
|
# target is an object, name_or_value is value → no, original signature
|
|
raise SystemExit("use pytest monkeypatch in CLI")
|
|
self.undos.append((target, name_or_value, getattr(target, name_or_value)))
|
|
setattr(target, name_or_value, value)
|
|
|
|
def setitem(self, mapping, key, value):
|
|
self.undos.append((mapping, key, mapping.get(key)))
|
|
mapping[key] = value
|
|
|
|
print("Run via pytest: pytest tests/unit/services/test_knowledge_ingest_consumer.py")
|