#!/usr/bin/env python3 # Copyright (c) 2025 Patrick Motsch # All rights reserved. """Bootstrap Outlook tests with a fake adapter + knowledge service. Verifies: - Well-known folders (inbox, sentitems) are discovered via Graph. - Each message produces a `requestIngestion` call with sourceKind=outlook_message and structured contentObjects (header / snippet / body). - Pagination via `@odata.nextLink` is followed. - changeKey is forwarded as contentVersion → idempotency. """ import asyncio import os import sys from types import SimpleNamespace sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../..")) from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook import ( bootstrapOutlook, OutlookBootstrapLimits, _syntheticMessageId, _buildContentObjects, ) class _FakeOutlookAdapter: def __init__(self, messages_by_folder, paginated_folder=None, page2=None): self._folders = {"inbox": "INBOX-ID", "sentitems": "SENT-ID"} self._messages = messages_by_folder self._paginated_folder = paginated_folder self._page2 = page2 or [] self.requested_endpoints = [] async def _graphGet(self, endpoint: str): self.requested_endpoints.append(endpoint) if endpoint.startswith("me/mailFolders/") and "/messages" not in endpoint: wellKnown = endpoint.split("/")[-1] fid = self._folders.get(wellKnown) if not fid: return {"error": "not found"} return {"id": fid, "displayName": wellKnown} # message page request: e.g. me/mailFolders/INBOX-ID/messages?... for fid, messages in self._messages.items(): if f"me/mailFolders/{fid}/messages" in endpoint: page = {"value": messages} if fid == self._paginated_folder and "skiptoken" not in endpoint: page["@odata.nextLink"] = ( "https://graph.microsoft.com/v1.0/" f"me/mailFolders/{fid}/messages?$skiptoken=abc" ) elif fid == self._paginated_folder and "skiptoken" in endpoint: page = {"value": self._page2} return page return {"value": []} async def browse(self, path): return [] class _FakeKnowledgeService: def __init__(self, duplicateIds=None): self.calls = [] self._duplicates = duplicateIds or set() async def requestIngestion(self, job): self.calls.append(job) status = "duplicate" if job.sourceId in self._duplicates else "indexed" return SimpleNamespace( jobId=job.sourceId, status=status, contentHash="h", fileId=job.sourceId, index=None, error=None, ) def _msg(mid: str, subject: str = "Hi", change: str = "ck1"): return { "id": mid, "subject": subject, "from": {"emailAddress": {"name": "Alice", "address": "a@x.com"}}, "toRecipients": [{"emailAddress": {"name": "Bob", "address": "b@x.com"}}], "ccRecipients": [], "receivedDateTime": "2025-01-01T10:00:00Z", "bodyPreview": "Hello world", "body": {"contentType": "text", "content": "Hello world\nThis is the body."}, "internetMessageId": f"<{mid}@local>", "hasAttachments": False, "changeKey": change, } def test_buildContentObjects_emits_header_snippet_body(): parts = _buildContentObjects(_msg("m1"), maxBodyChars=8000) ids = [p["contentObjectId"] for p in parts] assert ids == ["header", "snippet", "body"] header = parts[0]["data"] assert "Subject: Hi" in header assert "From: Alice " in header assert "To: Bob " in header def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent(): adapter = _FakeOutlookAdapter({ "INBOX-ID": [_msg("m1"), _msg("m2")], "SENT-ID": [_msg("m3")], }) knowledge = _FakeKnowledgeService() connection = SimpleNamespace(mandateId="m1", userId="u1") async def _run(): return await bootstrapOutlook( connectionId="c1", adapter=adapter, connection=connection, knowledgeService=knowledge, limits=OutlookBootstrapLimits(maxAgeDays=None), ) result = asyncio.run(_run()) assert result["indexed"] == 3 sourceIds = {c.sourceId for c in knowledge.calls} assert sourceIds == { _syntheticMessageId("c1", "m1"), _syntheticMessageId("c1", "m2"), _syntheticMessageId("c1", "m3"), } for job in knowledge.calls: assert job.sourceKind == "outlook_message" assert job.mimeType == "message/rfc822" assert job.provenance["connectionId"] == "c1" assert job.provenance["service"] == "outlook" assert job.contentVersion == "ck1" assert any(co["contentObjectId"] == "header" for co in job.contentObjects) def test_bootstrap_outlook_follows_pagination(): adapter = _FakeOutlookAdapter( messages_by_folder={"INBOX-ID": [_msg("m1")], "SENT-ID": []}, paginated_folder="INBOX-ID", page2=[_msg("m2"), _msg("m3")], ) knowledge = _FakeKnowledgeService() connection = SimpleNamespace(mandateId="m1", userId="u1") async def _run(): return await bootstrapOutlook( connectionId="c1", adapter=adapter, connection=connection, knowledgeService=knowledge, limits=OutlookBootstrapLimits(maxAgeDays=None), ) result = asyncio.run(_run()) assert result["indexed"] == 3 def test_bootstrap_outlook_reports_duplicates(): adapter = _FakeOutlookAdapter({ "INBOX-ID": [_msg("m1"), _msg("m2")], "SENT-ID": [], }) duplicates = { _syntheticMessageId("c1", "m1"), _syntheticMessageId("c1", "m2"), } knowledge = _FakeKnowledgeService(duplicateIds=duplicates) connection = SimpleNamespace(mandateId="m1", userId="u1") async def _run(): return await bootstrapOutlook( connectionId="c1", adapter=adapter, connection=connection, knowledgeService=knowledge, limits=OutlookBootstrapLimits(maxAgeDays=None), ) result = asyncio.run(_run()) assert result["indexed"] == 0 assert result["skippedDuplicate"] == 2 if __name__ == "__main__": test_buildContentObjects_emits_header_snippet_body() test_bootstrap_outlook_indexes_messages_from_inbox_and_sent() test_bootstrap_outlook_follows_pagination() test_bootstrap_outlook_reports_duplicates() print("OK — bootstrapOutlook tests passed")