diff --git a/tests/unit/serviceKnowledge/__init__.py b/tests/unit/serviceKnowledge/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/serviceKnowledge/test_requestIngestion.py b/tests/unit/serviceKnowledge/test_requestIngestion.py deleted file mode 100644 index 595faeff..00000000 --- a/tests/unit/serviceKnowledge/test_requestIngestion.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -"""Unit tests for the P0 ingestion facade on KnowledgeService. - -Covers acceptance criteria AC4 (idempotent ingestion for unchanged content) -and hash stability. The knowledge DB interface and AI embedding service are -stubbed so the test runs without any external dependency. -""" - -from unittest.mock import MagicMock, patch - -import pytest - -from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import ( - IngestionJob, - KnowledgeService, - _computeIngestionHash, -) - - -class _StubKnowledgeDb: - """Minimal in-memory stand-in for interfaceDbKnowledge.""" - - def __init__(self): - self.index = None - self.upsertIndexCalls = 0 - self.upsertChunkCalls = 0 - - def upsertFileContentIndex(self, index): - self.index = index.model_dump() if hasattr(index, "model_dump") else dict(index) - self.upsertIndexCalls += 1 - - def upsertContentChunk(self, chunk): - self.upsertChunkCalls += 1 - - def updateFileStatus(self, fileId, status): - if self.index is not None: - self.index["status"] = status - - def getFileContentIndex(self, fileId): - return self.index - - -def _makeService(): - """Create a KnowledgeService with stubbed db and ai dependencies.""" - stubDb = _StubKnowledgeDb() - - aiService = MagicMock() - - async def _callEmbedding(texts): - return MagicMock( - errorCount=0, - content="", - metadata={"embeddings": [[0.0] * 4 for _ in texts]}, - ) - - aiService.callEmbedding = _callEmbedding - - def getService(name): - if name == "ai": - return aiService - raise KeyError(name) - - context = MagicMock() - context.user = MagicMock() - # Return a non-empty but empty-dict record so the FileItem lookup branch - # in _indexFileInternal resolves without touching a real DB. - context.interfaceDbComponent = MagicMock() - context.interfaceDbComponent.getRecordset = MagicMock(return_value=[{}]) - - with patch( - "modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge.getKnowledgeInterface", - return_value=stubDb, - ): - service = KnowledgeService(context, getService) - - return service, stubDb - - -@pytest.mark.asyncio -async def test_duplicate_skipped(): - service, db = _makeService() - job = IngestionJob( - sourceKind="file", - sourceId="file-123", - fileName="a.txt", - mimeType="text/plain", - userId="u1", - contentObjects=[ - {"contentObjectId": "c1", "contentType": "text", "data": "hello world"} - ], - ) - - first = await service.requestIngestion(job) - assert first.status == "indexed" - chunksAfterFirst = db.upsertChunkCalls - assert chunksAfterFirst >= 1 - - second = await service.requestIngestion(job) - assert second.status == "duplicate" - assert second.contentHash == first.contentHash - # No additional embedding work. - assert db.upsertChunkCalls == chunksAfterFirst - - -@pytest.mark.asyncio -async def test_reindex_on_content_change(): - service, db = _makeService() - base = IngestionJob( - sourceKind="file", - sourceId="file-123", - fileName="a.txt", - mimeType="text/plain", - userId="u1", - contentObjects=[ - {"contentObjectId": "c1", "contentType": "text", "data": "hello world"} - ], - ) - first = await service.requestIngestion(base) - assert first.status == "indexed" - chunksAfterFirst = db.upsertChunkCalls - - changed = IngestionJob( - sourceKind="file", - sourceId="file-123", - fileName="a.txt", - mimeType="text/plain", - userId="u1", - contentObjects=[ - {"contentObjectId": "c1", "contentType": "text", "data": "hello universe"} - ], - ) - second = await service.requestIngestion(changed) - assert second.status == "indexed" - assert second.contentHash != first.contentHash - assert db.upsertChunkCalls > chunksAfterFirst - - -def test_hash_stable_under_reordering(): - a = [ - {"contentObjectId": "c1", "contentType": "text", "data": "alpha"}, - {"contentObjectId": "c2", "contentType": "text", "data": "beta"}, - ] - b = list(reversed(a)) - assert _computeIngestionHash(a) == _computeIngestionHash(b) - - -def test_hash_changes_on_data_edit(): - a = [{"contentObjectId": "c1", "contentType": "text", "data": "alpha"}] - b = [{"contentObjectId": "c1", "contentType": "text", "data": "alpha!"}] - assert _computeIngestionHash(a) != _computeIngestionHash(b) - - -@pytest.mark.asyncio -async def test_get_ingestion_status_after_index(): - service, _db = _makeService() - job = IngestionJob( - sourceKind="coaching_session", - sourceId="coaching-session:abc", - fileName="session", - mimeType="application/x-coaching-session", - userId="u1", - contentObjects=[ - {"contentObjectId": "m0", "contentType": "text", "data": "User: hi"} - ], - provenance={"lane": "feature", "feature": "commcoach"}, - ) - handle = await service.requestIngestion(job) - status = service.getIngestionStatus(handle) - assert status["status"] == "indexed" - assert status["sourceKind"] == "coaching_session" - assert status["contentHash"] == handle.contentHash