# Copyright (c) 2026 Patrick Motsch # All rights reserved. """Unit test: feature data sub-agent schema context is rich enough. The sub-agent's quality of answers depends almost entirely on the schema prompt it receives. This test guards the contract that, for every selected table, the prompt exposes: * the technical table name + i18n label, * every selected field with its Python type, German label, description and FK target (when registered via Pydantic models), * the structural rules around date-as-unix-timestamp, no JOINs, and preference for period-bucketed aggregate tables. Without that context the agent silently returns wrong numbers (e.g. summing `TrusteeDataJournalLine.debitAmount` without a date filter when the user asked for the closing balance per period). """ from __future__ import annotations import pytest from modules.shared import fkRegistry from modules.serviceCenter.services.serviceAgent.featureDataAgent import ( _buildSchemaContext, _buildTableSchemaBlock, _formatFieldLine, _summarizePythonType, ) @pytest.fixture(scope="module", autouse=True) def _ensureModels(): fkRegistry._ensureModelsLoaded() def _trusteeAccountBalanceObj(): return { "objectKey": "data.feature.trustee.TrusteeDataAccountBalance", "label": {"de": "Kontosalden", "en": "Account balances"}, "meta": { "table": "TrusteeDataAccountBalance", "fields": [ "id", "accountNumber", "periodYear", "periodMonth", "openingBalance", "debitTotal", "creditTotal", "closingBalance", "currency", ], }, } def _trusteeJournalLineObj(): return { "objectKey": "data.feature.trustee.TrusteeDataJournalLine", "label": {"de": "Buchungszeilen", "en": "Journal lines"}, "meta": { "table": "TrusteeDataJournalLine", "fields": [ "id", "journalEntryId", "accountNumber", "debitAmount", "creditAmount", "currency", "description", ], }, } def test_summarizePythonType_compactsTypingPrefix(): from typing import Optional, Dict, Any assert _summarizePythonType(str) == "str" assert _summarizePythonType(Optional[float]) == "Optional[float]" assert _summarizePythonType(Dict[str, Any]) == "Dict[str, Any]" assert _summarizePythonType(None) == "any" def test_formatFieldLine_includesLabelDescriptionAndFk(): from modules.datamodels.datamodelBase import MODEL_REGISTRY cls = MODEL_REGISTRY.get("TrusteeDataJournalLine") assert cls is not None, "Trustee datamodels must be registered for this test" journalEntryId = cls.model_fields["journalEntryId"] line = _formatFieldLine("journalEntryId", journalEntryId) assert line.startswith("journalEntryId (str)") assert '"Buchung"' in line assert "[FK -> TrusteeDataJournalEntry.id]" in line def test_buildTableSchemaBlock_listsAccountBalanceFields(): obj = _trusteeAccountBalanceObj() block = _buildTableSchemaBlock( obj["meta"]["table"], "Kontosalden", obj["meta"]["fields"], ) assert "Table: TrusteeDataAccountBalance" in block assert "Description: Account balance per period" in block assert "closingBalance (float)" in block assert "periodYear (int)" in block assert "periodMonth (int)" in block def test_buildTableSchemaBlock_unknownTableFallsBackToFlatFields(): block = _buildTableSchemaBlock( "NoSuchTable", "Demo", ["foo", "bar"], ) assert "NoSuchTable" in block assert "Fields: foo, bar" in block def test_buildSchemaContext_containsRichFieldsAndKeyRules(): selected = [_trusteeJournalLineObj(), _trusteeAccountBalanceObj()] prompt = _buildSchemaContext( featureCode="trustee", instanceLabel="Demo AG", selectedTables=selected, requestLang="de", ) assert "TrusteeDataJournalLine" in prompt assert "TrusteeDataAccountBalance" in prompt assert 'debitAmount (float) "Soll"' in prompt assert 'closingBalance (float) "Schlusssaldo"' in prompt assert "[FK -> TrusteeDataJournalEntry.id]" in prompt assert "unix timestamp" in prompt assert "CANNOT JOIN" in prompt assert "period-bucketed aggregates" in prompt def test_buildTableSchemaBlock_journalLineHasNoBookingDate(): """JournalLine has no bookingDate column. The agent must see this so it does not invent a `bookingDate` filter on JournalLine and instead either joins to JournalEntry or uses *AccountBalance for period filters.""" obj = _trusteeJournalLineObj() block = _buildTableSchemaBlock( obj["meta"]["table"], "Buchungszeilen", obj["meta"]["fields"], ) assert "Table: TrusteeDataJournalLine" in block assert "bookingDate" not in block def test_buildSchemaContext_forbidsSummingAggregateFields(): """The most damaging anti-pattern in trustee queries: SUMming closingBalance across periods. Without this rule the agent reports 11 mio for an account whose real closing balance is 48k. The generic prompt must call this out so every feature benefits, not just trustee.""" selected = [_trusteeAccountBalanceObj()] prompt = _buildSchemaContext( featureCode="trustee", instanceLabel="Demo AG", selectedTables=selected, requestLang="de", ) assert "NEVER apply SUM/AVG to columns that already represent a balance" in prompt assert "closingBalance" in prompt def test_buildSchemaContext_appendsTrusteeDomainHints(): """When the feature module exposes getAgentDomainHints(), the schema prompt must include those hints so the sub-agent knows e.g. that 102x are bank accounts and periodMonth=0 is the annual total.""" selected = [_trusteeAccountBalanceObj()] prompt = _buildSchemaContext( featureCode="trustee", instanceLabel="Demo AG", selectedTables=selected, requestLang="de", ) assert "TRUSTEE DOMAIN HINTS" in prompt assert "102x Bank / Post" in prompt assert "periodMonth = 0" in prompt assert "ANTI-PATTERNS" in prompt assert 'LIKE \'102%\'' in prompt or "LIKE '102%'" in prompt def test_buildSchemaContext_skipsHintsForFeaturesWithoutHook(): """Features that don't export getAgentDomainHints() should produce a prompt without the trailing hints block. Verified by using a feature code that cannot resolve to a main module (registry returns None).""" selected = [_trusteeAccountBalanceObj()] prompt = _buildSchemaContext( featureCode="nosuchfeature", instanceLabel="", selectedTables=selected, requestLang="de", ) assert "TRUSTEE DOMAIN HINTS" not in prompt assert "Keep your answer SHORT" in prompt