gateway/tests/unit/services/test_featureDataAgent_schema.py
2026-04-27 08:07:37 +02:00

185 lines
6.6 KiB
Python

# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Unit test: feature data sub-agent schema context is rich enough.
The sub-agent's quality of answers depends almost entirely on the schema
prompt it receives. This test guards the contract that, for every selected
table, the prompt exposes:
* the technical table name + i18n label,
* every selected field with its Python type, German label, description and
FK target (when registered via Pydantic models),
* the structural rules around date-as-unix-timestamp, no JOINs, and
preference for period-bucketed aggregate tables.
Without that context the agent silently returns wrong numbers (e.g. summing
`TrusteeDataJournalLine.debitAmount` without a date filter when the user
asked for the closing balance per period).
"""
from __future__ import annotations
import pytest
from modules.shared import fkRegistry
from modules.serviceCenter.services.serviceAgent.featureDataAgent import (
_buildSchemaContext,
_buildTableSchemaBlock,
_formatFieldLine,
_summarizePythonType,
)
@pytest.fixture(scope="module", autouse=True)
def _ensureModels():
fkRegistry._ensureModelsLoaded()
def _trusteeAccountBalanceObj():
return {
"objectKey": "data.feature.trustee.TrusteeDataAccountBalance",
"label": {"de": "Kontosalden", "en": "Account balances"},
"meta": {
"table": "TrusteeDataAccountBalance",
"fields": [
"id", "accountNumber", "periodYear", "periodMonth",
"openingBalance", "debitTotal", "creditTotal",
"closingBalance", "currency",
],
},
}
def _trusteeJournalLineObj():
return {
"objectKey": "data.feature.trustee.TrusteeDataJournalLine",
"label": {"de": "Buchungszeilen", "en": "Journal lines"},
"meta": {
"table": "TrusteeDataJournalLine",
"fields": [
"id", "journalEntryId", "accountNumber",
"debitAmount", "creditAmount", "currency", "description",
],
},
}
def test_summarizePythonType_compactsTypingPrefix():
from typing import Optional, Dict, Any
assert _summarizePythonType(str) == "str"
assert _summarizePythonType(Optional[float]) == "Optional[float]"
assert _summarizePythonType(Dict[str, Any]) == "Dict[str, Any]"
assert _summarizePythonType(None) == "any"
def test_formatFieldLine_includesLabelDescriptionAndFk():
from modules.datamodels.datamodelBase import MODEL_REGISTRY
cls = MODEL_REGISTRY.get("TrusteeDataJournalLine")
assert cls is not None, "Trustee datamodels must be registered for this test"
journalEntryId = cls.model_fields["journalEntryId"]
line = _formatFieldLine("journalEntryId", journalEntryId)
assert line.startswith("journalEntryId (str)")
assert '"Buchung"' in line
assert "[FK -> TrusteeDataJournalEntry.id]" in line
def test_buildTableSchemaBlock_listsAccountBalanceFields():
obj = _trusteeAccountBalanceObj()
block = _buildTableSchemaBlock(
obj["meta"]["table"], "Kontosalden", obj["meta"]["fields"],
)
assert "Table: TrusteeDataAccountBalance" in block
assert "Description: Account balance per period" in block
assert "closingBalance (float)" in block
assert "periodYear (int)" in block
assert "periodMonth (int)" in block
def test_buildTableSchemaBlock_unknownTableFallsBackToFlatFields():
block = _buildTableSchemaBlock(
"NoSuchTable", "Demo", ["foo", "bar"],
)
assert "NoSuchTable" in block
assert "Fields: foo, bar" in block
def test_buildSchemaContext_containsRichFieldsAndKeyRules():
selected = [_trusteeJournalLineObj(), _trusteeAccountBalanceObj()]
prompt = _buildSchemaContext(
featureCode="trustee",
instanceLabel="Demo AG",
selectedTables=selected,
requestLang="de",
)
assert "TrusteeDataJournalLine" in prompt
assert "TrusteeDataAccountBalance" in prompt
assert 'debitAmount (float) "Soll"' in prompt
assert 'closingBalance (float) "Schlusssaldo"' in prompt
assert "[FK -> TrusteeDataJournalEntry.id]" in prompt
assert "unix timestamp" in prompt
assert "CANNOT JOIN" in prompt
assert "period-bucketed aggregates" in prompt
def test_buildTableSchemaBlock_journalLineHasNoBookingDate():
"""JournalLine has no bookingDate column. The agent must see this so it does
not invent a `bookingDate` filter on JournalLine and instead either joins to
JournalEntry or uses *AccountBalance for period filters."""
obj = _trusteeJournalLineObj()
block = _buildTableSchemaBlock(
obj["meta"]["table"], "Buchungszeilen", obj["meta"]["fields"],
)
assert "Table: TrusteeDataJournalLine" in block
assert "bookingDate" not in block
def test_buildSchemaContext_forbidsSummingAggregateFields():
"""The most damaging anti-pattern in trustee queries: SUMming closingBalance
across periods. Without this rule the agent reports 11 mio for an account
whose real closing balance is 48k. The generic prompt must call this out so
every feature benefits, not just trustee."""
selected = [_trusteeAccountBalanceObj()]
prompt = _buildSchemaContext(
featureCode="trustee",
instanceLabel="Demo AG",
selectedTables=selected,
requestLang="de",
)
assert "NEVER apply SUM/AVG to columns that already represent a balance" in prompt
assert "closingBalance" in prompt
def test_buildSchemaContext_appendsTrusteeDomainHints():
"""When the feature module exposes getAgentDomainHints(), the schema prompt
must include those hints so the sub-agent knows e.g. that 102x are bank
accounts and periodMonth=0 is the annual total."""
selected = [_trusteeAccountBalanceObj()]
prompt = _buildSchemaContext(
featureCode="trustee",
instanceLabel="Demo AG",
selectedTables=selected,
requestLang="de",
)
assert "TRUSTEE DOMAIN HINTS" in prompt
assert "102x Bank / Post" in prompt
assert "periodMonth = 0" in prompt
assert "ANTI-PATTERNS" in prompt
assert 'LIKE \'102%\'' in prompt or "LIKE '102%'" in prompt
def test_buildSchemaContext_skipsHintsForFeaturesWithoutHook():
"""Features that don't export getAgentDomainHints() should produce a prompt
without the trailing hints block. Verified by using a feature code that
cannot resolve to a main module (registry returns None)."""
selected = [_trusteeAccountBalanceObj()]
prompt = _buildSchemaContext(
featureCode="nosuchfeature",
instanceLabel="",
selectedTables=selected,
requestLang="de",
)
assert "TRUSTEE DOMAIN HINTS" not in prompt
assert "Keep your answer SHORT" in prompt