platform-core/tests/fixtures/trusteeBenchmark/loadTrusteeBenchmarkFixture.py
2026-05-16 22:55:43 +02:00

275 lines
12 KiB
Python

# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Synthetic Trustee benchmark fixture for the Feature Data Sub-Agent eval.
Builds an in-memory snapshot of one fictional Swiss KMU mandate
("Demo AG") with:
* 3 fiscal years (2023, 2024, 2025) of `TrusteeDataAccountBalance` rows
-- both annual totals (periodMonth=0) and monthly snapshots.
* 8 representative accounts spanning all major chart-of-accounts blocks
(cash, banks, receivables, payables, revenue, materials, personnel,
operating expenses).
* Per-month `TrusteeDataJournalEntry` + multiple `TrusteeDataJournalLine`
rows so debit/credit/COUNT aggregations have meaningful answers.
The data is deterministic (no RNG) so a question's gold-standard answer
is stable across runs.
This module deliberately stays decoupled from the production DB pipeline
-- the harness uses :class:`FakeFeatureDataProvider` (see
``gateway/tests/eval/fakeFeatureDataProvider.py``) to serve queries
against this in-memory snapshot, mirroring the public methods of
``FeatureDataProvider`` (browseTable / queryTable / aggregateTable).
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Dict, List
_MANDATE_ID = "m-demo-ag"
_FEATURE_INSTANCE_ID = "fi-demo-ag-trustee"
# ---------------------------------------------------------------------------
# Account master data
# ---------------------------------------------------------------------------
_ACCOUNT_MASTER: List[Dict[str, Any]] = [
{"accountNumber": "1000", "label": "Hauptkasse", "accountType": "asset", "currency": "CHF"},
{"accountNumber": "1020", "label": "ZKB Geschaeftskonto", "accountType": "asset", "currency": "CHF"},
{"accountNumber": "1021", "label": "PostFinance", "accountType": "asset", "currency": "CHF"},
{"accountNumber": "1100", "label": "Forderungen aus Lieferungen und Leistungen", "accountType": "asset", "currency": "CHF"},
{"accountNumber": "2000", "label": "Verbindlichkeiten aus Lieferungen", "accountType": "liability", "currency": "CHF"},
{"accountNumber": "3000", "label": "Ertrag aus Beratung", "accountType": "revenue", "currency": "CHF"},
{"accountNumber": "5400", "label": "Materialaufwand", "accountType": "expense", "currency": "CHF"},
{"accountNumber": "6000", "label": "Mietaufwand", "accountType": "expense", "currency": "CHF"},
]
# Annual closing balances per (year, accountNumber) -- the canonical reference.
# Asset/expense balances are positive, liability/revenue balances are stored
# as positive numbers (sign by accountType, like most accounting systems).
_ANNUAL_CLOSING: Dict[int, Dict[str, float]] = {
2023: {
"1000": 4_800.00,
"1020": 132_500.00,
"1021": 22_400.00,
"1100": 58_200.00,
"2000": 41_300.00,
"3000": 410_000.00,
"5400": 92_000.00,
"6000": 36_000.00,
},
2024: {
"1000": 5_200.00,
"1020": 148_900.00,
"1021": 26_750.00,
"1100": 61_400.00,
"2000": 44_100.00,
"3000": 462_500.00,
"5400": 104_300.00,
"6000": 39_000.00,
},
2025: {
"1000": 5_900.00,
"1020": 152_400.00,
"1021": 28_100.00,
"1100": 66_800.00,
"2000": 47_900.00,
"3000": 488_700.00,
"5400": 112_100.00,
"6000": 42_000.00,
},
}
def _openingFromPriorYear(year: int, accountNumber: str) -> float:
"""Opening balance of year N = closing balance of year N-1 (0 if N-1 is unknown)."""
prior = year - 1
return float(_ANNUAL_CLOSING.get(prior, {}).get(accountNumber, 0.0))
def _monthlyProgression(opening: float, closing: float, month: int) -> float:
"""Linear interpolation between opening and closing for monthly snapshots.
Not realistic in detail but deterministic and monotonic per account, so
questions about "Stand per Ende März" produce stable answers.
"""
if month <= 0:
return float(closing)
frac = month / 12.0
return round(float(opening) + (float(closing) - float(opening)) * frac, 2)
# ---------------------------------------------------------------------------
# Journal entries / lines -- minimal but realistic
# ---------------------------------------------------------------------------
_JOURNAL_ENTRIES_2025: List[Dict[str, Any]] = [
{"month": 3, "day": 15, "reference": "RG-2025-0042", "description": "Beratung Kunde ACME AG", "amount": 18_500.00, "debit": "1100", "credit": "3000"},
{"month": 3, "day": 22, "reference": "EK-2025-0017", "description": "Materialeinkauf Buehler AG", "amount": 9_200.00, "debit": "5400", "credit": "2000"},
{"month": 3, "day": 28, "reference": "MIETE-2025-03", "description": "Mietzins Buero Maerz", "amount": 3_000.00, "debit": "6000", "credit": "1020"},
{"month": 4, "day": 5, "reference": "RG-2025-0051", "description": "Beratung Kunde Bell AG", "amount": 24_300.00, "debit": "1100", "credit": "3000"},
{"month": 4, "day": 18, "reference": "EK-2025-0024", "description": "Materialeinkauf Industriebedarf", "amount": 7_800.00, "debit": "5400", "credit": "2000"},
{"month": 6, "day": 12, "reference": "RG-2025-0079", "description": "Beratung Kunde Bell AG", "amount": 32_100.00, "debit": "1100", "credit": "3000"},
{"month": 6, "day": 30, "reference": "MIETE-2025-Q2", "description": "Mietzins Buero Q2-Abrechnung", "amount": 3_500.00, "debit": "6000", "credit": "1020"},
{"month": 9, "day": 4, "reference": "RG-2025-0114", "description": "Beratung Kunde Migros", "amount": 41_500.00, "debit": "1100", "credit": "3000"},
{"month": 9, "day": 25, "reference": "EK-2025-0061", "description": "Materialeinkauf Buehler AG", "amount": 12_400.00, "debit": "5400", "credit": "2000"},
{"month": 11, "day": 14, "reference": "RG-2025-0188", "description": "Beratung Kunde ACME AG", "amount": 28_700.00, "debit": "1100", "credit": "3000"},
]
# ---------------------------------------------------------------------------
# Snapshot containers
# ---------------------------------------------------------------------------
@dataclass
class BenchmarkFixture:
"""In-memory rows that mimic feature DB tables.
Each ``rowsByTable[tableName]`` is a list of column dicts compatible
with the Pydantic feature data models (TrusteeDataAccountBalance, etc.).
"""
mandateId: str
featureInstanceId: str
rowsByTable: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict)
selectedTables: List[Dict[str, Any]] = field(default_factory=list)
def _buildSelectedTables() -> List[Dict[str, Any]]:
"""Return the DATA_OBJECT-shaped descriptors the sub-agent expects.
Mirrors what the catalog would return for the trustee feature; the
real `getDataObjects("trustee")` call would yield the same shape but
we hard-code the three tables we actually populate.
"""
return [
{
"objectKey": "data.feature.trustee.TrusteeDataAccount",
"label": {"de": "Kontenplan", "en": "Chart of accounts"},
"meta": {
"table": "TrusteeDataAccount",
"fields": ["id", "accountNumber", "label", "accountType", "currency", "isActive"],
},
},
{
"objectKey": "data.feature.trustee.TrusteeDataAccountBalance",
"label": {"de": "Kontosalden", "en": "Account balances"},
"meta": {
"table": "TrusteeDataAccountBalance",
"fields": [
"id", "accountNumber", "periodYear", "periodMonth",
"openingBalance", "debitTotal", "creditTotal",
"closingBalance", "currency",
],
},
},
{
"objectKey": "data.feature.trustee.TrusteeDataJournalLine",
"label": {"de": "Buchungszeilen", "en": "Journal lines"},
"meta": {
"table": "TrusteeDataJournalLine",
"fields": [
"id", "journalEntryId", "accountNumber",
"debitAmount", "creditAmount", "currency", "description",
],
},
},
]
def buildTrusteeBenchmarkFixture() -> BenchmarkFixture:
"""Materialize the full in-memory benchmark snapshot.
All rows include ``mandateId`` and ``featureInstanceId`` columns so the
fake provider can scope them the same way the real one does.
"""
accountRows: List[Dict[str, Any]] = []
for i, acc in enumerate(_ACCOUNT_MASTER):
accountRows.append({
"id": f"acc-{i:03d}",
"accountNumber": acc["accountNumber"],
"label": acc["label"],
"accountType": acc["accountType"],
"currency": acc["currency"],
"isActive": True,
"mandateId": _MANDATE_ID,
"featureInstanceId": _FEATURE_INSTANCE_ID,
})
balanceRows: List[Dict[str, Any]] = []
rowIdx = 0
for year, closings in _ANNUAL_CLOSING.items():
for accountNumber, closing in closings.items():
opening = _openingFromPriorYear(year, accountNumber)
balanceRows.append({
"id": f"bal-{rowIdx:04d}",
"accountNumber": accountNumber,
"periodYear": year,
"periodMonth": 0,
"openingBalance": opening,
"debitTotal": round(max(closing - opening, 0.0) * 1.2, 2),
"creditTotal": round(max(closing - opening, 0.0) * 0.2, 2),
"closingBalance": float(closing),
"currency": "CHF",
"mandateId": _MANDATE_ID,
"featureInstanceId": _FEATURE_INSTANCE_ID,
})
rowIdx += 1
for month in range(1, 13):
monthly = _monthlyProgression(opening, closing, month)
balanceRows.append({
"id": f"bal-{rowIdx:04d}",
"accountNumber": accountNumber,
"periodYear": year,
"periodMonth": month,
"openingBalance": opening,
"debitTotal": round((monthly - opening) * 1.2, 2) if monthly > opening else 0.0,
"creditTotal": round((monthly - opening) * 0.2, 2) if monthly > opening else 0.0,
"closingBalance": monthly,
"currency": "CHF",
"mandateId": _MANDATE_ID,
"featureInstanceId": _FEATURE_INSTANCE_ID,
})
rowIdx += 1
lineRows: List[Dict[str, Any]] = []
for j, entry in enumerate(_JOURNAL_ENTRIES_2025):
entryId = f"je-2025-{j:03d}"
lineRows.append({
"id": f"jl-{j*2:04d}",
"journalEntryId": entryId,
"accountNumber": entry["debit"],
"debitAmount": float(entry["amount"]),
"creditAmount": 0.0,
"currency": "CHF",
"description": entry["description"],
"mandateId": _MANDATE_ID,
"featureInstanceId": _FEATURE_INSTANCE_ID,
})
lineRows.append({
"id": f"jl-{j*2+1:04d}",
"journalEntryId": entryId,
"accountNumber": entry["credit"],
"debitAmount": 0.0,
"creditAmount": float(entry["amount"]),
"currency": "CHF",
"description": entry["description"],
"mandateId": _MANDATE_ID,
"featureInstanceId": _FEATURE_INSTANCE_ID,
})
fixture = BenchmarkFixture(
mandateId=_MANDATE_ID,
featureInstanceId=_FEATURE_INSTANCE_ID,
rowsByTable={
"TrusteeDataAccount": accountRows,
"TrusteeDataAccountBalance": balanceRows,
"TrusteeDataJournalLine": lineRows,
},
selectedTables=_buildSelectedTables(),
)
return fixture