# Copyright (c) 2026 Patrick Motsch # All rights reserved. """Synthetic Trustee benchmark fixture for the Feature Data Sub-Agent eval. Builds an in-memory snapshot of one fictional Swiss KMU mandate ("Demo AG") with: * 3 fiscal years (2023, 2024, 2025) of `TrusteeDataAccountBalance` rows -- both annual totals (periodMonth=0) and monthly snapshots. * 8 representative accounts spanning all major chart-of-accounts blocks (cash, banks, receivables, payables, revenue, materials, personnel, operating expenses). * Per-month `TrusteeDataJournalEntry` + multiple `TrusteeDataJournalLine` rows so debit/credit/COUNT aggregations have meaningful answers. The data is deterministic (no RNG) so a question's gold-standard answer is stable across runs. This module deliberately stays decoupled from the production DB pipeline -- the harness uses :class:`FakeFeatureDataProvider` (see ``gateway/tests/eval/fakeFeatureDataProvider.py``) to serve queries against this in-memory snapshot, mirroring the public methods of ``FeatureDataProvider`` (browseTable / queryTable / aggregateTable). """ from __future__ import annotations from dataclasses import dataclass, field from typing import Any, Dict, List _MANDATE_ID = "m-demo-ag" _FEATURE_INSTANCE_ID = "fi-demo-ag-trustee" # --------------------------------------------------------------------------- # Account master data # --------------------------------------------------------------------------- _ACCOUNT_MASTER: List[Dict[str, Any]] = [ {"accountNumber": "1000", "label": "Hauptkasse", "accountType": "asset", "currency": "CHF"}, {"accountNumber": "1020", "label": "ZKB Geschaeftskonto", "accountType": "asset", "currency": "CHF"}, {"accountNumber": "1021", "label": "PostFinance", "accountType": "asset", "currency": "CHF"}, {"accountNumber": "1100", "label": "Forderungen aus Lieferungen und Leistungen", "accountType": "asset", "currency": "CHF"}, {"accountNumber": "2000", "label": "Verbindlichkeiten aus Lieferungen", "accountType": "liability", "currency": "CHF"}, {"accountNumber": "3000", "label": "Ertrag aus Beratung", "accountType": "revenue", "currency": "CHF"}, {"accountNumber": "5400", "label": "Materialaufwand", "accountType": "expense", "currency": "CHF"}, {"accountNumber": "6000", "label": "Mietaufwand", "accountType": "expense", "currency": "CHF"}, ] # Annual closing balances per (year, accountNumber) -- the canonical reference. # Asset/expense balances are positive, liability/revenue balances are stored # as positive numbers (sign by accountType, like most accounting systems). _ANNUAL_CLOSING: Dict[int, Dict[str, float]] = { 2023: { "1000": 4_800.00, "1020": 132_500.00, "1021": 22_400.00, "1100": 58_200.00, "2000": 41_300.00, "3000": 410_000.00, "5400": 92_000.00, "6000": 36_000.00, }, 2024: { "1000": 5_200.00, "1020": 148_900.00, "1021": 26_750.00, "1100": 61_400.00, "2000": 44_100.00, "3000": 462_500.00, "5400": 104_300.00, "6000": 39_000.00, }, 2025: { "1000": 5_900.00, "1020": 152_400.00, "1021": 28_100.00, "1100": 66_800.00, "2000": 47_900.00, "3000": 488_700.00, "5400": 112_100.00, "6000": 42_000.00, }, } def _openingFromPriorYear(year: int, accountNumber: str) -> float: """Opening balance of year N = closing balance of year N-1 (0 if N-1 is unknown).""" prior = year - 1 return float(_ANNUAL_CLOSING.get(prior, {}).get(accountNumber, 0.0)) def _monthlyProgression(opening: float, closing: float, month: int) -> float: """Linear interpolation between opening and closing for monthly snapshots. Not realistic in detail but deterministic and monotonic per account, so questions about "Stand per Ende März" produce stable answers. """ if month <= 0: return float(closing) frac = month / 12.0 return round(float(opening) + (float(closing) - float(opening)) * frac, 2) # --------------------------------------------------------------------------- # Journal entries / lines -- minimal but realistic # --------------------------------------------------------------------------- _JOURNAL_ENTRIES_2025: List[Dict[str, Any]] = [ {"month": 3, "day": 15, "reference": "RG-2025-0042", "description": "Beratung Kunde ACME AG", "amount": 18_500.00, "debit": "1100", "credit": "3000"}, {"month": 3, "day": 22, "reference": "EK-2025-0017", "description": "Materialeinkauf Buehler AG", "amount": 9_200.00, "debit": "5400", "credit": "2000"}, {"month": 3, "day": 28, "reference": "MIETE-2025-03", "description": "Mietzins Buero Maerz", "amount": 3_000.00, "debit": "6000", "credit": "1020"}, {"month": 4, "day": 5, "reference": "RG-2025-0051", "description": "Beratung Kunde Bell AG", "amount": 24_300.00, "debit": "1100", "credit": "3000"}, {"month": 4, "day": 18, "reference": "EK-2025-0024", "description": "Materialeinkauf Industriebedarf", "amount": 7_800.00, "debit": "5400", "credit": "2000"}, {"month": 6, "day": 12, "reference": "RG-2025-0079", "description": "Beratung Kunde Bell AG", "amount": 32_100.00, "debit": "1100", "credit": "3000"}, {"month": 6, "day": 30, "reference": "MIETE-2025-Q2", "description": "Mietzins Buero Q2-Abrechnung", "amount": 3_500.00, "debit": "6000", "credit": "1020"}, {"month": 9, "day": 4, "reference": "RG-2025-0114", "description": "Beratung Kunde Migros", "amount": 41_500.00, "debit": "1100", "credit": "3000"}, {"month": 9, "day": 25, "reference": "EK-2025-0061", "description": "Materialeinkauf Buehler AG", "amount": 12_400.00, "debit": "5400", "credit": "2000"}, {"month": 11, "day": 14, "reference": "RG-2025-0188", "description": "Beratung Kunde ACME AG", "amount": 28_700.00, "debit": "1100", "credit": "3000"}, ] # --------------------------------------------------------------------------- # Snapshot containers # --------------------------------------------------------------------------- @dataclass class BenchmarkFixture: """In-memory rows that mimic feature DB tables. Each ``rowsByTable[tableName]`` is a list of column dicts compatible with the Pydantic feature data models (TrusteeDataAccountBalance, etc.). """ mandateId: str featureInstanceId: str rowsByTable: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) selectedTables: List[Dict[str, Any]] = field(default_factory=list) def _buildSelectedTables() -> List[Dict[str, Any]]: """Return the DATA_OBJECT-shaped descriptors the sub-agent expects. Mirrors what the catalog would return for the trustee feature; the real `getDataObjects("trustee")` call would yield the same shape but we hard-code the three tables we actually populate. """ return [ { "objectKey": "data.feature.trustee.TrusteeDataAccount", "label": {"de": "Kontenplan", "en": "Chart of accounts"}, "meta": { "table": "TrusteeDataAccount", "fields": ["id", "accountNumber", "label", "accountType", "currency", "isActive"], }, }, { "objectKey": "data.feature.trustee.TrusteeDataAccountBalance", "label": {"de": "Kontosalden", "en": "Account balances"}, "meta": { "table": "TrusteeDataAccountBalance", "fields": [ "id", "accountNumber", "periodYear", "periodMonth", "openingBalance", "debitTotal", "creditTotal", "closingBalance", "currency", ], }, }, { "objectKey": "data.feature.trustee.TrusteeDataJournalLine", "label": {"de": "Buchungszeilen", "en": "Journal lines"}, "meta": { "table": "TrusteeDataJournalLine", "fields": [ "id", "journalEntryId", "accountNumber", "debitAmount", "creditAmount", "currency", "description", ], }, }, ] def buildTrusteeBenchmarkFixture() -> BenchmarkFixture: """Materialize the full in-memory benchmark snapshot. All rows include ``mandateId`` and ``featureInstanceId`` columns so the fake provider can scope them the same way the real one does. """ accountRows: List[Dict[str, Any]] = [] for i, acc in enumerate(_ACCOUNT_MASTER): accountRows.append({ "id": f"acc-{i:03d}", "accountNumber": acc["accountNumber"], "label": acc["label"], "accountType": acc["accountType"], "currency": acc["currency"], "isActive": True, "mandateId": _MANDATE_ID, "featureInstanceId": _FEATURE_INSTANCE_ID, }) balanceRows: List[Dict[str, Any]] = [] rowIdx = 0 for year, closings in _ANNUAL_CLOSING.items(): for accountNumber, closing in closings.items(): opening = _openingFromPriorYear(year, accountNumber) balanceRows.append({ "id": f"bal-{rowIdx:04d}", "accountNumber": accountNumber, "periodYear": year, "periodMonth": 0, "openingBalance": opening, "debitTotal": round(max(closing - opening, 0.0) * 1.2, 2), "creditTotal": round(max(closing - opening, 0.0) * 0.2, 2), "closingBalance": float(closing), "currency": "CHF", "mandateId": _MANDATE_ID, "featureInstanceId": _FEATURE_INSTANCE_ID, }) rowIdx += 1 for month in range(1, 13): monthly = _monthlyProgression(opening, closing, month) balanceRows.append({ "id": f"bal-{rowIdx:04d}", "accountNumber": accountNumber, "periodYear": year, "periodMonth": month, "openingBalance": opening, "debitTotal": round((monthly - opening) * 1.2, 2) if monthly > opening else 0.0, "creditTotal": round((monthly - opening) * 0.2, 2) if monthly > opening else 0.0, "closingBalance": monthly, "currency": "CHF", "mandateId": _MANDATE_ID, "featureInstanceId": _FEATURE_INSTANCE_ID, }) rowIdx += 1 lineRows: List[Dict[str, Any]] = [] for j, entry in enumerate(_JOURNAL_ENTRIES_2025): entryId = f"je-2025-{j:03d}" lineRows.append({ "id": f"jl-{j*2:04d}", "journalEntryId": entryId, "accountNumber": entry["debit"], "debitAmount": float(entry["amount"]), "creditAmount": 0.0, "currency": "CHF", "description": entry["description"], "mandateId": _MANDATE_ID, "featureInstanceId": _FEATURE_INSTANCE_ID, }) lineRows.append({ "id": f"jl-{j*2+1:04d}", "journalEntryId": entryId, "accountNumber": entry["credit"], "debitAmount": 0.0, "creditAmount": float(entry["amount"]), "currency": "CHF", "description": entry["description"], "mandateId": _MANDATE_ID, "featureInstanceId": _FEATURE_INSTANCE_ID, }) fixture = BenchmarkFixture( mandateId=_MANDATE_ID, featureInstanceId=_FEATURE_INSTANCE_ID, rowsByTable={ "TrusteeDataAccount": accountRows, "TrusteeDataAccountBalance": balanceRows, "TrusteeDataJournalLine": lineRows, }, selectedTables=_buildSelectedTables(), ) return fixture