275 lines
12 KiB
Python
275 lines
12 KiB
Python
# Copyright (c) 2026 PowerOn AG
|
|
# All rights reserved.
|
|
"""Synthetic Trustee benchmark fixture for the Feature Data Sub-Agent eval.
|
|
|
|
Builds an in-memory snapshot of one fictional Swiss KMU mandate
|
|
("Demo AG") with:
|
|
|
|
* 3 fiscal years (2023, 2024, 2025) of `TrusteeDataAccountBalance` rows
|
|
-- both annual totals (periodMonth=0) and monthly snapshots.
|
|
* 8 representative accounts spanning all major chart-of-accounts blocks
|
|
(cash, banks, receivables, payables, revenue, materials, personnel,
|
|
operating expenses).
|
|
* Per-month `TrusteeDataJournalEntry` + multiple `TrusteeDataJournalLine`
|
|
rows so debit/credit/COUNT aggregations have meaningful answers.
|
|
|
|
The data is deterministic (no RNG) so a question's gold-standard answer
|
|
is stable across runs.
|
|
|
|
This module deliberately stays decoupled from the production DB pipeline
|
|
-- the harness uses :class:`FakeFeatureDataProvider` (see
|
|
``gateway/tests/eval/fakeFeatureDataProvider.py``) to serve queries
|
|
against this in-memory snapshot, mirroring the public methods of
|
|
``FeatureDataProvider`` (browseTable / queryTable / aggregateTable).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Dict, List
|
|
|
|
|
|
_MANDATE_ID = "m-demo-ag"
|
|
_FEATURE_INSTANCE_ID = "fi-demo-ag-trustee"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Account master data
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_ACCOUNT_MASTER: List[Dict[str, Any]] = [
|
|
{"accountNumber": "1000", "label": "Hauptkasse", "accountType": "asset", "currency": "CHF"},
|
|
{"accountNumber": "1020", "label": "ZKB Geschaeftskonto", "accountType": "asset", "currency": "CHF"},
|
|
{"accountNumber": "1021", "label": "PostFinance", "accountType": "asset", "currency": "CHF"},
|
|
{"accountNumber": "1100", "label": "Forderungen aus Lieferungen und Leistungen", "accountType": "asset", "currency": "CHF"},
|
|
{"accountNumber": "2000", "label": "Verbindlichkeiten aus Lieferungen", "accountType": "liability", "currency": "CHF"},
|
|
{"accountNumber": "3000", "label": "Ertrag aus Beratung", "accountType": "revenue", "currency": "CHF"},
|
|
{"accountNumber": "5400", "label": "Materialaufwand", "accountType": "expense", "currency": "CHF"},
|
|
{"accountNumber": "6000", "label": "Mietaufwand", "accountType": "expense", "currency": "CHF"},
|
|
]
|
|
|
|
|
|
# Annual closing balances per (year, accountNumber) -- the canonical reference.
|
|
# Asset/expense balances are positive, liability/revenue balances are stored
|
|
# as positive numbers (sign by accountType, like most accounting systems).
|
|
_ANNUAL_CLOSING: Dict[int, Dict[str, float]] = {
|
|
2023: {
|
|
"1000": 4_800.00,
|
|
"1020": 132_500.00,
|
|
"1021": 22_400.00,
|
|
"1100": 58_200.00,
|
|
"2000": 41_300.00,
|
|
"3000": 410_000.00,
|
|
"5400": 92_000.00,
|
|
"6000": 36_000.00,
|
|
},
|
|
2024: {
|
|
"1000": 5_200.00,
|
|
"1020": 148_900.00,
|
|
"1021": 26_750.00,
|
|
"1100": 61_400.00,
|
|
"2000": 44_100.00,
|
|
"3000": 462_500.00,
|
|
"5400": 104_300.00,
|
|
"6000": 39_000.00,
|
|
},
|
|
2025: {
|
|
"1000": 5_900.00,
|
|
"1020": 152_400.00,
|
|
"1021": 28_100.00,
|
|
"1100": 66_800.00,
|
|
"2000": 47_900.00,
|
|
"3000": 488_700.00,
|
|
"5400": 112_100.00,
|
|
"6000": 42_000.00,
|
|
},
|
|
}
|
|
|
|
|
|
def _openingFromPriorYear(year: int, accountNumber: str) -> float:
|
|
"""Opening balance of year N = closing balance of year N-1 (0 if N-1 is unknown)."""
|
|
prior = year - 1
|
|
return float(_ANNUAL_CLOSING.get(prior, {}).get(accountNumber, 0.0))
|
|
|
|
|
|
def _monthlyProgression(opening: float, closing: float, month: int) -> float:
|
|
"""Linear interpolation between opening and closing for monthly snapshots.
|
|
|
|
Not realistic in detail but deterministic and monotonic per account, so
|
|
questions about "Stand per Ende März" produce stable answers.
|
|
"""
|
|
if month <= 0:
|
|
return float(closing)
|
|
frac = month / 12.0
|
|
return round(float(opening) + (float(closing) - float(opening)) * frac, 2)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Journal entries / lines -- minimal but realistic
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_JOURNAL_ENTRIES_2025: List[Dict[str, Any]] = [
|
|
{"month": 3, "day": 15, "reference": "RG-2025-0042", "description": "Beratung Kunde ACME AG", "amount": 18_500.00, "debit": "1100", "credit": "3000"},
|
|
{"month": 3, "day": 22, "reference": "EK-2025-0017", "description": "Materialeinkauf Buehler AG", "amount": 9_200.00, "debit": "5400", "credit": "2000"},
|
|
{"month": 3, "day": 28, "reference": "MIETE-2025-03", "description": "Mietzins Buero Maerz", "amount": 3_000.00, "debit": "6000", "credit": "1020"},
|
|
{"month": 4, "day": 5, "reference": "RG-2025-0051", "description": "Beratung Kunde Bell AG", "amount": 24_300.00, "debit": "1100", "credit": "3000"},
|
|
{"month": 4, "day": 18, "reference": "EK-2025-0024", "description": "Materialeinkauf Industriebedarf", "amount": 7_800.00, "debit": "5400", "credit": "2000"},
|
|
{"month": 6, "day": 12, "reference": "RG-2025-0079", "description": "Beratung Kunde Bell AG", "amount": 32_100.00, "debit": "1100", "credit": "3000"},
|
|
{"month": 6, "day": 30, "reference": "MIETE-2025-Q2", "description": "Mietzins Buero Q2-Abrechnung", "amount": 3_500.00, "debit": "6000", "credit": "1020"},
|
|
{"month": 9, "day": 4, "reference": "RG-2025-0114", "description": "Beratung Kunde Migros", "amount": 41_500.00, "debit": "1100", "credit": "3000"},
|
|
{"month": 9, "day": 25, "reference": "EK-2025-0061", "description": "Materialeinkauf Buehler AG", "amount": 12_400.00, "debit": "5400", "credit": "2000"},
|
|
{"month": 11, "day": 14, "reference": "RG-2025-0188", "description": "Beratung Kunde ACME AG", "amount": 28_700.00, "debit": "1100", "credit": "3000"},
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Snapshot containers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@dataclass
|
|
class BenchmarkFixture:
|
|
"""In-memory rows that mimic feature DB tables.
|
|
|
|
Each ``rowsByTable[tableName]`` is a list of column dicts compatible
|
|
with the Pydantic feature data models (TrusteeDataAccountBalance, etc.).
|
|
"""
|
|
mandateId: str
|
|
featureInstanceId: str
|
|
rowsByTable: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict)
|
|
selectedTables: List[Dict[str, Any]] = field(default_factory=list)
|
|
|
|
|
|
def _buildSelectedTables() -> List[Dict[str, Any]]:
|
|
"""Return the DATA_OBJECT-shaped descriptors the sub-agent expects.
|
|
|
|
Mirrors what the catalog would return for the trustee feature; the
|
|
real `getDataObjects("trustee")` call would yield the same shape but
|
|
we hard-code the three tables we actually populate.
|
|
"""
|
|
return [
|
|
{
|
|
"objectKey": "data.feature.trustee.TrusteeDataAccount",
|
|
"label": {"de": "Kontenplan", "en": "Chart of accounts"},
|
|
"meta": {
|
|
"table": "TrusteeDataAccount",
|
|
"fields": ["id", "accountNumber", "label", "accountType", "currency", "isActive"],
|
|
},
|
|
},
|
|
{
|
|
"objectKey": "data.feature.trustee.TrusteeDataAccountBalance",
|
|
"label": {"de": "Kontosalden", "en": "Account balances"},
|
|
"meta": {
|
|
"table": "TrusteeDataAccountBalance",
|
|
"fields": [
|
|
"id", "accountNumber", "periodYear", "periodMonth",
|
|
"openingBalance", "debitTotal", "creditTotal",
|
|
"closingBalance", "currency",
|
|
],
|
|
},
|
|
},
|
|
{
|
|
"objectKey": "data.feature.trustee.TrusteeDataJournalLine",
|
|
"label": {"de": "Buchungszeilen", "en": "Journal lines"},
|
|
"meta": {
|
|
"table": "TrusteeDataJournalLine",
|
|
"fields": [
|
|
"id", "journalEntryId", "accountNumber",
|
|
"debitAmount", "creditAmount", "currency", "description",
|
|
],
|
|
},
|
|
},
|
|
]
|
|
|
|
|
|
def buildTrusteeBenchmarkFixture() -> BenchmarkFixture:
|
|
"""Materialize the full in-memory benchmark snapshot.
|
|
|
|
All rows include ``mandateId`` and ``featureInstanceId`` columns so the
|
|
fake provider can scope them the same way the real one does.
|
|
"""
|
|
accountRows: List[Dict[str, Any]] = []
|
|
for i, acc in enumerate(_ACCOUNT_MASTER):
|
|
accountRows.append({
|
|
"id": f"acc-{i:03d}",
|
|
"accountNumber": acc["accountNumber"],
|
|
"label": acc["label"],
|
|
"accountType": acc["accountType"],
|
|
"currency": acc["currency"],
|
|
"isActive": True,
|
|
"mandateId": _MANDATE_ID,
|
|
"featureInstanceId": _FEATURE_INSTANCE_ID,
|
|
})
|
|
|
|
balanceRows: List[Dict[str, Any]] = []
|
|
rowIdx = 0
|
|
for year, closings in _ANNUAL_CLOSING.items():
|
|
for accountNumber, closing in closings.items():
|
|
opening = _openingFromPriorYear(year, accountNumber)
|
|
balanceRows.append({
|
|
"id": f"bal-{rowIdx:04d}",
|
|
"accountNumber": accountNumber,
|
|
"periodYear": year,
|
|
"periodMonth": 0,
|
|
"openingBalance": opening,
|
|
"debitTotal": round(max(closing - opening, 0.0) * 1.2, 2),
|
|
"creditTotal": round(max(closing - opening, 0.0) * 0.2, 2),
|
|
"closingBalance": float(closing),
|
|
"currency": "CHF",
|
|
"mandateId": _MANDATE_ID,
|
|
"featureInstanceId": _FEATURE_INSTANCE_ID,
|
|
})
|
|
rowIdx += 1
|
|
for month in range(1, 13):
|
|
monthly = _monthlyProgression(opening, closing, month)
|
|
balanceRows.append({
|
|
"id": f"bal-{rowIdx:04d}",
|
|
"accountNumber": accountNumber,
|
|
"periodYear": year,
|
|
"periodMonth": month,
|
|
"openingBalance": opening,
|
|
"debitTotal": round((monthly - opening) * 1.2, 2) if monthly > opening else 0.0,
|
|
"creditTotal": round((monthly - opening) * 0.2, 2) if monthly > opening else 0.0,
|
|
"closingBalance": monthly,
|
|
"currency": "CHF",
|
|
"mandateId": _MANDATE_ID,
|
|
"featureInstanceId": _FEATURE_INSTANCE_ID,
|
|
})
|
|
rowIdx += 1
|
|
|
|
lineRows: List[Dict[str, Any]] = []
|
|
for j, entry in enumerate(_JOURNAL_ENTRIES_2025):
|
|
entryId = f"je-2025-{j:03d}"
|
|
lineRows.append({
|
|
"id": f"jl-{j*2:04d}",
|
|
"journalEntryId": entryId,
|
|
"accountNumber": entry["debit"],
|
|
"debitAmount": float(entry["amount"]),
|
|
"creditAmount": 0.0,
|
|
"currency": "CHF",
|
|
"description": entry["description"],
|
|
"mandateId": _MANDATE_ID,
|
|
"featureInstanceId": _FEATURE_INSTANCE_ID,
|
|
})
|
|
lineRows.append({
|
|
"id": f"jl-{j*2+1:04d}",
|
|
"journalEntryId": entryId,
|
|
"accountNumber": entry["credit"],
|
|
"debitAmount": 0.0,
|
|
"creditAmount": float(entry["amount"]),
|
|
"currency": "CHF",
|
|
"description": entry["description"],
|
|
"mandateId": _MANDATE_ID,
|
|
"featureInstanceId": _FEATURE_INSTANCE_ID,
|
|
})
|
|
|
|
fixture = BenchmarkFixture(
|
|
mandateId=_MANDATE_ID,
|
|
featureInstanceId=_FEATURE_INSTANCE_ID,
|
|
rowsByTable={
|
|
"TrusteeDataAccount": accountRows,
|
|
"TrusteeDataAccountBalance": balanceRows,
|
|
"TrusteeDataJournalLine": lineRows,
|
|
},
|
|
selectedTables=_buildSelectedTables(),
|
|
)
|
|
return fixture
|