295 lines
11 KiB
Python
295 lines
11 KiB
Python
# Copyright (c) 2026 Patrick Motsch
|
|
# All rights reserved.
|
|
"""Trustee feature ontology (Phase 2 pilot).
|
|
|
|
Replaces the hand-written ``_AGENT_DOMAIN_HINTS`` block with a structured
|
|
ontology so the Feature Data Sub-Agent's QueryValidator AND the prompt
|
|
compiler share the same source of truth: account-group conventions,
|
|
period-bucket semantics, the NEVER_AGGREGATE constraints on already-
|
|
aggregated columns, and canonical tool-call templates for the most
|
|
frequent user intents.
|
|
|
|
Both the validator (deterministic enforcement) and the prompt compiler
|
|
(LLM steering) read from this descriptor, so an LLM that follows the
|
|
prompt patterns will never trigger a validator failure -- and one that
|
|
ignores them gets a structured repair hint pointing back at the same
|
|
constraint.
|
|
|
|
The legacy ``_AGENT_DOMAIN_HINTS_LEGACY`` block stays parked in
|
|
``mainTrustee.py`` for one release as a fallback during rollout.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
|
|
CanonicalQueryPattern,
|
|
Cardinality,
|
|
Constraint,
|
|
ConstraintRule,
|
|
Entity,
|
|
Invariant,
|
|
OntologyDescriptor,
|
|
Relation,
|
|
SemanticType,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Entities
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_ENTITIES = [
|
|
Entity(
|
|
name="Account",
|
|
pythonClass="TrusteeDataAccount",
|
|
semanticType=SemanticType.ACCOUNT,
|
|
description=(
|
|
"Chart-of-accounts row (Konto). One row per accountNumber per "
|
|
"mandate. Identifies the account, never holds balances."
|
|
),
|
|
invariants=[
|
|
Invariant(description="accountNumber is a stable string identifier (e.g. '1020', '5400')."),
|
|
Invariant(description="accountType is one of: asset / liability / revenue / expense."),
|
|
],
|
|
),
|
|
Entity(
|
|
name="BankAccount",
|
|
pythonClass="TrusteeDataAccount",
|
|
semanticType=SemanticType.ACCOUNT,
|
|
parentEntity="Account",
|
|
description="Account subgroup with accountNumber LIKE '102%' (ZKB, PostFinance, UBS, ...).",
|
|
),
|
|
Entity(
|
|
name="CashAccount",
|
|
pythonClass="TrusteeDataAccount",
|
|
semanticType=SemanticType.ACCOUNT,
|
|
parentEntity="Account",
|
|
description="Account subgroup with accountNumber LIKE '100%' (Hauptkasse, Nebenkassen).",
|
|
),
|
|
Entity(
|
|
name="AccountBalance",
|
|
pythonClass="TrusteeDataAccountBalance",
|
|
semanticType=SemanticType.BALANCE_SNAPSHOT,
|
|
description=(
|
|
"Period-bucketed snapshot: one row per (account, year, month). "
|
|
"closingBalance is THE balance at end of period -- already aggregated."
|
|
),
|
|
invariants=[
|
|
Invariant(description="periodMonth=0 means annual total of periodYear (use for 'per 31.12.YYYY')."),
|
|
Invariant(description="periodMonth in 1..12 means month-end snapshot."),
|
|
Invariant(description="closingBalance is the balance at period end; openingBalance at period start."),
|
|
Invariant(description="debitTotal/creditTotal are turnovers for the period, NOT balances."),
|
|
],
|
|
),
|
|
Entity(
|
|
name="JournalEntry",
|
|
pythonClass="TrusteeDataJournalEntry",
|
|
semanticType=SemanticType.TRANSACTION,
|
|
description="One booking header (Beleg). Has a bookingDate (unix seconds float) and totalAmount.",
|
|
invariants=[
|
|
Invariant(description="bookingDate is a UTC unix-seconds float; never compare against ISO strings."),
|
|
],
|
|
),
|
|
Entity(
|
|
name="JournalLine",
|
|
pythonClass="TrusteeDataJournalLine",
|
|
semanticType=SemanticType.TRANSACTION,
|
|
description="One booking line of a JournalEntry. Each line debits or credits exactly one account.",
|
|
invariants=[
|
|
Invariant(description="Per line either debitAmount > 0 (Soll) or creditAmount > 0 (Haben), not both."),
|
|
],
|
|
),
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Relations
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_RELATIONS = [
|
|
Relation(fromEntity="AccountBalance", toEntity="Account", cardinality=Cardinality.MANY_TO_ONE, via="accountNumber"),
|
|
Relation(fromEntity="JournalLine", toEntity="JournalEntry", cardinality=Cardinality.MANY_TO_ONE, via="journalEntryId"),
|
|
Relation(fromEntity="JournalLine", toEntity="Account", cardinality=Cardinality.MANY_TO_ONE, via="accountNumber"),
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Constraints (validator-enforced)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_CONSTRAINTS = [
|
|
# closingBalance is the single biggest hallucination magnet -- it's a
|
|
# balance per period, summing it across periods or accounts is meaningless.
|
|
Constraint(
|
|
appliesTo="TrusteeDataAccountBalance.closingBalance",
|
|
rule=ConstraintRule.NEVER_AGGREGATE,
|
|
message=(
|
|
"closingBalance is per-period already; query with periodYear+periodMonth, never SUM/AVG it."
|
|
),
|
|
),
|
|
Constraint(
|
|
appliesTo="TrusteeDataAccountBalance.openingBalance",
|
|
rule=ConstraintRule.NEVER_AGGREGATE,
|
|
message="openingBalance is already a balance per period; do not SUM/AVG it across rows.",
|
|
),
|
|
Constraint(
|
|
appliesTo="TrusteeDataAccountBalance.debitTotal",
|
|
rule=ConstraintRule.NEVER_AGGREGATE,
|
|
message=(
|
|
"debitTotal is the period's debit TURNOVER; do not SUM it without an explicit period filter."
|
|
),
|
|
),
|
|
Constraint(
|
|
appliesTo="TrusteeDataAccountBalance.creditTotal",
|
|
rule=ConstraintRule.NEVER_AGGREGATE,
|
|
message="creditTotal is a per-period turnover; do not SUM it across periods without an explicit period filter.",
|
|
),
|
|
# AccountBalance queries without a period filter are almost always wrong --
|
|
# they conflate annual and monthly snapshots. Phase 2 (REQUIRES_FILTER_ON)
|
|
# is wired through to the validator in a later iteration; for now this
|
|
# rule is rendered into the prompt compiler so the LLM sees it explicitly.
|
|
Constraint(
|
|
appliesTo="TrusteeDataAccountBalance",
|
|
rule=ConstraintRule.REQUIRES_FILTER_ON,
|
|
message=(
|
|
"Always filter on periodYear AND periodMonth (use periodMonth=0 for end-of-year)."
|
|
),
|
|
params={"requiredFields": ["periodYear", "periodMonth"]},
|
|
),
|
|
Constraint(
|
|
appliesTo="TrusteeDataAccountBalance",
|
|
rule=ConstraintRule.PREFERRED_TABLE_FOR_INTENT,
|
|
message="For 'Saldo per <date>' and 'Stand <year>' questions, prefer AccountBalance over JournalLine.",
|
|
params={"intents": ["BANK_BALANCE_AT_DATE", "BALANCE_AT_YEAR_END"]},
|
|
),
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Canonical query patterns (worked examples for the LLM)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_CANONICAL_PATTERNS = [
|
|
CanonicalQueryPattern(
|
|
intent="BANK_BALANCE_AT_DATE",
|
|
description="Saldo eines Bankkontos per Jahresende.",
|
|
pattern={
|
|
"tool": "queryTable",
|
|
"tableName": "TrusteeDataAccountBalance",
|
|
"filters": [
|
|
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
|
|
{"field": "periodYear", "op": "=", "value": "<year>"},
|
|
{"field": "periodMonth", "op": "=", "value": 0},
|
|
],
|
|
"fields": ["closingBalance", "currency"],
|
|
},
|
|
),
|
|
CanonicalQueryPattern(
|
|
intent="BANK_GROUP_TOTAL_AT_DATE",
|
|
description="Summe einer Kontogruppe (z. B. alle Bankkonten 102%) per Jahresende.",
|
|
pattern={
|
|
"tool": "queryTable",
|
|
"tableName": "TrusteeDataAccountBalance",
|
|
"filters": [
|
|
{"field": "accountNumber", "op": "LIKE", "value": "<prefix>%"},
|
|
{"field": "periodYear", "op": "=", "value": "<year>"},
|
|
{"field": "periodMonth", "op": "=", "value": 0},
|
|
],
|
|
"fields": ["accountNumber", "closingBalance", "currency"],
|
|
"_postProcessing": "Sum closingBalance values in your final answer; do NOT SUM via aggregateTable.",
|
|
},
|
|
),
|
|
CanonicalQueryPattern(
|
|
intent="BALANCE_HISTORY_PER_YEAR",
|
|
description="Saldo-Verlauf eines Kontos ueber mehrere Jahre.",
|
|
pattern={
|
|
"tool": "queryTable",
|
|
"tableName": "TrusteeDataAccountBalance",
|
|
"filters": [
|
|
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
|
|
{"field": "periodMonth", "op": "=", "value": 0},
|
|
],
|
|
"fields": ["periodYear", "closingBalance", "currency"],
|
|
"orderBy": "periodYear",
|
|
},
|
|
),
|
|
CanonicalQueryPattern(
|
|
intent="MONTHLY_BALANCE_SNAPSHOT",
|
|
description="Saldo per Ende eines bestimmten Monats.",
|
|
pattern={
|
|
"tool": "queryTable",
|
|
"tableName": "TrusteeDataAccountBalance",
|
|
"filters": [
|
|
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
|
|
{"field": "periodYear", "op": "=", "value": "<year>"},
|
|
{"field": "periodMonth", "op": "=", "value": "<month 1..12>"},
|
|
],
|
|
"fields": ["closingBalance", "currency"],
|
|
},
|
|
),
|
|
CanonicalQueryPattern(
|
|
intent="ACCOUNT_LIST_BY_TYPE_OR_PREFIX",
|
|
description="Welche Konten gehoeren zu einer Gruppe (Typ oder Nummern-Prefix)?",
|
|
pattern={
|
|
"tool": "queryTable",
|
|
"tableName": "TrusteeDataAccount",
|
|
"filters": [
|
|
{"field": "accountNumber", "op": "LIKE", "value": "<prefix>%"},
|
|
],
|
|
"fields": ["accountNumber", "label", "accountType"],
|
|
},
|
|
),
|
|
CanonicalQueryPattern(
|
|
intent="JOURNAL_SUM_AT_ACCOUNT",
|
|
description="Summe der Soll- oder Haben-Buchungen auf einem Konto.",
|
|
pattern={
|
|
"tool": "aggregateTable",
|
|
"tableName": "TrusteeDataJournalLine",
|
|
"aggregate": "SUM",
|
|
"field": "debitAmount",
|
|
"filters": [
|
|
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
|
|
],
|
|
},
|
|
),
|
|
CanonicalQueryPattern(
|
|
intent="COUNT_ROWS",
|
|
description="Anzahl Buchungen / Buchungszeilen / Konten.",
|
|
pattern={
|
|
"tool": "aggregateTable",
|
|
"tableName": "<table>",
|
|
"aggregate": "COUNT",
|
|
"field": "id",
|
|
},
|
|
),
|
|
CanonicalQueryPattern(
|
|
intent="JOURNAL_LINES_BY_AMOUNT",
|
|
description="Buchungszeilen mit einem Betrag groesser/kleiner als einer Schwelle.",
|
|
pattern={
|
|
"tool": "queryTable",
|
|
"tableName": "TrusteeDataJournalLine",
|
|
"filters": [
|
|
{"field": "debitAmount", "op": ">", "value": "<amount>"},
|
|
],
|
|
"fields": ["accountNumber", "debitAmount", "description"],
|
|
},
|
|
),
|
|
]
|
|
|
|
|
|
_TRUSTEE_ONTOLOGY = OntologyDescriptor(
|
|
featureCode="trustee",
|
|
entities=_ENTITIES,
|
|
relations=_RELATIONS,
|
|
constraints=_CONSTRAINTS,
|
|
canonicalPatterns=_CANONICAL_PATTERNS,
|
|
)
|
|
|
|
|
|
def getTrusteeOntology() -> OntologyDescriptor:
|
|
"""Public accessor for the trustee ontology.
|
|
|
|
Cached as a module-level singleton -- the descriptor is immutable and
|
|
has no per-call state.
|
|
"""
|
|
return _TRUSTEE_ONTOLOGY
|