platform-core/modules/features/trustee/trusteeOntology.py
ValueOn AG 4a60086c80
Some checks failed
Deploy Plattform-Core (Int) / test (push) Failing after 15s
Deploy Plattform-Core (Int) / deploy (push) Has been skipped
cp adapted to 2026 poweron
2026-06-09 09:53:31 +02:00

295 lines
11 KiB
Python

# Copyright (c) 2026 PowerOn AG
# All rights reserved.
"""Trustee feature ontology (Phase 2 pilot).
Replaces the hand-written ``_AGENT_DOMAIN_HINTS`` block with a structured
ontology so the Feature Data Sub-Agent's QueryValidator AND the prompt
compiler share the same source of truth: account-group conventions,
period-bucket semantics, the NEVER_AGGREGATE constraints on already-
aggregated columns, and canonical tool-call templates for the most
frequent user intents.
Both the validator (deterministic enforcement) and the prompt compiler
(LLM steering) read from this descriptor, so an LLM that follows the
prompt patterns will never trigger a validator failure -- and one that
ignores them gets a structured repair hint pointing back at the same
constraint.
The legacy ``_AGENT_DOMAIN_HINTS_LEGACY`` block stays parked in
``mainTrustee.py`` for one release as a fallback during rollout.
"""
from __future__ import annotations
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
CanonicalQueryPattern,
Cardinality,
Constraint,
ConstraintRule,
Entity,
Invariant,
OntologyDescriptor,
Relation,
SemanticType,
)
# ---------------------------------------------------------------------------
# Entities
# ---------------------------------------------------------------------------
_ENTITIES = [
Entity(
name="Account",
pythonClass="TrusteeDataAccount",
semanticType=SemanticType.ACCOUNT,
description=(
"Chart-of-accounts row (Konto). One row per accountNumber per "
"mandate. Identifies the account, never holds balances."
),
invariants=[
Invariant(description="accountNumber is a stable string identifier (e.g. '1020', '5400')."),
Invariant(description="accountType is one of: asset / liability / revenue / expense."),
],
),
Entity(
name="BankAccount",
pythonClass="TrusteeDataAccount",
semanticType=SemanticType.ACCOUNT,
parentEntity="Account",
description="Account subgroup with accountNumber LIKE '102%' (ZKB, PostFinance, UBS, ...).",
),
Entity(
name="CashAccount",
pythonClass="TrusteeDataAccount",
semanticType=SemanticType.ACCOUNT,
parentEntity="Account",
description="Account subgroup with accountNumber LIKE '100%' (Hauptkasse, Nebenkassen).",
),
Entity(
name="AccountBalance",
pythonClass="TrusteeDataAccountBalance",
semanticType=SemanticType.BALANCE_SNAPSHOT,
description=(
"Period-bucketed snapshot: one row per (account, year, month). "
"closingBalance is THE balance at end of period -- already aggregated."
),
invariants=[
Invariant(description="periodMonth=0 means annual total of periodYear (use for 'per 31.12.YYYY')."),
Invariant(description="periodMonth in 1..12 means month-end snapshot."),
Invariant(description="closingBalance is the balance at period end; openingBalance at period start."),
Invariant(description="debitTotal/creditTotal are turnovers for the period, NOT balances."),
],
),
Entity(
name="JournalEntry",
pythonClass="TrusteeDataJournalEntry",
semanticType=SemanticType.TRANSACTION,
description="One booking header (Beleg). Has a bookingDate (unix seconds float) and totalAmount.",
invariants=[
Invariant(description="bookingDate is a UTC unix-seconds float; never compare against ISO strings."),
],
),
Entity(
name="JournalLine",
pythonClass="TrusteeDataJournalLine",
semanticType=SemanticType.TRANSACTION,
description="One booking line of a JournalEntry. Each line debits or credits exactly one account.",
invariants=[
Invariant(description="Per line either debitAmount > 0 (Soll) or creditAmount > 0 (Haben), not both."),
],
),
]
# ---------------------------------------------------------------------------
# Relations
# ---------------------------------------------------------------------------
_RELATIONS = [
Relation(fromEntity="AccountBalance", toEntity="Account", cardinality=Cardinality.MANY_TO_ONE, via="accountNumber"),
Relation(fromEntity="JournalLine", toEntity="JournalEntry", cardinality=Cardinality.MANY_TO_ONE, via="journalEntryId"),
Relation(fromEntity="JournalLine", toEntity="Account", cardinality=Cardinality.MANY_TO_ONE, via="accountNumber"),
]
# ---------------------------------------------------------------------------
# Constraints (validator-enforced)
# ---------------------------------------------------------------------------
_CONSTRAINTS = [
# closingBalance is the single biggest hallucination magnet -- it's a
# balance per period, summing it across periods or accounts is meaningless.
Constraint(
appliesTo="TrusteeDataAccountBalance.closingBalance",
rule=ConstraintRule.NEVER_AGGREGATE,
message=(
"closingBalance is per-period already; query with periodYear+periodMonth, never SUM/AVG it."
),
),
Constraint(
appliesTo="TrusteeDataAccountBalance.openingBalance",
rule=ConstraintRule.NEVER_AGGREGATE,
message="openingBalance is already a balance per period; do not SUM/AVG it across rows.",
),
Constraint(
appliesTo="TrusteeDataAccountBalance.debitTotal",
rule=ConstraintRule.NEVER_AGGREGATE,
message=(
"debitTotal is the period's debit TURNOVER; do not SUM it without an explicit period filter."
),
),
Constraint(
appliesTo="TrusteeDataAccountBalance.creditTotal",
rule=ConstraintRule.NEVER_AGGREGATE,
message="creditTotal is a per-period turnover; do not SUM it across periods without an explicit period filter.",
),
# AccountBalance queries without a period filter are almost always wrong --
# they conflate annual and monthly snapshots. Phase 2 (REQUIRES_FILTER_ON)
# is wired through to the validator in a later iteration; for now this
# rule is rendered into the prompt compiler so the LLM sees it explicitly.
Constraint(
appliesTo="TrusteeDataAccountBalance",
rule=ConstraintRule.REQUIRES_FILTER_ON,
message=(
"Always filter on periodYear AND periodMonth (use periodMonth=0 for end-of-year)."
),
params={"requiredFields": ["periodYear", "periodMonth"]},
),
Constraint(
appliesTo="TrusteeDataAccountBalance",
rule=ConstraintRule.PREFERRED_TABLE_FOR_INTENT,
message="For 'Saldo per <date>' and 'Stand <year>' questions, prefer AccountBalance over JournalLine.",
params={"intents": ["BANK_BALANCE_AT_DATE", "BALANCE_AT_YEAR_END"]},
),
]
# ---------------------------------------------------------------------------
# Canonical query patterns (worked examples for the LLM)
# ---------------------------------------------------------------------------
_CANONICAL_PATTERNS = [
CanonicalQueryPattern(
intent="BANK_BALANCE_AT_DATE",
description="Saldo eines Bankkontos per Jahresende.",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataAccountBalance",
"filters": [
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
{"field": "periodYear", "op": "=", "value": "<year>"},
{"field": "periodMonth", "op": "=", "value": 0},
],
"fields": ["closingBalance", "currency"],
},
),
CanonicalQueryPattern(
intent="BANK_GROUP_TOTAL_AT_DATE",
description="Summe einer Kontogruppe (z. B. alle Bankkonten 102%) per Jahresende.",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataAccountBalance",
"filters": [
{"field": "accountNumber", "op": "LIKE", "value": "<prefix>%"},
{"field": "periodYear", "op": "=", "value": "<year>"},
{"field": "periodMonth", "op": "=", "value": 0},
],
"fields": ["accountNumber", "closingBalance", "currency"],
"_postProcessing": "Sum closingBalance values in your final answer; do NOT SUM via aggregateTable.",
},
),
CanonicalQueryPattern(
intent="BALANCE_HISTORY_PER_YEAR",
description="Saldo-Verlauf eines Kontos ueber mehrere Jahre.",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataAccountBalance",
"filters": [
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
{"field": "periodMonth", "op": "=", "value": 0},
],
"fields": ["periodYear", "closingBalance", "currency"],
"orderBy": "periodYear",
},
),
CanonicalQueryPattern(
intent="MONTHLY_BALANCE_SNAPSHOT",
description="Saldo per Ende eines bestimmten Monats.",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataAccountBalance",
"filters": [
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
{"field": "periodYear", "op": "=", "value": "<year>"},
{"field": "periodMonth", "op": "=", "value": "<month 1..12>"},
],
"fields": ["closingBalance", "currency"],
},
),
CanonicalQueryPattern(
intent="ACCOUNT_LIST_BY_TYPE_OR_PREFIX",
description="Welche Konten gehoeren zu einer Gruppe (Typ oder Nummern-Prefix)?",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataAccount",
"filters": [
{"field": "accountNumber", "op": "LIKE", "value": "<prefix>%"},
],
"fields": ["accountNumber", "label", "accountType"],
},
),
CanonicalQueryPattern(
intent="JOURNAL_SUM_AT_ACCOUNT",
description="Summe der Soll- oder Haben-Buchungen auf einem Konto.",
pattern={
"tool": "aggregateTable",
"tableName": "TrusteeDataJournalLine",
"aggregate": "SUM",
"field": "debitAmount",
"filters": [
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
],
},
),
CanonicalQueryPattern(
intent="COUNT_ROWS",
description="Anzahl Buchungen / Buchungszeilen / Konten.",
pattern={
"tool": "aggregateTable",
"tableName": "<table>",
"aggregate": "COUNT",
"field": "id",
},
),
CanonicalQueryPattern(
intent="JOURNAL_LINES_BY_AMOUNT",
description="Buchungszeilen mit einem Betrag groesser/kleiner als einer Schwelle.",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataJournalLine",
"filters": [
{"field": "debitAmount", "op": ">", "value": "<amount>"},
],
"fields": ["accountNumber", "debitAmount", "description"],
},
),
]
_TRUSTEE_ONTOLOGY = OntologyDescriptor(
featureCode="trustee",
entities=_ENTITIES,
relations=_RELATIONS,
constraints=_CONSTRAINTS,
canonicalPatterns=_CANONICAL_PATTERNS,
)
def getTrusteeOntology() -> OntologyDescriptor:
"""Public accessor for the trustee ontology.
Cached as a module-level singleton -- the descriptor is immutable and
has no per-call state.
"""
return _TRUSTEE_ONTOLOGY