203 lines
7 KiB
Python
203 lines
7 KiB
Python
# Copyright (c) 2026 Patrick Motsch
|
|
# All rights reserved.
|
|
"""Ontology data model for feature data sub-agents.
|
|
|
|
This module defines the data structures that describe a feature's data
|
|
ontology -- entities, relations, constraints, canonical query patterns --
|
|
plus the validation error payload used by the QueryValidator.
|
|
|
|
Phase 1 (Repair-Loop) only needs `QueryValidationError`, `Constraint`,
|
|
`ConstraintRule` and `ValidationErrorCode`; the richer `Entity`/`Relation`/
|
|
`OntologyDescriptor` types are defined here so Phase 2 (Trustee ontology
|
|
pilot) can plug in without a second data-model change.
|
|
|
|
See `wiki/c-work/2-build/2026-05-feature-data-agent-ontology-and-repair.md`.
|
|
"""
|
|
|
|
from enum import Enum
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class ValidationErrorCode(str, Enum):
|
|
"""Stable codes for validator failures.
|
|
|
|
The LLM sees these codes verbatim in `ToolResult.errorDetails["code"]`
|
|
and is expected to react to them deterministically (e.g. inspect the
|
|
schema via browseTable when FIELD_NOT_FOUND, drop the SUM when
|
|
INVALID_AGGREGATE_TARGET, add a period filter when MISSING_REQUIRED_FILTER).
|
|
"""
|
|
FIELD_NOT_FOUND = "FIELD_NOT_FOUND"
|
|
INVALID_AGGREGATE_TARGET = "INVALID_AGGREGATE_TARGET"
|
|
WRONG_TABLE_FOR_PURPOSE = "WRONG_TABLE_FOR_PURPOSE"
|
|
TYPE_MISMATCH = "TYPE_MISMATCH"
|
|
OPERATOR_INCOMPATIBLE = "OPERATOR_INCOMPATIBLE"
|
|
MISSING_REQUIRED_FILTER = "MISSING_REQUIRED_FILTER"
|
|
ORDER_BY_INVALID = "ORDER_BY_INVALID"
|
|
|
|
|
|
class QueryValidationError(BaseModel):
|
|
"""Structured pre-execute validation error.
|
|
|
|
Serialized into `ToolResult.errorDetails` (machine-readable) and
|
|
summarized into `ToolResult.error` (short human-readable string).
|
|
"""
|
|
code: ValidationErrorCode
|
|
field: Optional[str] = Field(
|
|
default=None,
|
|
description="The offending field name (when applicable).",
|
|
)
|
|
suggestion: Optional[str] = Field(
|
|
default=None,
|
|
description=(
|
|
"Best-effort suggestion (e.g. fuzzy-matched valid field name). "
|
|
"None when no useful suggestion exists."
|
|
),
|
|
)
|
|
hint: str = Field(
|
|
description="Short corrective hint, max ~80 chars. Surfaced to the LLM verbatim.",
|
|
max_length=160,
|
|
)
|
|
|
|
def toShortError(self) -> str:
|
|
"""Build the short `error` string for logs/audit.
|
|
|
|
Format: `<CODE>: <hint>` (or with field when present).
|
|
"""
|
|
if self.field:
|
|
return f"{self.code.value}: {self.field}: {self.hint}"
|
|
return f"{self.code.value}: {self.hint}"
|
|
|
|
def toErrorDetails(self) -> Dict[str, Any]:
|
|
"""Build the dict for `ToolResult.errorDetails`."""
|
|
return {
|
|
"code": self.code.value,
|
|
"field": self.field,
|
|
"suggestion": self.suggestion,
|
|
"hint": self.hint,
|
|
}
|
|
|
|
|
|
class ConstraintRule(str, Enum):
|
|
"""High-level rule kinds that can be attached to a field or table."""
|
|
NEVER_AGGREGATE = "NEVER_AGGREGATE"
|
|
REQUIRES_FILTER_ON = "REQUIRES_FILTER_ON"
|
|
TYPE_MISMATCH_GUARD = "TYPE_MISMATCH_GUARD"
|
|
PREFERRED_TABLE_FOR_INTENT = "PREFERRED_TABLE_FOR_INTENT"
|
|
|
|
|
|
class Constraint(BaseModel):
|
|
"""A single rule the validator and the prompt compiler both consume.
|
|
|
|
Phase 1 uses constraints declared inline by the validator (defaults
|
|
derived from naming conventions like ``*Balance`` / ``*Total``).
|
|
Phase 2 sources them from feature ontologies, replacing the
|
|
convention-based defaults.
|
|
"""
|
|
appliesTo: str = Field(
|
|
description=(
|
|
"Target identifier, format depends on rule: `<Table>.<field>` for "
|
|
"field-level constraints, `<Table>` for table-level."
|
|
),
|
|
)
|
|
rule: ConstraintRule
|
|
message: str = Field(
|
|
description="Short hint forwarded to the LLM if the constraint fires.",
|
|
max_length=160,
|
|
)
|
|
params: Dict[str, Any] = Field(
|
|
default_factory=dict,
|
|
description=(
|
|
"Rule-specific extras, e.g. {'requiredFields': ['periodYear', 'periodMonth']} "
|
|
"for REQUIRES_FILTER_ON."
|
|
),
|
|
)
|
|
|
|
|
|
class SemanticType(str, Enum):
|
|
"""High-level semantic category an entity belongs to.
|
|
|
|
Coarser than the underlying Pydantic type -- used so the prompt compiler
|
|
can group entities ("here are your ACCOUNT-like tables") without the LLM
|
|
having to read the full schema.
|
|
"""
|
|
ACCOUNT = "ACCOUNT"
|
|
BALANCE_SNAPSHOT = "BALANCE_SNAPSHOT"
|
|
TRANSACTION = "TRANSACTION"
|
|
DOCUMENT = "DOCUMENT"
|
|
PARTY = "PARTY"
|
|
PERIOD = "PERIOD"
|
|
OTHER = "OTHER"
|
|
|
|
|
|
class Cardinality(str, Enum):
|
|
ONE_TO_ONE = "ONE_TO_ONE"
|
|
ONE_TO_MANY = "ONE_TO_MANY"
|
|
MANY_TO_ONE = "MANY_TO_ONE"
|
|
MANY_TO_MANY = "MANY_TO_MANY"
|
|
|
|
|
|
class Invariant(BaseModel):
|
|
"""Free-form invariant attached to an entity.
|
|
|
|
Phase 1 leaves these as opaque text consumed by the prompt compiler.
|
|
Future phases may add a structured rule kind.
|
|
"""
|
|
description: str = Field(max_length=200)
|
|
|
|
|
|
class Entity(BaseModel):
|
|
"""One semantic entity in the ontology (often backed by a Pydantic table)."""
|
|
name: str
|
|
pythonClass: Optional[str] = Field(
|
|
default=None,
|
|
description="MODEL_REGISTRY key when the entity is DB-backed (e.g. 'TrusteeDataAccountBalance').",
|
|
)
|
|
semanticType: SemanticType = SemanticType.OTHER
|
|
parentEntity: Optional[str] = Field(
|
|
default=None,
|
|
description="Name of a broader entity this one specializes (e.g. 'BankAccount' parentEntity 'Account').",
|
|
)
|
|
description: str = ""
|
|
invariants: List[Invariant] = Field(default_factory=list)
|
|
|
|
|
|
class Relation(BaseModel):
|
|
fromEntity: str
|
|
toEntity: str
|
|
cardinality: Cardinality
|
|
via: Optional[str] = Field(
|
|
default=None,
|
|
description="FK-Feldname auf der fromEntity-Seite (z. B. 'journalEntryId').",
|
|
)
|
|
|
|
|
|
class CanonicalQueryPattern(BaseModel):
|
|
"""Tool-call skeleton for a recurring user intent.
|
|
|
|
The prompt compiler renders these as worked examples so the LLM has a
|
|
template to mimic instead of inventing a query shape.
|
|
"""
|
|
intent: str = Field(description="Short label, e.g. 'BANK_BALANCE_AT_DATE'.")
|
|
description: str = Field(default="", description="Human-readable when to use this pattern.")
|
|
pattern: Dict[str, Any] = Field(
|
|
description="Tool-call shape with placeholders, e.g. {'tool': 'queryTable', 'tableName': '...', 'filters': [...]}",
|
|
)
|
|
|
|
|
|
class OntologyDescriptor(BaseModel):
|
|
"""Top-level container exported by `getAgentOntology()` per feature."""
|
|
featureCode: str
|
|
entities: List[Entity] = Field(default_factory=list)
|
|
relations: List[Relation] = Field(default_factory=list)
|
|
constraints: List[Constraint] = Field(default_factory=list)
|
|
canonicalPatterns: List[CanonicalQueryPattern] = Field(default_factory=list)
|
|
|
|
def constraintsForTable(self, tableName: str) -> List[Constraint]:
|
|
"""Return constraints whose ``appliesTo`` targets the given table or one of its fields."""
|
|
prefix = f"{tableName}."
|
|
return [
|
|
c for c in self.constraints
|
|
if c.appliesTo == tableName or c.appliesTo.startswith(prefix)
|
|
]
|