# Copyright (c) 2026 PowerOn AG # All rights reserved. """Ontology data model for feature data sub-agents. This module defines the data structures that describe a feature's data ontology -- entities, relations, constraints, canonical query patterns -- plus the validation error payload used by the QueryValidator. Phase 1 (Repair-Loop) only needs `QueryValidationError`, `Constraint`, `ConstraintRule` and `ValidationErrorCode`; the richer `Entity`/`Relation`/ `OntologyDescriptor` types are defined here so Phase 2 (Trustee ontology pilot) can plug in without a second data-model change. See `wiki/c-work/2-build/2026-05-feature-data-agent-ontology-and-repair.md`. """ from enum import Enum from typing import Any, Dict, List, Optional from pydantic import BaseModel, Field class ValidationErrorCode(str, Enum): """Stable codes for validator failures. The LLM sees these codes verbatim in `ToolResult.errorDetails["code"]` and is expected to react to them deterministically (e.g. inspect the schema via browseTable when FIELD_NOT_FOUND, drop the SUM when INVALID_AGGREGATE_TARGET, add a period filter when MISSING_REQUIRED_FILTER). """ FIELD_NOT_FOUND = "FIELD_NOT_FOUND" INVALID_AGGREGATE_TARGET = "INVALID_AGGREGATE_TARGET" WRONG_TABLE_FOR_PURPOSE = "WRONG_TABLE_FOR_PURPOSE" TYPE_MISMATCH = "TYPE_MISMATCH" OPERATOR_INCOMPATIBLE = "OPERATOR_INCOMPATIBLE" MISSING_REQUIRED_FILTER = "MISSING_REQUIRED_FILTER" ORDER_BY_INVALID = "ORDER_BY_INVALID" class QueryValidationError(BaseModel): """Structured pre-execute validation error. Serialized into `ToolResult.errorDetails` (machine-readable) and summarized into `ToolResult.error` (short human-readable string). """ code: ValidationErrorCode field: Optional[str] = Field( default=None, description="The offending field name (when applicable).", ) suggestion: Optional[str] = Field( default=None, description=( "Best-effort suggestion (e.g. fuzzy-matched valid field name). " "None when no useful suggestion exists." ), ) hint: str = Field( description="Short corrective hint, max ~80 chars. Surfaced to the LLM verbatim.", max_length=160, ) def toShortError(self) -> str: """Build the short `error` string for logs/audit. Format: `: ` (or with field when present). """ if self.field: return f"{self.code.value}: {self.field}: {self.hint}" return f"{self.code.value}: {self.hint}" def toErrorDetails(self) -> Dict[str, Any]: """Build the dict for `ToolResult.errorDetails`.""" return { "code": self.code.value, "field": self.field, "suggestion": self.suggestion, "hint": self.hint, } class ConstraintRule(str, Enum): """High-level rule kinds that can be attached to a field or table.""" NEVER_AGGREGATE = "NEVER_AGGREGATE" REQUIRES_FILTER_ON = "REQUIRES_FILTER_ON" TYPE_MISMATCH_GUARD = "TYPE_MISMATCH_GUARD" PREFERRED_TABLE_FOR_INTENT = "PREFERRED_TABLE_FOR_INTENT" class Constraint(BaseModel): """A single rule the validator and the prompt compiler both consume. Phase 1 uses constraints declared inline by the validator (defaults derived from naming conventions like ``*Balance`` / ``*Total``). Phase 2 sources them from feature ontologies, replacing the convention-based defaults. """ appliesTo: str = Field( description=( "Target identifier, format depends on rule: `.` for " "field-level constraints, `
` for table-level." ), ) rule: ConstraintRule message: str = Field( description="Short hint forwarded to the LLM if the constraint fires.", max_length=160, ) params: Dict[str, Any] = Field( default_factory=dict, description=( "Rule-specific extras, e.g. {'requiredFields': ['periodYear', 'periodMonth']} " "for REQUIRES_FILTER_ON." ), ) class SemanticType(str, Enum): """High-level semantic category an entity belongs to. Coarser than the underlying Pydantic type -- used so the prompt compiler can group entities ("here are your ACCOUNT-like tables") without the LLM having to read the full schema. """ ACCOUNT = "ACCOUNT" BALANCE_SNAPSHOT = "BALANCE_SNAPSHOT" TRANSACTION = "TRANSACTION" DOCUMENT = "DOCUMENT" PARTY = "PARTY" PERIOD = "PERIOD" OTHER = "OTHER" class Cardinality(str, Enum): ONE_TO_ONE = "ONE_TO_ONE" ONE_TO_MANY = "ONE_TO_MANY" MANY_TO_ONE = "MANY_TO_ONE" MANY_TO_MANY = "MANY_TO_MANY" class Invariant(BaseModel): """Free-form invariant attached to an entity. Phase 1 leaves these as opaque text consumed by the prompt compiler. Future phases may add a structured rule kind. """ description: str = Field(max_length=200) class Entity(BaseModel): """One semantic entity in the ontology (often backed by a Pydantic table).""" name: str pythonClass: Optional[str] = Field( default=None, description="MODEL_REGISTRY key when the entity is DB-backed (e.g. 'TrusteeDataAccountBalance').", ) semanticType: SemanticType = SemanticType.OTHER parentEntity: Optional[str] = Field( default=None, description="Name of a broader entity this one specializes (e.g. 'BankAccount' parentEntity 'Account').", ) description: str = "" invariants: List[Invariant] = Field(default_factory=list) class Relation(BaseModel): fromEntity: str toEntity: str cardinality: Cardinality via: Optional[str] = Field( default=None, description="FK-Feldname auf der fromEntity-Seite (z. B. 'journalEntryId').", ) class CanonicalQueryPattern(BaseModel): """Tool-call skeleton for a recurring user intent. The prompt compiler renders these as worked examples so the LLM has a template to mimic instead of inventing a query shape. """ intent: str = Field(description="Short label, e.g. 'BANK_BALANCE_AT_DATE'.") description: str = Field(default="", description="Human-readable when to use this pattern.") pattern: Dict[str, Any] = Field( description="Tool-call shape with placeholders, e.g. {'tool': 'queryTable', 'tableName': '...', 'filters': [...]}", ) class OntologyDescriptor(BaseModel): """Top-level container exported by `getAgentOntology()` per feature.""" featureCode: str entities: List[Entity] = Field(default_factory=list) relations: List[Relation] = Field(default_factory=list) constraints: List[Constraint] = Field(default_factory=list) canonicalPatterns: List[CanonicalQueryPattern] = Field(default_factory=list) def constraintsForTable(self, tableName: str) -> List[Constraint]: """Return constraints whose ``appliesTo`` targets the given table or one of its fields.""" prefix = f"{tableName}." return [ c for c in self.constraints if c.appliesTo == tableName or c.appliesTo.startswith(prefix) ]