platform-core/modules/serviceCenter/services/serviceAgent/datamodelOntology.py
2026-05-16 22:55:43 +02:00

203 lines
7 KiB
Python

# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Ontology data model for feature data sub-agents.
This module defines the data structures that describe a feature's data
ontology -- entities, relations, constraints, canonical query patterns --
plus the validation error payload used by the QueryValidator.
Phase 1 (Repair-Loop) only needs `QueryValidationError`, `Constraint`,
`ConstraintRule` and `ValidationErrorCode`; the richer `Entity`/`Relation`/
`OntologyDescriptor` types are defined here so Phase 2 (Trustee ontology
pilot) can plug in without a second data-model change.
See `wiki/c-work/2-build/2026-05-feature-data-agent-ontology-and-repair.md`.
"""
from enum import Enum
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
class ValidationErrorCode(str, Enum):
"""Stable codes for validator failures.
The LLM sees these codes verbatim in `ToolResult.errorDetails["code"]`
and is expected to react to them deterministically (e.g. inspect the
schema via browseTable when FIELD_NOT_FOUND, drop the SUM when
INVALID_AGGREGATE_TARGET, add a period filter when MISSING_REQUIRED_FILTER).
"""
FIELD_NOT_FOUND = "FIELD_NOT_FOUND"
INVALID_AGGREGATE_TARGET = "INVALID_AGGREGATE_TARGET"
WRONG_TABLE_FOR_PURPOSE = "WRONG_TABLE_FOR_PURPOSE"
TYPE_MISMATCH = "TYPE_MISMATCH"
OPERATOR_INCOMPATIBLE = "OPERATOR_INCOMPATIBLE"
MISSING_REQUIRED_FILTER = "MISSING_REQUIRED_FILTER"
ORDER_BY_INVALID = "ORDER_BY_INVALID"
class QueryValidationError(BaseModel):
"""Structured pre-execute validation error.
Serialized into `ToolResult.errorDetails` (machine-readable) and
summarized into `ToolResult.error` (short human-readable string).
"""
code: ValidationErrorCode
field: Optional[str] = Field(
default=None,
description="The offending field name (when applicable).",
)
suggestion: Optional[str] = Field(
default=None,
description=(
"Best-effort suggestion (e.g. fuzzy-matched valid field name). "
"None when no useful suggestion exists."
),
)
hint: str = Field(
description="Short corrective hint, max ~80 chars. Surfaced to the LLM verbatim.",
max_length=160,
)
def toShortError(self) -> str:
"""Build the short `error` string for logs/audit.
Format: `<CODE>: <hint>` (or with field when present).
"""
if self.field:
return f"{self.code.value}: {self.field}: {self.hint}"
return f"{self.code.value}: {self.hint}"
def toErrorDetails(self) -> Dict[str, Any]:
"""Build the dict for `ToolResult.errorDetails`."""
return {
"code": self.code.value,
"field": self.field,
"suggestion": self.suggestion,
"hint": self.hint,
}
class ConstraintRule(str, Enum):
"""High-level rule kinds that can be attached to a field or table."""
NEVER_AGGREGATE = "NEVER_AGGREGATE"
REQUIRES_FILTER_ON = "REQUIRES_FILTER_ON"
TYPE_MISMATCH_GUARD = "TYPE_MISMATCH_GUARD"
PREFERRED_TABLE_FOR_INTENT = "PREFERRED_TABLE_FOR_INTENT"
class Constraint(BaseModel):
"""A single rule the validator and the prompt compiler both consume.
Phase 1 uses constraints declared inline by the validator (defaults
derived from naming conventions like ``*Balance`` / ``*Total``).
Phase 2 sources them from feature ontologies, replacing the
convention-based defaults.
"""
appliesTo: str = Field(
description=(
"Target identifier, format depends on rule: `<Table>.<field>` for "
"field-level constraints, `<Table>` for table-level."
),
)
rule: ConstraintRule
message: str = Field(
description="Short hint forwarded to the LLM if the constraint fires.",
max_length=160,
)
params: Dict[str, Any] = Field(
default_factory=dict,
description=(
"Rule-specific extras, e.g. {'requiredFields': ['periodYear', 'periodMonth']} "
"for REQUIRES_FILTER_ON."
),
)
class SemanticType(str, Enum):
"""High-level semantic category an entity belongs to.
Coarser than the underlying Pydantic type -- used so the prompt compiler
can group entities ("here are your ACCOUNT-like tables") without the LLM
having to read the full schema.
"""
ACCOUNT = "ACCOUNT"
BALANCE_SNAPSHOT = "BALANCE_SNAPSHOT"
TRANSACTION = "TRANSACTION"
DOCUMENT = "DOCUMENT"
PARTY = "PARTY"
PERIOD = "PERIOD"
OTHER = "OTHER"
class Cardinality(str, Enum):
ONE_TO_ONE = "ONE_TO_ONE"
ONE_TO_MANY = "ONE_TO_MANY"
MANY_TO_ONE = "MANY_TO_ONE"
MANY_TO_MANY = "MANY_TO_MANY"
class Invariant(BaseModel):
"""Free-form invariant attached to an entity.
Phase 1 leaves these as opaque text consumed by the prompt compiler.
Future phases may add a structured rule kind.
"""
description: str = Field(max_length=200)
class Entity(BaseModel):
"""One semantic entity in the ontology (often backed by a Pydantic table)."""
name: str
pythonClass: Optional[str] = Field(
default=None,
description="MODEL_REGISTRY key when the entity is DB-backed (e.g. 'TrusteeDataAccountBalance').",
)
semanticType: SemanticType = SemanticType.OTHER
parentEntity: Optional[str] = Field(
default=None,
description="Name of a broader entity this one specializes (e.g. 'BankAccount' parentEntity 'Account').",
)
description: str = ""
invariants: List[Invariant] = Field(default_factory=list)
class Relation(BaseModel):
fromEntity: str
toEntity: str
cardinality: Cardinality
via: Optional[str] = Field(
default=None,
description="FK-Feldname auf der fromEntity-Seite (z. B. 'journalEntryId').",
)
class CanonicalQueryPattern(BaseModel):
"""Tool-call skeleton for a recurring user intent.
The prompt compiler renders these as worked examples so the LLM has a
template to mimic instead of inventing a query shape.
"""
intent: str = Field(description="Short label, e.g. 'BANK_BALANCE_AT_DATE'.")
description: str = Field(default="", description="Human-readable when to use this pattern.")
pattern: Dict[str, Any] = Field(
description="Tool-call shape with placeholders, e.g. {'tool': 'queryTable', 'tableName': '...', 'filters': [...]}",
)
class OntologyDescriptor(BaseModel):
"""Top-level container exported by `getAgentOntology()` per feature."""
featureCode: str
entities: List[Entity] = Field(default_factory=list)
relations: List[Relation] = Field(default_factory=list)
constraints: List[Constraint] = Field(default_factory=list)
canonicalPatterns: List[CanonicalQueryPattern] = Field(default_factory=list)
def constraintsForTable(self, tableName: str) -> List[Constraint]:
"""Return constraints whose ``appliesTo`` targets the given table or one of its fields."""
prefix = f"{tableName}."
return [
c for c in self.constraints
if c.appliesTo == tableName or c.appliesTo.startswith(prefix)
]