platform-core/modules/serviceCenter/services/serviceAgent/datamodelOntology.py

# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Ontology data model for feature data sub-agents.

This module defines the data structures that describe a feature's data
ontology -- entities, relations, constraints, canonical query patterns --
plus the validation error payload used by the QueryValidator.

Phase 1 (Repair-Loop) only needs `QueryValidationError`, `Constraint`,
`ConstraintRule` and `ValidationErrorCode`; the richer `Entity`/`Relation`/
`OntologyDescriptor` types are defined here so Phase 2 (Trustee ontology
pilot) can plug in without a second data-model change.

See `wiki/c-work/2-build/2026-05-feature-data-agent-ontology-and-repair.md`.
"""

from enum import Enum
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field


class ValidationErrorCode(str, Enum):
    """Stable codes for validator failures.

    The LLM sees these codes verbatim in `ToolResult.errorDetails["code"]`
    and is expected to react to them deterministically (e.g. inspect the
    schema via browseTable when FIELD_NOT_FOUND, drop the SUM when
    INVALID_AGGREGATE_TARGET, add a period filter when MISSING_REQUIRED_FILTER).
    """
    FIELD_NOT_FOUND = "FIELD_NOT_FOUND"
    INVALID_AGGREGATE_TARGET = "INVALID_AGGREGATE_TARGET"
    WRONG_TABLE_FOR_PURPOSE = "WRONG_TABLE_FOR_PURPOSE"
    TYPE_MISMATCH = "TYPE_MISMATCH"
    OPERATOR_INCOMPATIBLE = "OPERATOR_INCOMPATIBLE"
    MISSING_REQUIRED_FILTER = "MISSING_REQUIRED_FILTER"
    ORDER_BY_INVALID = "ORDER_BY_INVALID"


class QueryValidationError(BaseModel):
    """Structured pre-execute validation error.

    Serialized into `ToolResult.errorDetails` (machine-readable) and
    summarized into `ToolResult.error` (short human-readable string).
    """
    code: ValidationErrorCode
    field: Optional[str] = Field(
        default=None,
        description="The offending field name (when applicable).",
    )
    suggestion: Optional[str] = Field(
        default=None,
        description=(
            "Best-effort suggestion (e.g. fuzzy-matched valid field name). "
            "None when no useful suggestion exists."
        ),
    )
    hint: str = Field(
        description="Short corrective hint, max ~80 chars. Surfaced to the LLM verbatim.",
        max_length=160,
    )

    def toShortError(self) -> str:
        """Build the short `error` string for logs/audit.

        Format: `<CODE>: <hint>` (or with field when present).
        """
        if self.field:
            return f"{self.code.value}: {self.field}: {self.hint}"
        return f"{self.code.value}: {self.hint}"

    def toErrorDetails(self) -> Dict[str, Any]:
        """Build the dict for `ToolResult.errorDetails`."""
        return {
            "code": self.code.value,
            "field": self.field,
            "suggestion": self.suggestion,
            "hint": self.hint,
        }


class ConstraintRule(str, Enum):
    """High-level rule kinds that can be attached to a field or table."""
    NEVER_AGGREGATE = "NEVER_AGGREGATE"
    REQUIRES_FILTER_ON = "REQUIRES_FILTER_ON"
    TYPE_MISMATCH_GUARD = "TYPE_MISMATCH_GUARD"
    PREFERRED_TABLE_FOR_INTENT = "PREFERRED_TABLE_FOR_INTENT"


class Constraint(BaseModel):
    """A single rule the validator and the prompt compiler both consume.

    Phase 1 uses constraints declared inline by the validator (defaults
    derived from naming conventions like ``*Balance`` / ``*Total``).
    Phase 2 sources them from feature ontologies, replacing the
    convention-based defaults.
    """
    appliesTo: str = Field(
        description=(
            "Target identifier, format depends on rule: `<Table>.<field>` for "
            "field-level constraints, `<Table>` for table-level."
        ),
    )
    rule: ConstraintRule
    message: str = Field(
        description="Short hint forwarded to the LLM if the constraint fires.",
        max_length=160,
    )
    params: Dict[str, Any] = Field(
        default_factory=dict,
        description=(
            "Rule-specific extras, e.g. {'requiredFields': ['periodYear', 'periodMonth']} "
            "for REQUIRES_FILTER_ON."
        ),
    )


class SemanticType(str, Enum):
    """High-level semantic category an entity belongs to.

    Coarser than the underlying Pydantic type -- used so the prompt compiler
    can group entities ("here are your ACCOUNT-like tables") without the LLM
    having to read the full schema.
    """
    ACCOUNT = "ACCOUNT"
    BALANCE_SNAPSHOT = "BALANCE_SNAPSHOT"
    TRANSACTION = "TRANSACTION"
    DOCUMENT = "DOCUMENT"
    PARTY = "PARTY"
    PERIOD = "PERIOD"
    OTHER = "OTHER"


class Cardinality(str, Enum):
    ONE_TO_ONE = "ONE_TO_ONE"
    ONE_TO_MANY = "ONE_TO_MANY"
    MANY_TO_ONE = "MANY_TO_ONE"
    MANY_TO_MANY = "MANY_TO_MANY"


class Invariant(BaseModel):
    """Free-form invariant attached to an entity.

    Phase 1 leaves these as opaque text consumed by the prompt compiler.
    Future phases may add a structured rule kind.
    """
    description: str = Field(max_length=200)


class Entity(BaseModel):
    """One semantic entity in the ontology (often backed by a Pydantic table)."""
    name: str
    pythonClass: Optional[str] = Field(
        default=None,
        description="MODEL_REGISTRY key when the entity is DB-backed (e.g. 'TrusteeDataAccountBalance').",
    )
    semanticType: SemanticType = SemanticType.OTHER
    parentEntity: Optional[str] = Field(
        default=None,
        description="Name of a broader entity this one specializes (e.g. 'BankAccount' parentEntity 'Account').",
    )
    description: str = ""
    invariants: List[Invariant] = Field(default_factory=list)


class Relation(BaseModel):
    fromEntity: str
    toEntity: str
    cardinality: Cardinality
    via: Optional[str] = Field(
        default=None,
        description="FK-Feldname auf der fromEntity-Seite (z. B. 'journalEntryId').",
    )


class CanonicalQueryPattern(BaseModel):
    """Tool-call skeleton for a recurring user intent.

    The prompt compiler renders these as worked examples so the LLM has a
    template to mimic instead of inventing a query shape.
    """
    intent: str = Field(description="Short label, e.g. 'BANK_BALANCE_AT_DATE'.")
    description: str = Field(default="", description="Human-readable when to use this pattern.")
    pattern: Dict[str, Any] = Field(
        description="Tool-call shape with placeholders, e.g. {'tool': 'queryTable', 'tableName': '...', 'filters': [...]}",
    )


class OntologyDescriptor(BaseModel):
    """Top-level container exported by `getAgentOntology()` per feature."""
    featureCode: str
    entities: List[Entity] = Field(default_factory=list)
    relations: List[Relation] = Field(default_factory=list)
    constraints: List[Constraint] = Field(default_factory=list)
    canonicalPatterns: List[CanonicalQueryPattern] = Field(default_factory=list)

    def constraintsForTable(self, tableName: str) -> List[Constraint]:
        """Return constraints whose ``appliesTo`` targets the given table or one of its fields."""
        prefix = f"{tableName}."
        return [
            c for c in self.constraints
            if c.appliesTo == tableName or c.appliesTo.startswith(prefix)
        ]