gateway/modules/workflows/processing/shared/parameterValidation.py
2026-04-28 11:58:53 +02:00

198 lines
7.6 KiB
Python

# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Universal parameter validation + coercion for workflow actions.
Workflow actions historically received their ``parameters`` as a raw
``Dict[str, Any]`` with no enforcement of the declared parameter schema.
That implicit contract masked two whole classes of bugs:
1. **Type confusion at the agent boundary.** The agent's tool schema
(Phase-3 Typed Action Architecture) exposes ``FeatureInstanceRef`` /
``ConnectionRef`` etc. as typed *objects* with ``id`` plus a
discriminator (``featureCode`` / ``authority``) so the LLM can pick
the right instance among several. The action implementations, however,
use the value as a bare UUID string in ``recordFilter={"col": <value>}``.
Without normalization Postgres fails with "can't adapt type 'dict'",
the connector's previous swallow-and-return-[] hid the failure, and the
action returned the misleading "no record found" error.
2. **Unchecked optional flags.** ``forceRefresh`` arriving as the string
``"true"`` instead of a real bool, ``periodMonth`` arriving as ``"12"``
instead of ``12``, etc. Every action grew its own ad-hoc coercion code.
This module centralises validation and coercion at exactly one boundary:
``ActionExecutor.executeAction``. By the time the action body runs, the
``parameters`` dict is guaranteed to satisfy the declared schema.
Unknown extra keys (e.g. ``parentOperationId`` injected by the executor,
``expectedDocumentFormats`` from action items) are passed through
untouched — the schema only constrains *declared* parameters.
"""
from __future__ import annotations
import logging
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
class InvalidActionParameterError(ValueError):
"""Raised when a declared action parameter is missing, malformed, or
cannot be coerced into the declared type.
The message identifies the action and parameter so the agent and
workflow log can pinpoint the offending call instead of getting an
opaque downstream "no record found" or "can't adapt type 'X'".
"""
def __init__(self, actionId: str, paramName: str, reason: str):
super().__init__(f"{actionId}.{paramName}: {reason}")
self.actionId = actionId
self.paramName = paramName
self.reason = reason
_TRUE_STRINGS = {"true", "1", "yes", "on"}
_FALSE_STRINGS = {"false", "0", "no", "off", ""}
def _isRefSchema(typeStr: str) -> bool:
"""A declared type is a Ref-Schema iff its name ends with ``Ref`` AND it
resolves to a PORT_TYPE_CATALOG schema with an ``id`` field.
The catalog is imported lazily to keep this module light at startup.
"""
if not typeStr or not typeStr.endswith("Ref"):
return False
from modules.features.graphicalEditor.portTypes import PORT_TYPE_CATALOG
schema = PORT_TYPE_CATALOG.get(typeStr)
if schema is None:
return False
return any(f.name == "id" for f in schema.fields)
def _coerceRef(actionId: str, paramName: str, value: Any) -> Optional[str]:
"""Collapse a Ref payload to its ``id`` string.
Accepts:
* already a string → returned as-is (workflow execution path),
* dict with non-empty ``id`` field → returns the id (agent path),
* ``None`` → returned as-is so optional Ref params stay optional.
"""
if value is None or isinstance(value, str):
return value
if isinstance(value, dict):
refId = value.get("id")
if isinstance(refId, str) and refId:
return refId
raise InvalidActionParameterError(
actionId, paramName,
f"Ref payload missing or empty 'id' field: {value!r}",
)
raise InvalidActionParameterError(
actionId, paramName,
f"Ref must be a string id or {{'id': ...}} dict, got {type(value).__name__}",
)
def _coercePrimitive(actionId: str, paramName: str, value: Any, typeStr: str) -> Any:
"""Best-effort coercion of primitive types from string-form payloads.
The agent's JSON tool calls deliver everything as strings/numbers; the
workflow executor passes through raw template values which are also
often strings. Coercing here removes ad-hoc ``isinstance(x, str)``
branches inside every action.
"""
if value is None:
return None
if typeStr == "bool":
if isinstance(value, bool):
return value
if isinstance(value, str):
lower = value.strip().lower()
if lower in _TRUE_STRINGS:
return True
if lower in _FALSE_STRINGS:
return False
if isinstance(value, (int, float)):
return bool(value)
raise InvalidActionParameterError(
actionId, paramName, f"cannot coerce {value!r} to bool",
)
if typeStr == "int":
if isinstance(value, bool):
return int(value)
if isinstance(value, int):
return value
if isinstance(value, str) and value.strip():
try:
return int(value.strip(), 10)
except ValueError:
pass
if isinstance(value, float) and value.is_integer():
return int(value)
raise InvalidActionParameterError(
actionId, paramName, f"cannot coerce {value!r} to int",
)
if typeStr == "float":
if isinstance(value, (int, float)):
return float(value)
if isinstance(value, str) and value.strip():
try:
return float(value.strip())
except ValueError:
pass
raise InvalidActionParameterError(
actionId, paramName, f"cannot coerce {value!r} to float",
)
return value
def validateAndCoerceParameters(actionDef, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and coerce ``parameters`` against ``actionDef.parameters``.
Behaviour per declared parameter:
* **Missing + required** → raises ``InvalidActionParameterError``.
* **Missing + optional** → left absent (action uses its own default).
* **Present + Ref-Schema (e.g. FeatureInstanceRef)** → ``{id: ..., ...}``
collapsed to the bare id string; pass-through if already a string.
* **Present + primitive (bool/int/float)** → coerced from common
string forms (e.g. ``"true"`` → ``True``).
* **Present + other types** (catalog objects, ``str``, ``Any``,
containers) → passed through untouched.
Unknown keys (e.g. ``parentOperationId``, ``expectedDocumentFormats``,
ad-hoc fields injected by the executor) are passed through unchanged.
Returns a new dict (does not mutate the caller's parameters).
"""
if not parameters:
parameters = {}
actionId = getattr(actionDef, "actionId", None) or "<unknown.action>"
declared = getattr(actionDef, "parameters", {}) or {}
coerced: Dict[str, Any] = dict(parameters)
for paramName, paramSchema in declared.items():
typeStr = getattr(paramSchema, "type", None) or "Any"
required = bool(getattr(paramSchema, "required", False))
if paramName not in coerced or coerced[paramName] is None:
if required:
raise InvalidActionParameterError(
actionId, paramName, "required parameter missing",
)
continue
rawValue = coerced[paramName]
if _isRefSchema(typeStr):
coerced[paramName] = _coerceRef(actionId, paramName, rawValue)
continue
if typeStr in ("bool", "int", "float"):
coerced[paramName] = _coercePrimitive(actionId, paramName, rawValue, typeStr)
continue
return coerced