gateway/tests/integration/trustee/test_spesenbelege_workflow_e2e.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Plan #2 Track A2 (T4): Trustee Spesenbelege Live-E2E Integration-Test.

Runs the canonical Trustee Spesenbelege chain end-to-end through
``executeGraph``::

    trigger.manual
        -> trustee.processDocuments       (real action)
            -> trustee.syncToAccounting   (real action)

with:

* an in-memory **TrusteeInterface** fake (records createDocument /
  createPosition / updatePosition calls and assigns deterministic IDs),
* an in-memory **AccountingBridge** fake (records pushBatchToAccounting
  calls and returns one success result per positionId),
* a literal upstream ``documentList`` (no AI / SharePoint involved — the
  extraction step is replaced by a canned ActionDocument list so this
  test focuses on the bindings + action layer, exactly as the Track A2
  plan requires: "Mock SharePoint + AI + Trustee-DB, echtes
  processDocuments + syncToAccounting").

The test exercises the **Schicht-4 typed bindings pipeline** end-to-end:

* ``featureInstanceId`` is provided as a typed ``FeatureInstanceRef``
  envelope on the producer node and as a raw legacy UUID on the consumer
  node — both must reach the action layer as the bare UUID string after
  ``materializeFeatureInstanceRefs`` + ``resolveParameterReferences``.
* ``documentList`` on ``trustee.syncToAccounting`` is a ``DataRef`` on
  ``processDocuments[documents]`` (Pick-not-Push) — must resolve to the
  ActionDocument list produced by ``processDocuments``.

Plan: ``wiki/c-work/1-plan/2026-04-typed-action-followups.md`` (A2 / T4).
"""
from __future__ import annotations

import copy
import json
import uuid
from typing import Any, Dict, List, Optional

import pytest

from modules.workflows.automation2.executionEngine import executeGraph
from modules.workflows.automation2.runEnvelope import default_run_envelope


_TRUSTEE_INSTANCE_UUID = "11111111-2222-3333-4444-555555555555"
_MANDATE_ID = "mandate-zh-001"


# ---------------------------------------------------------------------------
# In-memory fakes for the Trustee feature
# ---------------------------------------------------------------------------


class _FakeTrusteeDocument:
    """Minimal stand-in for ``TrusteeDocument`` with attribute access."""

    def __init__(self, payload: Dict[str, Any]):
        self.id = str(uuid.uuid4())
        for k, v in payload.items():
            setattr(self, k, v)

    def model_dump(self) -> Dict[str, Any]:
        return {k: v for k, v in self.__dict__.items()}


class _FakeTrusteePosition:
    """Minimal stand-in for ``TrusteePosition`` with attribute access."""

    def __init__(self, payload: Dict[str, Any]):
        self.id = str(uuid.uuid4())
        for k, v in payload.items():
            setattr(self, k, v)

    def model_dump(self) -> Dict[str, Any]:
        return {k: v for k, v in self.__dict__.items()}


class _FakeTrusteeDb:
    """Captures ``getRecordset`` calls so processDocuments' bank-match
    auto-linking path can be exercised without a real DB."""

    def __init__(self, positions: List[_FakeTrusteePosition]):
        self._positions = positions
        self.calls: List[Dict[str, Any]] = []

    def getRecordset(self, model, recordFilter=None):
        self.calls.append({"model": getattr(model, "__name__", str(model)),
                           "filter": recordFilter})
        return list(self._positions)


class _FakeTrusteeInterface:
    """In-memory replacement for the live trustee interface."""

    def __init__(self, mandateId: str, featureInstanceId: str):
        self.mandateId = mandateId
        self.featureInstanceId = featureInstanceId
        self.documents: List[_FakeTrusteeDocument] = []
        self.positions: List[_FakeTrusteePosition] = []
        self.updates: List[Dict[str, Any]] = []
        self.db = _FakeTrusteeDb(self.positions)

    def createDocument(self, payload: Dict[str, Any]) -> _FakeTrusteeDocument:
        doc = _FakeTrusteeDocument({
            "fileId": payload.get("fileId"),
            "documentName": payload.get("documentName"),
            "documentMimeType": payload.get("documentMimeType"),
            "sourceType": payload.get("sourceType"),
            "documentType": payload.get("documentType"),
            "mandateId": self.mandateId,
            "featureInstanceId": self.featureInstanceId,
        })
        self.documents.append(doc)
        return doc

    def createPosition(self, payload: Dict[str, Any]) -> _FakeTrusteePosition:
        pos = _FakeTrusteePosition({**payload})
        self.positions.append(pos)
        return pos

    def updatePosition(self, positionId: str, patch: Dict[str, Any]) -> Optional[_FakeTrusteePosition]:
        self.updates.append({"id": positionId, "patch": dict(patch)})
        for pos in self.positions:
            if getattr(pos, "id", None) == positionId:
                for k, v in patch.items():
                    setattr(pos, k, v)
                return pos
        return None


class _FakeAccountingResult:
    def __init__(self, success: bool = True, errorMessage: Optional[str] = None):
        self.success = success
        self.errorMessage = errorMessage


class _FakeAccountingBridge:
    """Records pushBatchToAccounting invocations and returns one success
    per positionId."""

    pushBatchCalls: List[Dict[str, Any]] = []

    def __init__(self, trusteeInterface):
        self.trusteeInterface = trusteeInterface

    async def pushBatchToAccounting(self, featureInstanceId: str,
                                    positionIds: List[str]):
        type(self).pushBatchCalls.append({
            "featureInstanceId": featureInstanceId,
            "positionIds": list(positionIds),
        })
        return [_FakeAccountingResult(success=True) for _ in positionIds]


# ---------------------------------------------------------------------------
# Test fixtures: mock services + module-level patches
# ---------------------------------------------------------------------------


@pytest.fixture
def trusteeInterface():
    return _FakeTrusteeInterface(_MANDATE_ID, _TRUSTEE_INSTANCE_UUID)


@pytest.fixture(autouse=True)
def resetAccountingBridgeCalls():
    _FakeAccountingBridge.pushBatchCalls = []
    yield
    _FakeAccountingBridge.pushBatchCalls = []


@pytest.fixture
def patchTrustee(monkeypatch, trusteeInterface):
    """Patches ``getInterface`` + ``AccountingBridge`` in both action
    modules so the real action code runs against the in-memory fakes."""
    from modules.workflows.methods.methodTrustee.actions import (
        processDocuments as _procMod,
        syncToAccounting as _syncMod,
    )
    from modules.features.trustee import (
        interfaceFeatureTrustee as _ifaceMod,
    )
    from modules.features.trustee.accounting import accountingBridge as _bridgeMod

    def _fakeGetInterface(*_args, **_kwargs):
        return trusteeInterface

    monkeypatch.setattr(_ifaceMod, "getInterface", _fakeGetInterface, raising=True)
    monkeypatch.setattr(_bridgeMod, "AccountingBridge", _FakeAccountingBridge, raising=True)
    return trusteeInterface


def _services():
    """Minimal services container for executeGraph.

    The ``ActionExecutor`` only needs ``services`` to be passed through to
    the trustee actions. The trustee actions only touch
    ``services.mandateId`` and ``services.featureInstanceId`` directly
    (everything else is provided via ``parameters``); ``services.chat`` is
    looked up but only used as a fallback that we do not exercise here.
    """
    class _S:
        mandateId = _MANDATE_ID
        featureInstanceId = _TRUSTEE_INSTANCE_UUID
        user = None
        chat = None
    return _S()


# ---------------------------------------------------------------------------
# Canned upstream extraction result
# ---------------------------------------------------------------------------


def _expenseReceiptExtraction() -> Dict[str, Any]:
    return {
        "documentType": "EXPENSE_RECEIPT",
        "fileId": "file-001",
        "fileName": "tankbeleg.pdf",
        "extractedData": [
            {
                "documentType": "expense_receipt",
                "valuta": "2026-04-12",
                "transactionDateTime": 1744675200,
                "company": "Migrolino Tankstelle Zürich AG",
                "desc": "Tankfüllung Bleifrei 95, 42.30 L à 1.799 CHF/L",
                "bookingCurrency": "CHF",
                "bookingAmount": "76.10",
                "originalCurrency": "CHF",
                "originalAmount": "76.10",
                "vatPercentage": "8.1",
                "vatAmount": "5.71",
                "debitAccountNumber": "6200 Fahrzeugaufwand",
                "creditAccountNumber": "1020 Bank",
                "tags": ["fuel", "vehicle"],
                "bookingReference": "RB-2026-04-12-001",
            }
        ],
    }


def _bankDocumentExtraction() -> Dict[str, Any]:
    return {
        "documentType": "BANK_DOCUMENT",
        "fileId": "file-002",
        "fileName": "kontoauszug_april.pdf",
        "extractedData": [
            {
                "documentType": "bank_document",
                "valuta": "2026-04-13",
                "company": "Migrolino Tankstelle Zürich AG",
                "desc": "Lastschrift Tankfüllung 12.04.2026, Ref RB-2026-04-12-001",
                "bookingCurrency": "CHF",
                "bookingAmount": "-76.10",
                "creditAccountNumber": "1020 Bank",
                "bookingReference": "RB-2026-04-12-001",
            }
        ],
    }


def _cannedExtractionDocuments() -> List[Dict[str, Any]]:
    """Two ActionDocument-shaped dicts: one expense receipt + one bank
    document. processDocuments' ``_resolveDocumentList`` accepts this
    shape directly when ``documentName`` / ``documentData`` are present."""
    return [
        {
            "documentName": "tankbeleg.json",
            "documentData": json.dumps(_expenseReceiptExtraction()),
            "mimeType": "application/json",
        },
        {
            "documentName": "kontoauszug_april.json",
            "documentData": json.dumps(_bankDocumentExtraction()),
            "mimeType": "application/json",
        },
    ]


# ---------------------------------------------------------------------------
# Graph builder
# ---------------------------------------------------------------------------


def _buildGraph(featureInstanceIdOnProcess, featureInstanceIdOnSync) -> Dict[str, Any]:
    """Trustee Spesenbelege chain.

    The ``trigger.manual`` node emits an ``ActionResult`` port, which is
    not assignable into ``trustee.processDocuments[in:0]`` (accepts only
    ``DocumentList`` / ``Transit``). Production graphs solve this by
    going through ``trustee.extractFromFiles`` (DocumentList output)
    first; this test bypasses that step (we ship a literal canned
    extraction list instead of running AI/SharePoint), so we simply
    leave ``trigger.manual`` orphaned and start the data plane at
    ``process``."""
    return {
        "nodes": [
            {"id": "trigger", "type": "trigger.manual", "parameters": {}},
            {
                "id": "process",
                "type": "trustee.processDocuments",
                "parameters": {
                    "featureInstanceId": featureInstanceIdOnProcess,
                    "documentList": _cannedExtractionDocuments(),
                },
            },
            {
                "id": "sync",
                "type": "trustee.syncToAccounting",
                "parameters": {
                    "featureInstanceId": featureInstanceIdOnSync,
                    "documentList": {
                        "type": "ref",
                        "nodeId": "process",
                        "path": ["documents"],
                    },
                },
            },
        ],
        "connections": [
            {"source": "process", "target": "sync"},
        ],
    }


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


class TestSpesenbelegeEndToEnd:
    """End-to-end Trustee Spesenbelege graph through executeGraph."""

    @pytest.mark.asyncio
    async def test_processAndSyncWritesDocumentsPositionsAndAccountingPush(
        self, patchTrustee
    ):
        """Happy-path: 1 expense receipt + 1 bank document.

        Asserts at all three layers: bindings, action results, and side
        effects on the (faked) trustee + accounting infrastructure."""
        trustee = patchTrustee
        envelope = {
            "$type": "FeatureInstanceRef",
            "id": _TRUSTEE_INSTANCE_UUID,
            "featureCode": "trustee",
        }
        graph = _buildGraph(
            featureInstanceIdOnProcess=copy.deepcopy(envelope),
            featureInstanceIdOnSync=_TRUSTEE_INSTANCE_UUID,
        )
        runEnvelope = default_run_envelope("manual", payload={})

        result = await executeGraph(
            graph,
            services=_services(),
            run_envelope=runEnvelope,
            userId="test-user",
            mandateId=_MANDATE_ID,
            instanceId=_TRUSTEE_INSTANCE_UUID,
        )

        assert result.get("success") is True, result

        # --- Layer 1: bindings — both nodes must see the unwrapped UUID ---
        assert len(trustee.documents) == 2
        for doc in trustee.documents:
            assert doc.featureInstanceId == _TRUSTEE_INSTANCE_UUID

        # --- Layer 2: action results -----------------------------------
        nodeOutputs = result["nodeOutputs"]
        processOut = nodeOutputs["process"]
        assert processOut.get("success") is True
        assert processOut.get("error") in (None, "", False)
        assert isinstance(processOut.get("documents"), list)
        assert len(processOut["documents"]) == 1
        processedDoc = processOut["documents"][0]
        assert processedDoc.get("documentName") == "process_documents_result.json"
        payload = json.loads(processedDoc["documentData"])
        assert len(payload["documentIds"]) == 2
        assert len(payload["positionIds"]) == 2
        # Bank document auto-link found the matching expense (same
        # bookingReference RB-2026-04-12-001), so exactly one position
        # was matched.
        assert len(payload["autoMatchedPositionIds"]) == 1

        syncOut = nodeOutputs["sync"]
        assert syncOut.get("success") is True
        assert syncOut.get("error") in (None, "", False)
        syncDoc = syncOut["documents"][0]
        syncSummary = json.loads(syncDoc["documentData"])
        assert syncSummary["pushed"] == 2
        assert syncSummary["total"] == 2
        assert all(r["success"] is True for r in syncSummary["results"])

        # --- Layer 3: side effects -------------------------------------
        assert len(trustee.positions) == 2
        # Bank document update propagated through updatePosition
        assert len(trustee.updates) == 1
        assert "bankDocumentId" in trustee.updates[0]["patch"]

        # Accounting bridge was called once with the resolved positionIds
        # and the unwrapped UUID, NOT the typed envelope.
        assert len(_FakeAccountingBridge.pushBatchCalls) == 1
        call = _FakeAccountingBridge.pushBatchCalls[0]
        assert call["featureInstanceId"] == _TRUSTEE_INSTANCE_UUID
        assert sorted(call["positionIds"]) == sorted(payload["positionIds"])

    @pytest.mark.asyncio
    async def test_legacyRawUuidFeatureInstanceIdAlsoWorks(self, patchTrustee):
        """A pre-Schicht-4 graph storing ``featureInstanceId`` as a raw
        UUID must produce the same end-to-end behaviour after the
        runtime ``materializeFeatureInstanceRefs`` migration."""
        trustee = patchTrustee
        graph = _buildGraph(
            featureInstanceIdOnProcess=_TRUSTEE_INSTANCE_UUID,
            featureInstanceIdOnSync=_TRUSTEE_INSTANCE_UUID,
        )
        result = await executeGraph(
            graph,
            services=_services(),
            run_envelope=default_run_envelope("manual", payload={}),
            userId="test-user",
            mandateId=_MANDATE_ID,
            instanceId=_TRUSTEE_INSTANCE_UUID,
        )
        assert result.get("success") is True, result
        assert len(trustee.documents) == 2
        assert len(trustee.positions) == 2
        assert _FakeAccountingBridge.pushBatchCalls[0]["featureInstanceId"] == _TRUSTEE_INSTANCE_UUID

    @pytest.mark.asyncio
    async def test_emptyExtractionListIsHandledGracefully(self, patchTrustee):
        """When processDocuments receives no documents, syncToAccounting
        must surface a graceful "No positionIds in document" message and
        never call the accounting bridge."""
        trustee = patchTrustee
        graph = _buildGraph(
            featureInstanceIdOnProcess=_TRUSTEE_INSTANCE_UUID,
            featureInstanceIdOnSync=_TRUSTEE_INSTANCE_UUID,
        )
        # Replace the canned documents with a no-records extraction.
        emptyExtraction = {
            "documentType": "EXPENSE_RECEIPT",
            "fileId": "file-empty",
            "fileName": "empty.json",
            "extractedData": [],
        }
        graph["nodes"][1]["parameters"]["documentList"] = [{
            "documentName": "empty.json",
            "documentData": json.dumps(emptyExtraction),
            "mimeType": "application/json",
        }]
        result = await executeGraph(
            graph,
            services=_services(),
            run_envelope=default_run_envelope("manual", payload={}),
            userId="test-user",
            mandateId=_MANDATE_ID,
            instanceId=_TRUSTEE_INSTANCE_UUID,
        )
        assert result.get("success") is True, result
        assert len(trustee.documents) == 0
        assert len(trustee.positions) == 0
        syncSummary = json.loads(
            result["nodeOutputs"]["sync"]["documents"][0]["documentData"]
        )
        assert syncSummary["pushed"] == 0
        assert _FakeAccountingBridge.pushBatchCalls == []