gateway/modules/workflows/methods/methodTrustee/actions/queryData.py

# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""
Query data from the Trustee feature DB.

Three modes:
- ``lookup`` (default): tenant-aware lookup. For ``entity=tenantWithRent`` the
  action joins ``TrusteeDataContact`` (identity match by tenantName +
  tenantAddress with light tolerance) with derived rent amounts from
  ``TrusteeDataJournalLine`` filtered by an account-pattern. Output is a
  compact dict ready to feed into ``ai.prompt`` ``context``.
- ``raw``: return the recordset for the given entity (filtered by
  ``filterJson``). Use for debugging or advanced workflows.
- ``aggregate``: count records per group (basic group-by helper).

This action does NOT trigger an external sync — use
``trustee.refreshAccountingData`` first if data may be stale.
"""

import json
import logging
import re
from typing import Any, Dict, List, Optional

from modules.datamodels.datamodelChat import ActionResult

logger = logging.getLogger(__name__)


_NAME_NORMALIZE_RE = re.compile(r"[^a-z0-9]+")
_ENTITY_TO_MODEL = {
    "contact": "TrusteeDataContact",
    "accounts": "TrusteeDataAccount",
    "balances": "TrusteeDataAccountBalance",
    "journalLines": "TrusteeDataJournalLine",
}


async def queryData(self, parameters: Dict[str, Any]) -> ActionResult:
    """Query the Trustee feature DB. See module docstring for modes."""
    featureInstanceId = parameters.get("featureInstanceId") or (
        self.services.featureInstanceId if hasattr(self.services, "featureInstanceId") else None
    )
    if not featureInstanceId:
        return ActionResult.isFailure(error="featureInstanceId is required")

    mode = (parameters.get("mode") or "lookup").lower()
    entity = (parameters.get("entity") or "tenantWithRent")

    try:
        from modules.features.trustee.interfaceFeatureTrustee import getInterface as getTrusteeInterface
        trusteeInterface = getTrusteeInterface(
            self.services.user,
            mandateId=self.services.mandateId,
            featureInstanceId=featureInstanceId,
        )
    except Exception as exc:
        logger.exception("trustee.queryData: cannot open trustee interface")
        return ActionResult.isFailure(error=f"Trustee interface unavailable: {exc}")

    if mode == "lookup" and entity == "tenantWithRent":
        payload = _lookupTenantWithRent(trusteeInterface, featureInstanceId, parameters)
        return ActionResult.isSuccess(data=payload)

    if mode == "lookup" and entity == "contact":
        contact = _lookupContact(
            trusteeInterface,
            featureInstanceId,
            parameters.get("tenantNameRef") or "",
            parameters.get("tenantAddressRef") or "",
        )
        return ActionResult.isSuccess(data={"contact": contact})

    if mode in ("raw", "aggregate"):
        modelName = _ENTITY_TO_MODEL.get(entity)
        if not modelName:
            return ActionResult.isFailure(
                error=f"entity '{entity}' is not supported in mode '{mode}'"
            )
        records = _readRecordset(
            trusteeInterface,
            featureInstanceId,
            modelName,
            _parseFilterJson(parameters.get("filterJson")),
        )
        if mode == "raw":
            return ActionResult.isSuccess(data={"entity": entity, "count": len(records), "records": records})
        return ActionResult.isSuccess(data={
            "entity": entity,
            "count": len(records),
            "summary": _summarizeAggregate(records),
        })

    return ActionResult.isFailure(
        error=f"Unsupported combination mode='{mode}' entity='{entity}'"
    )


def _lookupTenantWithRent(
    trusteeInterface,
    featureInstanceId: str,
    parameters: Dict[str, Any],
) -> Dict[str, Any]:
    """Return ``{contact, expectedRent, rentLines}`` for one tenant.

    Identity match is intentionally tolerant (case-insensitive, punctuation
    stripped) so OCR results with minor variations still hit. Rent amount is
    derived from ``TrusteeDataJournalLine`` rows whose ``accountNumber``
    matches ``rentAccountPattern`` and whose booking date (via the journal
    entry header) falls inside the requested period.
    """
    tenantName = parameters.get("tenantNameRef") or ""
    tenantAddress = parameters.get("tenantAddressRef") or ""
    period = parameters.get("period") or ""
    accountPattern = parameters.get("rentAccountPattern") or ""

    contact = _lookupContact(trusteeInterface, featureInstanceId, tenantName, tenantAddress)
    if not contact:
        return {
            "matched": False,
            "tenantNameRef": tenantName,
            "tenantAddressRef": tenantAddress,
            "contact": None,
            "expectedRent": None,
            "rentLines": [],
        }

    rentLines, expectedRent = _deriveRentForContact(
        trusteeInterface,
        featureInstanceId,
        contact,
        period,
        accountPattern,
    )
    return {
        "matched": True,
        "tenantNameRef": tenantName,
        "tenantAddressRef": tenantAddress,
        "contact": contact,
        "period": period,
        "rentAccountPattern": accountPattern,
        "rentLines": rentLines,
        "expectedRent": expectedRent,
    }


def _lookupContact(
    trusteeInterface,
    featureInstanceId: str,
    tenantName: str,
    tenantAddress: str,
) -> Optional[Dict[str, Any]]:
    from modules.features.trustee.datamodelFeatureTrustee import TrusteeDataContact

    records = trusteeInterface.db.getRecordset(
        TrusteeDataContact,
        recordFilter={"featureInstanceId": featureInstanceId},
    ) or []
    if not records:
        return None

    nameKey = _normalizeText(tenantName)
    addressKey = _normalizeText(tenantAddress)

    if not nameKey and not addressKey:
        return None

    bestScore = -1
    bestMatch: Optional[Dict[str, Any]] = None
    for raw in records:
        rec = dict(raw)
        recName = _normalizeText(rec.get("name") or "")
        recAddress = _normalizeText(
            " ".join([rec.get("address") or "", rec.get("zip") or "", rec.get("city") or ""]).strip()
        )
        score = 0
        if nameKey and recName:
            if recName == nameKey:
                score += 10
            elif nameKey in recName or recName in nameKey:
                score += 6
        if addressKey and recAddress:
            if recAddress == addressKey:
                score += 5
            elif addressKey in recAddress or recAddress in addressKey:
                score += 3
        if score > bestScore:
            bestScore = score
            bestMatch = rec

    if bestScore < 5:
        return None
    return _shrinkContact(bestMatch)


def _deriveRentForContact(
    trusteeInterface,
    featureInstanceId: str,
    contact: Dict[str, Any],
    period: str,
    accountPattern: str,
) -> tuple:
    """Derive expected annual rent from journal lines.

    The trustee DB does not store a ``Mietvertrag`` entity; the expected
    annual rent is the sum of all credit amounts on rent-revenue accounts
    referenced in journal entries whose description / reference contains
    the contact name. This is intentionally a heuristic — when no match is
    found we return ``(None, None)`` so the caller can flag ``unleserlich``.
    """
    from modules.features.trustee.datamodelFeatureTrustee import (
        TrusteeDataJournalEntry,
        TrusteeDataJournalLine,
    )

    entries = trusteeInterface.db.getRecordset(
        TrusteeDataJournalEntry,
        recordFilter={"featureInstanceId": featureInstanceId},
    ) or []
    lines = trusteeInterface.db.getRecordset(
        TrusteeDataJournalLine,
        recordFilter={"featureInstanceId": featureInstanceId},
    ) or []
    if not entries or not lines:
        return [], None

    fromDate, toDate = _parsePeriod(period)
    accountMatcher = _accountMatcher(accountPattern)
    nameKey = _normalizeText(contact.get("name") or "")
    contactNumber = (contact.get("contactNumber") or "").strip()

    relevantEntryIds = set()
    entryById = {}
    for raw in entries:
        e = dict(raw)
        eid = e.get("id")
        if not eid:
            continue
        bDate = e.get("bookingDate") or ""
        if fromDate and bDate and bDate < fromDate:
            continue
        if toDate and bDate and bDate > toDate:
            continue
        descKey = _normalizeText(" ".join([e.get("description") or "", e.get("reference") or ""]))
        if (nameKey and nameKey in descKey) or (contactNumber and contactNumber in (e.get("reference") or "")):
            relevantEntryIds.add(eid)
            entryById[eid] = e

    rentLines = []
    total = 0.0
    for raw in lines:
        ln = dict(raw)
        if ln.get("journalEntryId") not in relevantEntryIds:
            continue
        accountNo = (ln.get("accountNumber") or "")
        if not accountMatcher(accountNo):
            continue
        credit = float(ln.get("creditAmount") or 0.0)
        debit = float(ln.get("debitAmount") or 0.0)
        amount = credit - debit
        e = entryById.get(ln.get("journalEntryId"), {})
        rentLines.append({
            "date": e.get("bookingDate"),
            "ref": e.get("reference"),
            "account": accountNo,
            "amount": round(amount, 2),
            "description": ln.get("description") or e.get("description"),
        })
        total += amount

    expectedRent = round(total, 2) if rentLines else None
    return rentLines, expectedRent


def _readRecordset(
    trusteeInterface,
    featureInstanceId: str,
    modelName: str,
    extraFilter: Optional[Dict[str, Any]],
) -> List[Dict[str, Any]]:
    from modules.features.trustee.datamodelFeatureTrustee import (
        TrusteeDataAccount,
        TrusteeDataAccountBalance,
        TrusteeDataContact,
        TrusteeDataJournalLine,
    )

    modelMap = {
        "TrusteeDataAccount": TrusteeDataAccount,
        "TrusteeDataAccountBalance": TrusteeDataAccountBalance,
        "TrusteeDataContact": TrusteeDataContact,
        "TrusteeDataJournalLine": TrusteeDataJournalLine,
    }
    model = modelMap.get(modelName)
    if not model:
        return []
    rf: Dict[str, Any] = {"featureInstanceId": featureInstanceId}
    if isinstance(extraFilter, dict):
        rf.update(extraFilter)
    raw = trusteeInterface.db.getRecordset(model, recordFilter=rf) or []
    return [dict(r) for r in raw]


def _summarizeAggregate(records: List[Dict[str, Any]]) -> Dict[str, Any]:
    """Quick counts by common fields. Avoids heavy SQL for the prototype."""
    summary: Dict[str, Any] = {"total": len(records)}
    for field in ("contactType", "accountType", "currency"):
        bucket: Dict[str, int] = {}
        for r in records:
            key = str(r.get(field) or "")
            bucket[key] = bucket.get(key, 0) + 1
        if bucket:
            summary[f"by_{field}"] = bucket
    return summary


def _normalizeText(value: str) -> str:
    return _NAME_NORMALIZE_RE.sub("", (value or "").lower())


def _shrinkContact(rec: Dict[str, Any]) -> Dict[str, Any]:
    return {
        "id": rec.get("id"),
        "externalId": rec.get("externalId"),
        "contactType": rec.get("contactType"),
        "contactNumber": rec.get("contactNumber"),
        "name": rec.get("name"),
        "address": rec.get("address"),
        "zip": rec.get("zip"),
        "city": rec.get("city"),
        "email": rec.get("email"),
    }


def _parseFilterJson(raw: Any) -> Dict[str, Any]:
    if not raw:
        return {}
    if isinstance(raw, dict):
        return raw
    try:
        parsed = json.loads(str(raw))
        return parsed if isinstance(parsed, dict) else {}
    except Exception:
        logger.warning("trustee.queryData: invalid filterJson, ignoring")
        return {}


def _parsePeriod(period: str) -> tuple:
    """Parse ``"YYYY"`` or ``"YYYY-MM-DD/YYYY-MM-DD"`` into ``(from, to)``.

    Empty string → ``(None, None)``. Invalid input is treated as no filter
    rather than raising — workflows must not abort on malformed period text.
    """
    if not period:
        return None, None
    period = period.strip()
    if "/" in period:
        parts = period.split("/", 1)
        return parts[0].strip() or None, parts[1].strip() or None
    if len(period) == 4 and period.isdigit():
        return f"{period}-01-01", f"{period}-12-31"
    return period, period


def _accountMatcher(pattern: str):
    """Return a predicate ``str -> bool`` that matches account numbers.

    Supports ``"6*"`` (prefix), ``"6000-6099"`` (numeric range), and exact
    matches. Empty pattern matches everything (caller decides if that's wise).
    """
    pattern = (pattern or "").strip()
    if not pattern:
        return lambda _x: True
    if "-" in pattern and pattern.replace("-", "").isdigit():
        lo, hi = pattern.split("-", 1)
        try:
            lo_i = int(lo)
            hi_i = int(hi)
            def _rangeMatch(acc: str) -> bool:
                try:
                    return lo_i <= int(acc) <= hi_i
                except (TypeError, ValueError):
                    return False
            return _rangeMatch
        except ValueError:
            pass
    if pattern.endswith("*"):
        prefix = pattern[:-1]
        return lambda acc: (acc or "").startswith(prefix)
    return lambda acc: acc == pattern