Add document type, payment data and detailed descriptions to trustee positions

Extend TrusteePosition with documentType, payeeIban, payeeName, payeeBic, paymentReference and dueDate fields. Overhaul AI extraction prompts to capture full document details in desc (line items, addresses, conditions), extract QR/ESR payment references and IBAN from invoices, and tag each record with its documentType. Sanitise and normalise all new fields on write.

Made-with: Cursor
This commit is contained in:
patrick-motsch 2026-03-01 10:14:46 +01:00
parent 603e319f15
commit f8137e857a
4 changed files with 182 additions and 22 deletions

View file

@ -604,6 +604,67 @@ class TrusteePosition(BaseModel):
"frontend_required": False
}
)
documentType: Optional[str] = Field(
default=None,
description="Document type that generated this position (invoice, expense_receipt, bank_document, contract, unknown)",
json_schema_extra={
"frontend_type": "select",
"frontend_readonly": True,
"frontend_required": False,
"frontend_options": [
{"value": "invoice", "label": {"en": "Invoice", "fr": "Facture", "de": "Rechnung"}},
{"value": "expense_receipt", "label": {"en": "Expense Receipt", "fr": "Reçu", "de": "Beleg"}},
{"value": "bank_document", "label": {"en": "Bank Statement", "fr": "Relevé bancaire", "de": "Bankauszug"}},
{"value": "contract", "label": {"en": "Contract", "fr": "Contrat", "de": "Vertrag"}},
{"value": "unknown", "label": {"en": "Other", "fr": "Autre", "de": "Sonstige"}},
]
}
)
payeeIban: Optional[str] = Field(
default=None,
description="IBAN of the payment recipient (from invoice / QR code)",
json_schema_extra={
"frontend_type": "text",
"frontend_readonly": False,
"frontend_required": False
}
)
payeeName: Optional[str] = Field(
default=None,
description="Bank or account holder name of the payment recipient",
json_schema_extra={
"frontend_type": "text",
"frontend_readonly": False,
"frontend_required": False
}
)
payeeBic: Optional[str] = Field(
default=None,
description="BIC / SWIFT code of the recipient bank",
json_schema_extra={
"frontend_type": "text",
"frontend_readonly": False,
"frontend_required": False
}
)
paymentReference: Optional[str] = Field(
default=None,
description="Structured payment reference (QR-Referenz, ESR, SCOR, Mitteilung)",
json_schema_extra={
"frontend_type": "text",
"frontend_readonly": False,
"frontend_required": False
}
)
dueDate: Optional[str] = Field(
default=None,
description="Payment due date (ISO format: YYYY-MM-DD)",
json_schema_extra={
"frontend_type": "date",
"frontend_readonly": False,
"frontend_required": False
}
)
mandateId: Optional[str] = Field(
default=None,
description="Mandate ID (auto-set from context)",
@ -662,6 +723,12 @@ registerModelLabels(
"taxCode": {"en": "Tax Code", "fr": "Code TVA", "de": "Steuercode"},
"costCenter": {"en": "Cost Center", "fr": "Centre de coûts", "de": "Kostenstelle"},
"bookingReference": {"en": "Booking Reference", "fr": "Référence de réservation", "de": "Buchungsreferenz"},
"documentType": {"en": "Document Type", "fr": "Type de document", "de": "Dokumenttyp"},
"payeeIban": {"en": "Payee IBAN", "fr": "IBAN bénéficiaire", "de": "Empfänger-IBAN"},
"payeeName": {"en": "Payee Name", "fr": "Nom du bénéficiaire", "de": "Empfänger-Name"},
"payeeBic": {"en": "Payee BIC/SWIFT", "fr": "BIC/SWIFT bénéficiaire", "de": "Empfänger-BIC"},
"paymentReference": {"en": "Payment Reference", "fr": "Référence de paiement", "de": "Zahlungsreferenz"},
"dueDate": {"en": "Due Date", "fr": "Date d'échéance", "de": "Fälligkeitsdatum"},
"mandateId": {"en": "Mandate", "fr": "Mandat", "de": "Mandat"},
"featureInstanceId": {"en": "Feature Instance", "fr": "Instance de fonctionnalité", "de": "Feature-Instanz"},
"accountingSyncId": {"en": "Accounting Sync ID", "fr": "ID sync comptabilité", "de": "Buha-Sync-ID"},

View file

@ -143,6 +143,24 @@ def _sanitisePositionPayload(data: Dict[str, Any]) -> Dict[str, Any]:
safeData["bookingCurrency"] = str(bookingCurrency).upper()
safeData["originalCurrency"] = str(originalCurrency).upper()
if "dueDate" in safeData and safeData["dueDate"]:
safeData["dueDate"] = _normaliseIsoDate(safeData["dueDate"])
_VALID_DOC_TYPES = {"invoice", "expense_receipt", "bank_document", "contract", "unknown"}
docType = safeData.get("documentType")
if docType:
docType = str(docType).strip().lower()
safeData["documentType"] = docType if docType in _VALID_DOC_TYPES else None
else:
safeData["documentType"] = None
for strField in ("payeeIban", "payeeName", "payeeBic", "paymentReference"):
val = safeData.get(strField)
if val:
safeData[strField] = str(val).strip() or None
else:
safeData[strField] = None
return safeData

View file

@ -40,35 +40,91 @@ _CLASSIFY_PROMPT = (
# Phase 2: Type-specific structuring prompts (placeholders: {expenseList}, {bankList})
_PROMPT_EXPENSE_RECEIPT = (
"Extrahiere aus dem folgenden Dokument eine Buchung pro Ausgabeposition. "
"Return JSON: {{\"records\": [{{...}}]}}. Jeder Record: valuta (YYYY-MM-DD), transactionDateTime (unix seconds, numeric), company, desc, "
"bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, "
"debitAccountNumber (NUR die Kontonummer, z.B. \"6200\", aus: {expenseList}), "
"creditAccountNumber (NUR die Kontonummer, z.B. \"1020\", aus: {bankList}), tags, taxCode, costCenter, bookingReference. "
"WICHTIG: transactionDateTime muss eine ZAHL sein (z.B. 1737417600), niemals '21.01.2026'."
"Extrahiere aus dem folgenden Beleg eine Buchung pro Ausgabeposition. "
"Return JSON: {{\"records\": [{{...}}]}}. "
"Jeder Record hat diese Felder:\n"
"- documentType: immer \"expense_receipt\"\n"
"- valuta (YYYY-MM-DD), transactionDateTime (unix seconds, numeric)\n"
"- company: vollstaendiger Firmenname inkl. Rechtsform\n"
"- desc: AUSFUEHRLICHE Beschreibung — alle Positionen/Artikel, Mengen, Einzelpreise, "
"Adresse des Geschaefts, Belegnummer, evtl. Kassennummer. "
"Fuer die Nachbearbeitung muessen alle relevanten Details im desc stehen.\n"
"- bookingCurrency, bookingAmount, originalCurrency, originalAmount\n"
"- vatPercentage, vatAmount\n"
"- debitAccountNumber (NUR Kontonummer, z.B. \"6200\", aus: {expenseList})\n"
"- creditAccountNumber (NUR Kontonummer, z.B. \"1020\", aus: {bankList})\n"
"- tags, taxCode, costCenter, bookingReference\n"
"- payeeIban, payeeName, payeeBic: falls Zahlungsdaten auf dem Beleg stehen\n"
"- paymentReference: QR-Referenz / ESR-Nummer / Mitteilung, falls vorhanden\n"
"- dueDate (YYYY-MM-DD): Zahlungsfrist, falls angegeben\n"
"WICHTIG: transactionDateTime muss eine ZAHL sein (z.B. 1737417600), niemals '21.01.2026'. "
"Felder ohne Wert als null."
)
_PROMPT_BANK_DOCUMENT = (
"Extrahiere aus dem folgenden Bankauszug eine Buchung pro Transaktionszeile. "
"Return JSON: {{\"records\": [{{...}}]}}. Jeder Record: valuta, company (Gegenpartei), desc (Zahlungsreferenz), "
"bookingAmount, bookingCurrency, "
"debitAccountNumber (NUR die Kontonummer aus: {expenseList}), creditAccountNumber (NUR die Kontonummer aus: {bankList}), bookingReference. "
"Kein MwSt bei Bankauszuegen. transactionDateTime optional."
"Return JSON: {{\"records\": [{{...}}]}}. "
"Jeder Record hat diese Felder:\n"
"- documentType: immer \"bank_document\"\n"
"- valuta (YYYY-MM-DD), transactionDateTime (unix seconds, optional)\n"
"- company: Gegenpartei (vollstaendiger Name)\n"
"- desc: AUSFUEHRLICH — Zahlungsreferenz, Mitteilung, Verwendungszweck, alle Details der Transaktionszeile. "
"Wenn mehrere Referenzen/Mitteilungen vorhanden sind, alle angeben.\n"
"- bookingAmount, bookingCurrency\n"
"- debitAccountNumber (NUR Kontonummer aus: {expenseList})\n"
"- creditAccountNumber (NUR Kontonummer aus: {bankList})\n"
"- bookingReference\n"
"- payeeIban: IBAN der Gegenpartei, falls sichtbar\n"
"- payeeName: Name des Kontoinhabers der Gegenpartei\n"
"- paymentReference: Referenznummer der Transaktion\n"
"Kein MwSt bei Bankauszuegen. Felder ohne Wert als null."
)
_PROMPT_INVOICE = (
"Extrahiere aus der folgenden Rechnung genau eine Buchung. "
"Return JSON: {{\"records\": [{{...}}]}}. Record: valuta (Rechnungsdatum), company (Kreditor), desc (Rechnungsdetails), "
"bookingAmount, bookingCurrency, vatPercentage, vatAmount, "
"debitAccountNumber (NUR die Kontonummer aus: {expenseList}), creditAccountNumber (NUR die Kontonummer aus: {bankList}), "
"bookingReference (Rechnungsnummer), transactionDateTime, taxCode, costCenter. "
"Formatregeln: valuta nur YYYY-MM-DD; transactionDateTime nur unix seconds als Zahl."
"Return JSON: {{\"records\": [{{...}}]}}. "
"Der Record hat diese Felder:\n"
"- documentType: immer \"invoice\"\n"
"- valuta (Rechnungsdatum, YYYY-MM-DD)\n"
"- transactionDateTime (unix seconds als Zahl)\n"
"- company: Kreditor — vollstaendiger Firmenname inkl. Rechtsform und Adresse\n"
"- desc: AUSFUEHRLICHE Rechnungsdetails — alle Positionen mit Einzelpreisen und Mengen, "
"Rechnungsnummer, Kundennummer, Lieferadresse, besondere Bedingungen. "
"Alle Informationen die fuer die Folgebearbeitung (Zahlung, Kontrolle, Verbuchung) relevant sind.\n"
"- bookingAmount (Totalbetrag), bookingCurrency\n"
"- vatPercentage, vatAmount\n"
"- debitAccountNumber (NUR Kontonummer aus: {expenseList})\n"
"- creditAccountNumber (NUR Kontonummer aus: {bankList})\n"
"- bookingReference: Rechnungsnummer\n"
"- taxCode, costCenter\n"
"ZAHLUNGSDATEN (sehr wichtig, haeufig im QR-Code oder Einzahlungsschein):\n"
"- payeeIban: IBAN des Zahlungsempfaengers\n"
"- payeeName: Kontoinhaber / Bankname des Empfaengers\n"
"- payeeBic: BIC/SWIFT-Code, falls vorhanden\n"
"- paymentReference: Strukturierte Referenz — QR-Referenz (26-27 Stellen), "
"ESR-Referenznummer, SCOR-Referenz oder unstrukturierte Mitteilung. "
"Alle Referenzen vollstaendig uebernehmen.\n"
"- dueDate (YYYY-MM-DD): Zahlungsfrist / Faelligkeitsdatum\n"
"Formatregeln: valuta und dueDate nur YYYY-MM-DD; transactionDateTime nur unix seconds als Zahl. "
"Felder ohne Wert als null."
)
_PROMPT_FALLBACK = (
"Extrahiere aus dem folgenden Dokument Buchungsdaten. "
"Return JSON: {{\"records\": [{{...}}]}}. Jeder Record: valuta (YYYY-MM-DD), transactionDateTime (unix seconds, numeric), company, desc, "
"bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, "
"debitAccountNumber (NUR die Kontonummer, z.B. \"6200\", aus: {expenseList}), "
"creditAccountNumber (NUR die Kontonummer, z.B. \"1020\", aus: {bankList}), tags, taxCode, costCenter, bookingReference. "
"WICHTIG: keine lokalen Datumsformate in transactionDateTime (kein DD.MM.YYYY)."
"Return JSON: {{\"records\": [{{...}}]}}. "
"Jeder Record hat diese Felder:\n"
"- documentType: Art des Dokuments (\"invoice\", \"expense_receipt\", \"bank_document\" oder \"unknown\")\n"
"- valuta (YYYY-MM-DD), transactionDateTime (unix seconds, numeric)\n"
"- company: vollstaendiger Firmenname\n"
"- desc: AUSFUEHRLICHE Beschreibung — alle Details des Dokuments, Positionen, Referenzen, "
"Adressen, Bedingungen. Nicht nur ein Stichwort sondern alle relevanten Informationen.\n"
"- bookingCurrency, bookingAmount, originalCurrency, originalAmount\n"
"- vatPercentage, vatAmount\n"
"- debitAccountNumber (NUR Kontonummer, z.B. \"6200\", aus: {expenseList})\n"
"- creditAccountNumber (NUR Kontonummer, z.B. \"1020\", aus: {bankList})\n"
"- tags, taxCode, costCenter, bookingReference\n"
"- payeeIban, payeeName, payeeBic: Zahlungsdaten, falls vorhanden\n"
"- paymentReference: QR-Referenz / ESR / SCOR / Mitteilung\n"
"- dueDate (YYYY-MM-DD): Zahlungsfrist, falls vorhanden\n"
"WICHTIG: transactionDateTime muss eine ZAHL sein, niemals DD.MM.YYYY. "
"Felder ohne Wert als null."
)

View file

@ -45,10 +45,23 @@ def _normaliseTags(value) -> str:
return str(value)
def _recordToPosition(record: Dict[str, Any], documentId: Optional[str], featureInstanceId: str, mandateId: str) -> Dict[str, Any]:
def _cleanStr(value, default=None) -> Optional[str]:
"""Strip and return a non-empty string, else *default*."""
if not value:
return default
s = str(value).strip()
return s if s else default
def _recordToPosition(record: Dict[str, Any], documentId: Optional[str], featureInstanceId: str, mandateId: str, documentType: Optional[str] = None) -> Dict[str, Any]:
"""Map extraction record to TrusteePosition payload."""
recDocType = _cleanStr(record.get("documentType")) or documentType
if recDocType:
recDocType = recDocType.lower().strip()
return {
"documentId": documentId,
"documentType": recDocType,
"valuta": record.get("valuta"),
"transactionDateTime": record.get("transactionDateTime"),
"company": record.get("company", ""),
@ -65,6 +78,11 @@ def _recordToPosition(record: Dict[str, Any], documentId: Optional[str], feature
"taxCode": record.get("taxCode") or None,
"costCenter": record.get("costCenter") or None,
"bookingReference": record.get("bookingReference") or None,
"payeeIban": _cleanStr(record.get("payeeIban")),
"payeeName": _cleanStr(record.get("payeeName")),
"payeeBic": _cleanStr(record.get("payeeBic")),
"paymentReference": _cleanStr(record.get("paymentReference")),
"dueDate": _cleanStr(record.get("dueDate")),
"featureInstanceId": featureInstanceId,
"mandateId": mandateId,
}
@ -131,8 +149,9 @@ async def processDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
continue
allDocumentIds.append(trusteeDoc.id)
docTypeLower = (documentType or "unknown").lower()
for record in records:
posPayload = _recordToPosition(record, trusteeDoc.id, featureInstanceId, self.services.mandateId)
posPayload = _recordToPosition(record, trusteeDoc.id, featureInstanceId, self.services.mandateId, documentType=docTypeLower)
pos = trusteeInterface.createPosition(posPayload)
if pos:
allPositionIds.append(pos.id)