From 6b11d66766cab0e1c88fed7237ac977d797ac5d7 Mon Sep 17 00:00:00 2001
From: patrick-motsch
Date: Sun, 22 Feb 2026 01:03:19 +0100
Subject: [PATCH] fixes
---
.../automation/subAutomationTemplates.py | 36 ++++++---
.../trustee/accounting/accountingBridge.py | 18 ++++-
.../accounting/accountingConnectorBase.py | 4 +-
.../connectors/accountingConnectorAbacus.py | 2 +-
.../connectors/accountingConnectorBexio.py | 2 +-
.../connectors/accountingConnectorRma.py | 23 ++++--
.../features/trustee/routeFeatureTrustee.py | 5 +-
.../methodTrustee/actions/extractFromFiles.py | 81 ++++++++++++++++++-
8 files changed, 141 insertions(+), 30 deletions(-)
diff --git a/modules/features/automation/subAutomationTemplates.py b/modules/features/automation/subAutomationTemplates.py
index 420203ec..0795f757 100644
--- a/modules/features/automation/subAutomationTemplates.py
+++ b/modules/features/automation/subAutomationTemplates.py
@@ -376,20 +376,37 @@ AUTOMATION_TEMPLATES: Dict[str, Any] = {
"tasks": [
{
"id": "Task01",
- "title": "Extract Expenses from SharePoint PDFs",
- "description": "Reads PDF expense documents from SharePoint folder, extracts data via AI, and saves to TrusteePosition",
- "objective": "Extract expense data from PDF documents and store in Trustee database with automatic file organization",
+ "title": "Run trustee pipeline on SharePoint files",
+ "description": "Extract expenses from SharePoint PDFs, create positions + documents, sync to accounting",
+ "objective": "End-to-end: SharePoint folder → AI extraction → Trustee DB → Accounting sync",
"actionList": [
{
- "execMethod": "sharepoint",
- "execAction": "getExpensesFromPdf",
+ "execMethod": "trustee",
+ "execAction": "extractFromFiles",
"execParameters": {
"connectionReference": "{{KEY:connectionName}}",
"sharepointFolder": "{{KEY:sharepointFolder}}",
- "featureInstanceId": "{{KEY:featureInstanceId}}",
- "prompt": "{{KEY:extractionPrompt}}"
+ "featureInstanceId": "{{KEY:featureInstanceId}}"
},
- "execResultLabel": "expense_extraction_result"
+ "execResultLabel": "extract_result"
+ },
+ {
+ "execMethod": "trustee",
+ "execAction": "processDocuments",
+ "execParameters": {
+ "documentList": "docList:{{PREV_MESSAGE_ID}}:extract_result",
+ "featureInstanceId": "{{KEY:featureInstanceId}}"
+ },
+ "execResultLabel": "process_result"
+ },
+ {
+ "execMethod": "trustee",
+ "execAction": "syncToAccounting",
+ "execParameters": {
+ "documentList": "docList:{{PREV_MESSAGE_ID}}:process_result",
+ "featureInstanceId": "{{KEY:featureInstanceId}}"
+ },
+ "execResultLabel": "sync_result"
}
]
}
@@ -398,8 +415,7 @@ AUTOMATION_TEMPLATES: Dict[str, Any] = {
"parameters": {
"connectionName": "",
"sharepointFolder": "",
- "featureInstanceId": "",
- "extractionPrompt": "Du bist ein Spezialist für die Extraktion von Belegdaten aus PDF-Dokumenten.\n\nAUFGABE:\nExtrahiere die Daten aus dem bereitgestellten Zahlungsbeleg und erstelle EINE EINZIGE CSV-Tabelle mit allen Datensätzen.\n\nOUTPUT-STRUKTUR:\nErstelle genau EINE Tabelle mit den folgenden Spalten. Alle extrahierten Datensätze kommen in diese eine Tabelle als Zeilen.\n\nWICHTIGE REGELN:\n1. Pro MwSt-Prozentsatz einen separaten Datensatz (= Zeile) erstellen\n2. Alle Datensätze zusammen müssen den Gesamtbetrag des Dokuments ergeben\n3. Der gesamte extrahierte Text des Dokuments muss im Feld \"desc\" erfasst werden\n4. Feld \"company\" enthält den Lieferanten/Verkäufer der Buchung\n5. Tags müssen aus dieser Liste gewählt werden: customer, meeting, license, subscription, fuel, food, material\n - Mehrere zutreffende Tags mit Komma trennen\n\nCSV-SPALTEN (in dieser Reihenfolge):\nvaluta,transactionDateTime,company,desc,tags,bookingCurrency,bookingAmount,originalCurrency,originalAmount,vatPercentage,vatAmount\n\nDATENFORMAT:\n- valuta: YYYY-MM-DD (Valutadatum)\n- transactionDateTime: Unix-Timestamp in Sekunden (Transaktionszeitpunkt)\n- company: Lieferant/Verkäufer Name\n- desc: Vollständiger extrahierter Text des Dokuments\n- tags: Komma-getrennte Tags aus der erlaubten Liste\n- bookingCurrency: Währungscode (CHF, EUR, USD, GBP)\n- bookingAmount: Buchungsbetrag als Dezimalzahl\n- originalCurrency: Original-Währungscode\n- originalAmount: Original-Betrag als Dezimalzahl\n- vatPercentage: MwSt-Prozentsatz (z.B. 8.1 für 8.1%)\n- vatAmount: MwSt-Betrag als Dezimalzahl\n\nHINWEISE:\n- Wenn nur ein MwSt-Satz vorhanden ist, einen Datensatz erstellen\n- Wenn mehrere MwSt-Sätze vorhanden sind (z.B. Lebensmittel 2.6% und Non-Food 8.1%), separate Datensätze erstellen\n- Bei fehlenden Informationen: leeres Feld oder Standardwert"
+ "featureInstanceId": ""
}
}
]
diff --git a/modules/features/trustee/accounting/accountingBridge.py b/modules/features/trustee/accounting/accountingBridge.py
index 1e6a9f78..fc79159e 100644
--- a/modules/features/trustee/accounting/accountingBridge.py
+++ b/modules/features/trustee/accounting/accountingBridge.py
@@ -133,8 +133,11 @@ class AccountingBridge:
return SyncResult(success=False, errorMessage=f"Position {positionId} not found")
position = posRecords[0]
- # Build booking once (for push; externalDocumentIds filled after document upload)
+ # Build booking; skip if position has no accounts (not ready for sync)
booking = self._buildBookingFromPosition(position)
+ if not booking.lines:
+ logger.info("Accounting sync skipped (no accounts): positionId=%s", positionId)
+ return SyncResult(success=True, errorMessage="Position hat keine Kontierung (Soll-/Haben-Konto) – Sync übersprungen")
# 1) First: ensure all documents are in RMA (upload or duplicate); collect Beleg-IDs for linking
documentIds = []
@@ -273,12 +276,19 @@ class AccountingBridge:
results.append(result)
return results
- async def getChartOfAccounts(self, featureInstanceId: str) -> List[AccountingChart]:
- """Load the chart of accounts from the configured external system."""
+ async def getChartOfAccounts(self, featureInstanceId: str, accountType: Optional[str] = None) -> List[AccountingChart]:
+ """Load the chart of accounts from the configured external system. Optional filter by accountType."""
connector, plainConfig, _ = await self._resolveConnectorAndConfig(featureInstanceId)
if not connector or not plainConfig:
return []
- return await connector.getChartOfAccounts(plainConfig)
+ charts = await connector.getChartOfAccounts(plainConfig, accountType=accountType)
+ if accountType:
+ charts = [c for c in charts if c.accountType == accountType]
+ return charts
+
+ async def _getExpenseAccounts(self, featureInstanceId: str) -> List[AccountingChart]:
+ """Load only expense accounts (Aufwandkonten) for use in AI prompts."""
+ return await self.getChartOfAccounts(featureInstanceId, accountType="expense")
async def testConnection(self, featureInstanceId: str) -> SyncResult:
"""Test the connection with the configured accounting system."""
diff --git a/modules/features/trustee/accounting/accountingConnectorBase.py b/modules/features/trustee/accounting/accountingConnectorBase.py
index 775a07b6..2cfa4a54 100644
--- a/modules/features/trustee/accounting/accountingConnectorBase.py
+++ b/modules/features/trustee/accounting/accountingConnectorBase.py
@@ -82,8 +82,8 @@ class BaseAccountingConnector(ABC):
"""Verify the connection with the given credentials."""
@abstractmethod
- async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
- """Load the chart of accounts from the external system."""
+ async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
+ """Load the chart of accounts from the external system. accountType filters by category (e.g. 'expense', 'asset')."""
@abstractmethod
async def pushBooking(self, config: Dict[str, Any], booking: AccountingBooking) -> SyncResult:
diff --git a/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py b/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py
index 66bb14f0..193c5bf6 100644
--- a/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py
+++ b/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py
@@ -144,7 +144,7 @@ class AccountingConnectorAbacus(BaseAccountingConnector):
except Exception as e:
return SyncResult(success=False, errorMessage=str(e))
- async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
+ async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
headers = await self._buildAuthHeaders(config)
if not headers:
return []
diff --git a/modules/features/trustee/accounting/connectors/accountingConnectorBexio.py b/modules/features/trustee/accounting/connectors/accountingConnectorBexio.py
index 183d1bcc..ec60d761 100644
--- a/modules/features/trustee/accounting/connectors/accountingConnectorBexio.py
+++ b/modules/features/trustee/accounting/connectors/accountingConnectorBexio.py
@@ -92,7 +92,7 @@ class AccountingConnectorBexio(BaseAccountingConnector):
return acc.get("id")
return None
- async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
+ async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
accounts = await self._loadRawAccounts(config)
return [
AccountingChart(
diff --git a/modules/features/trustee/accounting/connectors/accountingConnectorRma.py b/modules/features/trustee/accounting/connectors/accountingConnectorRma.py
index e55cfe40..9d9fbf2f 100644
--- a/modules/features/trustee/accounting/connectors/accountingConnectorRma.py
+++ b/modules/features/trustee/accounting/connectors/accountingConnectorRma.py
@@ -87,11 +87,14 @@ class AccountingConnectorRma(BaseAccountingConnector):
except Exception as e:
return SyncResult(success=False, errorMessage=str(e))
- async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
+ async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
try:
+ params = {}
+ if accountType:
+ params["type"] = accountType
async with aiohttp.ClientSession() as session:
url = self._buildUrl(config, "charts")
- async with session.get(url, headers=self._buildHeaders(config), timeout=aiohttp.ClientTimeout(total=30)) as resp:
+ async with session.get(url, headers=self._buildHeaders(config), params=params, timeout=aiohttp.ClientTimeout(total=30)) as resp:
if resp.status != 200:
logger.error(f"RMA charts failed: HTTP {resp.status}")
return []
@@ -101,9 +104,17 @@ class AccountingConnectorRma(BaseAccountingConnector):
items = data if isinstance(data, list) else data.get("chart", data.get("row", []))
for item in items:
if isinstance(item, dict):
- accNo = item.get("accno", item.get("account_number", ""))
- label = item.get("description", item.get("label", ""))
- charts.append(AccountingChart(accountNumber=str(accNo), label=str(label)))
+ accNo = str(item.get("accno", item.get("account_number", "")))
+ label = str(item.get("description", item.get("label", "")))
+ chartType = item.get("charttype") or item.get("category") or item.get("link") or ""
+ if not chartType and accNo:
+ firstDigit = accNo[0] if accNo else ""
+ chartType = {
+ "1": "asset", "2": "liability", "3": "revenue",
+ "4": "expense", "5": "expense", "6": "expense",
+ "7": "expense", "8": "expense", "9": "closing",
+ }.get(firstDigit, "")
+ charts.append(AccountingChart(accountNumber=accNo, label=label, accountType=chartType))
return charts
except Exception as e:
logger.error(f"RMA getChartOfAccounts error: {e}")
@@ -149,7 +160,7 @@ class AccountingConnectorRma(BaseAccountingConnector):
docParts.append(label)
erfDate = datetime.utcnow().strftime("%d.%m.%Y")
linkSuffix = " (" + ", ".join(docParts) + ", erf. " + erfDate + ")"
- shortDesc = (rawDesc[:80] + "…") if len(rawDesc) > 80 else rawDesc
+ shortDesc = (rawDesc[:80] + "...") if len(rawDesc) > 80 else rawDesc
description = (shortDesc + linkSuffix).strip()[:500]
else:
description = rawDesc[:500]
diff --git a/modules/features/trustee/routeFeatureTrustee.py b/modules/features/trustee/routeFeatureTrustee.py
index 2161f719..673063eb 100644
--- a/modules/features/trustee/routeFeatureTrustee.py
+++ b/modules/features/trustee/routeFeatureTrustee.py
@@ -1348,14 +1348,15 @@ def delete_accounting_config(
async def get_chart_of_accounts(
request: Request,
instanceId: str = Path(..., description="Feature Instance ID"),
+ accountType: Optional[str] = Query(None, description="Filter by type: expense, asset, liability, revenue"),
context: RequestContext = Depends(getRequestContext)
) -> List[Dict[str, Any]]:
- """Load the chart of accounts from the connected accounting system."""
+ """Load the chart of accounts from the connected accounting system. Optional filter by accountType."""
mandateId = _validateInstanceAccess(instanceId, context)
interface = getInterface(context.user, mandateId=mandateId, featureInstanceId=instanceId)
from .accounting.accountingBridge import AccountingBridge
bridge = AccountingBridge(interface)
- charts = await bridge.getChartOfAccounts(instanceId)
+ charts = await bridge.getChartOfAccounts(instanceId, accountType=accountType)
return [c.model_dump() for c in charts]
diff --git a/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py b/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py
index fe2379bd..ac4e8fbb 100644
--- a/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py
+++ b/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py
@@ -24,6 +24,52 @@ logger = logging.getLogger(__name__)
ALLOWED_EXTENSIONS = (".pdf", ".jpg", ".jpeg")
MAX_FILES = 50
+_DEFAULT_PROMPT_FALLBACK = (
+ 'Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) '
+ 'and expense/position records. Return JSON: {"documentType": "...", "records": [{...}]}. '
+ 'Each record must have: valuta (YYYY-MM-DD), transactionDateTime (unix seconds), company, desc (full extracted text), '
+ 'tags (from: customer, meeting, license, subscription, fuel, food, material), '
+ 'bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, '
+ 'debitAccountNumber (Soll-Konto nach Schweizer KMU-Kontenrahmen, z.B. 6200 Fahrzeugaufwand, 6000 Materialaufwand), '
+ 'creditAccountNumber (Haben-Konto, z.B. 1020 Bank), taxCode, costCenter, bookingReference.'
+)
+
+
+async def _buildDefaultPromptWithAccounts(self, featureInstanceId: str) -> str:
+ """Build extraction prompt with real expense accounts from the connected accounting system."""
+ try:
+ from modules.features.trustee.interfaceFeatureTrustee import getInterface as getTrusteeInterface
+ from modules.features.trustee.accounting.accountingBridge import AccountingBridge
+ trusteeInterface = getTrusteeInterface(
+ self.services.user,
+ mandateId=self.services.mandateId,
+ featureInstanceId=featureInstanceId,
+ )
+ bridge = AccountingBridge(trusteeInterface)
+ expenseAccounts = await bridge._getExpenseAccounts(featureInstanceId)
+ assetAccounts = await bridge.getChartOfAccounts(featureInstanceId, accountType="asset")
+ except Exception as e:
+ logger.debug("Could not load chart of accounts for prompt: %s", e)
+ return ""
+
+ if not expenseAccounts:
+ return ""
+
+ expenseList = ", ".join(f"{a.accountNumber} {a.label}" for a in expenseAccounts[:50])
+ bankAccounts = [a for a in assetAccounts if a.accountNumber.startswith("10")]
+ bankList = ", ".join(f"{a.accountNumber} {a.label}" for a in bankAccounts[:10]) if bankAccounts else "1020 Bank"
+
+ return (
+ 'Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) '
+ 'and expense/position records. Return JSON: {"documentType": "...", "records": [{...}]}. '
+ 'Each record must have: valuta (YYYY-MM-DD), transactionDateTime (unix seconds), company, desc (full extracted text), '
+ 'tags (from: customer, meeting, license, subscription, fuel, food, material), '
+ 'bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, '
+ f'debitAccountNumber (Soll-Konto, verwende eines der folgenden Aufwandkonten: {expenseList}), '
+ f'creditAccountNumber (Haben-Konto, verwende eines der folgenden Konten: {bankList}), '
+ 'taxCode, costCenter, bookingReference.'
+ )
+
def _parseCsvToRecords(csvContent: str) -> List[Dict[str, Any]]:
"""Parse CSV content to list of expense records."""
@@ -58,7 +104,7 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str,
options = AiCallOptions(resultFormat="json", operationType=OperationTypeEnum.DATA_GENERATE)
try:
aiResponse = await self.services.ai.callAiContent(
- prompt=prompt or "Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) and expense/position records. Return JSON: {\"documentType\": \"...\", \"records\": [{...}]}.",
+ prompt=prompt or _DEFAULT_PROMPT_FALLBACK,
options=options,
documentList=docList,
contentParts=None,
@@ -68,7 +114,7 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str,
except Exception:
options = AiCallOptions(resultFormat="csv", operationType=OperationTypeEnum.DATA_GENERATE)
aiResponse = await self.services.ai.callAiContent(
- prompt=prompt or "Extract expense data from this document. Return CSV with columns: company, desc, valuta, bookingAmount, bookingCurrency, vatPercentage, vatAmount, tags.",
+ prompt=prompt or _DEFAULT_PROMPT_FALLBACK,
options=options,
documentList=docList,
contentParts=None,
@@ -91,8 +137,31 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str,
try:
if raw.strip().startswith("{"):
data = json.loads(raw)
- documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
- records = data.get("records") or data.get("extractedData") or []
+ # Direct format: {"documentType": "...", "records": [...]}
+ if "records" in data or "extractedData" in data:
+ documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
+ records = data.get("records") or data.get("extractedData") or []
+ # Wrapped in document structure: {"documents": [{"sections": [{"elements": [{"content": {"code": "..."}}]}]}]}
+ elif "documents" in data:
+ for doc in data.get("documents", []):
+ for section in doc.get("sections", []):
+ for elem in section.get("elements", []):
+ code = (elem.get("content") or {}).get("code")
+ if code and isinstance(code, str):
+ try:
+ inner = json.loads(code)
+ if isinstance(inner, dict) and ("records" in inner or "documentType" in inner):
+ documentType = (inner.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
+ records = inner.get("records") or inner.get("extractedData") or []
+ break
+ except Exception:
+ pass
+ if records:
+ break
+ if records:
+ break
+ elif "documentType" in data:
+ documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
except Exception:
pass
@@ -260,6 +329,10 @@ async def extractFromFiles(self, parameters: Dict[str, Any]) -> ActionResult:
if i < len(createdMessage.documents):
fileIdToChatDocId[f["fileId"]] = createdMessage.documents[i].id
+ # Load expense accounts from accounting system for AI prompt (if configured)
+ if not prompt:
+ prompt = await _buildDefaultPromptWithAccounts(self, featureInstanceId)
+
# Parallel extraction (all files at once)
tasks = [
_extractOne(self, f, fileIdToChatDocId, prompt, featureInstanceId)