From 6b11d66766cab0e1c88fed7237ac977d797ac5d7 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Sun, 22 Feb 2026 01:03:19 +0100 Subject: [PATCH] fixes --- .../automation/subAutomationTemplates.py | 36 ++++++--- .../trustee/accounting/accountingBridge.py | 18 ++++- .../accounting/accountingConnectorBase.py | 4 +- .../connectors/accountingConnectorAbacus.py | 2 +- .../connectors/accountingConnectorBexio.py | 2 +- .../connectors/accountingConnectorRma.py | 23 ++++-- .../features/trustee/routeFeatureTrustee.py | 5 +- .../methodTrustee/actions/extractFromFiles.py | 81 ++++++++++++++++++- 8 files changed, 141 insertions(+), 30 deletions(-) diff --git a/modules/features/automation/subAutomationTemplates.py b/modules/features/automation/subAutomationTemplates.py index 420203ec..0795f757 100644 --- a/modules/features/automation/subAutomationTemplates.py +++ b/modules/features/automation/subAutomationTemplates.py @@ -376,20 +376,37 @@ AUTOMATION_TEMPLATES: Dict[str, Any] = { "tasks": [ { "id": "Task01", - "title": "Extract Expenses from SharePoint PDFs", - "description": "Reads PDF expense documents from SharePoint folder, extracts data via AI, and saves to TrusteePosition", - "objective": "Extract expense data from PDF documents and store in Trustee database with automatic file organization", + "title": "Run trustee pipeline on SharePoint files", + "description": "Extract expenses from SharePoint PDFs, create positions + documents, sync to accounting", + "objective": "End-to-end: SharePoint folder → AI extraction → Trustee DB → Accounting sync", "actionList": [ { - "execMethod": "sharepoint", - "execAction": "getExpensesFromPdf", + "execMethod": "trustee", + "execAction": "extractFromFiles", "execParameters": { "connectionReference": "{{KEY:connectionName}}", "sharepointFolder": "{{KEY:sharepointFolder}}", - "featureInstanceId": "{{KEY:featureInstanceId}}", - "prompt": "{{KEY:extractionPrompt}}" + "featureInstanceId": "{{KEY:featureInstanceId}}" }, - "execResultLabel": "expense_extraction_result" + "execResultLabel": "extract_result" + }, + { + "execMethod": "trustee", + "execAction": "processDocuments", + "execParameters": { + "documentList": "docList:{{PREV_MESSAGE_ID}}:extract_result", + "featureInstanceId": "{{KEY:featureInstanceId}}" + }, + "execResultLabel": "process_result" + }, + { + "execMethod": "trustee", + "execAction": "syncToAccounting", + "execParameters": { + "documentList": "docList:{{PREV_MESSAGE_ID}}:process_result", + "featureInstanceId": "{{KEY:featureInstanceId}}" + }, + "execResultLabel": "sync_result" } ] } @@ -398,8 +415,7 @@ AUTOMATION_TEMPLATES: Dict[str, Any] = { "parameters": { "connectionName": "", "sharepointFolder": "", - "featureInstanceId": "", - "extractionPrompt": "Du bist ein Spezialist für die Extraktion von Belegdaten aus PDF-Dokumenten.\n\nAUFGABE:\nExtrahiere die Daten aus dem bereitgestellten Zahlungsbeleg und erstelle EINE EINZIGE CSV-Tabelle mit allen Datensätzen.\n\nOUTPUT-STRUKTUR:\nErstelle genau EINE Tabelle mit den folgenden Spalten. Alle extrahierten Datensätze kommen in diese eine Tabelle als Zeilen.\n\nWICHTIGE REGELN:\n1. Pro MwSt-Prozentsatz einen separaten Datensatz (= Zeile) erstellen\n2. Alle Datensätze zusammen müssen den Gesamtbetrag des Dokuments ergeben\n3. Der gesamte extrahierte Text des Dokuments muss im Feld \"desc\" erfasst werden\n4. Feld \"company\" enthält den Lieferanten/Verkäufer der Buchung\n5. Tags müssen aus dieser Liste gewählt werden: customer, meeting, license, subscription, fuel, food, material\n - Mehrere zutreffende Tags mit Komma trennen\n\nCSV-SPALTEN (in dieser Reihenfolge):\nvaluta,transactionDateTime,company,desc,tags,bookingCurrency,bookingAmount,originalCurrency,originalAmount,vatPercentage,vatAmount\n\nDATENFORMAT:\n- valuta: YYYY-MM-DD (Valutadatum)\n- transactionDateTime: Unix-Timestamp in Sekunden (Transaktionszeitpunkt)\n- company: Lieferant/Verkäufer Name\n- desc: Vollständiger extrahierter Text des Dokuments\n- tags: Komma-getrennte Tags aus der erlaubten Liste\n- bookingCurrency: Währungscode (CHF, EUR, USD, GBP)\n- bookingAmount: Buchungsbetrag als Dezimalzahl\n- originalCurrency: Original-Währungscode\n- originalAmount: Original-Betrag als Dezimalzahl\n- vatPercentage: MwSt-Prozentsatz (z.B. 8.1 für 8.1%)\n- vatAmount: MwSt-Betrag als Dezimalzahl\n\nHINWEISE:\n- Wenn nur ein MwSt-Satz vorhanden ist, einen Datensatz erstellen\n- Wenn mehrere MwSt-Sätze vorhanden sind (z.B. Lebensmittel 2.6% und Non-Food 8.1%), separate Datensätze erstellen\n- Bei fehlenden Informationen: leeres Feld oder Standardwert" + "featureInstanceId": "" } } ] diff --git a/modules/features/trustee/accounting/accountingBridge.py b/modules/features/trustee/accounting/accountingBridge.py index 1e6a9f78..fc79159e 100644 --- a/modules/features/trustee/accounting/accountingBridge.py +++ b/modules/features/trustee/accounting/accountingBridge.py @@ -133,8 +133,11 @@ class AccountingBridge: return SyncResult(success=False, errorMessage=f"Position {positionId} not found") position = posRecords[0] - # Build booking once (for push; externalDocumentIds filled after document upload) + # Build booking; skip if position has no accounts (not ready for sync) booking = self._buildBookingFromPosition(position) + if not booking.lines: + logger.info("Accounting sync skipped (no accounts): positionId=%s", positionId) + return SyncResult(success=True, errorMessage="Position hat keine Kontierung (Soll-/Haben-Konto) – Sync übersprungen") # 1) First: ensure all documents are in RMA (upload or duplicate); collect Beleg-IDs for linking documentIds = [] @@ -273,12 +276,19 @@ class AccountingBridge: results.append(result) return results - async def getChartOfAccounts(self, featureInstanceId: str) -> List[AccountingChart]: - """Load the chart of accounts from the configured external system.""" + async def getChartOfAccounts(self, featureInstanceId: str, accountType: Optional[str] = None) -> List[AccountingChart]: + """Load the chart of accounts from the configured external system. Optional filter by accountType.""" connector, plainConfig, _ = await self._resolveConnectorAndConfig(featureInstanceId) if not connector or not plainConfig: return [] - return await connector.getChartOfAccounts(plainConfig) + charts = await connector.getChartOfAccounts(plainConfig, accountType=accountType) + if accountType: + charts = [c for c in charts if c.accountType == accountType] + return charts + + async def _getExpenseAccounts(self, featureInstanceId: str) -> List[AccountingChart]: + """Load only expense accounts (Aufwandkonten) for use in AI prompts.""" + return await self.getChartOfAccounts(featureInstanceId, accountType="expense") async def testConnection(self, featureInstanceId: str) -> SyncResult: """Test the connection with the configured accounting system.""" diff --git a/modules/features/trustee/accounting/accountingConnectorBase.py b/modules/features/trustee/accounting/accountingConnectorBase.py index 775a07b6..2cfa4a54 100644 --- a/modules/features/trustee/accounting/accountingConnectorBase.py +++ b/modules/features/trustee/accounting/accountingConnectorBase.py @@ -82,8 +82,8 @@ class BaseAccountingConnector(ABC): """Verify the connection with the given credentials.""" @abstractmethod - async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]: - """Load the chart of accounts from the external system.""" + async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]: + """Load the chart of accounts from the external system. accountType filters by category (e.g. 'expense', 'asset').""" @abstractmethod async def pushBooking(self, config: Dict[str, Any], booking: AccountingBooking) -> SyncResult: diff --git a/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py b/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py index 66bb14f0..193c5bf6 100644 --- a/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py +++ b/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py @@ -144,7 +144,7 @@ class AccountingConnectorAbacus(BaseAccountingConnector): except Exception as e: return SyncResult(success=False, errorMessage=str(e)) - async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]: + async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]: headers = await self._buildAuthHeaders(config) if not headers: return [] diff --git a/modules/features/trustee/accounting/connectors/accountingConnectorBexio.py b/modules/features/trustee/accounting/connectors/accountingConnectorBexio.py index 183d1bcc..ec60d761 100644 --- a/modules/features/trustee/accounting/connectors/accountingConnectorBexio.py +++ b/modules/features/trustee/accounting/connectors/accountingConnectorBexio.py @@ -92,7 +92,7 @@ class AccountingConnectorBexio(BaseAccountingConnector): return acc.get("id") return None - async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]: + async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]: accounts = await self._loadRawAccounts(config) return [ AccountingChart( diff --git a/modules/features/trustee/accounting/connectors/accountingConnectorRma.py b/modules/features/trustee/accounting/connectors/accountingConnectorRma.py index e55cfe40..9d9fbf2f 100644 --- a/modules/features/trustee/accounting/connectors/accountingConnectorRma.py +++ b/modules/features/trustee/accounting/connectors/accountingConnectorRma.py @@ -87,11 +87,14 @@ class AccountingConnectorRma(BaseAccountingConnector): except Exception as e: return SyncResult(success=False, errorMessage=str(e)) - async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]: + async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]: try: + params = {} + if accountType: + params["type"] = accountType async with aiohttp.ClientSession() as session: url = self._buildUrl(config, "charts") - async with session.get(url, headers=self._buildHeaders(config), timeout=aiohttp.ClientTimeout(total=30)) as resp: + async with session.get(url, headers=self._buildHeaders(config), params=params, timeout=aiohttp.ClientTimeout(total=30)) as resp: if resp.status != 200: logger.error(f"RMA charts failed: HTTP {resp.status}") return [] @@ -101,9 +104,17 @@ class AccountingConnectorRma(BaseAccountingConnector): items = data if isinstance(data, list) else data.get("chart", data.get("row", [])) for item in items: if isinstance(item, dict): - accNo = item.get("accno", item.get("account_number", "")) - label = item.get("description", item.get("label", "")) - charts.append(AccountingChart(accountNumber=str(accNo), label=str(label))) + accNo = str(item.get("accno", item.get("account_number", ""))) + label = str(item.get("description", item.get("label", ""))) + chartType = item.get("charttype") or item.get("category") or item.get("link") or "" + if not chartType and accNo: + firstDigit = accNo[0] if accNo else "" + chartType = { + "1": "asset", "2": "liability", "3": "revenue", + "4": "expense", "5": "expense", "6": "expense", + "7": "expense", "8": "expense", "9": "closing", + }.get(firstDigit, "") + charts.append(AccountingChart(accountNumber=accNo, label=label, accountType=chartType)) return charts except Exception as e: logger.error(f"RMA getChartOfAccounts error: {e}") @@ -149,7 +160,7 @@ class AccountingConnectorRma(BaseAccountingConnector): docParts.append(label) erfDate = datetime.utcnow().strftime("%d.%m.%Y") linkSuffix = " (" + ", ".join(docParts) + ", erf. " + erfDate + ")" - shortDesc = (rawDesc[:80] + "…") if len(rawDesc) > 80 else rawDesc + shortDesc = (rawDesc[:80] + "...") if len(rawDesc) > 80 else rawDesc description = (shortDesc + linkSuffix).strip()[:500] else: description = rawDesc[:500] diff --git a/modules/features/trustee/routeFeatureTrustee.py b/modules/features/trustee/routeFeatureTrustee.py index 2161f719..673063eb 100644 --- a/modules/features/trustee/routeFeatureTrustee.py +++ b/modules/features/trustee/routeFeatureTrustee.py @@ -1348,14 +1348,15 @@ def delete_accounting_config( async def get_chart_of_accounts( request: Request, instanceId: str = Path(..., description="Feature Instance ID"), + accountType: Optional[str] = Query(None, description="Filter by type: expense, asset, liability, revenue"), context: RequestContext = Depends(getRequestContext) ) -> List[Dict[str, Any]]: - """Load the chart of accounts from the connected accounting system.""" + """Load the chart of accounts from the connected accounting system. Optional filter by accountType.""" mandateId = _validateInstanceAccess(instanceId, context) interface = getInterface(context.user, mandateId=mandateId, featureInstanceId=instanceId) from .accounting.accountingBridge import AccountingBridge bridge = AccountingBridge(interface) - charts = await bridge.getChartOfAccounts(instanceId) + charts = await bridge.getChartOfAccounts(instanceId, accountType=accountType) return [c.model_dump() for c in charts] diff --git a/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py b/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py index fe2379bd..ac4e8fbb 100644 --- a/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py +++ b/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py @@ -24,6 +24,52 @@ logger = logging.getLogger(__name__) ALLOWED_EXTENSIONS = (".pdf", ".jpg", ".jpeg") MAX_FILES = 50 +_DEFAULT_PROMPT_FALLBACK = ( + 'Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) ' + 'and expense/position records. Return JSON: {"documentType": "...", "records": [{...}]}. ' + 'Each record must have: valuta (YYYY-MM-DD), transactionDateTime (unix seconds), company, desc (full extracted text), ' + 'tags (from: customer, meeting, license, subscription, fuel, food, material), ' + 'bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, ' + 'debitAccountNumber (Soll-Konto nach Schweizer KMU-Kontenrahmen, z.B. 6200 Fahrzeugaufwand, 6000 Materialaufwand), ' + 'creditAccountNumber (Haben-Konto, z.B. 1020 Bank), taxCode, costCenter, bookingReference.' +) + + +async def _buildDefaultPromptWithAccounts(self, featureInstanceId: str) -> str: + """Build extraction prompt with real expense accounts from the connected accounting system.""" + try: + from modules.features.trustee.interfaceFeatureTrustee import getInterface as getTrusteeInterface + from modules.features.trustee.accounting.accountingBridge import AccountingBridge + trusteeInterface = getTrusteeInterface( + self.services.user, + mandateId=self.services.mandateId, + featureInstanceId=featureInstanceId, + ) + bridge = AccountingBridge(trusteeInterface) + expenseAccounts = await bridge._getExpenseAccounts(featureInstanceId) + assetAccounts = await bridge.getChartOfAccounts(featureInstanceId, accountType="asset") + except Exception as e: + logger.debug("Could not load chart of accounts for prompt: %s", e) + return "" + + if not expenseAccounts: + return "" + + expenseList = ", ".join(f"{a.accountNumber} {a.label}" for a in expenseAccounts[:50]) + bankAccounts = [a for a in assetAccounts if a.accountNumber.startswith("10")] + bankList = ", ".join(f"{a.accountNumber} {a.label}" for a in bankAccounts[:10]) if bankAccounts else "1020 Bank" + + return ( + 'Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) ' + 'and expense/position records. Return JSON: {"documentType": "...", "records": [{...}]}. ' + 'Each record must have: valuta (YYYY-MM-DD), transactionDateTime (unix seconds), company, desc (full extracted text), ' + 'tags (from: customer, meeting, license, subscription, fuel, food, material), ' + 'bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, ' + f'debitAccountNumber (Soll-Konto, verwende eines der folgenden Aufwandkonten: {expenseList}), ' + f'creditAccountNumber (Haben-Konto, verwende eines der folgenden Konten: {bankList}), ' + 'taxCode, costCenter, bookingReference.' + ) + def _parseCsvToRecords(csvContent: str) -> List[Dict[str, Any]]: """Parse CSV content to list of expense records.""" @@ -58,7 +104,7 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str, options = AiCallOptions(resultFormat="json", operationType=OperationTypeEnum.DATA_GENERATE) try: aiResponse = await self.services.ai.callAiContent( - prompt=prompt or "Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) and expense/position records. Return JSON: {\"documentType\": \"...\", \"records\": [{...}]}.", + prompt=prompt or _DEFAULT_PROMPT_FALLBACK, options=options, documentList=docList, contentParts=None, @@ -68,7 +114,7 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str, except Exception: options = AiCallOptions(resultFormat="csv", operationType=OperationTypeEnum.DATA_GENERATE) aiResponse = await self.services.ai.callAiContent( - prompt=prompt or "Extract expense data from this document. Return CSV with columns: company, desc, valuta, bookingAmount, bookingCurrency, vatPercentage, vatAmount, tags.", + prompt=prompt or _DEFAULT_PROMPT_FALLBACK, options=options, documentList=docList, contentParts=None, @@ -91,8 +137,31 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str, try: if raw.strip().startswith("{"): data = json.loads(raw) - documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_") - records = data.get("records") or data.get("extractedData") or [] + # Direct format: {"documentType": "...", "records": [...]} + if "records" in data or "extractedData" in data: + documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_") + records = data.get("records") or data.get("extractedData") or [] + # Wrapped in document structure: {"documents": [{"sections": [{"elements": [{"content": {"code": "..."}}]}]}]} + elif "documents" in data: + for doc in data.get("documents", []): + for section in doc.get("sections", []): + for elem in section.get("elements", []): + code = (elem.get("content") or {}).get("code") + if code and isinstance(code, str): + try: + inner = json.loads(code) + if isinstance(inner, dict) and ("records" in inner or "documentType" in inner): + documentType = (inner.get("documentType") or "UNKNOWN").upper().replace(" ", "_") + records = inner.get("records") or inner.get("extractedData") or [] + break + except Exception: + pass + if records: + break + if records: + break + elif "documentType" in data: + documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_") except Exception: pass @@ -260,6 +329,10 @@ async def extractFromFiles(self, parameters: Dict[str, Any]) -> ActionResult: if i < len(createdMessage.documents): fileIdToChatDocId[f["fileId"]] = createdMessage.documents[i].id + # Load expense accounts from accounting system for AI prompt (if configured) + if not prompt: + prompt = await _buildDefaultPromptWithAccounts(self, featureInstanceId) + # Parallel extraction (all files at once) tasks = [ _extractOne(self, f, fileIdToChatDocId, prompt, featureInstanceId)