This commit is contained in:
patrick-motsch 2026-02-22 01:03:19 +01:00
parent 2eeaf35990
commit 6b11d66766
8 changed files with 141 additions and 30 deletions

View file

@ -376,20 +376,37 @@ AUTOMATION_TEMPLATES: Dict[str, Any] = {
"tasks": [
{
"id": "Task01",
"title": "Extract Expenses from SharePoint PDFs",
"description": "Reads PDF expense documents from SharePoint folder, extracts data via AI, and saves to TrusteePosition",
"objective": "Extract expense data from PDF documents and store in Trustee database with automatic file organization",
"title": "Run trustee pipeline on SharePoint files",
"description": "Extract expenses from SharePoint PDFs, create positions + documents, sync to accounting",
"objective": "End-to-end: SharePoint folder → AI extraction → Trustee DB → Accounting sync",
"actionList": [
{
"execMethod": "sharepoint",
"execAction": "getExpensesFromPdf",
"execMethod": "trustee",
"execAction": "extractFromFiles",
"execParameters": {
"connectionReference": "{{KEY:connectionName}}",
"sharepointFolder": "{{KEY:sharepointFolder}}",
"featureInstanceId": "{{KEY:featureInstanceId}}",
"prompt": "{{KEY:extractionPrompt}}"
"featureInstanceId": "{{KEY:featureInstanceId}}"
},
"execResultLabel": "expense_extraction_result"
"execResultLabel": "extract_result"
},
{
"execMethod": "trustee",
"execAction": "processDocuments",
"execParameters": {
"documentList": "docList:{{PREV_MESSAGE_ID}}:extract_result",
"featureInstanceId": "{{KEY:featureInstanceId}}"
},
"execResultLabel": "process_result"
},
{
"execMethod": "trustee",
"execAction": "syncToAccounting",
"execParameters": {
"documentList": "docList:{{PREV_MESSAGE_ID}}:process_result",
"featureInstanceId": "{{KEY:featureInstanceId}}"
},
"execResultLabel": "sync_result"
}
]
}
@ -398,8 +415,7 @@ AUTOMATION_TEMPLATES: Dict[str, Any] = {
"parameters": {
"connectionName": "",
"sharepointFolder": "",
"featureInstanceId": "",
"extractionPrompt": "Du bist ein Spezialist für die Extraktion von Belegdaten aus PDF-Dokumenten.\n\nAUFGABE:\nExtrahiere die Daten aus dem bereitgestellten Zahlungsbeleg und erstelle EINE EINZIGE CSV-Tabelle mit allen Datensätzen.\n\nOUTPUT-STRUKTUR:\nErstelle genau EINE Tabelle mit den folgenden Spalten. Alle extrahierten Datensätze kommen in diese eine Tabelle als Zeilen.\n\nWICHTIGE REGELN:\n1. Pro MwSt-Prozentsatz einen separaten Datensatz (= Zeile) erstellen\n2. Alle Datensätze zusammen müssen den Gesamtbetrag des Dokuments ergeben\n3. Der gesamte extrahierte Text des Dokuments muss im Feld \"desc\" erfasst werden\n4. Feld \"company\" enthält den Lieferanten/Verkäufer der Buchung\n5. Tags müssen aus dieser Liste gewählt werden: customer, meeting, license, subscription, fuel, food, material\n - Mehrere zutreffende Tags mit Komma trennen\n\nCSV-SPALTEN (in dieser Reihenfolge):\nvaluta,transactionDateTime,company,desc,tags,bookingCurrency,bookingAmount,originalCurrency,originalAmount,vatPercentage,vatAmount\n\nDATENFORMAT:\n- valuta: YYYY-MM-DD (Valutadatum)\n- transactionDateTime: Unix-Timestamp in Sekunden (Transaktionszeitpunkt)\n- company: Lieferant/Verkäufer Name\n- desc: Vollständiger extrahierter Text des Dokuments\n- tags: Komma-getrennte Tags aus der erlaubten Liste\n- bookingCurrency: Währungscode (CHF, EUR, USD, GBP)\n- bookingAmount: Buchungsbetrag als Dezimalzahl\n- originalCurrency: Original-Währungscode\n- originalAmount: Original-Betrag als Dezimalzahl\n- vatPercentage: MwSt-Prozentsatz (z.B. 8.1 für 8.1%)\n- vatAmount: MwSt-Betrag als Dezimalzahl\n\nHINWEISE:\n- Wenn nur ein MwSt-Satz vorhanden ist, einen Datensatz erstellen\n- Wenn mehrere MwSt-Sätze vorhanden sind (z.B. Lebensmittel 2.6% und Non-Food 8.1%), separate Datensätze erstellen\n- Bei fehlenden Informationen: leeres Feld oder Standardwert"
"featureInstanceId": ""
}
}
]

View file

@ -133,8 +133,11 @@ class AccountingBridge:
return SyncResult(success=False, errorMessage=f"Position {positionId} not found")
position = posRecords[0]
# Build booking once (for push; externalDocumentIds filled after document upload)
# Build booking; skip if position has no accounts (not ready for sync)
booking = self._buildBookingFromPosition(position)
if not booking.lines:
logger.info("Accounting sync skipped (no accounts): positionId=%s", positionId)
return SyncResult(success=True, errorMessage="Position hat keine Kontierung (Soll-/Haben-Konto) Sync übersprungen")
# 1) First: ensure all documents are in RMA (upload or duplicate); collect Beleg-IDs for linking
documentIds = []
@ -273,12 +276,19 @@ class AccountingBridge:
results.append(result)
return results
async def getChartOfAccounts(self, featureInstanceId: str) -> List[AccountingChart]:
"""Load the chart of accounts from the configured external system."""
async def getChartOfAccounts(self, featureInstanceId: str, accountType: Optional[str] = None) -> List[AccountingChart]:
"""Load the chart of accounts from the configured external system. Optional filter by accountType."""
connector, plainConfig, _ = await self._resolveConnectorAndConfig(featureInstanceId)
if not connector or not plainConfig:
return []
return await connector.getChartOfAccounts(plainConfig)
charts = await connector.getChartOfAccounts(plainConfig, accountType=accountType)
if accountType:
charts = [c for c in charts if c.accountType == accountType]
return charts
async def _getExpenseAccounts(self, featureInstanceId: str) -> List[AccountingChart]:
"""Load only expense accounts (Aufwandkonten) for use in AI prompts."""
return await self.getChartOfAccounts(featureInstanceId, accountType="expense")
async def testConnection(self, featureInstanceId: str) -> SyncResult:
"""Test the connection with the configured accounting system."""

View file

@ -82,8 +82,8 @@ class BaseAccountingConnector(ABC):
"""Verify the connection with the given credentials."""
@abstractmethod
async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
"""Load the chart of accounts from the external system."""
async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
"""Load the chart of accounts from the external system. accountType filters by category (e.g. 'expense', 'asset')."""
@abstractmethod
async def pushBooking(self, config: Dict[str, Any], booking: AccountingBooking) -> SyncResult:

View file

@ -144,7 +144,7 @@ class AccountingConnectorAbacus(BaseAccountingConnector):
except Exception as e:
return SyncResult(success=False, errorMessage=str(e))
async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
headers = await self._buildAuthHeaders(config)
if not headers:
return []

View file

@ -92,7 +92,7 @@ class AccountingConnectorBexio(BaseAccountingConnector):
return acc.get("id")
return None
async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
accounts = await self._loadRawAccounts(config)
return [
AccountingChart(

View file

@ -87,11 +87,14 @@ class AccountingConnectorRma(BaseAccountingConnector):
except Exception as e:
return SyncResult(success=False, errorMessage=str(e))
async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
try:
params = {}
if accountType:
params["type"] = accountType
async with aiohttp.ClientSession() as session:
url = self._buildUrl(config, "charts")
async with session.get(url, headers=self._buildHeaders(config), timeout=aiohttp.ClientTimeout(total=30)) as resp:
async with session.get(url, headers=self._buildHeaders(config), params=params, timeout=aiohttp.ClientTimeout(total=30)) as resp:
if resp.status != 200:
logger.error(f"RMA charts failed: HTTP {resp.status}")
return []
@ -101,9 +104,17 @@ class AccountingConnectorRma(BaseAccountingConnector):
items = data if isinstance(data, list) else data.get("chart", data.get("row", []))
for item in items:
if isinstance(item, dict):
accNo = item.get("accno", item.get("account_number", ""))
label = item.get("description", item.get("label", ""))
charts.append(AccountingChart(accountNumber=str(accNo), label=str(label)))
accNo = str(item.get("accno", item.get("account_number", "")))
label = str(item.get("description", item.get("label", "")))
chartType = item.get("charttype") or item.get("category") or item.get("link") or ""
if not chartType and accNo:
firstDigit = accNo[0] if accNo else ""
chartType = {
"1": "asset", "2": "liability", "3": "revenue",
"4": "expense", "5": "expense", "6": "expense",
"7": "expense", "8": "expense", "9": "closing",
}.get(firstDigit, "")
charts.append(AccountingChart(accountNumber=accNo, label=label, accountType=chartType))
return charts
except Exception as e:
logger.error(f"RMA getChartOfAccounts error: {e}")
@ -149,7 +160,7 @@ class AccountingConnectorRma(BaseAccountingConnector):
docParts.append(label)
erfDate = datetime.utcnow().strftime("%d.%m.%Y")
linkSuffix = " (" + ", ".join(docParts) + ", erf. " + erfDate + ")"
shortDesc = (rawDesc[:80] + "") if len(rawDesc) > 80 else rawDesc
shortDesc = (rawDesc[:80] + "...") if len(rawDesc) > 80 else rawDesc
description = (shortDesc + linkSuffix).strip()[:500]
else:
description = rawDesc[:500]

View file

@ -1348,14 +1348,15 @@ def delete_accounting_config(
async def get_chart_of_accounts(
request: Request,
instanceId: str = Path(..., description="Feature Instance ID"),
accountType: Optional[str] = Query(None, description="Filter by type: expense, asset, liability, revenue"),
context: RequestContext = Depends(getRequestContext)
) -> List[Dict[str, Any]]:
"""Load the chart of accounts from the connected accounting system."""
"""Load the chart of accounts from the connected accounting system. Optional filter by accountType."""
mandateId = _validateInstanceAccess(instanceId, context)
interface = getInterface(context.user, mandateId=mandateId, featureInstanceId=instanceId)
from .accounting.accountingBridge import AccountingBridge
bridge = AccountingBridge(interface)
charts = await bridge.getChartOfAccounts(instanceId)
charts = await bridge.getChartOfAccounts(instanceId, accountType=accountType)
return [c.model_dump() for c in charts]

View file

@ -24,6 +24,52 @@ logger = logging.getLogger(__name__)
ALLOWED_EXTENSIONS = (".pdf", ".jpg", ".jpeg")
MAX_FILES = 50
_DEFAULT_PROMPT_FALLBACK = (
'Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) '
'and expense/position records. Return JSON: {"documentType": "...", "records": [{...}]}. '
'Each record must have: valuta (YYYY-MM-DD), transactionDateTime (unix seconds), company, desc (full extracted text), '
'tags (from: customer, meeting, license, subscription, fuel, food, material), '
'bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, '
'debitAccountNumber (Soll-Konto nach Schweizer KMU-Kontenrahmen, z.B. 6200 Fahrzeugaufwand, 6000 Materialaufwand), '
'creditAccountNumber (Haben-Konto, z.B. 1020 Bank), taxCode, costCenter, bookingReference.'
)
async def _buildDefaultPromptWithAccounts(self, featureInstanceId: str) -> str:
"""Build extraction prompt with real expense accounts from the connected accounting system."""
try:
from modules.features.trustee.interfaceFeatureTrustee import getInterface as getTrusteeInterface
from modules.features.trustee.accounting.accountingBridge import AccountingBridge
trusteeInterface = getTrusteeInterface(
self.services.user,
mandateId=self.services.mandateId,
featureInstanceId=featureInstanceId,
)
bridge = AccountingBridge(trusteeInterface)
expenseAccounts = await bridge._getExpenseAccounts(featureInstanceId)
assetAccounts = await bridge.getChartOfAccounts(featureInstanceId, accountType="asset")
except Exception as e:
logger.debug("Could not load chart of accounts for prompt: %s", e)
return ""
if not expenseAccounts:
return ""
expenseList = ", ".join(f"{a.accountNumber} {a.label}" for a in expenseAccounts[:50])
bankAccounts = [a for a in assetAccounts if a.accountNumber.startswith("10")]
bankList = ", ".join(f"{a.accountNumber} {a.label}" for a in bankAccounts[:10]) if bankAccounts else "1020 Bank"
return (
'Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) '
'and expense/position records. Return JSON: {"documentType": "...", "records": [{...}]}. '
'Each record must have: valuta (YYYY-MM-DD), transactionDateTime (unix seconds), company, desc (full extracted text), '
'tags (from: customer, meeting, license, subscription, fuel, food, material), '
'bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, '
f'debitAccountNumber (Soll-Konto, verwende eines der folgenden Aufwandkonten: {expenseList}), '
f'creditAccountNumber (Haben-Konto, verwende eines der folgenden Konten: {bankList}), '
'taxCode, costCenter, bookingReference.'
)
def _parseCsvToRecords(csvContent: str) -> List[Dict[str, Any]]:
"""Parse CSV content to list of expense records."""
@ -58,7 +104,7 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str,
options = AiCallOptions(resultFormat="json", operationType=OperationTypeEnum.DATA_GENERATE)
try:
aiResponse = await self.services.ai.callAiContent(
prompt=prompt or "Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) and expense/position records. Return JSON: {\"documentType\": \"...\", \"records\": [{...}]}.",
prompt=prompt or _DEFAULT_PROMPT_FALLBACK,
options=options,
documentList=docList,
contentParts=None,
@ -68,7 +114,7 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str,
except Exception:
options = AiCallOptions(resultFormat="csv", operationType=OperationTypeEnum.DATA_GENERATE)
aiResponse = await self.services.ai.callAiContent(
prompt=prompt or "Extract expense data from this document. Return CSV with columns: company, desc, valuta, bookingAmount, bookingCurrency, vatPercentage, vatAmount, tags.",
prompt=prompt or _DEFAULT_PROMPT_FALLBACK,
options=options,
documentList=docList,
contentParts=None,
@ -91,8 +137,31 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str,
try:
if raw.strip().startswith("{"):
data = json.loads(raw)
documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
records = data.get("records") or data.get("extractedData") or []
# Direct format: {"documentType": "...", "records": [...]}
if "records" in data or "extractedData" in data:
documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
records = data.get("records") or data.get("extractedData") or []
# Wrapped in document structure: {"documents": [{"sections": [{"elements": [{"content": {"code": "..."}}]}]}]}
elif "documents" in data:
for doc in data.get("documents", []):
for section in doc.get("sections", []):
for elem in section.get("elements", []):
code = (elem.get("content") or {}).get("code")
if code and isinstance(code, str):
try:
inner = json.loads(code)
if isinstance(inner, dict) and ("records" in inner or "documentType" in inner):
documentType = (inner.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
records = inner.get("records") or inner.get("extractedData") or []
break
except Exception:
pass
if records:
break
if records:
break
elif "documentType" in data:
documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
except Exception:
pass
@ -260,6 +329,10 @@ async def extractFromFiles(self, parameters: Dict[str, Any]) -> ActionResult:
if i < len(createdMessage.documents):
fileIdToChatDocId[f["fileId"]] = createdMessage.documents[i].id
# Load expense accounts from accounting system for AI prompt (if configured)
if not prompt:
prompt = await _buildDefaultPromptWithAccounts(self, featureInstanceId)
# Parallel extraction (all files at once)
tasks = [
_extractOne(self, f, fileIdToChatDocId, prompt, featureInstanceId)