fixes
This commit is contained in:
parent
2eeaf35990
commit
6b11d66766
8 changed files with 141 additions and 30 deletions
|
|
@ -376,20 +376,37 @@ AUTOMATION_TEMPLATES: Dict[str, Any] = {
|
|||
"tasks": [
|
||||
{
|
||||
"id": "Task01",
|
||||
"title": "Extract Expenses from SharePoint PDFs",
|
||||
"description": "Reads PDF expense documents from SharePoint folder, extracts data via AI, and saves to TrusteePosition",
|
||||
"objective": "Extract expense data from PDF documents and store in Trustee database with automatic file organization",
|
||||
"title": "Run trustee pipeline on SharePoint files",
|
||||
"description": "Extract expenses from SharePoint PDFs, create positions + documents, sync to accounting",
|
||||
"objective": "End-to-end: SharePoint folder → AI extraction → Trustee DB → Accounting sync",
|
||||
"actionList": [
|
||||
{
|
||||
"execMethod": "sharepoint",
|
||||
"execAction": "getExpensesFromPdf",
|
||||
"execMethod": "trustee",
|
||||
"execAction": "extractFromFiles",
|
||||
"execParameters": {
|
||||
"connectionReference": "{{KEY:connectionName}}",
|
||||
"sharepointFolder": "{{KEY:sharepointFolder}}",
|
||||
"featureInstanceId": "{{KEY:featureInstanceId}}",
|
||||
"prompt": "{{KEY:extractionPrompt}}"
|
||||
"featureInstanceId": "{{KEY:featureInstanceId}}"
|
||||
},
|
||||
"execResultLabel": "expense_extraction_result"
|
||||
"execResultLabel": "extract_result"
|
||||
},
|
||||
{
|
||||
"execMethod": "trustee",
|
||||
"execAction": "processDocuments",
|
||||
"execParameters": {
|
||||
"documentList": "docList:{{PREV_MESSAGE_ID}}:extract_result",
|
||||
"featureInstanceId": "{{KEY:featureInstanceId}}"
|
||||
},
|
||||
"execResultLabel": "process_result"
|
||||
},
|
||||
{
|
||||
"execMethod": "trustee",
|
||||
"execAction": "syncToAccounting",
|
||||
"execParameters": {
|
||||
"documentList": "docList:{{PREV_MESSAGE_ID}}:process_result",
|
||||
"featureInstanceId": "{{KEY:featureInstanceId}}"
|
||||
},
|
||||
"execResultLabel": "sync_result"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -398,8 +415,7 @@ AUTOMATION_TEMPLATES: Dict[str, Any] = {
|
|||
"parameters": {
|
||||
"connectionName": "",
|
||||
"sharepointFolder": "",
|
||||
"featureInstanceId": "",
|
||||
"extractionPrompt": "Du bist ein Spezialist für die Extraktion von Belegdaten aus PDF-Dokumenten.\n\nAUFGABE:\nExtrahiere die Daten aus dem bereitgestellten Zahlungsbeleg und erstelle EINE EINZIGE CSV-Tabelle mit allen Datensätzen.\n\nOUTPUT-STRUKTUR:\nErstelle genau EINE Tabelle mit den folgenden Spalten. Alle extrahierten Datensätze kommen in diese eine Tabelle als Zeilen.\n\nWICHTIGE REGELN:\n1. Pro MwSt-Prozentsatz einen separaten Datensatz (= Zeile) erstellen\n2. Alle Datensätze zusammen müssen den Gesamtbetrag des Dokuments ergeben\n3. Der gesamte extrahierte Text des Dokuments muss im Feld \"desc\" erfasst werden\n4. Feld \"company\" enthält den Lieferanten/Verkäufer der Buchung\n5. Tags müssen aus dieser Liste gewählt werden: customer, meeting, license, subscription, fuel, food, material\n - Mehrere zutreffende Tags mit Komma trennen\n\nCSV-SPALTEN (in dieser Reihenfolge):\nvaluta,transactionDateTime,company,desc,tags,bookingCurrency,bookingAmount,originalCurrency,originalAmount,vatPercentage,vatAmount\n\nDATENFORMAT:\n- valuta: YYYY-MM-DD (Valutadatum)\n- transactionDateTime: Unix-Timestamp in Sekunden (Transaktionszeitpunkt)\n- company: Lieferant/Verkäufer Name\n- desc: Vollständiger extrahierter Text des Dokuments\n- tags: Komma-getrennte Tags aus der erlaubten Liste\n- bookingCurrency: Währungscode (CHF, EUR, USD, GBP)\n- bookingAmount: Buchungsbetrag als Dezimalzahl\n- originalCurrency: Original-Währungscode\n- originalAmount: Original-Betrag als Dezimalzahl\n- vatPercentage: MwSt-Prozentsatz (z.B. 8.1 für 8.1%)\n- vatAmount: MwSt-Betrag als Dezimalzahl\n\nHINWEISE:\n- Wenn nur ein MwSt-Satz vorhanden ist, einen Datensatz erstellen\n- Wenn mehrere MwSt-Sätze vorhanden sind (z.B. Lebensmittel 2.6% und Non-Food 8.1%), separate Datensätze erstellen\n- Bei fehlenden Informationen: leeres Feld oder Standardwert"
|
||||
"featureInstanceId": ""
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -133,8 +133,11 @@ class AccountingBridge:
|
|||
return SyncResult(success=False, errorMessage=f"Position {positionId} not found")
|
||||
position = posRecords[0]
|
||||
|
||||
# Build booking once (for push; externalDocumentIds filled after document upload)
|
||||
# Build booking; skip if position has no accounts (not ready for sync)
|
||||
booking = self._buildBookingFromPosition(position)
|
||||
if not booking.lines:
|
||||
logger.info("Accounting sync skipped (no accounts): positionId=%s", positionId)
|
||||
return SyncResult(success=True, errorMessage="Position hat keine Kontierung (Soll-/Haben-Konto) – Sync übersprungen")
|
||||
|
||||
# 1) First: ensure all documents are in RMA (upload or duplicate); collect Beleg-IDs for linking
|
||||
documentIds = []
|
||||
|
|
@ -273,12 +276,19 @@ class AccountingBridge:
|
|||
results.append(result)
|
||||
return results
|
||||
|
||||
async def getChartOfAccounts(self, featureInstanceId: str) -> List[AccountingChart]:
|
||||
"""Load the chart of accounts from the configured external system."""
|
||||
async def getChartOfAccounts(self, featureInstanceId: str, accountType: Optional[str] = None) -> List[AccountingChart]:
|
||||
"""Load the chart of accounts from the configured external system. Optional filter by accountType."""
|
||||
connector, plainConfig, _ = await self._resolveConnectorAndConfig(featureInstanceId)
|
||||
if not connector or not plainConfig:
|
||||
return []
|
||||
return await connector.getChartOfAccounts(plainConfig)
|
||||
charts = await connector.getChartOfAccounts(plainConfig, accountType=accountType)
|
||||
if accountType:
|
||||
charts = [c for c in charts if c.accountType == accountType]
|
||||
return charts
|
||||
|
||||
async def _getExpenseAccounts(self, featureInstanceId: str) -> List[AccountingChart]:
|
||||
"""Load only expense accounts (Aufwandkonten) for use in AI prompts."""
|
||||
return await self.getChartOfAccounts(featureInstanceId, accountType="expense")
|
||||
|
||||
async def testConnection(self, featureInstanceId: str) -> SyncResult:
|
||||
"""Test the connection with the configured accounting system."""
|
||||
|
|
|
|||
|
|
@ -82,8 +82,8 @@ class BaseAccountingConnector(ABC):
|
|||
"""Verify the connection with the given credentials."""
|
||||
|
||||
@abstractmethod
|
||||
async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
|
||||
"""Load the chart of accounts from the external system."""
|
||||
async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
|
||||
"""Load the chart of accounts from the external system. accountType filters by category (e.g. 'expense', 'asset')."""
|
||||
|
||||
@abstractmethod
|
||||
async def pushBooking(self, config: Dict[str, Any], booking: AccountingBooking) -> SyncResult:
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ class AccountingConnectorAbacus(BaseAccountingConnector):
|
|||
except Exception as e:
|
||||
return SyncResult(success=False, errorMessage=str(e))
|
||||
|
||||
async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
|
||||
async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
|
||||
headers = await self._buildAuthHeaders(config)
|
||||
if not headers:
|
||||
return []
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ class AccountingConnectorBexio(BaseAccountingConnector):
|
|||
return acc.get("id")
|
||||
return None
|
||||
|
||||
async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
|
||||
async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
|
||||
accounts = await self._loadRawAccounts(config)
|
||||
return [
|
||||
AccountingChart(
|
||||
|
|
|
|||
|
|
@ -87,11 +87,14 @@ class AccountingConnectorRma(BaseAccountingConnector):
|
|||
except Exception as e:
|
||||
return SyncResult(success=False, errorMessage=str(e))
|
||||
|
||||
async def getChartOfAccounts(self, config: Dict[str, Any]) -> List[AccountingChart]:
|
||||
async def getChartOfAccounts(self, config: Dict[str, Any], accountType: Optional[str] = None) -> List[AccountingChart]:
|
||||
try:
|
||||
params = {}
|
||||
if accountType:
|
||||
params["type"] = accountType
|
||||
async with aiohttp.ClientSession() as session:
|
||||
url = self._buildUrl(config, "charts")
|
||||
async with session.get(url, headers=self._buildHeaders(config), timeout=aiohttp.ClientTimeout(total=30)) as resp:
|
||||
async with session.get(url, headers=self._buildHeaders(config), params=params, timeout=aiohttp.ClientTimeout(total=30)) as resp:
|
||||
if resp.status != 200:
|
||||
logger.error(f"RMA charts failed: HTTP {resp.status}")
|
||||
return []
|
||||
|
|
@ -101,9 +104,17 @@ class AccountingConnectorRma(BaseAccountingConnector):
|
|||
items = data if isinstance(data, list) else data.get("chart", data.get("row", []))
|
||||
for item in items:
|
||||
if isinstance(item, dict):
|
||||
accNo = item.get("accno", item.get("account_number", ""))
|
||||
label = item.get("description", item.get("label", ""))
|
||||
charts.append(AccountingChart(accountNumber=str(accNo), label=str(label)))
|
||||
accNo = str(item.get("accno", item.get("account_number", "")))
|
||||
label = str(item.get("description", item.get("label", "")))
|
||||
chartType = item.get("charttype") or item.get("category") or item.get("link") or ""
|
||||
if not chartType and accNo:
|
||||
firstDigit = accNo[0] if accNo else ""
|
||||
chartType = {
|
||||
"1": "asset", "2": "liability", "3": "revenue",
|
||||
"4": "expense", "5": "expense", "6": "expense",
|
||||
"7": "expense", "8": "expense", "9": "closing",
|
||||
}.get(firstDigit, "")
|
||||
charts.append(AccountingChart(accountNumber=accNo, label=label, accountType=chartType))
|
||||
return charts
|
||||
except Exception as e:
|
||||
logger.error(f"RMA getChartOfAccounts error: {e}")
|
||||
|
|
@ -149,7 +160,7 @@ class AccountingConnectorRma(BaseAccountingConnector):
|
|||
docParts.append(label)
|
||||
erfDate = datetime.utcnow().strftime("%d.%m.%Y")
|
||||
linkSuffix = " (" + ", ".join(docParts) + ", erf. " + erfDate + ")"
|
||||
shortDesc = (rawDesc[:80] + "…") if len(rawDesc) > 80 else rawDesc
|
||||
shortDesc = (rawDesc[:80] + "...") if len(rawDesc) > 80 else rawDesc
|
||||
description = (shortDesc + linkSuffix).strip()[:500]
|
||||
else:
|
||||
description = rawDesc[:500]
|
||||
|
|
|
|||
|
|
@ -1348,14 +1348,15 @@ def delete_accounting_config(
|
|||
async def get_chart_of_accounts(
|
||||
request: Request,
|
||||
instanceId: str = Path(..., description="Feature Instance ID"),
|
||||
accountType: Optional[str] = Query(None, description="Filter by type: expense, asset, liability, revenue"),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Load the chart of accounts from the connected accounting system."""
|
||||
"""Load the chart of accounts from the connected accounting system. Optional filter by accountType."""
|
||||
mandateId = _validateInstanceAccess(instanceId, context)
|
||||
interface = getInterface(context.user, mandateId=mandateId, featureInstanceId=instanceId)
|
||||
from .accounting.accountingBridge import AccountingBridge
|
||||
bridge = AccountingBridge(interface)
|
||||
charts = await bridge.getChartOfAccounts(instanceId)
|
||||
charts = await bridge.getChartOfAccounts(instanceId, accountType=accountType)
|
||||
return [c.model_dump() for c in charts]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,52 @@ logger = logging.getLogger(__name__)
|
|||
ALLOWED_EXTENSIONS = (".pdf", ".jpg", ".jpeg")
|
||||
MAX_FILES = 50
|
||||
|
||||
_DEFAULT_PROMPT_FALLBACK = (
|
||||
'Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) '
|
||||
'and expense/position records. Return JSON: {"documentType": "...", "records": [{...}]}. '
|
||||
'Each record must have: valuta (YYYY-MM-DD), transactionDateTime (unix seconds), company, desc (full extracted text), '
|
||||
'tags (from: customer, meeting, license, subscription, fuel, food, material), '
|
||||
'bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, '
|
||||
'debitAccountNumber (Soll-Konto nach Schweizer KMU-Kontenrahmen, z.B. 6200 Fahrzeugaufwand, 6000 Materialaufwand), '
|
||||
'creditAccountNumber (Haben-Konto, z.B. 1020 Bank), taxCode, costCenter, bookingReference.'
|
||||
)
|
||||
|
||||
|
||||
async def _buildDefaultPromptWithAccounts(self, featureInstanceId: str) -> str:
|
||||
"""Build extraction prompt with real expense accounts from the connected accounting system."""
|
||||
try:
|
||||
from modules.features.trustee.interfaceFeatureTrustee import getInterface as getTrusteeInterface
|
||||
from modules.features.trustee.accounting.accountingBridge import AccountingBridge
|
||||
trusteeInterface = getTrusteeInterface(
|
||||
self.services.user,
|
||||
mandateId=self.services.mandateId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
)
|
||||
bridge = AccountingBridge(trusteeInterface)
|
||||
expenseAccounts = await bridge._getExpenseAccounts(featureInstanceId)
|
||||
assetAccounts = await bridge.getChartOfAccounts(featureInstanceId, accountType="asset")
|
||||
except Exception as e:
|
||||
logger.debug("Could not load chart of accounts for prompt: %s", e)
|
||||
return ""
|
||||
|
||||
if not expenseAccounts:
|
||||
return ""
|
||||
|
||||
expenseList = ", ".join(f"{a.accountNumber} {a.label}" for a in expenseAccounts[:50])
|
||||
bankAccounts = [a for a in assetAccounts if a.accountNumber.startswith("10")]
|
||||
bankList = ", ".join(f"{a.accountNumber} {a.label}" for a in bankAccounts[:10]) if bankAccounts else "1020 Bank"
|
||||
|
||||
return (
|
||||
'Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) '
|
||||
'and expense/position records. Return JSON: {"documentType": "...", "records": [{...}]}. '
|
||||
'Each record must have: valuta (YYYY-MM-DD), transactionDateTime (unix seconds), company, desc (full extracted text), '
|
||||
'tags (from: customer, meeting, license, subscription, fuel, food, material), '
|
||||
'bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, '
|
||||
f'debitAccountNumber (Soll-Konto, verwende eines der folgenden Aufwandkonten: {expenseList}), '
|
||||
f'creditAccountNumber (Haben-Konto, verwende eines der folgenden Konten: {bankList}), '
|
||||
'taxCode, costCenter, bookingReference.'
|
||||
)
|
||||
|
||||
|
||||
def _parseCsvToRecords(csvContent: str) -> List[Dict[str, Any]]:
|
||||
"""Parse CSV content to list of expense records."""
|
||||
|
|
@ -58,7 +104,7 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str,
|
|||
options = AiCallOptions(resultFormat="json", operationType=OperationTypeEnum.DATA_GENERATE)
|
||||
try:
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=prompt or "Extract document type (one of: INVOICE, EXPENSE_RECEIPT, BANK_DOCUMENT, CONTRACT, UNKNOWN) and expense/position records. Return JSON: {\"documentType\": \"...\", \"records\": [{...}]}.",
|
||||
prompt=prompt or _DEFAULT_PROMPT_FALLBACK,
|
||||
options=options,
|
||||
documentList=docList,
|
||||
contentParts=None,
|
||||
|
|
@ -68,7 +114,7 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str,
|
|||
except Exception:
|
||||
options = AiCallOptions(resultFormat="csv", operationType=OperationTypeEnum.DATA_GENERATE)
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=prompt or "Extract expense data from this document. Return CSV with columns: company, desc, valuta, bookingAmount, bookingCurrency, vatPercentage, vatAmount, tags.",
|
||||
prompt=prompt or _DEFAULT_PROMPT_FALLBACK,
|
||||
options=options,
|
||||
documentList=docList,
|
||||
contentParts=None,
|
||||
|
|
@ -91,8 +137,31 @@ async def _extractWithAi(self, chatDocumentId: str, fileId: str, fileName: str,
|
|||
try:
|
||||
if raw.strip().startswith("{"):
|
||||
data = json.loads(raw)
|
||||
documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
|
||||
records = data.get("records") or data.get("extractedData") or []
|
||||
# Direct format: {"documentType": "...", "records": [...]}
|
||||
if "records" in data or "extractedData" in data:
|
||||
documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
|
||||
records = data.get("records") or data.get("extractedData") or []
|
||||
# Wrapped in document structure: {"documents": [{"sections": [{"elements": [{"content": {"code": "..."}}]}]}]}
|
||||
elif "documents" in data:
|
||||
for doc in data.get("documents", []):
|
||||
for section in doc.get("sections", []):
|
||||
for elem in section.get("elements", []):
|
||||
code = (elem.get("content") or {}).get("code")
|
||||
if code and isinstance(code, str):
|
||||
try:
|
||||
inner = json.loads(code)
|
||||
if isinstance(inner, dict) and ("records" in inner or "documentType" in inner):
|
||||
documentType = (inner.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
|
||||
records = inner.get("records") or inner.get("extractedData") or []
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if records:
|
||||
break
|
||||
if records:
|
||||
break
|
||||
elif "documentType" in data:
|
||||
documentType = (data.get("documentType") or "UNKNOWN").upper().replace(" ", "_")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -260,6 +329,10 @@ async def extractFromFiles(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
if i < len(createdMessage.documents):
|
||||
fileIdToChatDocId[f["fileId"]] = createdMessage.documents[i].id
|
||||
|
||||
# Load expense accounts from accounting system for AI prompt (if configured)
|
||||
if not prompt:
|
||||
prompt = await _buildDefaultPromptWithAccounts(self, featureInstanceId)
|
||||
|
||||
# Parallel extraction (all files at once)
|
||||
tasks = [
|
||||
_extractOne(self, f, fileIdToChatDocId, prompt, featureInstanceId)
|
||||
|
|
|
|||
Loading…
Reference in a new issue