From 87f237a7a2be3c14e033f3711325a142d95e7ac9 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sun, 25 Jan 2026 23:57:59 +0100 Subject: [PATCH] expenses workflow concept --- .../expenses_workflow_definition.md | 1015 +++++++++++++++++ 1 file changed, 1015 insertions(+) create mode 100644 implementation/Use Case Expenses Workflow/expenses_workflow_definition.md diff --git a/implementation/Use Case Expenses Workflow/expenses_workflow_definition.md b/implementation/Use Case Expenses Workflow/expenses_workflow_definition.md new file mode 100644 index 0000000..64c3936 --- /dev/null +++ b/implementation/Use Case Expenses Workflow/expenses_workflow_definition.md @@ -0,0 +1,1015 @@ +# Expenses Workflow Definition + +## Übersicht + +Dieses Dokument beschreibt die Implementierung eines automatisierten Workflows zum Auslesen von Spesen aus PDF-Dokumenten in SharePoint und deren Speicherung als `TrusteePosition`-Einträge in der Datenbank. + +--- + +## 1. Neue Action: `getExpensesFromPdf` + +### 1.1 Datei-Struktur + +``` +gateway/modules/workflows/methods/methodSharepoint/ +├── actions/ +│ └── getExpensesFromPdf.py # NEUE DATEI +├── methodSharepoint.py # Action-Registration hinzufügen +``` + +### 1.2 Action-Definition in `methodSharepoint.py` + +```python +from .actions.getExpensesFromPdf import getExpensesFromPdf + +# In __init__ der MethodSharepoint-Klasse, innerhalb _actions Dict: +"getExpensesFromPdf": WorkflowActionDefinition( + actionId="sharepoint.getExpensesFromPdf", + description="Extract expenses from PDF documents in SharePoint folder and save to TrusteePosition", + dynamicMode=False, # WICHTIG: Nicht für dynamic workflow nutzbar + parameters={ + "connectionReference": WorkflowActionParameter( + name="connectionReference", + type="str", + frontendType=FrontendType.USER_CONNECTION, + required=True, + description="Microsoft connection label for SharePoint access" + ), + "sharepointFolder": WorkflowActionParameter( + name="sharepointFolder", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="SharePoint folder path containing PDF expense documents (e.g., /sites/MySite/Documents/Expenses)" + ), + "featureInstanceId": WorkflowActionParameter( + name="featureInstanceId", + type="str", + frontendType=FrontendType.TEXT, + required=True, + description="Feature Instance ID for the Trustee feature where positions will be stored" + ), + "prompt": WorkflowActionParameter( + name="prompt", + type="str", + frontendType=FrontendType.TEXTAREA, + required=True, + description="AI prompt for extracting expense data from PDF content" + ) + }, + execute=getExpensesFromPdf.__get__(self, self.__class__) +) +``` + +### 1.3 Action-Logik (`getExpensesFromPdf.py`) + +```python +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +import logging +import time +import json +import csv +import io +import base64 +from datetime import datetime, UTC +from typing import Dict, Any, List, Optional +from modules.datamodels.datamodelChat import ActionResult, ActionDocument + +logger = logging.getLogger(__name__) + +# Erlaubte Tags für TrusteePosition +ALLOWED_TAGS = ["customer", "meeting", "license", "subscription", "fuel", "food", "material"] + +async def getExpensesFromPdf(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Extract expenses from PDF documents in SharePoint and save to TrusteePosition. + + Process: + 1. Read PDF files from SharePoint folder (max 50 files per execution) + 2. FOR EACH PDF document: + a. AI call to extract expense data in CSV format + b. If 0 records: skip document with warning, move to "error" folder + c. Validate/calculate VAT, complete valuta/transactionDateTime + d. Save all records to TrusteePosition + e. Move document to "processed" subfolder with timestamp prefix + + Parameters: + - connectionReference (str): Microsoft connection label + - sharepointFolder (str): SharePoint folder path + - featureInstanceId (str): Feature instance ID for TrusteePosition + - prompt (str): AI prompt for content extraction + + Returns: + ActionResult with success status and processing summary + """ + operationId = None + processedDocuments = [] + skippedDocuments = [] + errorDocuments = [] + totalPositions = 0 + + try: + # Initialize progress tracking + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"sharepoint_expenses_{workflowId}_{int(time.time())}" + + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Extract Expenses from PDF", + "SharePoint PDF Processing", + "Initializing expense extraction", + parentOperationId=parentOperationId + ) + + # Extract parameters + connectionReference = parameters.get("connectionReference") + sharepointFolder = parameters.get("sharepointFolder") + featureInstanceId = parameters.get("featureInstanceId") + prompt = parameters.get("prompt") + + # Validate required parameters + if not connectionReference: + return ActionResult.isFailure(error="connectionReference is required") + if not sharepointFolder: + return ActionResult.isFailure(error="sharepointFolder is required") + if not featureInstanceId: + return ActionResult.isFailure(error="featureInstanceId is required") + if not prompt: + return ActionResult.isFailure(error="prompt is required") + + # Step 1: Get Microsoft connection + self.services.chat.progressLogUpdate(operationId, 0.1, "Getting Microsoft connection") + connection = self.connection.getMicrosoftConnection(connectionReference) + if not connection: + return ActionResult.isFailure(error="No valid Microsoft connection found") + + # Step 2: Find PDF files in folder + self.services.chat.progressLogUpdate(operationId, 0.15, "Finding PDF files in SharePoint folder") + + # Use findDocumentPath to locate PDFs + findParams = { + "connectionReference": connectionReference, + "searchQuery": f"{sharepointFolder}:files:.pdf", + "maxResults": 1000 + } + findResult = await self.findDocumentPath(findParams) + if not findResult.success: + return ActionResult.isFailure(error=f"Failed to find PDF files: {findResult.error}") + + # Parse found documents + pdfFiles = _extractPdfFilesFromResult(findResult) + if not pdfFiles: + return ActionResult.isSuccess( + documents=[ActionDocument( + documentName="expense_extraction_result.json", + documentData=json.dumps({ + "status": "no_documents", + "message": "No PDF files found in the specified folder", + "folder": sharepointFolder + }, indent=2), + mimeType="application/json", + validationMetadata={"actionType": "sharepoint.getExpensesFromPdf"} + )] + ) + + # Limit to max 50 PDFs per execution + MAX_FILES_PER_EXECUTION = 50 + if len(pdfFiles) > MAX_FILES_PER_EXECUTION: + logger.warning(f"Found {len(pdfFiles)} PDFs, limiting to {MAX_FILES_PER_EXECUTION}") + pdfFiles = pdfFiles[:MAX_FILES_PER_EXECUTION] + + # Step 3: Process each PDF + totalFiles = len(pdfFiles) + progressPerFile = 0.7 / totalFiles # 70% for file processing + + for idx, pdfFile in enumerate(pdfFiles): + currentProgress = 0.2 + (idx * progressPerFile) + fileName = pdfFile.get("name", f"file_{idx}") + fileId = pdfFile.get("id") + siteId = pdfFile.get("siteId") + + self.services.chat.progressLogUpdate( + operationId, + currentProgress, + f"Processing {idx + 1}/{totalFiles}: {fileName}" + ) + + try: + # 3a: Download PDF content + fileContent = await self.services.sharepoint.downloadFile(siteId, fileId) + if not fileContent: + # Move to error folder on download failure + await _moveToErrorFolder( + self, + connectionReference, + siteId, + pdfFile.get("folderPath", ""), + fileName + ) + errorDocuments.append({ + "file": fileName, + "error": "Failed to download", + "movedTo": "error/" + }) + continue + + # 3b: AI call to extract expense data + aiResult = await _extractExpensesWithAi( + self.services, + fileContent, + fileName, + prompt + ) + + if not aiResult.get("success"): + # Move to error folder on AI failure + await _moveToErrorFolder( + self, + connectionReference, + siteId, + pdfFile.get("folderPath", ""), + fileName + ) + errorDocuments.append({ + "file": fileName, + "error": aiResult.get("error", "AI extraction failed"), + "movedTo": "error/" + }) + continue + + records = aiResult.get("records", []) + + # 3c: Check for empty records - move to error folder + if not records: + logger.warning(f"Document {fileName}: No records extracted, moving to error folder") + await _moveToErrorFolder( + self, + connectionReference, + siteId, + pdfFile.get("folderPath", ""), + fileName # Keep original filename + ) + skippedDocuments.append({ + "file": fileName, + "reason": "No expense records extracted", + "movedTo": "error/" + }) + continue + + # 3d: Validate and enrich records + validatedRecords = _validateAndEnrichRecords(records, fileName) + + # 3e: Save to TrusteePosition + savedCount = await _saveToTrusteePosition( + self.services, + validatedRecords, + featureInstanceId + ) + totalPositions += savedCount + + # 3f: Move document to "processed" subfolder + timestamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + newFileName = f"{timestamp}_{fileName}" + + moveSuccess = await _moveToProcessedFolder( + self, + connectionReference, + siteId, + pdfFile.get("folderPath", ""), + fileName, + newFileName + ) + + processedDocuments.append({ + "file": fileName, + "newLocation": f"processed/{newFileName}" if moveSuccess else "move_failed", + "recordsExtracted": len(validatedRecords), + "recordsSaved": savedCount + }) + + except Exception as e: + logger.error(f"Error processing {fileName}: {str(e)}") + # Move to error folder on exception + await _moveToErrorFolder( + self, + connectionReference, + siteId, + pdfFile.get("folderPath", ""), + fileName + ) + errorDocuments.append({ + "file": fileName, + "error": str(e), + "movedTo": "error/" + }) + + # Step 4: Create result summary + self.services.chat.progressLogUpdate(operationId, 0.95, "Creating result summary") + + # Calculate remaining files (if limited by MAX_FILES_PER_EXECUTION) + originalFileCount = len(_extractPdfFilesFromResult(findResult)) if findResult else 0 + remainingFiles = max(0, originalFileCount - MAX_FILES_PER_EXECUTION) + + resultSummary = { + "status": "completed", + "folder": sharepointFolder, + "featureInstanceId": featureInstanceId, + "summary": { + "totalFilesFound": originalFileCount, + "filesProcessedThisRun": totalFiles, + "remainingFiles": remainingFiles, + "successfulDocuments": len(processedDocuments), + "skippedDocuments": len(skippedDocuments), + "errorDocuments": len(errorDocuments), + "totalPositionsSaved": totalPositions + }, + "processedDocuments": processedDocuments, + "skippedDocuments": skippedDocuments, + "errorDocuments": errorDocuments, + "note": f"{remainingFiles} files remaining for next execution" if remainingFiles > 0 else None + } + + self.services.chat.progressLogFinish(operationId, True) + + return ActionResult.isSuccess( + documents=[ActionDocument( + documentName="expense_extraction_result.json", + documentData=json.dumps(resultSummary, indent=2), + mimeType="application/json", + validationMetadata={ + "actionType": "sharepoint.getExpensesFromPdf", + "sharepointFolder": sharepointFolder, + "featureInstanceId": featureInstanceId, + "totalPositions": totalPositions + } + )] + ) + + except Exception as e: + logger.error(f"Error in getExpensesFromPdf: {str(e)}") + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=str(e)) + + +def _extractPdfFilesFromResult(findResult: ActionResult) -> List[Dict[str, Any]]: + """Extract PDF file information from findDocumentPath result.""" + pdfFiles = [] + # Implementation: Parse ActionDocument data to extract file IDs, names, paths + # ... + return pdfFiles + + +async def _extractExpensesWithAi( + services, + fileContent: bytes, + fileName: str, + prompt: str +) -> Dict[str, Any]: + """ + Call AI service to extract expense data from PDF content. + AI service handles retries internally - no retry logic needed here. + + Returns dict with: + - success: bool + - records: List[Dict] - extracted records in TrusteePosition format + - error: str (if success=False) + """ + try: + # Convert PDF to text/base64 for AI + base64Content = base64.b64encode(fileContent).decode('utf-8') + + # Call AI service with prompt (AI service handles PDF extraction internally) + aiResponse = await services.ai.processDocument( + documentContent=base64Content, + documentName=fileName, + mimeType="application/pdf", + prompt=prompt, + outputFormat="csv" + ) + + if not aiResponse or not aiResponse.get("success"): + return {"success": False, "error": aiResponse.get("error", "AI call failed")} + + # Parse CSV response to records + csvContent = aiResponse.get("content", "") + records = _parseCsvToRecords(csvContent) + + return {"success": True, "records": records} + + except Exception as e: + return {"success": False, "error": str(e)} + + +async def _handleRateLimitError(waitSeconds: int = 60): + """Handle SharePoint rate limit by waiting.""" + import asyncio + logger.warning(f"Rate limit hit, waiting {waitSeconds} seconds before continuing") + await asyncio.sleep(waitSeconds) + + +def _parseCsvToRecords(csvContent: str) -> List[Dict[str, Any]]: + """Parse CSV content to list of expense records.""" + records = [] + try: + reader = csv.DictReader(io.StringIO(csvContent)) + for row in reader: + records.append(row) + except Exception as e: + logger.error(f"Error parsing CSV: {str(e)}") + return records + + +def _validateAndEnrichRecords( + records: List[Dict[str, Any]], + sourceFileName: str +) -> List[Dict[str, Any]]: + """ + Validate and enrich expense records: + 1. Calculate/correct VAT amount + 2. Complete valuta/transactionDateTime if one is missing + 3. Validate tags + """ + enrichedRecords = [] + + for record in records: + enriched = record.copy() + + # VAT calculation/validation + vatPercentage = _parseFloat(record.get("vatPercentage", 0)) + vatAmount = _parseFloat(record.get("vatAmount", 0)) + bookingAmount = _parseFloat(record.get("bookingAmount", 0)) + + if vatPercentage > 0 and bookingAmount > 0: + # Calculate expected VAT amount + expectedVat = bookingAmount * vatPercentage / (100 + vatPercentage) + + # If vatAmount is missing or significantly different, recalculate + if vatAmount == 0 or abs(vatAmount - expectedVat) > 0.01: + enriched["vatAmount"] = round(expectedVat, 2) + logger.info(f"VAT amount corrected: {vatAmount} -> {enriched['vatAmount']}") + + # Valuta / transactionDateTime completion + valuta = record.get("valuta") + transactionDateTime = record.get("transactionDateTime") + + if valuta and not transactionDateTime: + # Convert valuta date to timestamp + try: + dt = datetime.strptime(valuta, "%Y-%m-%d") + enriched["transactionDateTime"] = dt.replace(hour=12).timestamp() + except: + pass + elif transactionDateTime and not valuta: + # Convert timestamp to valuta date + try: + ts = float(transactionDateTime) + dt = datetime.fromtimestamp(ts, UTC) + enriched["valuta"] = dt.strftime("%Y-%m-%d") + except: + pass + + # Validate tags + tags = record.get("tags", "") + if tags: + tagList = [t.strip().lower() for t in tags.split(",")] + validTags = [t for t in tagList if t in ALLOWED_TAGS] + enriched["tags"] = ",".join(validTags) + + # Store source file info in description + existingDesc = record.get("desc", "") + if sourceFileName and sourceFileName not in existingDesc: + enriched["desc"] = f"[Source: {sourceFileName}]\n{existingDesc}" + + enrichedRecords.append(enriched) + + return enrichedRecords + + +def _parseFloat(value) -> float: + """Safely parse float value.""" + try: + return float(value) if value else 0.0 + except (ValueError, TypeError): + return 0.0 + + +async def _saveToTrusteePosition( + services, + records: List[Dict[str, Any]], + featureInstanceId: str +) -> int: + """Save validated records to TrusteePosition table.""" + savedCount = 0 + + # Get Trustee interface + from modules.features.trustee.interfaceFeatureTrustee import getInterface + trusteeInterface = getInterface( + services.user, + mandateId=services.mandateId, + featureInstanceId=featureInstanceId + ) + + for record in records: + try: + position = { + "valuta": record.get("valuta"), + "transactionDateTime": record.get("transactionDateTime"), + "company": record.get("company", ""), + "desc": record.get("desc", ""), + "tags": record.get("tags", ""), + "bookingCurrency": record.get("bookingCurrency", "CHF"), + "bookingAmount": _parseFloat(record.get("bookingAmount", 0)), + "originalCurrency": record.get("originalCurrency", "CHF"), + "originalAmount": _parseFloat(record.get("originalAmount", 0)), + "vatPercentage": _parseFloat(record.get("vatPercentage", 0)), + "vatAmount": _parseFloat(record.get("vatAmount", 0)), + "featureInstanceId": featureInstanceId, + "mandateId": services.mandateId + } + + result = trusteeInterface.createPosition(position) + if result: + savedCount += 1 + + except Exception as e: + logger.error(f"Failed to save position: {str(e)}") + + return savedCount + + +async def _moveToProcessedFolder( + self, + connectionReference: str, + siteId: str, + sourceFolderPath: str, + sourceFileName: str, + destFileName: str +) -> bool: + """Move processed PDF to 'processed' subfolder.""" + try: + processedFolder = f"{sourceFolderPath}/processed" + + # Ensure 'processed' folder exists (create if not) + await _ensureFolderExists(self, connectionReference, siteId, processedFolder) + + # Copy file to new location + copyResult = await self.copyFile({ + "connectionReference": connectionReference, + "siteId": siteId, + "sourceFolder": sourceFolderPath, + "sourceFile": sourceFileName, + "destFolder": processedFolder, + "destFile": destFileName + }) + + if copyResult.success: + # Delete original file after successful copy + await _deleteFile(self, connectionReference, siteId, sourceFolderPath, sourceFileName) + return True + + return False + + except Exception as e: + logger.error(f"Failed to move file to processed: {str(e)}") + return False + + +async def _moveToErrorFolder( + self, + connectionReference: str, + siteId: str, + sourceFolderPath: str, + sourceFileName: str # Keep original filename +) -> bool: + """Move failed PDF to 'error' subfolder (filename unchanged).""" + try: + errorFolder = f"{sourceFolderPath}/error" + + # Ensure 'error' folder exists (create if not) + await _ensureFolderExists(self, connectionReference, siteId, errorFolder) + + # Copy file to error folder (keep original name) + copyResult = await self.copyFile({ + "connectionReference": connectionReference, + "siteId": siteId, + "sourceFolder": sourceFolderPath, + "sourceFile": sourceFileName, + "destFolder": errorFolder, + "destFile": sourceFileName # Same filename + }) + + if copyResult.success: + # Delete original file after successful copy + await _deleteFile(self, connectionReference, siteId, sourceFolderPath, sourceFileName) + return True + + return False + + except Exception as e: + logger.error(f"Failed to move file to error folder: {str(e)}") + return False + + +async def _ensureFolderExists( + self, + connectionReference: str, + siteId: str, + folderPath: str +) -> bool: + """Create folder if it doesn't exist.""" + try: + # Use SharePoint API to create folder + # Graph API: POST /sites/{siteId}/drive/root:/{folderPath} + # with body: {"name": folderName, "folder": {}, "@microsoft.graph.conflictBehavior": "fail"} + # ... implementation ... + return True + except Exception as e: + logger.error(f"Failed to ensure folder exists: {str(e)}") + return False + + +async def _deleteFile( + self, + connectionReference: str, + siteId: str, + folderPath: str, + fileName: str +) -> bool: + """Delete file from SharePoint.""" + try: + # Use SharePoint API to delete file + # Graph API: DELETE /sites/{siteId}/drive/root:/{folderPath}/{fileName} + # ... implementation ... + return True + except Exception as e: + logger.error(f"Failed to delete file: {str(e)}") + return False +``` + +--- + +## 2. Automation Template: `getExpenses` + +### 2.1 Template-Definition (hinzufügen in `subAutomationTemplates.py`) + +```python +{ + "template": { + "overview": "Expenses PDF Extraction", + "tasks": [ + { + "id": "Task01", + "title": "Extract Expenses from SharePoint PDFs", + "description": "Reads PDF expense documents from SharePoint folder and saves extracted data to TrusteePosition", + "objective": "Extract expense data from PDF documents and store in Trustee database", + "actionList": [ + { + "execMethod": "sharepoint", + "execAction": "getExpensesFromPdf", + "execParameters": { + "connectionReference": "{{KEY:connectionName}}", + "sharepointFolder": "{{KEY:sharepointFolder}}", + "featureInstanceId": "{{KEY:featureInstanceId}}", + "prompt": "{{KEY:extractionPrompt}}" + }, + "execResultLabel": "expense_extraction_result" + } + ] + } + ] + }, + "parameters": { + "connectionName": "", + "sharepointFolder": "", + "featureInstanceId": "", + "extractionPrompt": """Du bist ein Spezialist für die Extraktion von Spesendaten aus PDF-Dokumenten. + +AUFGABE: +Extrahiere alle Speseneinträge aus dem bereitgestellten PDF-Dokument und gib sie im CSV-Format zurück. + +WICHTIGE REGELN: +1. Pro MwSt-Prozentsatz einen separaten Datensatz erstellen +2. Alle Datensätze zusammen müssen den Gesamtbetrag des Dokuments ergeben +3. Der gesamte extrahierte Text des Dokuments muss im Feld "desc" erfasst werden +4. Feld "company" enthält den Lieferanten/Verkäufer der Buchung +5. Tags müssen aus dieser Liste gewählt werden: customer, meeting, license, subscription, fuel, food, material + - Mehrere zutreffende Tags mit Komma trennen + +CSV-SPALTEN (in dieser Reihenfolge): +valuta,transactionDateTime,company,desc,tags,bookingCurrency,bookingAmount,originalCurrency,originalAmount,vatPercentage,vatAmount + +DATENFORMAT: +- valuta: YYYY-MM-DD (Valutadatum) +- transactionDateTime: Unix-Timestamp in Sekunden (Transaktionszeitpunkt) +- company: Lieferant/Verkäufer Name +- desc: Vollständiger extrahierter Text des Dokuments +- tags: Komma-getrennte Tags aus der erlaubten Liste +- bookingCurrency: Währungscode (CHF, EUR, USD, GBP) +- bookingAmount: Buchungsbetrag als Dezimalzahl +- originalCurrency: Original-Währungscode +- originalAmount: Original-Betrag als Dezimalzahl +- vatPercentage: MwSt-Prozentsatz (z.B. 8.1 für 8.1%) +- vatAmount: MwSt-Betrag als Dezimalzahl + +BEISPIEL OUTPUT: +```csv +valuta,transactionDateTime,company,desc,tags,bookingCurrency,bookingAmount,originalCurrency,originalAmount,vatPercentage,vatAmount +2026-01-15,1736953200,Migros AG,"Einkauf Migros Zürich...",food,CHF,45.50,CHF,45.50,2.6,1.15 +2026-01-15,1736953200,Migros AG,"Einkauf Migros Zürich...",material,CHF,12.30,CHF,12.30,8.1,0.92 +``` + +HINWEISE: +- Wenn nur ein MwSt-Satz vorhanden ist, einen Datensatz erstellen +- Wenn mehrere MwSt-Sätze vorhanden sind (z.B. Lebensmittel 2.6% und Non-Food 8.1%), separate Datensätze erstellen +- Bei fehlenden Informationen: leeres Feld oder Standardwert +- Keine Anführungszeichen um numerische Werte""" + } +} +``` + +### 2.2 Placeholder-Beschreibung + +| Placeholder | Beschreibung | Beispielwert | +|------------|--------------|--------------| +| `connectionName` | User Connection Reference für SharePoint | `connection:msft:user@company.ch` | +| `sharepointFolder` | SharePoint-Ordnerpfad mit PDFs | `/sites/MySite/Documents/Expenses` | +| `featureInstanceId` | Feature Instance ID des Trustee | `fi_abc123` | +| `extractionPrompt` | AI-Prompt für Extraktion | (siehe oben) | + +--- + +## 3. Frontend: Neue Seite im Trustee Feature + +### 3.1 Komponenten-Struktur + +``` +frontend_nyla/src/features/trustee/ +├── pages/ +│ └── TrusteeExpenseImport.tsx # NEUE SEITE +├── components/ +│ └── SharepointFolderSelect.tsx # Wiederverwendbare Komponente +``` + +### 3.2 Seiten-Anforderungen + +1. **Microsoft Connection Button** + - Icon: Microsoft-Logo (wie bei User Connections Seite) + - Klick öffnet OAuth-Popup für Microsoft-Anmeldung + - Nutzt `useConnections.createMicrosoftConnectionAndAuth()` + - Status-Anzeige: verbunden/nicht verbunden + +2. **SharePoint Folder Dropdown** + - Dropdown zur Auswahl eines SharePoint-Ordners + - Lädt Ordner-Liste über `/api/sharepoint/folders` Endpoint + - Zeigt Site-Name und Ordner-Pfad + - Referenz: Neutralization Feature hat ähnliches Dropdown + +3. **Aktivieren-Button** + - Erstellt `AutomationDefinition` mit: + - Template: "getExpenses" + - Placeholders: ausgefüllte Werte + - Schedule: täglich (z.B. `0 22 * * *`) + - Active: true + - Speichert über `/api/automation/definitions` Endpoint + +### 3.3 Beispiel-Implementation + +```tsx +// TrusteeExpenseImport.tsx +import React, { useState, useEffect } from 'react'; +import { useConnections } from '@/hooks/useConnections'; +import { useFeatureInstance } from '@/hooks/useFeatureInstance'; +import { Button } from '@/components/ui/button'; +import { Select } from '@/components/ui/select'; +import { MicrosoftIcon } from '@/components/icons'; +import api from '@/api'; + +export function TrusteeExpenseImport() { + const { connections, createMicrosoftConnectionAndAuth } = useConnections(); + const { featureInstanceId } = useFeatureInstance(); + + const [msftConnection, setMsftConnection] = useState(null); + const [folders, setFolders] = useState([]); + const [selectedFolder, setSelectedFolder] = useState(''); + const [isActivating, setIsActivating] = useState(false); + + // Find active Microsoft connection + useEffect(() => { + const conn = connections.find(c => + c.type === 'msft' && c.status === 'active' + ); + setMsftConnection(conn || null); + }, [connections]); + + // Load SharePoint folders when connected + useEffect(() => { + if (msftConnection) { + loadSharepointFolders(); + } + }, [msftConnection]); + + const loadSharepointFolders = async () => { + try { + const response = await api.get('/api/sharepoint/folders', { + params: { connectionId: msftConnection?.id } + }); + setFolders(response.data.folders || []); + } catch (error) { + console.error('Failed to load folders:', error); + } + }; + + const handleConnect = async () => { + try { + await createMicrosoftConnectionAndAuth(); + } catch (error) { + console.error('Connection failed:', error); + } + }; + + const handleActivate = async () => { + if (!selectedFolder || !msftConnection || !featureInstanceId) return; + + setIsActivating(true); + try { + await api.post('/api/automation/definitions', { + label: 'Expense Import', + schedule: '0 22 * * *', // Daily at 22:00 + templateName: 'getExpenses', + placeholders: { + connectionName: `connection:msft:${msftConnection.accountName}`, + sharepointFolder: selectedFolder, + featureInstanceId: featureInstanceId, + extractionPrompt: DEFAULT_EXTRACTION_PROMPT + }, + active: true, + featureInstanceId: featureInstanceId + }); + + // Show success message + } catch (error) { + console.error('Activation failed:', error); + } finally { + setIsActivating(false); + } + }; + + return ( +
+

Expense Import Setup

+ + {/* Microsoft Connection */} +
+ + {msftConnection ? ( +
+ + + Connected as {msftConnection.accountName} + +
+ ) : ( + + )} +
+ + {/* SharePoint Folder Selection */} + {msftConnection && ( +
+ + +
+ )} + + {/* Activate Button */} + {selectedFolder && ( + + )} +
+ ); +} +``` + +--- + +## 4. Backend: API Endpoints + +### 4.1 SharePoint Folder List Endpoint + +Neuer Endpoint in `routeSharepoint.py`: + +```python +@router.get("/api/sharepoint/folders") +async def listSharepointFolders( + connectionId: str = Query(..., description="Connection ID"), + request: Request = None +): + """List available SharePoint folders for the user's connection.""" + # Implementation: Use Graph API to list sites and root folders + ... +``` + +### 4.2 Automation Definition Endpoint + +Erweiterung von `routeFeatureAutomation.py` für Template-basierte Erstellung. + +--- + +## 5. Datenbank-Änderungen + +Keine Schema-Änderungen erforderlich. `TrusteePosition` Tabelle wird verwendet wie definiert. + +--- + +## 6. Design-Entscheidungen + +### 6.1 Geklärte Punkte + +| Thema | Entscheidung | +|-------|--------------| +| **PDF-Parsing** | AI-Service verarbeitet PDFs direkt (inkl. Bilder, Scans etc.) - keine Vorverarbeitung nötig | +| **Folder-Erstellung** | "processed" und "error" Subfolders werden automatisch erstellt wenn nicht vorhanden | +| **Fehlerbehandlung** | Fehlerhafte PDFs werden in "error" Subfolder verschoben, Dateiname bleibt unverändert | +| **Duplikat-Erkennung** | Keine - ein wiederholtes Dokument ist bewusst (Kunde lädt erneut hoch) | + +### 6.2 Risiko-Management + +| Risiko | Handling | +|--------|----------| +| **AI-Kosten** | Kunde bezahlt pro Aufruf - keine weitere Einschränkung nötig | +| **SharePoint Rate-Limiting** | Bei Rate-Limit-Error: warten, dann weiterfahren | +| **Timeout** | Bereits im System implementiert - funktioniert | + +### 6.3 Implementierungs-Vorgaben + +| Vorgabe | Wert | +|---------|------| +| **Max PDFs pro Ausführung** | 50 Dateien (Limit) | +| **Retry-Logik** | NEIN - AI-Service handhabt Retries intern | +| **Preview-Modus** | NEIN - nicht benötigt | + +--- + +## 7. Implementierungs-Reihenfolge + +1. **Phase 1: Backend Action** + - `getExpensesFromPdf.py` erstellen + - In `methodSharepoint.py` registrieren + - Unit-Tests schreiben + +2. **Phase 2: Automation Template** + - Template in `subAutomationTemplates.py` hinzufügen + - Prompt optimieren und testen + +3. **Phase 3: API Endpoints** + - SharePoint Folder-List Endpoint + - Automation Definition Erweiterung + +4. **Phase 4: Frontend** + - `TrusteeExpenseImport.tsx` Seite + - Navigation/Routing hinzufügen + - Integration testen + +--- + +## 8. Test-Plan + +1. **Unit-Tests** + - VAT-Berechnung + - Valuta/DateTime-Ergänzung + - CSV-Parsing + - Tag-Validierung + +2. **Integration-Tests** + - SharePoint-Verbindung + - PDF-Download + - AI-Extraktion + - TrusteePosition-Speicherung + - Datei-Verschiebung + +3. **E2E-Tests** + - Kompletter Workflow von PDF bis gespeicherter Position + - Automation-Schedule-Ausführung