From a527806436c52cb7cac7c3f1fc79d626858b4241 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 27 Feb 2026 23:30:37 +0100 Subject: [PATCH] Harden trustee position ingestion and auto-clean corrupt records. Normalize AI-derived date/time and numeric fields before writes, tighten extraction prompts for unix timestamps, and prevent /positions crashes by normalizing or deleting irreparably invalid legacy TrusteePosition records during reads. Made-with: Cursor --- .../trustee/interfaceFeatureTrustee.py | 183 +++++++++++++++++- .../methodTrustee/actions/extractFromFiles.py | 13 +- 2 files changed, 183 insertions(+), 13 deletions(-) diff --git a/modules/features/trustee/interfaceFeatureTrustee.py b/modules/features/trustee/interfaceFeatureTrustee.py index 4c97cc4d..a326ade6 100644 --- a/modules/features/trustee/interfaceFeatureTrustee.py +++ b/modules/features/trustee/interfaceFeatureTrustee.py @@ -8,7 +8,9 @@ Manages trustee organisations, roles, access, contracts, documents, and position import logging import math import uuid +from datetime import datetime, timezone from typing import Dict, Any, List, Optional, Union +from pydantic import ValidationError from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.shared.configuration import APP_CONFIG @@ -32,6 +34,118 @@ logger = logging.getLogger(__name__) _trusteeInterfaces = {} +def _toSafeFloat(value: Any, defaultValue: float = 0.0) -> float: + """Convert mixed numeric inputs (str/number) to float safely.""" + if value is None or value == "": + return defaultValue + if isinstance(value, (int, float)): + return float(value) + try: + textValue = str(value).strip().replace("'", "").replace(" ", "") + if "," in textValue and "." not in textValue: + textValue = textValue.replace(",", ".") + return float(textValue) + except Exception: + return defaultValue + + +def _normaliseIsoDate(value: Any) -> Optional[str]: + """Normalise date-like input to ISO date format YYYY-MM-DD.""" + if value is None or value == "": + return None + if isinstance(value, (int, float)): + try: + return datetime.fromtimestamp(float(value), tz=timezone.utc).date().isoformat() + except Exception: + return None + + textValue = str(value).strip() + if not textValue: + return None + + # Try common explicit formats first (incl. Swiss/European notation). + for formatValue in ( + "%Y-%m-%d", + "%d.%m.%Y", + "%d/%m/%Y", + "%d-%m-%Y", + "%Y/%m/%d", + "%Y.%m.%d", + ): + try: + return datetime.strptime(textValue, formatValue).date().isoformat() + except Exception: + continue + + # Try ISO datetime variants. + try: + return datetime.fromisoformat(textValue.replace("Z", "+00:00")).date().isoformat() + except Exception: + return None + + +def _normaliseTimestamp(value: Any, fallbackIsoDate: Optional[str] = None) -> Optional[float]: + """Normalise timestamp input to unix seconds (float).""" + if value is None or value == "": + if fallbackIsoDate: + try: + fallbackDate = datetime.strptime(fallbackIsoDate, "%Y-%m-%d").replace(tzinfo=timezone.utc) + return float(fallbackDate.timestamp()) + except Exception: + return None + return None + + if isinstance(value, (int, float)): + return float(value) + + textValue = str(value).strip() + if not textValue: + return None + + numericTimestamp = _toSafeFloat(textValue, defaultValue=float("nan")) + if not math.isnan(numericTimestamp): + return float(numericTimestamp) + + # Accept date-only input and normalise to midnight UTC timestamp. + isoDate = _normaliseIsoDate(textValue) + if isoDate: + try: + parsedDate = datetime.strptime(isoDate, "%Y-%m-%d").replace(tzinfo=timezone.utc) + return float(parsedDate.timestamp()) + except Exception: + return None + + return None + + +def _sanitisePositionPayload(data: Dict[str, Any]) -> Dict[str, Any]: + """Failsafe normalisation for TrusteePosition payloads before DB writes.""" + safeData = dict(data or {}) + + isoValuta = _normaliseIsoDate(safeData.get("valuta")) + safeData["valuta"] = isoValuta + + safeData["transactionDateTime"] = _normaliseTimestamp( + safeData.get("transactionDateTime"), + fallbackIsoDate=isoValuta, + ) + + safeData["bookingAmount"] = _toSafeFloat(safeData.get("bookingAmount"), defaultValue=0.0) + safeData["originalAmount"] = _toSafeFloat( + safeData.get("originalAmount"), + defaultValue=safeData["bookingAmount"], + ) + safeData["vatPercentage"] = _toSafeFloat(safeData.get("vatPercentage"), defaultValue=0.0) + safeData["vatAmount"] = _toSafeFloat(safeData.get("vatAmount"), defaultValue=0.0) + + bookingCurrency = (safeData.get("bookingCurrency") or "CHF") + originalCurrency = (safeData.get("originalCurrency") or bookingCurrency) + safeData["bookingCurrency"] = str(bookingCurrency).upper() + safeData["originalCurrency"] = str(originalCurrency).upper() + + return safeData + + def getInterface(currentUser: User, mandateId: Optional[Union[str, uuid.UUID]] = None, featureInstanceId: Optional[str] = None) -> "TrusteeObjects": """Get or create a TrusteeObjects instance for the given user context. @@ -1133,6 +1247,37 @@ class TrusteeObjects: # ===== Position CRUD ===== + def _toTrusteePositionOrDelete(self, rawRecord: Dict[str, Any], deleteCorrupt: bool = True) -> Optional[TrusteePosition]: + """Build TrusteePosition safely; optionally delete irreparably corrupt records.""" + cleanRecord = {k: v for k, v in (rawRecord or {}).items() if not k.startswith("_") or k == "_createdAt"} + if not cleanRecord: + return None + + normalisedRecord = _sanitisePositionPayload(cleanRecord) + recordId = normalisedRecord.get("id") or cleanRecord.get("id") + + try: + model = TrusteePosition(**normalisedRecord) + if recordId and normalisedRecord != cleanRecord: + try: + self.db.recordModify(TrusteePosition, recordId, normalisedRecord) + logger.info(f"Normalised legacy TrusteePosition record: {recordId}") + except Exception as writeErr: + logger.warning(f"Could not persist normalised TrusteePosition {recordId}: {writeErr}") + return model + except ValidationError as err: + logger.error(f"Corrupt TrusteePosition record detected (id={recordId}): {err}") + if deleteCorrupt and recordId: + try: + deleted = self.db.recordDelete(TrusteePosition, recordId) + if deleted: + logger.warning(f"Deleted corrupt TrusteePosition record: {recordId}") + else: + logger.warning(f"Failed to delete corrupt TrusteePosition record: {recordId}") + except Exception as deleteErr: + logger.error(f"Error deleting corrupt TrusteePosition record {recordId}: {deleteErr}") + return None + def createPosition(self, data: Dict[str, Any]) -> Optional[TrusteePosition]: """Create a new position. @@ -1144,6 +1289,9 @@ class TrusteeObjects: logger.warning(f"User {self.userId} lacks permission to create position") return None + # Failsafe normalisation to keep DB payload stable for AI/manual inputs. + data = _sanitisePositionPayload(data) + # Auto-set context fields data["mandateId"] = self.mandateId data["featureInstanceId"] = self.featureInstanceId @@ -1160,7 +1308,7 @@ class TrusteeObjects: createdRecord = self.db.recordCreate(TrusteePosition, data) if createdRecord and createdRecord.get("id"): - return TrusteePosition(**{k: v for k, v in createdRecord.items() if not k.startswith("_")}) + return self._toTrusteePositionOrDelete(createdRecord, deleteCorrupt=False) return None def getPosition(self, positionId: str) -> Optional[TrusteePosition]: @@ -1168,7 +1316,7 @@ class TrusteeObjects: records = self.db.getRecordset(TrusteePosition, recordFilter={"id": positionId}) if not records: return None - return TrusteePosition(**{k: v for k, v in records[0].items() if not k.startswith("_")}) + return self._toTrusteePositionOrDelete(records[0], deleteCorrupt=True) def getAllPositions(self, params: Optional[PaginationParams] = None) -> PaginatedResult: """Get all positions with RBAC filtering + feature-level access filtering.""" @@ -1198,8 +1346,12 @@ class TrusteeObjects: # Step 3: Apply sorting sortedRecords = self._applySorting(filteredRecords, params) - # Step 4: Convert to Pydantic objects - pydanticItems = [TrusteePosition(**r) for r in sortedRecords] + # Step 4: Convert to Pydantic objects and cleanup corrupt legacy records. + pydanticItems = [] + for record in sortedRecords: + position = self._toTrusteePositionOrDelete(record, deleteCorrupt=True) + if position is not None: + pydanticItems.append(position) # Step 5: Apply pagination totalItems = len(pydanticItems) @@ -1235,7 +1387,12 @@ class TrusteeObjects: featureInstanceId=self.featureInstanceId, featureCode=self.FEATURE_CODE ) - return [TrusteePosition(**{k: v for k, v in r.items() if not k.startswith("_")}) for r in records] + safeItems = [] + for record in records: + position = self._toTrusteePositionOrDelete(record, deleteCorrupt=True) + if position is not None: + safeItems.append(position) + return safeItems def getPositionsByOrganisation(self, organisationId: str) -> List[TrusteePosition]: """Get all positions for a specific organisation.""" @@ -1250,7 +1407,12 @@ class TrusteeObjects: featureInstanceId=self.featureInstanceId, featureCode=self.FEATURE_CODE ) - return [TrusteePosition(**{k: v for k, v in r.items() if not k.startswith("_")}) for r in records] + safeItems = [] + for record in records: + position = self._toTrusteePositionOrDelete(record, deleteCorrupt=True) + if position is not None: + safeItems.append(position) + return safeItems def updatePosition(self, positionId: str, data: Dict[str, Any]) -> Optional[TrusteePosition]: """Update a position. @@ -1276,7 +1438,7 @@ class TrusteeObjects: updatedRecord = self.db.recordModify(TrusteePosition, positionId, data) if not updatedRecord: return None - return TrusteePosition(**{k: v for k, v in updatedRecord.items() if not k.startswith("_")}) + return self._toTrusteePositionOrDelete(updatedRecord, deleteCorrupt=False) def deletePosition(self, positionId: str) -> bool: """Delete a position.""" @@ -1309,7 +1471,12 @@ class TrusteeObjects: featureInstanceId=self.featureInstanceId, featureCode=self.FEATURE_CODE ) - return [TrusteePosition(**{k: v for k, v in r.items() if not k.startswith("_")}) for r in records] + safeItems = [] + for record in records: + position = self._toTrusteePositionOrDelete(record, deleteCorrupt=True) + if position is not None: + safeItems.append(position) + return safeItems # ===== Trustee-specific Access Check ===== diff --git a/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py b/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py index 88f2c544..a7c3d820 100644 --- a/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py +++ b/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py @@ -41,10 +41,11 @@ _CLASSIFY_PROMPT = ( # Phase 2: Type-specific structuring prompts (placeholders: {expenseList}, {bankList}) _PROMPT_EXPENSE_RECEIPT = ( "Extrahiere aus dem folgenden Dokument eine Buchung pro Ausgabeposition. " - "Return JSON: {{\"records\": [{{...}}]}}. Jeder Record: valuta (YYYY-MM-DD), transactionDateTime (unix seconds), company, desc, " + "Return JSON: {{\"records\": [{{...}}]}}. Jeder Record: valuta (YYYY-MM-DD), transactionDateTime (unix seconds, numeric), company, desc, " "bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, " "debitAccountNumber (NUR die Kontonummer, z.B. \"6200\", aus: {expenseList}), " - "creditAccountNumber (NUR die Kontonummer, z.B. \"1020\", aus: {bankList}), tags, taxCode, costCenter, bookingReference." + "creditAccountNumber (NUR die Kontonummer, z.B. \"1020\", aus: {bankList}), tags, taxCode, costCenter, bookingReference. " + "WICHTIG: transactionDateTime muss eine ZAHL sein (z.B. 1737417600), niemals '21.01.2026'." ) _PROMPT_BANK_DOCUMENT = ( "Extrahiere aus dem folgenden Bankauszug eine Buchung pro Transaktionszeile. " @@ -58,14 +59,16 @@ _PROMPT_INVOICE = ( "Return JSON: {{\"records\": [{{...}}]}}. Record: valuta (Rechnungsdatum), company (Kreditor), desc (Rechnungsdetails), " "bookingAmount, bookingCurrency, vatPercentage, vatAmount, " "debitAccountNumber (NUR die Kontonummer aus: {expenseList}), creditAccountNumber (NUR die Kontonummer aus: {bankList}), " - "bookingReference (Rechnungsnummer), transactionDateTime, taxCode, costCenter." + "bookingReference (Rechnungsnummer), transactionDateTime, taxCode, costCenter. " + "Formatregeln: valuta nur YYYY-MM-DD; transactionDateTime nur unix seconds als Zahl." ) _PROMPT_FALLBACK = ( "Extrahiere aus dem folgenden Dokument Buchungsdaten. " - "Return JSON: {{\"records\": [{{...}}]}}. Jeder Record: valuta (YYYY-MM-DD), transactionDateTime (unix seconds), company, desc, " + "Return JSON: {{\"records\": [{{...}}]}}. Jeder Record: valuta (YYYY-MM-DD), transactionDateTime (unix seconds, numeric), company, desc, " "bookingCurrency, bookingAmount, originalCurrency, originalAmount, vatPercentage, vatAmount, " "debitAccountNumber (NUR die Kontonummer, z.B. \"6200\", aus: {expenseList}), " - "creditAccountNumber (NUR die Kontonummer, z.B. \"1020\", aus: {bankList}), tags, taxCode, costCenter, bookingReference." + "creditAccountNumber (NUR die Kontonummer, z.B. \"1020\", aus: {bankList}), tags, taxCode, costCenter, bookingReference. " + "WICHTIG: keine lokalen Datumsformate in transactionDateTime (kein DD.MM.YYYY)." )