gateway/modules/shared/aiAuditLogger.py
2026-04-21 00:50:36 +02:00

262 lines
9.5 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""AI Audit Logger — records every AI provider call for compliance reporting.
Usage:
from modules.shared.aiAuditLogger import aiAuditLogger
aiAuditLogger.logAiCall(userId=..., mandateId=..., ...)
"""
import hashlib
import logging
from collections import defaultdict
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional
from modules.shared.timeUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
_PREVIEW_LENGTH = 200
class AiAuditLogger:
"""Persists AI audit entries to the poweron_app database."""
def __init__(self):
self._db = None
self._initialized = False
def _ensureInitialized(self):
if self._initialized:
return
try:
from modules.connectors.connectorDbPostgre import _get_cached_connector
from modules.shared.configuration import APP_CONFIG
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
self._db = _get_cached_connector(
dbHost=dbHost,
dbDatabase="poweron_app",
dbUser=dbUser,
dbPassword=dbPassword,
dbPort=dbPort,
userId="system",
)
self._db._ensureTableExists(AiAuditLogEntry)
self._initialized = True
except Exception as e:
logger.error(f"AI audit logger init failed: {e}")
def logAiCall(
self,
userId: str,
mandateId: str,
aiProvider: str,
aiModel: str,
*,
username: Optional[str] = None,
featureInstanceId: Optional[str] = None,
featureCode: Optional[str] = None,
instanceLabel: Optional[str] = None,
operationType: Optional[str] = None,
tokensInput: Optional[int] = None,
tokensOutput: Optional[int] = None,
processingTimeMs: Optional[int] = None,
priceCHF: Optional[float] = None,
neutralizationActive: bool = False,
neutralizationMappingsCount: Optional[int] = None,
contentInput: Optional[str] = None,
contentOutput: Optional[str] = None,
storeFullContent: bool = False,
success: bool = True,
errorMessage: Optional[str] = None,
) -> Optional[str]:
"""Write one AI audit entry. Returns the entry id or None on failure."""
self._ensureInitialized()
if not self._db:
return None
try:
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
inputPreview = (contentInput or "")[:_PREVIEW_LENGTH] or None
outputPreview = (contentOutput or "")[:_PREVIEW_LENGTH] or None
inputHash = hashlib.sha256(contentInput.encode("utf-8")).hexdigest() if contentInput else None
entry = AiAuditLogEntry(
userId=userId,
username=username,
mandateId=mandateId,
featureInstanceId=featureInstanceId or "",
featureCode=featureCode,
instanceLabel=instanceLabel,
aiProvider=aiProvider,
aiModel=aiModel,
operationType=operationType,
tokensInput=tokensInput,
tokensOutput=tokensOutput,
processingTimeMs=processingTimeMs,
priceCHF=priceCHF,
neutralizationActive=neutralizationActive,
neutralizationMappingsCount=neutralizationMappingsCount,
contentStored=storeFullContent and bool(contentInput),
contentInputHash=inputHash,
contentInputPreview=inputPreview,
contentOutputPreview=outputPreview,
contentInputFull=contentInput if storeFullContent else None,
contentOutputFull=contentOutput if storeFullContent else None,
success=success,
errorMessage=errorMessage,
)
self._db.recordCreate(AiAuditLogEntry, entry.model_dump())
return entry.id
except Exception as e:
logger.error(f"Failed to write AI audit entry: {e}", exc_info=True)
return None
# ── Read helpers (used by route) ──
def getAiAuditLogs(
self,
mandateId: str,
*,
userId: Optional[str] = None,
featureInstanceId: Optional[str] = None,
aiModel: Optional[str] = None,
fromTimestamp: Optional[float] = None,
toTimestamp: Optional[float] = None,
limit: int = 100,
offset: int = 0,
) -> Dict[str, Any]:
"""Return paginated AI audit entries for a mandate."""
self._ensureInitialized()
if not self._db:
return {"items": [], "totalItems": 0}
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
recordFilter: Dict[str, Any] = {"mandateId": mandateId}
if userId:
recordFilter["userId"] = userId
if featureInstanceId:
recordFilter["featureInstanceId"] = featureInstanceId
if aiModel:
recordFilter["aiModel"] = aiModel
allRecords = self._db.getRecordset(
AiAuditLogEntry,
recordFilter=recordFilter,
)
if fromTimestamp is not None:
allRecords = [r for r in allRecords if (r.get("timestamp") or 0) >= fromTimestamp]
if toTimestamp is not None:
allRecords = [r for r in allRecords if (r.get("timestamp") or 0) <= toTimestamp]
allRecords.sort(key=lambda r: r.get("timestamp") or 0, reverse=True)
totalItems = len(allRecords)
page = allRecords[offset: offset + limit]
for item in page:
item.pop("contentInputFull", None)
item.pop("contentOutputFull", None)
return {"items": page, "totalItems": totalItems}
def getAiAuditEntryContent(self, entryId: str, mandateId: str) -> Optional[Dict[str, Any]]:
"""Return full content for a single entry (RBAC checked by route)."""
self._ensureInitialized()
if not self._db:
return None
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
records = self._db.getRecordset(
AiAuditLogEntry, recordFilter={"id": entryId, "mandateId": mandateId}
)
if not records:
return None
rec = records[0]
return {
"id": rec.get("id"),
"contentStored": rec.get("contentStored", False),
"contentInputFull": rec.get("contentInputFull"),
"contentOutputFull": rec.get("contentOutputFull"),
"contentInputPreview": rec.get("contentInputPreview"),
"contentOutputPreview": rec.get("contentOutputPreview"),
}
def getAiAuditStats(
self,
mandateId: str,
*,
fromTs: float,
toTs: float,
groupBy: str = "model",
) -> Dict[str, Any]:
"""Aggregate statistics for Tab C over an explicit timestamp range.
`fromTs`/`toTs` are inclusive epoch-second boundaries (see
`dateRange.isoDateRangeToLocalEpoch`). Both are required.
"""
self._ensureInitialized()
if not self._db:
return {}
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
allRecords = self._db.getRecordset(
AiAuditLogEntry, recordFilter={"mandateId": mandateId}
)
records = [
r for r in allRecords
if fromTs <= (r.get("timestamp") or 0) <= toTs
]
callsByDay: Dict[str, int] = defaultdict(int)
callsByModel: Dict[str, int] = defaultdict(int)
callsByFeature: Dict[str, int] = defaultdict(int)
costByDay: Dict[str, float] = defaultdict(float)
callsByUser: Dict[str, int] = defaultdict(int)
neutralizationCount = 0
totalCalls = len(records)
for r in records:
ts = r.get("timestamp") or 0
try:
day = datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d")
except (ValueError, OSError):
day = "unknown"
callsByDay[day] += 1
callsByModel[r.get("aiModel") or "unknown"] += 1
callsByFeature[r.get("featureCode") or "system"] += 1
costByDay[day] += r.get("priceCHF") or 0.0
callsByUser[r.get("username") or r.get("userId") or "unknown"] += 1
if r.get("neutralizationActive"):
neutralizationCount += 1
sortedDays = sorted(callsByDay.keys())
neutralizationPercent = round(100.0 * neutralizationCount / totalCalls, 1) if totalCalls else 0.0
days = max(1, int((toTs - fromTs) / 86400) + 1)
return {
"totalCalls": totalCalls,
"fromTs": fromTs,
"toTs": toTs,
"days": days,
"callsPerDay": [{"date": d, "calls": callsByDay[d]} for d in sortedDays],
"costPerDay": [{"date": d, "cost": round(costByDay[d], 4)} for d in sortedDays],
"callsByModel": dict(sorted(callsByModel.items(), key=lambda x: -x[1])),
"callsByFeature": dict(sorted(callsByFeature.items(), key=lambda x: -x[1])),
"topUsers": dict(sorted(callsByUser.items(), key=lambda x: -x[1])[:10]),
"neutralizationPercent": neutralizationPercent,
}
aiAuditLogger = AiAuditLogger()