262 lines
9.5 KiB
Python
262 lines
9.5 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""AI Audit Logger — records every AI provider call for compliance reporting.
|
|
|
|
Usage:
|
|
from modules.shared.aiAuditLogger import aiAuditLogger
|
|
aiAuditLogger.logAiCall(userId=..., mandateId=..., ...)
|
|
"""
|
|
|
|
import hashlib
|
|
import logging
|
|
from collections import defaultdict
|
|
from datetime import datetime, timezone
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from modules.shared.timeUtils import getUtcTimestamp
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_PREVIEW_LENGTH = 200
|
|
|
|
|
|
class AiAuditLogger:
|
|
"""Persists AI audit entries to the poweron_app database."""
|
|
|
|
def __init__(self):
|
|
self._db = None
|
|
self._initialized = False
|
|
|
|
def _ensureInitialized(self):
|
|
if self._initialized:
|
|
return
|
|
try:
|
|
from modules.connectors.connectorDbPostgre import _get_cached_connector
|
|
from modules.shared.configuration import APP_CONFIG
|
|
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
|
|
|
|
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
|
|
dbUser = APP_CONFIG.get("DB_USER")
|
|
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
|
|
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
|
|
|
|
self._db = _get_cached_connector(
|
|
dbHost=dbHost,
|
|
dbDatabase="poweron_app",
|
|
dbUser=dbUser,
|
|
dbPassword=dbPassword,
|
|
dbPort=dbPort,
|
|
userId="system",
|
|
)
|
|
self._db._ensureTableExists(AiAuditLogEntry)
|
|
self._initialized = True
|
|
except Exception as e:
|
|
logger.error(f"AI audit logger init failed: {e}")
|
|
|
|
def logAiCall(
|
|
self,
|
|
userId: str,
|
|
mandateId: str,
|
|
aiProvider: str,
|
|
aiModel: str,
|
|
*,
|
|
username: Optional[str] = None,
|
|
featureInstanceId: Optional[str] = None,
|
|
featureCode: Optional[str] = None,
|
|
instanceLabel: Optional[str] = None,
|
|
operationType: Optional[str] = None,
|
|
tokensInput: Optional[int] = None,
|
|
tokensOutput: Optional[int] = None,
|
|
processingTimeMs: Optional[int] = None,
|
|
priceCHF: Optional[float] = None,
|
|
neutralizationActive: bool = False,
|
|
neutralizationMappingsCount: Optional[int] = None,
|
|
contentInput: Optional[str] = None,
|
|
contentOutput: Optional[str] = None,
|
|
storeFullContent: bool = False,
|
|
success: bool = True,
|
|
errorMessage: Optional[str] = None,
|
|
) -> Optional[str]:
|
|
"""Write one AI audit entry. Returns the entry id or None on failure."""
|
|
self._ensureInitialized()
|
|
if not self._db:
|
|
return None
|
|
|
|
try:
|
|
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
|
|
|
|
inputPreview = (contentInput or "")[:_PREVIEW_LENGTH] or None
|
|
outputPreview = (contentOutput or "")[:_PREVIEW_LENGTH] or None
|
|
inputHash = hashlib.sha256(contentInput.encode("utf-8")).hexdigest() if contentInput else None
|
|
|
|
entry = AiAuditLogEntry(
|
|
userId=userId,
|
|
username=username,
|
|
mandateId=mandateId,
|
|
featureInstanceId=featureInstanceId or "",
|
|
featureCode=featureCode,
|
|
instanceLabel=instanceLabel,
|
|
aiProvider=aiProvider,
|
|
aiModel=aiModel,
|
|
operationType=operationType,
|
|
tokensInput=tokensInput,
|
|
tokensOutput=tokensOutput,
|
|
processingTimeMs=processingTimeMs,
|
|
priceCHF=priceCHF,
|
|
neutralizationActive=neutralizationActive,
|
|
neutralizationMappingsCount=neutralizationMappingsCount,
|
|
contentStored=storeFullContent and bool(contentInput),
|
|
contentInputHash=inputHash,
|
|
contentInputPreview=inputPreview,
|
|
contentOutputPreview=outputPreview,
|
|
contentInputFull=contentInput if storeFullContent else None,
|
|
contentOutputFull=contentOutput if storeFullContent else None,
|
|
success=success,
|
|
errorMessage=errorMessage,
|
|
)
|
|
self._db.recordCreate(AiAuditLogEntry, entry.model_dump())
|
|
return entry.id
|
|
except Exception as e:
|
|
logger.error(f"Failed to write AI audit entry: {e}", exc_info=True)
|
|
return None
|
|
|
|
# ── Read helpers (used by route) ──
|
|
|
|
def getAiAuditLogs(
|
|
self,
|
|
mandateId: str,
|
|
*,
|
|
userId: Optional[str] = None,
|
|
featureInstanceId: Optional[str] = None,
|
|
aiModel: Optional[str] = None,
|
|
fromTimestamp: Optional[float] = None,
|
|
toTimestamp: Optional[float] = None,
|
|
limit: int = 100,
|
|
offset: int = 0,
|
|
) -> Dict[str, Any]:
|
|
"""Return paginated AI audit entries for a mandate."""
|
|
self._ensureInitialized()
|
|
if not self._db:
|
|
return {"items": [], "totalItems": 0}
|
|
|
|
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
|
|
|
|
recordFilter: Dict[str, Any] = {"mandateId": mandateId}
|
|
if userId:
|
|
recordFilter["userId"] = userId
|
|
if featureInstanceId:
|
|
recordFilter["featureInstanceId"] = featureInstanceId
|
|
if aiModel:
|
|
recordFilter["aiModel"] = aiModel
|
|
|
|
allRecords = self._db.getRecordset(
|
|
AiAuditLogEntry,
|
|
recordFilter=recordFilter,
|
|
)
|
|
|
|
if fromTimestamp is not None:
|
|
allRecords = [r for r in allRecords if (r.get("timestamp") or 0) >= fromTimestamp]
|
|
if toTimestamp is not None:
|
|
allRecords = [r for r in allRecords if (r.get("timestamp") or 0) <= toTimestamp]
|
|
|
|
allRecords.sort(key=lambda r: r.get("timestamp") or 0, reverse=True)
|
|
totalItems = len(allRecords)
|
|
page = allRecords[offset: offset + limit]
|
|
|
|
for item in page:
|
|
item.pop("contentInputFull", None)
|
|
item.pop("contentOutputFull", None)
|
|
|
|
return {"items": page, "totalItems": totalItems}
|
|
|
|
def getAiAuditEntryContent(self, entryId: str, mandateId: str) -> Optional[Dict[str, Any]]:
|
|
"""Return full content for a single entry (RBAC checked by route)."""
|
|
self._ensureInitialized()
|
|
if not self._db:
|
|
return None
|
|
|
|
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
|
|
|
|
records = self._db.getRecordset(
|
|
AiAuditLogEntry, recordFilter={"id": entryId, "mandateId": mandateId}
|
|
)
|
|
if not records:
|
|
return None
|
|
rec = records[0]
|
|
return {
|
|
"id": rec.get("id"),
|
|
"contentStored": rec.get("contentStored", False),
|
|
"contentInputFull": rec.get("contentInputFull"),
|
|
"contentOutputFull": rec.get("contentOutputFull"),
|
|
"contentInputPreview": rec.get("contentInputPreview"),
|
|
"contentOutputPreview": rec.get("contentOutputPreview"),
|
|
}
|
|
|
|
def getAiAuditStats(
|
|
self,
|
|
mandateId: str,
|
|
*,
|
|
fromTs: float,
|
|
toTs: float,
|
|
groupBy: str = "model",
|
|
) -> Dict[str, Any]:
|
|
"""Aggregate statistics for Tab C over an explicit timestamp range.
|
|
|
|
`fromTs`/`toTs` are inclusive epoch-second boundaries (see
|
|
`dateRange.isoDateRangeToLocalEpoch`). Both are required.
|
|
"""
|
|
self._ensureInitialized()
|
|
if not self._db:
|
|
return {}
|
|
|
|
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
|
|
|
|
allRecords = self._db.getRecordset(
|
|
AiAuditLogEntry, recordFilter={"mandateId": mandateId}
|
|
)
|
|
records = [
|
|
r for r in allRecords
|
|
if fromTs <= (r.get("timestamp") or 0) <= toTs
|
|
]
|
|
|
|
callsByDay: Dict[str, int] = defaultdict(int)
|
|
callsByModel: Dict[str, int] = defaultdict(int)
|
|
callsByFeature: Dict[str, int] = defaultdict(int)
|
|
costByDay: Dict[str, float] = defaultdict(float)
|
|
callsByUser: Dict[str, int] = defaultdict(int)
|
|
neutralizationCount = 0
|
|
totalCalls = len(records)
|
|
|
|
for r in records:
|
|
ts = r.get("timestamp") or 0
|
|
try:
|
|
day = datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d")
|
|
except (ValueError, OSError):
|
|
day = "unknown"
|
|
callsByDay[day] += 1
|
|
callsByModel[r.get("aiModel") or "unknown"] += 1
|
|
callsByFeature[r.get("featureCode") or "system"] += 1
|
|
costByDay[day] += r.get("priceCHF") or 0.0
|
|
callsByUser[r.get("username") or r.get("userId") or "unknown"] += 1
|
|
if r.get("neutralizationActive"):
|
|
neutralizationCount += 1
|
|
|
|
sortedDays = sorted(callsByDay.keys())
|
|
neutralizationPercent = round(100.0 * neutralizationCount / totalCalls, 1) if totalCalls else 0.0
|
|
days = max(1, int((toTs - fromTs) / 86400) + 1)
|
|
|
|
return {
|
|
"totalCalls": totalCalls,
|
|
"fromTs": fromTs,
|
|
"toTs": toTs,
|
|
"days": days,
|
|
"callsPerDay": [{"date": d, "calls": callsByDay[d]} for d in sortedDays],
|
|
"costPerDay": [{"date": d, "cost": round(costByDay[d], 4)} for d in sortedDays],
|
|
"callsByModel": dict(sorted(callsByModel.items(), key=lambda x: -x[1])),
|
|
"callsByFeature": dict(sorted(callsByFeature.items(), key=lambda x: -x[1])),
|
|
"topUsers": dict(sorted(callsByUser.items(), key=lambda x: -x[1])[:10]),
|
|
"neutralizationPercent": neutralizationPercent,
|
|
}
|
|
|
|
|
|
aiAuditLogger = AiAuditLogger()
|