"""Per-connection knowledge ingestion preference helpers. Walkers call `loadConnectionPrefs(connectionId)` once at bootstrap start and receive a `ConnectionIngestionPrefs` dataclass they can pass down into their inner loops. All fields have safe defaults so walkers stay backward-compatible with connections that predate the ยง2.6 preference schema (knowledgePreferences is None). """ from __future__ import annotations import logging from dataclasses import dataclass, field from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) _DEFAULT_MAX_AGE_DAYS = 90 _DEFAULT_MAIL_DEPTH = "full" _DEFAULT_CLICKUP_SCOPE = "title_description" @dataclass class ConnectionIngestionPrefs: """Parsed per-connection preferences for knowledge ingestion walkers.""" # PII neutralizeBeforeEmbed: bool = False # Mail (Outlook + Gmail) mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full" mailIndexAttachments: bool = False # Files (Drive / SharePoint / OneDrive) filesIndexBinaries: bool = True mimeAllowlist: List[str] = field(default_factory=list) # empty = all allowed # ClickUp clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments" clickupIndexAttachments: bool = False # Per-authority surface toggles (default everything on) gmailEnabled: bool = True driveEnabled: bool = True sharepointEnabled: bool = True outlookEnabled: bool = True # Time window maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs: """Load and parse per-connection preferences from the database. Returns safe defaults for any missing or unparseable values so walkers never fail due to missing preference data. """ try: from modules.interfaces.interfaceDbApp import getRootInterface root = getRootInterface() conn = root.getUserConnectionById(connectionId) if not conn: logger.debug("loadConnectionPrefs: connection %s not found, using defaults", connectionId) return ConnectionIngestionPrefs() raw: Optional[Dict[str, Any]] = getattr(conn, "knowledgePreferences", None) if not raw or not isinstance(raw, dict): return ConnectionIngestionPrefs() def _bool(key: str, default: bool) -> bool: v = raw.get(key) return bool(v) if isinstance(v, bool) else default def _str(key: str, allowed: List[str], default: str) -> str: v = raw.get(key) return v if v in allowed else default def _int(key: str, default: int) -> int: v = raw.get(key) return int(v) if isinstance(v, int) else default surface = raw.get("surfaceToggles") or {} google_surf = surface.get("google") or {} msft_surf = surface.get("msft") or {} return ConnectionIngestionPrefs( neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False), mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH), mailIndexAttachments=_bool("mailIndexAttachments", False), filesIndexBinaries=_bool("filesIndexBinaries", True), mimeAllowlist=list(raw.get("mimeAllowlist") or []), clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE), clickupIndexAttachments=_bool("clickupIndexAttachments", False), gmailEnabled=bool(google_surf.get("gmail", True)), driveEnabled=bool(google_surf.get("drive", True)), sharepointEnabled=bool(msft_surf.get("sharepoint", True)), outlookEnabled=bool(msft_surf.get("outlook", True)), maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS), ) except Exception as exc: logger.warning("loadConnectionPrefs failed for %s, using defaults: %s", connectionId, exc) return ConnectionIngestionPrefs()