gateway/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py

101 lines
4 KiB
Python

"""Per-connection knowledge ingestion preference helpers.
Walkers call `loadConnectionPrefs(connectionId)` once at bootstrap start and
receive a `ConnectionIngestionPrefs` dataclass they can pass down into their
inner loops. All fields have safe defaults so walkers stay backward-compatible
with connections that predate the §2.6 preference schema (knowledgePreferences
is None).
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
_DEFAULT_MAX_AGE_DAYS = 90
_DEFAULT_MAIL_DEPTH = "full"
_DEFAULT_CLICKUP_SCOPE = "title_description"
@dataclass
class ConnectionIngestionPrefs:
"""Parsed per-connection preferences for knowledge ingestion walkers."""
# PII
neutralizeBeforeEmbed: bool = False
# Mail (Outlook + Gmail)
mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full"
mailIndexAttachments: bool = False
# Files (Drive / SharePoint / OneDrive)
filesIndexBinaries: bool = True
mimeAllowlist: List[str] = field(default_factory=list) # empty = all allowed
# ClickUp
clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments"
clickupIndexAttachments: bool = False
# Per-authority surface toggles (default everything on)
gmailEnabled: bool = True
driveEnabled: bool = True
sharepointEnabled: bool = True
outlookEnabled: bool = True
# Time window
maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit
def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
"""Load and parse per-connection preferences from the database.
Returns safe defaults for any missing or unparseable values so walkers
never fail due to missing preference data.
"""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
root = getRootInterface()
conn = root.getUserConnectionById(connectionId)
if not conn:
logger.debug("loadConnectionPrefs: connection %s not found, using defaults", connectionId)
return ConnectionIngestionPrefs()
raw: Optional[Dict[str, Any]] = getattr(conn, "knowledgePreferences", None)
if not raw or not isinstance(raw, dict):
return ConnectionIngestionPrefs()
def _bool(key: str, default: bool) -> bool:
v = raw.get(key)
return bool(v) if isinstance(v, bool) else default
def _str(key: str, allowed: List[str], default: str) -> str:
v = raw.get(key)
return v if v in allowed else default
def _int(key: str, default: int) -> int:
v = raw.get(key)
return int(v) if isinstance(v, int) else default
surface = raw.get("surfaceToggles") or {}
google_surf = surface.get("google") or {}
msft_surf = surface.get("msft") or {}
return ConnectionIngestionPrefs(
neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
mailIndexAttachments=_bool("mailIndexAttachments", False),
filesIndexBinaries=_bool("filesIndexBinaries", True),
mimeAllowlist=list(raw.get("mimeAllowlist") or []),
clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
clickupIndexAttachments=_bool("clickupIndexAttachments", False),
gmailEnabled=bool(google_surf.get("gmail", True)),
driveEnabled=bool(google_surf.get("drive", True)),
sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
outlookEnabled=bool(msft_surf.get("outlook", True)),
maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
)
except Exception as exc:
logger.warning("loadConnectionPrefs failed for %s, using defaults: %s", connectionId, exc)
return ConnectionIngestionPrefs()