101 lines
4 KiB
Python
101 lines
4 KiB
Python
"""Per-connection knowledge ingestion preference helpers.
|
|
|
|
Walkers call `loadConnectionPrefs(connectionId)` once at bootstrap start and
|
|
receive a `ConnectionIngestionPrefs` dataclass they can pass down into their
|
|
inner loops. All fields have safe defaults so walkers stay backward-compatible
|
|
with connections that predate the §2.6 preference schema (knowledgePreferences
|
|
is None).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_DEFAULT_MAX_AGE_DAYS = 90
|
|
_DEFAULT_MAIL_DEPTH = "full"
|
|
_DEFAULT_CLICKUP_SCOPE = "title_description"
|
|
|
|
|
|
@dataclass
|
|
class ConnectionIngestionPrefs:
|
|
"""Parsed per-connection preferences for knowledge ingestion walkers."""
|
|
|
|
# PII
|
|
neutralizeBeforeEmbed: bool = False
|
|
|
|
# Mail (Outlook + Gmail)
|
|
mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full"
|
|
mailIndexAttachments: bool = False
|
|
|
|
# Files (Drive / SharePoint / OneDrive)
|
|
filesIndexBinaries: bool = True
|
|
mimeAllowlist: List[str] = field(default_factory=list) # empty = all allowed
|
|
|
|
# ClickUp
|
|
clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments"
|
|
clickupIndexAttachments: bool = False
|
|
|
|
# Per-authority surface toggles (default everything on)
|
|
gmailEnabled: bool = True
|
|
driveEnabled: bool = True
|
|
sharepointEnabled: bool = True
|
|
outlookEnabled: bool = True
|
|
|
|
# Time window
|
|
maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit
|
|
|
|
|
|
def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
|
|
"""Load and parse per-connection preferences from the database.
|
|
|
|
Returns safe defaults for any missing or unparseable values so walkers
|
|
never fail due to missing preference data.
|
|
"""
|
|
try:
|
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
root = getRootInterface()
|
|
conn = root.getUserConnectionById(connectionId)
|
|
if not conn:
|
|
logger.debug("loadConnectionPrefs: connection %s not found, using defaults", connectionId)
|
|
return ConnectionIngestionPrefs()
|
|
|
|
raw: Optional[Dict[str, Any]] = getattr(conn, "knowledgePreferences", None)
|
|
if not raw or not isinstance(raw, dict):
|
|
return ConnectionIngestionPrefs()
|
|
|
|
def _bool(key: str, default: bool) -> bool:
|
|
v = raw.get(key)
|
|
return bool(v) if isinstance(v, bool) else default
|
|
|
|
def _str(key: str, allowed: List[str], default: str) -> str:
|
|
v = raw.get(key)
|
|
return v if v in allowed else default
|
|
|
|
def _int(key: str, default: int) -> int:
|
|
v = raw.get(key)
|
|
return int(v) if isinstance(v, int) else default
|
|
|
|
surface = raw.get("surfaceToggles") or {}
|
|
google_surf = surface.get("google") or {}
|
|
msft_surf = surface.get("msft") or {}
|
|
|
|
return ConnectionIngestionPrefs(
|
|
neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
|
|
mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
|
|
mailIndexAttachments=_bool("mailIndexAttachments", False),
|
|
filesIndexBinaries=_bool("filesIndexBinaries", True),
|
|
mimeAllowlist=list(raw.get("mimeAllowlist") or []),
|
|
clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
|
|
clickupIndexAttachments=_bool("clickupIndexAttachments", False),
|
|
gmailEnabled=bool(google_surf.get("gmail", True)),
|
|
driveEnabled=bool(google_surf.get("drive", True)),
|
|
sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
|
|
outlookEnabled=bool(msft_surf.get("outlook", True)),
|
|
maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
|
|
)
|
|
except Exception as exc:
|
|
logger.warning("loadConnectionPrefs failed for %s, using defaults: %s", connectionId, exc)
|
|
return ConnectionIngestionPrefs()
|