512 lines
18 KiB
Python
512 lines
18 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""ClickUp bootstrap for the unified knowledge ingestion lane.
|
|
|
|
ClickUp tasks are ingested as *virtual documents* — we never download file
|
|
bytes. Each task becomes a `sourceKind="clickup_task"` IngestionJob whose
|
|
`contentObjects` carry a summary header (name + status + metadata) and the
|
|
task description / text content so retrieval finds them without a live API
|
|
call.
|
|
|
|
Hierarchy traversal: workspace (team) → spaces → folders / folderless lists →
|
|
tasks. We cap the fan-out with `maxWorkspaces` / `maxListsPerWorkspace` /
|
|
`maxTasks` and skip tasks older than `maxAgeDays` (default 180 d).
|
|
|
|
Idempotency: `date_updated` from the ClickUp task payload is a millisecond
|
|
timestamp and strictly monotonic per revision — used as `contentVersion`.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import logging
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timedelta, timezone
|
|
from typing import Any, Callable, Dict, List, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
MAX_TASKS_DEFAULT = 500
|
|
MAX_WORKSPACES_DEFAULT = 3
|
|
MAX_LISTS_PER_WORKSPACE_DEFAULT = 20
|
|
MAX_DESCRIPTION_CHARS_DEFAULT = 8000
|
|
MAX_AGE_DAYS_DEFAULT = 180
|
|
|
|
|
|
@dataclass
|
|
class ClickupBootstrapLimits:
|
|
maxTasks: int = MAX_TASKS_DEFAULT
|
|
maxWorkspaces: int = MAX_WORKSPACES_DEFAULT
|
|
maxListsPerWorkspace: int = MAX_LISTS_PER_WORKSPACE_DEFAULT
|
|
maxDescriptionChars: int = MAX_DESCRIPTION_CHARS_DEFAULT
|
|
# Only ingest tasks updated within the last N days. None disables filter.
|
|
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
|
|
# Include closed/archived tasks if they still meet the recency filter.
|
|
# ClickUp `closed` tasks often carry the most useful RAG context
|
|
# ("why was this shipped the way it was?").
|
|
includeClosed: bool = True
|
|
# Pass-through to IngestionJob.neutralize
|
|
neutralize: bool = False
|
|
# Content scope: "titles" | "title_description" | "with_comments"
|
|
clickupScope: str = "title_description"
|
|
|
|
|
|
@dataclass
|
|
class ClickupBootstrapResult:
|
|
connectionId: str
|
|
indexed: int = 0
|
|
skippedDuplicate: int = 0
|
|
skippedPolicy: int = 0
|
|
failed: int = 0
|
|
workspaces: int = 0
|
|
lists: int = 0
|
|
errors: List[str] = field(default_factory=list)
|
|
|
|
|
|
def _syntheticTaskId(connectionId: str, taskId: str) -> str:
|
|
token = hashlib.sha256(f"{connectionId}:{taskId}".encode("utf-8")).hexdigest()[:16]
|
|
return f"cu:{connectionId[:8]}:{token}"
|
|
|
|
|
|
def _truncate(value: Any, limit: int) -> str:
|
|
text = str(value or "").strip()
|
|
if not text:
|
|
return ""
|
|
if len(text) <= limit:
|
|
return text
|
|
return text[:limit].rstrip() + "\n[truncated]"
|
|
|
|
|
|
def _isRecent(dateUpdatedMs: Any, maxAgeDays: Optional[int]) -> bool:
|
|
if not maxAgeDays:
|
|
return True
|
|
if not dateUpdatedMs:
|
|
return True
|
|
try:
|
|
ts = datetime.fromtimestamp(int(dateUpdatedMs) / 1000.0, tz=timezone.utc)
|
|
except Exception:
|
|
return True
|
|
cutoff = datetime.now(timezone.utc) - timedelta(days=maxAgeDays)
|
|
return ts >= cutoff
|
|
|
|
|
|
def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -> List[Dict[str, Any]]:
|
|
"""Header (name/status/metadata) + optional description + text_content.
|
|
|
|
`limits.clickupScope` controls how much is embedded:
|
|
- "titles": task name + status metadata only
|
|
- "title_description": header + description / text_content (default)
|
|
- "with_comments": header + description + text_content
|
|
(comments themselves are not yet fetched in v1)
|
|
"""
|
|
name = task.get("name") or f"Task {task.get('id', '')}"
|
|
status = ((task.get("status") or {}).get("status")) or ""
|
|
assignees = ", ".join(
|
|
filter(None, [
|
|
(a.get("username") or a.get("email") or "")
|
|
for a in (task.get("assignees") or [])
|
|
])
|
|
)
|
|
tags = ", ".join(filter(None, [t.get("name", "") for t in (task.get("tags") or [])]))
|
|
listInfo = task.get("list") or {}
|
|
folderInfo = task.get("folder") or {}
|
|
spaceInfo = task.get("space") or {}
|
|
dueMs = task.get("due_date")
|
|
dueIso = ""
|
|
if dueMs:
|
|
try:
|
|
dueIso = datetime.fromtimestamp(int(dueMs) / 1000.0, tz=timezone.utc).strftime("%Y-%m-%d")
|
|
except Exception:
|
|
dueIso = ""
|
|
|
|
headerLines = [
|
|
f"Task: {name}",
|
|
f"Status: {status}" if status else "",
|
|
f"List: {listInfo.get('name', '')}" if listInfo else "",
|
|
f"Folder: {folderInfo.get('name', '')}" if folderInfo else "",
|
|
f"Space: {spaceInfo.get('name', '')}" if spaceInfo else "",
|
|
f"Assignees: {assignees}" if assignees else "",
|
|
f"Tags: {tags}" if tags else "",
|
|
f"Due: {dueIso}" if dueIso else "",
|
|
f"Url: {task.get('url', '')}" if task.get("url") else "",
|
|
]
|
|
header = "\n".join(line for line in headerLines if line)
|
|
|
|
parts: List[Dict[str, Any]] = [{
|
|
"contentObjectId": "header",
|
|
"contentType": "text",
|
|
"data": header,
|
|
"contextRef": {"part": "header"},
|
|
}]
|
|
|
|
scope = getattr(limits, "clickupScope", "title_description")
|
|
if scope in ("title_description", "with_comments"):
|
|
description = _truncate(task.get("description"), limits.maxDescriptionChars)
|
|
if description:
|
|
parts.append({
|
|
"contentObjectId": "description",
|
|
"contentType": "text",
|
|
"data": description,
|
|
"contextRef": {"part": "description"},
|
|
})
|
|
# text_content is ClickUp's rendered-markdown version; include if it adds
|
|
# something beyond the plain description (common for bullet lists, checklists).
|
|
textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
|
|
if textContent and textContent != description:
|
|
parts.append({
|
|
"contentObjectId": "text_content",
|
|
"contentType": "text",
|
|
"data": textContent,
|
|
"contextRef": {"part": "text_content"},
|
|
})
|
|
return parts
|
|
|
|
|
|
async def bootstrapClickup(
|
|
connectionId: str,
|
|
*,
|
|
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
|
adapter: Any = None,
|
|
connection: Any = None,
|
|
knowledgeService: Any = None,
|
|
limits: Optional[ClickupBootstrapLimits] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc."""
|
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
|
prefs = loadConnectionPrefs(connectionId)
|
|
|
|
if not limits:
|
|
limits = ClickupBootstrapLimits(
|
|
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
|
neutralize=prefs.neutralizeBeforeEmbed,
|
|
clickupScope=prefs.clickupScope,
|
|
)
|
|
|
|
startMs = time.time()
|
|
result = ClickupBootstrapResult(connectionId=connectionId)
|
|
|
|
logger.info(
|
|
"ingestion.connection.bootstrap.started part=clickup connectionId=%s",
|
|
connectionId,
|
|
extra={
|
|
"event": "ingestion.connection.bootstrap.started",
|
|
"part": "clickup",
|
|
"connectionId": connectionId,
|
|
},
|
|
)
|
|
|
|
if adapter is None or knowledgeService is None or connection is None:
|
|
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
|
|
|
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
|
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
|
|
|
svc = getattr(adapter, "_svc", None)
|
|
if svc is None:
|
|
result.errors.append("adapter missing _svc instance")
|
|
return _finalizeResult(connectionId, result, startMs)
|
|
|
|
try:
|
|
teamsResp = await svc.getAuthorizedTeams()
|
|
except Exception as exc:
|
|
logger.error("clickup team discovery failed for %s: %s", connectionId, exc, exc_info=True)
|
|
result.errors.append(f"teams: {exc}")
|
|
return _finalizeResult(connectionId, result, startMs)
|
|
|
|
teams = (teamsResp or {}).get("teams") or []
|
|
for team in teams[: limits.maxWorkspaces]:
|
|
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
|
break
|
|
teamId = str(team.get("id", "") or "")
|
|
if not teamId:
|
|
continue
|
|
result.workspaces += 1
|
|
try:
|
|
await _walkTeam(
|
|
svc=svc,
|
|
knowledgeService=knowledgeService,
|
|
connectionId=connectionId,
|
|
mandateId=mandateId,
|
|
userId=userId,
|
|
team=team,
|
|
limits=limits,
|
|
result=result,
|
|
progressCb=progressCb,
|
|
)
|
|
except Exception as exc:
|
|
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
|
|
result.errors.append(f"team({teamId}): {exc}")
|
|
|
|
return _finalizeResult(connectionId, result, startMs)
|
|
|
|
|
|
async def _resolveDependencies(connectionId: str):
|
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
from modules.auth import TokenManager
|
|
from modules.connectors.providerClickup.connectorClickup import ClickupConnector
|
|
from modules.serviceCenter import getService
|
|
from modules.serviceCenter.context import ServiceCenterContext
|
|
from modules.security.rootAccess import getRootUser
|
|
|
|
rootInterface = getRootInterface()
|
|
connection = rootInterface.getUserConnectionById(connectionId)
|
|
if connection is None:
|
|
raise ValueError(f"UserConnection not found: {connectionId}")
|
|
|
|
token = TokenManager().getFreshToken(connectionId)
|
|
if not token or not token.tokenAccess:
|
|
raise ValueError(f"No valid token for connection {connectionId}")
|
|
|
|
provider = ClickupConnector(connection, token.tokenAccess)
|
|
adapter = provider.getServiceAdapter("clickup")
|
|
|
|
rootUser = getRootUser()
|
|
ctx = ServiceCenterContext(
|
|
user=rootUser,
|
|
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
|
)
|
|
knowledgeService = getService("knowledge", ctx)
|
|
return adapter, connection, knowledgeService
|
|
|
|
|
|
async def _walkTeam(
|
|
*,
|
|
svc,
|
|
knowledgeService,
|
|
connectionId: str,
|
|
mandateId: str,
|
|
userId: str,
|
|
team: Dict[str, Any],
|
|
limits: ClickupBootstrapLimits,
|
|
result: ClickupBootstrapResult,
|
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
|
) -> None:
|
|
teamId = str(team.get("id", "") or "")
|
|
spacesResp = await svc.getSpaces(teamId)
|
|
spaces = (spacesResp or {}).get("spaces") or []
|
|
|
|
listsCollected: List[Dict[str, Any]] = []
|
|
for space in spaces:
|
|
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
|
break
|
|
spaceId = str(space.get("id", "") or "")
|
|
if not spaceId:
|
|
continue
|
|
|
|
# Folderless lists directly under the space
|
|
folderless = await svc.getFolderlessLists(spaceId)
|
|
for lst in (folderless or {}).get("lists") or []:
|
|
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
|
break
|
|
listsCollected.append({**lst, "_space": space})
|
|
|
|
# Lists inside folders
|
|
foldersResp = await svc.getFolders(spaceId)
|
|
for folder in (foldersResp or {}).get("folders") or []:
|
|
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
|
break
|
|
folderId = str(folder.get("id", "") or "")
|
|
if not folderId:
|
|
continue
|
|
folderLists = await svc.getListsInFolder(folderId)
|
|
for lst in (folderLists or {}).get("lists") or []:
|
|
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
|
break
|
|
listsCollected.append({**lst, "_space": space, "_folder": folder})
|
|
|
|
for lst in listsCollected:
|
|
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
|
return
|
|
result.lists += 1
|
|
await _walkList(
|
|
svc=svc,
|
|
knowledgeService=knowledgeService,
|
|
connectionId=connectionId,
|
|
mandateId=mandateId,
|
|
userId=userId,
|
|
teamId=teamId,
|
|
lst=lst,
|
|
limits=limits,
|
|
result=result,
|
|
progressCb=progressCb,
|
|
)
|
|
|
|
|
|
async def _walkList(
|
|
*,
|
|
svc,
|
|
knowledgeService,
|
|
connectionId: str,
|
|
mandateId: str,
|
|
userId: str,
|
|
teamId: str,
|
|
lst: Dict[str, Any],
|
|
limits: ClickupBootstrapLimits,
|
|
result: ClickupBootstrapResult,
|
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
|
) -> None:
|
|
listId = str(lst.get("id", "") or "")
|
|
if not listId:
|
|
return
|
|
page = 0
|
|
while result.indexed + result.skippedDuplicate < limits.maxTasks:
|
|
resp = await svc.getTasksInList(
|
|
listId,
|
|
page=page,
|
|
include_closed=limits.includeClosed,
|
|
subtasks=True,
|
|
)
|
|
if isinstance(resp, dict) and resp.get("error"):
|
|
logger.warning("clickup tasks list=%s page=%d error: %s", listId, page, resp.get("error"))
|
|
result.errors.append(f"list({listId}): {resp.get('error')}")
|
|
return
|
|
tasks = (resp or {}).get("tasks") or []
|
|
if not tasks:
|
|
return
|
|
|
|
for task in tasks:
|
|
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
|
return
|
|
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
|
|
result.skippedPolicy += 1
|
|
continue
|
|
# Inject the list/folder/space metadata we already loaded.
|
|
task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
|
|
task["folder"] = task.get("folder") or lst.get("_folder") or {}
|
|
task["space"] = task.get("space") or lst.get("_space") or {}
|
|
await _ingestTask(
|
|
knowledgeService=knowledgeService,
|
|
connectionId=connectionId,
|
|
mandateId=mandateId,
|
|
userId=userId,
|
|
teamId=teamId,
|
|
task=task,
|
|
limits=limits,
|
|
result=result,
|
|
progressCb=progressCb,
|
|
)
|
|
|
|
if len(tasks) < 100: # ClickUp page-size hint: fewer than 100 => last page
|
|
return
|
|
page += 1
|
|
|
|
|
|
async def _ingestTask(
|
|
*,
|
|
knowledgeService,
|
|
connectionId: str,
|
|
mandateId: str,
|
|
userId: str,
|
|
teamId: str,
|
|
task: Dict[str, Any],
|
|
limits: ClickupBootstrapLimits,
|
|
result: ClickupBootstrapResult,
|
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
|
) -> None:
|
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
|
|
|
taskId = str(task.get("id", "") or "")
|
|
if not taskId:
|
|
result.skippedPolicy += 1
|
|
return
|
|
revision = str(task.get("date_updated") or task.get("date_created") or "")
|
|
name = task.get("name") or f"Task {taskId}"
|
|
syntheticId = _syntheticTaskId(connectionId, taskId)
|
|
fileName = f"{name[:80].strip() or taskId}.task.json"
|
|
|
|
contentObjects = _buildContentObjects(task, limits)
|
|
|
|
try:
|
|
handle = await knowledgeService.requestIngestion(
|
|
IngestionJob(
|
|
sourceKind="clickup_task",
|
|
sourceId=syntheticId,
|
|
fileName=fileName,
|
|
mimeType="application/vnd.clickup.task+json",
|
|
userId=userId,
|
|
mandateId=mandateId,
|
|
contentObjects=contentObjects,
|
|
contentVersion=revision or None,
|
|
neutralize=limits.neutralize,
|
|
provenance={
|
|
"connectionId": connectionId,
|
|
"authority": "clickup",
|
|
"service": "clickup",
|
|
"externalItemId": taskId,
|
|
"teamId": teamId,
|
|
"listId": ((task.get("list") or {}).get("id")),
|
|
"spaceId": ((task.get("space") or {}).get("id")),
|
|
"url": task.get("url"),
|
|
"status": ((task.get("status") or {}).get("status")),
|
|
"tier": limits.clickupScope,
|
|
},
|
|
)
|
|
)
|
|
except Exception as exc:
|
|
logger.error("clickup ingestion %s failed: %s", taskId, exc, exc_info=True)
|
|
result.failed += 1
|
|
result.errors.append(f"ingest({taskId}): {exc}")
|
|
return
|
|
|
|
if handle.status == "duplicate":
|
|
result.skippedDuplicate += 1
|
|
elif handle.status == "indexed":
|
|
result.indexed += 1
|
|
else:
|
|
result.failed += 1
|
|
|
|
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
|
processed = result.indexed + result.skippedDuplicate
|
|
try:
|
|
progressCb(
|
|
min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),
|
|
f"clickup processed={processed}",
|
|
)
|
|
except Exception:
|
|
pass
|
|
logger.info(
|
|
"ingestion.connection.bootstrap.progress part=clickup processed=%d skippedDup=%d failed=%d",
|
|
processed, result.skippedDuplicate, result.failed,
|
|
extra={
|
|
"event": "ingestion.connection.bootstrap.progress",
|
|
"part": "clickup",
|
|
"connectionId": connectionId,
|
|
"processed": processed,
|
|
"skippedDup": result.skippedDuplicate,
|
|
"failed": result.failed,
|
|
},
|
|
)
|
|
|
|
|
|
def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs: float) -> Dict[str, Any]:
|
|
durationMs = int((time.time() - startMs) * 1000)
|
|
logger.info(
|
|
"ingestion.connection.bootstrap.done part=clickup connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d workspaces=%d lists=%d durationMs=%d",
|
|
connectionId,
|
|
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
|
result.failed, result.workspaces, result.lists, durationMs,
|
|
extra={
|
|
"event": "ingestion.connection.bootstrap.done",
|
|
"part": "clickup",
|
|
"connectionId": connectionId,
|
|
"indexed": result.indexed,
|
|
"skippedDup": result.skippedDuplicate,
|
|
"skippedPolicy": result.skippedPolicy,
|
|
"failed": result.failed,
|
|
"workspaces": result.workspaces,
|
|
"lists": result.lists,
|
|
"durationMs": durationMs,
|
|
},
|
|
)
|
|
return {
|
|
"connectionId": result.connectionId,
|
|
"indexed": result.indexed,
|
|
"skippedDuplicate": result.skippedDuplicate,
|
|
"skippedPolicy": result.skippedPolicy,
|
|
"failed": result.failed,
|
|
"workspaces": result.workspaces,
|
|
"lists": result.lists,
|
|
"durationMs": durationMs,
|
|
"errors": result.errors[:20],
|
|
}
|