Merge branch 'int' of git.poweron.swiss:PowerOn/platform-core into int
All checks were successful
Deploy Plattform-Core (Int) / test (push) Successful in 1m2s
Deploy Plattform-Core (Int) / deploy (push) Successful in 10s

This commit is contained in:
Ida 2026-05-29 06:41:57 +02:00
commit 3345f65c40
6 changed files with 108 additions and 32 deletions

View file

@ -7,6 +7,7 @@ and database migration (backup / restore).
import json import json
import logging import logging
import os
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status
@ -570,13 +571,34 @@ async def postMigrationUploadImport(
fileSizeMb = round(totalBytes / (1024 * 1024), 1) fileSizeMb = round(totalBytes / (1024 * 1024), 1)
logger.info("SysAdmin migration upload-import: %s bytes on disk (%.1f MB)", totalBytes, fileSizeMb) logger.info("SysAdmin migration upload-import: %s bytes on disk (%.1f MB)", totalBytes, fileSizeMb)
_pendingProcessing[token] = {"filePath": filePath, "tmpDir": tmpDir} _writeTokenMeta(token, "processing", {"filePath": filePath, "tmpDir": tmpDir})
return {"token": token, "fileSizeMb": fileSizeMb} return {"token": token, "fileSizeMb": fileSizeMb}
_pendingProcessing: Dict[str, dict] = {} def _tokenMetaPath(token: str, kind: str) -> str:
_pendingImports: Dict[str, dict] = {} import tempfile
return os.path.join(tempfile.gettempdir(), f"poweron_{kind}_{token}.meta.json")
def _writeTokenMeta(token: str, kind: str, data: dict):
path = _tokenMetaPath(token, kind)
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False)
def _readTokenMeta(token: str, kind: str, pop: bool = False) -> dict | None:
path = _tokenMetaPath(token, kind)
if not os.path.exists(path):
return None
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
if pop:
try:
os.remove(path)
except OSError:
pass
return data
@router.get("/migration/process-import-stream") @router.get("/migration/process-import-stream")
@ -598,7 +620,7 @@ def getProcessImportStream(
import queue import queue
import threading import threading
pending = _pendingProcessing.pop(token, None) pending = _readTokenMeta(token, "processing", pop=True)
if not pending: if not pending:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid or expired processing token.") detail="Invalid or expired processing token.")
@ -643,10 +665,10 @@ def getProcessImportStream(
except OSError: except OSError:
pass pass
_pendingImports[token] = { _writeTokenMeta(token, "import", {
"dbFiles": dbFiles, "dbFiles": dbFiles,
"protectedIds": protectedIds, "protectedIds": protectedIds,
} })
q.put({"phase": "done", "result": { q.put({"phase": "done", "result": {
"token": token, "token": token,
@ -704,7 +726,7 @@ def postMigrationImportSingle(
if mode not in ("replace", "merge"): if mode not in ("replace", "merge"):
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid mode: '{mode}'.") raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid mode: '{mode}'.")
pending = _pendingImports.get(token) pending = _readTokenMeta(token, "import")
if not pending: if not pending:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid or expired import token.") raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid or expired import token.")
@ -749,7 +771,7 @@ def postMigrationImportDone(
import os import os
token = body.get("token", "") token = body.get("token", "")
pending = _pendingImports.pop(token, None) pending = _readTokenMeta(token, "import", pop=True)
if pending: if pending:
for dbEntry in pending.get("dbFiles", {}).values(): for dbEntry in pending.get("dbFiles", {}).values():
if isinstance(dbEntry, str): if isinstance(dbEntry, str):

View file

@ -140,6 +140,21 @@ _SOURCE_TYPE_MAP = {
} }
def _findConnectionRootRagLimits(allDs) -> dict:
"""Return ragLimits from the connection root (path='/') if set, else {}."""
for ds in allDs:
dsDict = ds if isinstance(ds, dict) else {**ds.__dict__}
path = dsDict.get("path", "")
if path not in ("/", ""):
continue
settings = dsDict.get("settings")
if isinstance(settings, dict):
limits = settings.get("ragLimits")
if isinstance(limits, dict) and limits:
return dict(limits)
return {}
def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None): def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None):
"""Load DataSource rows whose *effective* ragIndexEnabled is True. """Load DataSource rows whose *effective* ragIndexEnabled is True.
@ -161,6 +176,8 @@ def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list]
rootIf = getRootInterface() rootIf = getRootInterface()
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId}) allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
connectionRootLimits = _findConnectionRootRagLimits(allDs)
resolved = [] resolved = []
for ds in allDs: for ds in allDs:
effRagIndex = getEffectiveFlag(ds, "ragIndexEnabled", allDs) effRagIndex = getEffectiveFlag(ds, "ragIndexEnabled", allDs)
@ -170,6 +187,15 @@ def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list]
dsCopy["neutralize"] = getEffectiveFlag(ds, "neutralize", allDs) dsCopy["neutralize"] = getEffectiveFlag(ds, "neutralize", allDs)
dsCopy["scope"] = getEffectiveFlag(ds, "scope", allDs) dsCopy["scope"] = getEffectiveFlag(ds, "scope", allDs)
dsCopy["ragIndexEnabled"] = True dsCopy["ragIndexEnabled"] = True
if connectionRootLimits:
dsSettings = dsCopy.get("settings") or {}
if not isinstance(dsSettings, dict):
dsSettings = {}
ownLimits = dsSettings.get("ragLimits")
if not isinstance(ownLimits, dict) or not ownLimits:
dsCopy["settings"] = {**dsSettings, "ragLimits": connectionRootLimits}
resolved.append(dsCopy) resolved.append(dsCopy)
if dataSourceIds: if dataSourceIds:
resolved = [ds for ds in resolved if ds.get("id") in dataSourceIds] resolved = [ds for ds in resolved if ds.get("id") in dataSourceIds]

View file

@ -234,6 +234,7 @@ async def bootstrapClickup(
teams = (teamsResp or {}).get("teams") or [] teams = (teamsResp or {}).get("teams") or []
cancelled = False cancelled = False
effectiveLimits = limits
for ds in dataSources: for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxTasks: if result.indexed + result.skippedDuplicate >= limits.maxTasks:
_recordLimitStop(result, "maxTasks", "dataSource", limits) _recordLimitStop(result, "maxTasks", "dataSource", limits)
@ -255,6 +256,7 @@ async def bootstrapClickup(
neutralize=dsNeutralize, neutralize=dsNeutralize,
clickupScope=limits.clickupScope, clickupScope=limits.clickupScope,
) )
effectiveLimits = dsLimits
if len(teams) > dsLimits.maxWorkspaces: if len(teams) > dsLimits.maxWorkspaces:
_recordLimitStop(result, "maxWorkspaces", "teams", dsLimits, hard=False) _recordLimitStop(result, "maxWorkspaces", "teams", dsLimits, hard=False)
@ -283,7 +285,7 @@ async def bootstrapClickup(
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True) logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
result.errors.append(f"team({teamId}): {exc}") result.errors.append(f"team({teamId}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs) finalResult = _finalizeResult(connectionId, result, startMs, effectiveLimits)
if cancelled: if cancelled:
finalResult["cancelled"] = True finalResult["cancelled"] = True
return finalResult return finalResult
@ -574,7 +576,12 @@ def _recordLimitStop(
) )
def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs: float) -> Dict[str, Any]: def _finalizeResult(
connectionId: str,
result: ClickupBootstrapResult,
startMs: float,
effectiveLimits: Optional[ClickupBootstrapLimits] = None,
) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000) durationMs = int((time.time() - startMs) * 1000)
logger.info( logger.info(
"ingestion.connection.bootstrap.done part=clickup connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d workspaces=%d lists=%d durationMs=%d stoppedAtLimit=%s", "ingestion.connection.bootstrap.done part=clickup connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d workspaces=%d lists=%d durationMs=%d stoppedAtLimit=%s",
@ -608,9 +615,9 @@ def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs:
"errors": result.errors[:20], "errors": result.errors[:20],
"stoppedAtLimit": result.stoppedAtLimit, "stoppedAtLimit": result.stoppedAtLimit,
"limits": { "limits": {
"maxTasks": MAX_TASKS_DEFAULT, "maxTasks": effectiveLimits.maxTasks if effectiveLimits else MAX_TASKS_DEFAULT,
"maxWorkspaces": MAX_WORKSPACES_DEFAULT, "maxWorkspaces": effectiveLimits.maxWorkspaces if effectiveLimits else MAX_WORKSPACES_DEFAULT,
"maxListsPerWorkspace": MAX_LISTS_PER_WORKSPACE_DEFAULT, "maxListsPerWorkspace": effectiveLimits.maxListsPerWorkspace if effectiveLimits else MAX_LISTS_PER_WORKSPACE_DEFAULT,
"maxAgeDays": MAX_AGE_DAYS_DEFAULT, "maxAgeDays": effectiveLimits.maxAgeDays if effectiveLimits else MAX_AGE_DAYS_DEFAULT,
}, },
} }

View file

@ -172,6 +172,7 @@ async def bootstrapGdrive(
userId = str(getattr(connection, "userId", "") or "") if connection is not None else "" userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
cancelled = False cancelled = False
effectiveLimits = limits
for ds in dataSources: for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxItems: if result.indexed + result.skippedDuplicate >= limits.maxItems:
break break
@ -193,6 +194,7 @@ async def bootstrapGdrive(
maxAgeDays=dsMaxAgeDays, maxAgeDays=dsMaxAgeDays,
neutralize=dsNeutralize, neutralize=dsNeutralize,
) )
effectiveLimits = dsLimits
try: try:
await _walkFolder( await _walkFolder(
@ -213,7 +215,7 @@ async def bootstrapGdrive(
logger.error("gdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True) logger.error("gdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
result.errors.append(f"walk({dsPath}): {exc}") result.errors.append(f"walk({dsPath}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs) finalResult = _finalizeResult(connectionId, result, startMs, effectiveLimits)
if cancelled: if cancelled:
finalResult["cancelled"] = True finalResult["cancelled"] = True
return finalResult return finalResult
@ -515,7 +517,12 @@ def _recordLimitStop(
) )
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]: def _finalizeResult(
connectionId: str,
result: GdriveBootstrapResult,
startMs: float,
effectiveLimits: Optional[GdriveBootstrapLimits] = None,
) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000) durationMs = int((time.time() - startMs) * 1000)
logger.info( logger.info(
"ingestion.connection.bootstrap.done part=gdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d bytes=%d durationMs=%d stoppedAtLimit=%s", "ingestion.connection.bootstrap.done part=gdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d bytes=%d durationMs=%d stoppedAtLimit=%s",
@ -547,9 +554,9 @@ def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: f
"errors": result.errors[:20], "errors": result.errors[:20],
"stoppedAtLimit": result.stoppedAtLimit, "stoppedAtLimit": result.stoppedAtLimit,
"limits": { "limits": {
"maxItems": MAX_ITEMS_DEFAULT, "maxItems": effectiveLimits.maxItems if effectiveLimits else MAX_ITEMS_DEFAULT,
"maxBytes": MAX_BYTES_DEFAULT, "maxBytes": effectiveLimits.maxBytes if effectiveLimits else MAX_BYTES_DEFAULT,
"maxFileSize": MAX_FILE_SIZE_DEFAULT, "maxFileSize": effectiveLimits.maxFileSize if effectiveLimits else MAX_FILE_SIZE_DEFAULT,
"maxDepth": MAX_DEPTH_DEFAULT, "maxDepth": effectiveLimits.maxDepth if effectiveLimits else MAX_DEPTH_DEFAULT,
}, },
} }

View file

@ -141,6 +141,7 @@ async def bootstrapKdrive(
userId = str(getattr(connection, "userId", "") or "") if connection is not None else "" userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
cancelled = False cancelled = False
effectiveLimits = limits
for ds in dataSources: for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxItems: if result.indexed + result.skippedDuplicate >= limits.maxItems:
break break
@ -160,6 +161,7 @@ async def bootstrapKdrive(
maxDepth=eff.get("maxDepth", limits.maxDepth), maxDepth=eff.get("maxDepth", limits.maxDepth),
neutralize=dsNeutralize, neutralize=dsNeutralize,
) )
effectiveLimits = dsLimits
try: try:
await _walkFolder( await _walkFolder(
@ -180,7 +182,7 @@ async def bootstrapKdrive(
logger.error("kdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True) logger.error("kdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
result.errors.append(f"walk({dsPath}): {exc}") result.errors.append(f"walk({dsPath}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs) finalResult = _finalizeResult(connectionId, result, startMs, effectiveLimits)
if cancelled: if cancelled:
finalResult["cancelled"] = True finalResult["cancelled"] = True
return finalResult return finalResult
@ -460,7 +462,12 @@ def _recordLimitStop(
) )
def _finalizeResult(connectionId: str, result: KdriveBootstrapResult, startMs: float) -> Dict[str, Any]: def _finalizeResult(
connectionId: str,
result: KdriveBootstrapResult,
startMs: float,
effectiveLimits: Optional[KdriveBootstrapLimits] = None,
) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000) durationMs = int((time.time() - startMs) * 1000)
logger.info( logger.info(
"ingestion.connection.bootstrap.done part=kdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d stoppedAtLimit=%s", "ingestion.connection.bootstrap.done part=kdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d stoppedAtLimit=%s",
@ -484,9 +491,9 @@ def _finalizeResult(connectionId: str, result: KdriveBootstrapResult, startMs: f
"errors": result.errors[:20], "errors": result.errors[:20],
"stoppedAtLimit": result.stoppedAtLimit, "stoppedAtLimit": result.stoppedAtLimit,
"limits": { "limits": {
"maxItems": MAX_ITEMS_DEFAULT, "maxItems": effectiveLimits.maxItems if effectiveLimits else MAX_ITEMS_DEFAULT,
"maxBytes": MAX_BYTES_DEFAULT, "maxBytes": effectiveLimits.maxBytes if effectiveLimits else MAX_BYTES_DEFAULT,
"maxFileSize": MAX_FILE_SIZE_DEFAULT, "maxFileSize": effectiveLimits.maxFileSize if effectiveLimits else MAX_FILE_SIZE_DEFAULT,
"maxDepth": MAX_DEPTH_DEFAULT, "maxDepth": effectiveLimits.maxDepth if effectiveLimits else MAX_DEPTH_DEFAULT,
}, },
} }

View file

@ -168,6 +168,7 @@ async def bootstrapSharepoint(
userId = str(getattr(connection, "userId", "") or "") if connection is not None else "" userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
cancelled = False cancelled = False
effectiveLimits = limits
for ds in dataSources: for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxItems: if result.indexed + result.skippedDuplicate >= limits.maxItems:
break break
@ -188,6 +189,7 @@ async def bootstrapSharepoint(
maxSites=limits.maxSites, maxSites=limits.maxSites,
neutralize=dsNeutralize, neutralize=dsNeutralize,
) )
effectiveLimits = dsLimits
try: try:
await _walkFolder( await _walkFolder(
@ -208,7 +210,7 @@ async def bootstrapSharepoint(
logger.error("sharepoint walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True) logger.error("sharepoint walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
result.errors.append(f"walk({dsPath}): {exc}") result.errors.append(f"walk({dsPath}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs) finalResult = _finalizeResult(connectionId, result, startMs, effectiveLimits)
if cancelled: if cancelled:
finalResult["cancelled"] = True finalResult["cancelled"] = True
return finalResult return finalResult
@ -505,7 +507,12 @@ def _recordLimitStop(
) )
def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startMs: float) -> Dict[str, Any]: def _finalizeResult(
connectionId: str,
result: SharepointBootstrapResult,
startMs: float,
effectiveLimits: Optional[SharepointBootstrapLimits] = None,
) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000) durationMs = int((time.time() - startMs) * 1000)
logger.info( logger.info(
"ingestion.connection.bootstrap.done part=sharepoint connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d stoppedAtLimit=%s", "ingestion.connection.bootstrap.done part=sharepoint connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d stoppedAtLimit=%s",
@ -535,9 +542,9 @@ def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startM
"errors": result.errors[:20], "errors": result.errors[:20],
"stoppedAtLimit": result.stoppedAtLimit, "stoppedAtLimit": result.stoppedAtLimit,
"limits": { "limits": {
"maxItems": MAX_ITEMS_DEFAULT, "maxItems": effectiveLimits.maxItems if effectiveLimits else MAX_ITEMS_DEFAULT,
"maxBytes": MAX_BYTES_DEFAULT, "maxBytes": effectiveLimits.maxBytes if effectiveLimits else MAX_BYTES_DEFAULT,
"maxFileSize": MAX_FILE_SIZE_DEFAULT, "maxFileSize": effectiveLimits.maxFileSize if effectiveLimits else MAX_FILE_SIZE_DEFAULT,
"maxDepth": MAX_DEPTH_DEFAULT, "maxDepth": effectiveLimits.maxDepth if effectiveLimits else MAX_DEPTH_DEFAULT,
}, },
} }