commit
45091dc596
70 changed files with 7203 additions and 1590 deletions
14
app.py
14
app.py
|
|
@ -418,6 +418,9 @@ async def lifespan(app: FastAPI):
|
|||
registerKnowledgeIngestionConsumer,
|
||||
)
|
||||
registerKnowledgeIngestionConsumer()
|
||||
# Side-effect import: registers all walker progress message keys
|
||||
# in the i18n registry so `syncRegistryToDb` picks them up.
|
||||
from modules.serviceCenter.services.serviceKnowledge import _progressMessages # noqa: F401
|
||||
except Exception as e:
|
||||
logger.warning(f"KnowledgeIngestionConsumer registration failed (non-critical): {e}")
|
||||
|
||||
|
|
@ -438,7 +441,16 @@ async def lifespan(app: FastAPI):
|
|||
logger.error(f"Feature '{featureName}' failed to stop: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not shutdown feature containers: {e}")
|
||||
|
||||
|
||||
# --- Close all PostgreSQL connection pools ---
|
||||
# Must run LAST: feature `onStop` hooks may still issue DB calls during
|
||||
# shutdown. Once we tear down the pools, no more borrows are possible.
|
||||
try:
|
||||
from modules.connectors.connectorDbPostgre import closeAllPools
|
||||
closeAllPools()
|
||||
except Exception as e:
|
||||
logger.warning(f"Closing DB connection pools failed: {e}")
|
||||
|
||||
logger.info("Application has been shut down")
|
||||
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -96,6 +96,17 @@ class BackgroundJob(PowerOnModel):
|
|||
description="Human-readable current step (e.g. 'Importing journal entries...')",
|
||||
json_schema_extra={"label": "Fortschritts-Nachricht"},
|
||||
)
|
||||
progressMessageData: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Structured i18n payload for `progressMessage`. Shape: "
|
||||
"{'key': '<de-text-with-{placeholders}>', 'params': {...}}. "
|
||||
"Frontend renders via `t(key, params)`; older clients fall back "
|
||||
"to `progressMessage`. Single source of truth — keep `progressMessage` "
|
||||
"as the rendered fallback in the producing language."
|
||||
),
|
||||
json_schema_extra={"label": "Fortschritts-Nachricht (i18n)"},
|
||||
)
|
||||
|
||||
payload: Dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
|
|
|
|||
|
|
@ -62,9 +62,14 @@ class DataSource(PowerOnModel):
|
|||
description="Owner user ID",
|
||||
json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "UserInDB", "labelField": "username"}},
|
||||
)
|
||||
ragIndexEnabled: bool = Field(
|
||||
default=False,
|
||||
description="When true this tree element is indexed into the RAG knowledge store",
|
||||
ragIndexEnabled: Optional[bool] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Three-state RAG indexing flag with cascade-inherit semantics. "
|
||||
"None = inherit from nearest ancestor DataSource (path-traversal); "
|
||||
"True/False = explicit override that propagates to descendants. "
|
||||
"Walker computes effective value via getEffectiveFlag()."
|
||||
),
|
||||
json_schema_extra={"label": "Im RAG indexieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
lastIndexed: Optional[float] = Field(
|
||||
|
|
@ -72,9 +77,13 @@ class DataSource(PowerOnModel):
|
|||
description="Timestamp of last successful RAG indexing run",
|
||||
json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
|
||||
)
|
||||
scope: str = Field(
|
||||
default="personal",
|
||||
description="Data visibility scope: personal, featureInstance, mandate, global",
|
||||
scope: Optional[str] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Data visibility scope with inherit semantics. "
|
||||
"None = inherit; values: personal, featureInstance, mandate, global. "
|
||||
"Cascade-reset on parent toggle."
|
||||
),
|
||||
json_schema_extra={"label": "Sichtbarkeit", "frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [
|
||||
{"value": "personal", "label": "Persönlich"},
|
||||
{"value": "featureInstance", "label": "Feature-Instanz"},
|
||||
|
|
@ -82,11 +91,25 @@ class DataSource(PowerOnModel):
|
|||
{"value": "global", "label": "Global"},
|
||||
]},
|
||||
)
|
||||
neutralize: bool = Field(
|
||||
default=False,
|
||||
description="Whether this data source should be neutralized before AI processing",
|
||||
neutralize: Optional[bool] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Three-state neutralization flag with cascade-inherit semantics. "
|
||||
"None = inherit from nearest ancestor DataSource (path-traversal); "
|
||||
"True/False = explicit override that propagates to descendants."
|
||||
),
|
||||
json_schema_extra={"label": "Neutralisieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
settings: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"DataSource-scoped settings (JSON). Currently used keys: "
|
||||
"ragLimits.{maxBytes,maxFileSize,maxItems,maxDepth}. "
|
||||
"Walker reads these directly; missing keys fall back to RAG_LIMITS_DEFAULT "
|
||||
"and are lazily persisted on next bootstrap."
|
||||
),
|
||||
json_schema_extra={"label": "Einstellungen", "frontend_type": "json", "frontend_readonly": True, "frontend_required": False},
|
||||
)
|
||||
|
||||
|
||||
class ExternalEntry(BaseModel):
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ A FeatureDataSource links a FeatureInstance table (DATA_OBJECT) to a workspace
|
|||
so the agent can query structured feature data (e.g. TrusteePosition rows).
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.datamodels.datamodelBase import PowerOnModel
|
||||
from modules.shared.i18nRegistry import i18nModel
|
||||
|
|
@ -55,9 +55,12 @@ class FeatureDataSource(PowerOnModel):
|
|||
description="Workspace feature instance where this source is used",
|
||||
json_schema_extra={"label": "Workspace", "fk_target": {"db": "poweron_app", "table": "FeatureInstance", "labelField": "label"}},
|
||||
)
|
||||
scope: str = Field(
|
||||
default="personal",
|
||||
description="Data visibility scope: personal, featureInstance, mandate, global",
|
||||
scope: Optional[str] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Data visibility scope with inherit semantics. "
|
||||
"None = inherit; values: personal, featureInstance, mandate, global."
|
||||
),
|
||||
json_schema_extra={"label": "Sichtbarkeit", "frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [
|
||||
{"value": "personal", "label": "Persönlich"},
|
||||
{"value": "featureInstance", "label": "Feature-Instanz"},
|
||||
|
|
@ -65,11 +68,22 @@ class FeatureDataSource(PowerOnModel):
|
|||
{"value": "global", "label": "Global"},
|
||||
]},
|
||||
)
|
||||
neutralize: bool = Field(
|
||||
default=False,
|
||||
description="Whether this data source should be neutralized before AI processing",
|
||||
neutralize: Optional[bool] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Three-state neutralization flag with cascade-inherit semantics. "
|
||||
"None = inherit; True/False = explicit. Cascade-reset on parent toggle."
|
||||
),
|
||||
json_schema_extra={"label": "Neutralisieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
ragIndexEnabled: Optional[bool] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Three-state RAG-indexing flag with cascade-inherit semantics. "
|
||||
"None = inherit; True/False = explicit. Cascade-reset on parent toggle."
|
||||
),
|
||||
json_schema_extra={"label": "RAG-Indexierung", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
neutralizeFields: Optional[List[str]] = Field(
|
||||
default=None,
|
||||
description="Column names whose values are replaced with placeholders before AI processing",
|
||||
|
|
@ -80,3 +94,12 @@ class FeatureDataSource(PowerOnModel):
|
|||
description="Record-level filter applied when querying this table, e.g. {'sessionId': 'abc-123'}",
|
||||
json_schema_extra={"label": "Datensatzfilter"},
|
||||
)
|
||||
settings: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"FeatureDataSource-scoped settings (JSON). Currently used keys: "
|
||||
"ragLimits.{maxBytes,maxFileSize,maxItems,maxDepth}. "
|
||||
"Mirror of DataSource.settings so the UDB settings modal can target both."
|
||||
),
|
||||
json_schema_extra={"label": "Einstellungen", "frontend_type": "json", "frontend_readonly": True, "frontend_required": False},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -124,6 +124,7 @@ class InvestorDemo2026(_BaseDemoConfig):
|
|||
from modules.datamodels.datamodelUam import Mandate, UserInDB
|
||||
from modules.datamodels.datamodelMembership import UserMandate
|
||||
|
||||
summary["_removedMandateIds"] = []
|
||||
for mandateDef in [_MANDATE_HAPPYLIFE, _MANDATE_ALPINA]:
|
||||
try:
|
||||
existing = db.getRecordset(Mandate, recordFilter={"name": mandateDef["name"]})
|
||||
|
|
@ -132,28 +133,36 @@ class InvestorDemo2026(_BaseDemoConfig):
|
|||
self._removeMandateData(db, mid, mandateDef["label"], summary)
|
||||
db.recordDelete(Mandate, mid)
|
||||
summary["removed"].append(f"Mandate {mandateDef['label']} ({mid})")
|
||||
summary["_removedMandateIds"].append({"id": mid, "mandateId": mid})
|
||||
logger.info(f"Removed mandate {mandateDef['label']} ({mid})")
|
||||
except Exception as e:
|
||||
summary["errors"].append(f"Remove mandate {mandateDef['label']}: {e}")
|
||||
|
||||
# SAFETY: NEVER delete the user record. The user may have connections,
|
||||
# chats, workflows, files, and other data across multiple databases.
|
||||
# Only remove the mandate memberships that THIS demo created.
|
||||
try:
|
||||
existing = db.getRecordset(UserInDB, recordFilter={"username": _USER["username"]})
|
||||
for u in existing:
|
||||
uid = u.get("id")
|
||||
removedMandateIds = {m.get("mandateId") for m in summary.get("_removedMandateIds", [])}
|
||||
memberships = db.getRecordset(UserMandate, recordFilter={"userId": uid})
|
||||
for mem in memberships:
|
||||
try:
|
||||
db.recordDelete(UserMandate, mem.get("id"))
|
||||
except Exception:
|
||||
pass
|
||||
db.recordDelete(UserInDB, uid)
|
||||
summary["removed"].append(f"User {_USER['username']} ({uid})")
|
||||
logger.info(f"Removed user {_USER['username']} ({uid})")
|
||||
if mem.get("mandateId") in removedMandateIds:
|
||||
try:
|
||||
db.recordDelete(UserMandate, mem.get("id"))
|
||||
except Exception:
|
||||
pass
|
||||
summary["skipped"].append(
|
||||
f"User {_USER['username']} ({uid}) preserved (only demo mandate memberships removed)"
|
||||
)
|
||||
logger.info(f"Preserved user {_USER['username']} ({uid}) - removed demo mandate memberships only")
|
||||
except Exception as e:
|
||||
summary["errors"].append(f"Remove user: {e}")
|
||||
summary["errors"].append(f"Remove user memberships: {e}")
|
||||
|
||||
self._removeLanguageSet(db, "es", summary)
|
||||
|
||||
summary.pop("_removedMandateIds", None)
|
||||
return summary
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -121,32 +121,39 @@ class PwgDemo2026(_BaseDemoConfig):
|
|||
from modules.datamodels.datamodelMembership import UserMandate
|
||||
from modules.datamodels.datamodelUam import Mandate, UserInDB
|
||||
|
||||
removedMandateIds = set()
|
||||
try:
|
||||
existing = db.getRecordset(Mandate, recordFilter={"name": _MANDATE_PWG["name"]})
|
||||
for m in existing:
|
||||
mid = m.get("id")
|
||||
self._removeMandateData(db, mid, _MANDATE_PWG["label"], summary)
|
||||
db.recordDelete(Mandate, mid)
|
||||
removedMandateIds.add(mid)
|
||||
summary["removed"].append(f"Mandate {_MANDATE_PWG['label']} ({mid})")
|
||||
logger.info(f"Removed mandate {_MANDATE_PWG['label']} ({mid})")
|
||||
except Exception as e:
|
||||
summary["errors"].append(f"Remove mandate {_MANDATE_PWG['label']}: {e}")
|
||||
|
||||
# SAFETY: NEVER delete the user record. The user may have connections,
|
||||
# chats, workflows, files, and other data across multiple databases.
|
||||
# Only remove the mandate memberships that THIS demo created.
|
||||
try:
|
||||
existing = db.getRecordset(UserInDB, recordFilter={"username": _USER["username"]})
|
||||
for u in existing:
|
||||
uid = u.get("id")
|
||||
memberships = db.getRecordset(UserMandate, recordFilter={"userId": uid}) or []
|
||||
for mem in memberships:
|
||||
try:
|
||||
db.recordDelete(UserMandate, mem.get("id"))
|
||||
except Exception:
|
||||
pass
|
||||
db.recordDelete(UserInDB, uid)
|
||||
summary["removed"].append(f"User {_USER['username']} ({uid})")
|
||||
logger.info(f"Removed user {_USER['username']} ({uid})")
|
||||
if mem.get("mandateId") in removedMandateIds:
|
||||
try:
|
||||
db.recordDelete(UserMandate, mem.get("id"))
|
||||
except Exception:
|
||||
pass
|
||||
summary["skipped"].append(
|
||||
f"User {_USER['username']} ({uid}) preserved (only demo mandate memberships removed)"
|
||||
)
|
||||
logger.info(f"Preserved user {_USER['username']} ({uid}) - removed demo mandate memberships only")
|
||||
except Exception as e:
|
||||
summary["errors"].append(f"Remove user: {e}")
|
||||
summary["errors"].append(f"Remove user memberships: {e}")
|
||||
|
||||
return summary
|
||||
|
||||
|
|
|
|||
|
|
@ -342,7 +342,7 @@ class RealEstateObjects:
|
|||
# If no exact match, try case-insensitive search via SQL query
|
||||
# This handles cases where the name might have different casing
|
||||
self.db._ensure_connection()
|
||||
with self.db.connection.cursor() as cursor:
|
||||
with self.db.borrowCursor() as cursor:
|
||||
cursor.execute(
|
||||
'SELECT "id" FROM "Gemeinde" WHERE LOWER("label") = LOWER(%s) LIMIT 1',
|
||||
(name,)
|
||||
|
|
@ -375,7 +375,7 @@ class RealEstateObjects:
|
|||
|
||||
# Try case-insensitive search
|
||||
self.db._ensure_connection()
|
||||
with self.db.connection.cursor() as cursor:
|
||||
with self.db.borrowCursor() as cursor:
|
||||
cursor.execute(
|
||||
'SELECT "id" FROM "Kanton" WHERE LOWER("label") = LOWER(%s) LIMIT 1',
|
||||
(name,)
|
||||
|
|
@ -408,7 +408,7 @@ class RealEstateObjects:
|
|||
|
||||
# Try case-insensitive search
|
||||
self.db._ensure_connection()
|
||||
with self.db.connection.cursor() as cursor:
|
||||
with self.db.borrowCursor() as cursor:
|
||||
cursor.execute(
|
||||
'SELECT "id" FROM "Land" WHERE LOWER("label") = LOWER(%s) LIMIT 1',
|
||||
(name,)
|
||||
|
|
@ -840,7 +840,7 @@ class RealEstateObjects:
|
|||
# Ensure connection is alive
|
||||
self.db._ensure_connection()
|
||||
|
||||
with self.db.connection.cursor() as cursor:
|
||||
with self.db.borrowCursor() as cursor:
|
||||
# Execute query
|
||||
if parameters:
|
||||
# Use parameterized query for safety
|
||||
|
|
|
|||
|
|
@ -205,11 +205,16 @@ class AccountingDataSync:
|
|||
boundary so the UI poll on ``GET /api/jobs/{jobId}`` shows real
|
||||
movement instead of jumping from 10 % to 100 %. Safe to omit.
|
||||
"""
|
||||
def _progress(pct: int, msg: str) -> None:
|
||||
def _progress(pct: int, msgKey: str, msgParams: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Forward to progressCb using the i18n contract.
|
||||
|
||||
`msgKey` is the German plaintext-as-key; the frontend translates
|
||||
it via `t(key, params)` when rendering.
|
||||
"""
|
||||
if progressCb is None:
|
||||
return
|
||||
try:
|
||||
progressCb(pct, msg)
|
||||
progressCb(pct, messageKey=msgKey, messageParams=msgParams or {})
|
||||
except Exception as ex:
|
||||
logger.warning(f"progressCb failed at {pct}%: {ex}")
|
||||
from modules.features.trustee.datamodelFeatureTrustee import (
|
||||
|
|
|
|||
|
|
@ -12,6 +12,27 @@ from modules.shared.i18nRegistry import t
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# i18n: register BackgroundJob progress message keys used by routeFeatureTrustee /
|
||||
# accountingDataSync. Walker call sites use `progressCb(..., messageKey="…")`
|
||||
# without going through `t()`, so we must register each key here as a
|
||||
# string-literal `t(...)` call -- per i18n convention `t()` MUST receive a
|
||||
# literal so static scanners and the boot-time `syncRegistryToDb` can pick
|
||||
# it up. Do NOT collapse these into a loop over a list of variables.
|
||||
t("Sync wird vorbereitet ({total} Position(en))...")
|
||||
t("Verbindungsaufbau fehlgeschlagen.")
|
||||
t("Keine aktive Buchhaltungs-Konfiguration gefunden.")
|
||||
t("Position {index}/{total} verarbeitet")
|
||||
t("Sync abgeschlossen.")
|
||||
t("Initialisiere Import...")
|
||||
t("Verbinde mit Buchhaltungssystem...")
|
||||
t("Import abgeschlossen.")
|
||||
t("Lade Kontenplan...")
|
||||
t("Lade Journaleintraege vom Buchhaltungssystem...")
|
||||
t("Lade Kunden...")
|
||||
t("Lade Lieferanten...")
|
||||
t("Lade Kontensaldi vom Buchhaltungssystem...")
|
||||
t("Speichere Kontensaldi...")
|
||||
|
||||
# Feature metadata
|
||||
FEATURE_CODE = "trustee"
|
||||
FEATURE_LABEL = t("Treuhand", context="UI")
|
||||
|
|
@ -463,8 +484,14 @@ TEMPLATE_WORKFLOWS = [
|
|||
"3. Kurzer Management-Summary-Absatz (3-5 Saetze) UNTER dem Chart "
|
||||
"mit den 3 groessten Abweichungen (>10%) und einer fachlichen "
|
||||
"Einschaetzung.\n\n"
|
||||
"Verwende die uebergebene Budget-Datei als Soll-Quelle und die im "
|
||||
"Kontext bereitgestellten Buchhaltungsdaten als Ist-Quelle.\n"
|
||||
"DATENQUELLEN:\n"
|
||||
"- SOLL (Budget): Aus der uebergebenen Budget-Datei (Excel).\n"
|
||||
"- IST (Buchhaltung): Verwende AUSSCHLIESSLICH das Feld "
|
||||
"\"closingBalance\" aus \"accountSummary\" im Kontext-JSON. "
|
||||
"Dort steht pro Konto GENAU EIN Ist-Wert (Jahresabschluss-Saldo). "
|
||||
"Fuer Quartals-Budgets stehen zusaetzlich Q1/Q2/Q3/Q4-Felder bereit. "
|
||||
"SUMMIERE NIEMALS mehrere Zeilen oder Journal-Eintraege auf -- der "
|
||||
"closingBalance in accountSummary ist bereits der korrekte Ist-Wert.\n\n"
|
||||
"WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN "
|
||||
"Uebersichts-Chart ueber alle Konten ist gewuenscht.\n\n"
|
||||
"Hinweis: Das documentTheme ist 'finance'. Wenn du ein Dokument erstellst, "
|
||||
|
|
|
|||
|
|
@ -1644,7 +1644,11 @@ async def _trusteeAccountingPushJobHandler(job: Dict[str, Any], progressCb) -> D
|
|||
|
||||
results = []
|
||||
total = len(positionIds)
|
||||
progressCb(2, f"Sync wird vorbereitet ({total} Position(en))...")
|
||||
progressCb(
|
||||
2,
|
||||
messageKey="Sync wird vorbereitet ({total} Position(en))...",
|
||||
messageParams={"total": total},
|
||||
)
|
||||
|
||||
# Resolve connector + plain config once to avoid decryption rate-limits
|
||||
# (mirrors the optimisation in pushBatchToAccounting). We push positions
|
||||
|
|
@ -1655,12 +1659,12 @@ async def _trusteeAccountingPushJobHandler(job: Dict[str, Any], progressCb) -> D
|
|||
connector, plainConfig, configRecord = await bridge._resolveConnectorAndConfig(instanceId)
|
||||
except Exception as resolveErr:
|
||||
logger.exception("Accounting push: failed to resolve connector/config")
|
||||
progressCb(100, "Verbindungsaufbau fehlgeschlagen.")
|
||||
progressCb(100, messageKey="Verbindungsaufbau fehlgeschlagen.")
|
||||
raise resolveErr
|
||||
|
||||
if not connector or not plainConfig:
|
||||
results = [SyncResult(success=False, errorMessage="No active accounting configuration found") for _ in positionIds]
|
||||
progressCb(100, "Keine aktive Buchhaltungs-Konfiguration gefunden.")
|
||||
progressCb(100, messageKey="Keine aktive Buchhaltungs-Konfiguration gefunden.")
|
||||
return {
|
||||
"total": len(results),
|
||||
"success": 0,
|
||||
|
|
@ -1680,7 +1684,11 @@ async def _trusteeAccountingPushJobHandler(job: Dict[str, Any], progressCb) -> D
|
|||
results.append(result)
|
||||
# Reserve 5..95% for the push loop, keep the tail for summary.
|
||||
pct = 5 + int(90 * index / total)
|
||||
progressCb(pct, f"Position {index}/{total} verarbeitet")
|
||||
progressCb(
|
||||
pct,
|
||||
messageKey="Position {index}/{total} verarbeitet",
|
||||
messageParams={"index": index, "total": total},
|
||||
)
|
||||
|
||||
skipped = [r for r in results if not r.success and r.errorMessage and "already synced" in r.errorMessage]
|
||||
failed = [r for r in results if not r.success and r not in skipped]
|
||||
|
|
@ -1693,7 +1701,7 @@ async def _trusteeAccountingPushJobHandler(job: Dict[str, Any], progressCb) -> D
|
|||
"; ".join(r.errorMessage or "unknown" for r in failed[:3]),
|
||||
)
|
||||
|
||||
progressCb(100, "Sync abgeschlossen.")
|
||||
progressCb(100, messageKey="Sync abgeschlossen.")
|
||||
return {
|
||||
"total": len(results),
|
||||
"success": sum(1 for r in results if r.success),
|
||||
|
|
@ -1823,10 +1831,10 @@ async def _trusteeAccountingSyncJobHandler(job: Dict[str, Any], progressCb) -> D
|
|||
payload = job.get("payload") or {}
|
||||
rootUser = getRootUser()
|
||||
|
||||
progressCb(5, "Initialisiere Import...")
|
||||
progressCb(5, messageKey="Initialisiere Import...")
|
||||
interface = getInterface(rootUser, mandateId=mandateId, featureInstanceId=instanceId)
|
||||
sync = AccountingDataSync(interface)
|
||||
progressCb(10, "Verbinde mit Buchhaltungssystem...")
|
||||
progressCb(10, messageKey="Verbinde mit Buchhaltungssystem...")
|
||||
result = await sync.importData(
|
||||
featureInstanceId=instanceId,
|
||||
mandateId=mandateId,
|
||||
|
|
@ -1834,7 +1842,7 @@ async def _trusteeAccountingSyncJobHandler(job: Dict[str, Any], progressCb) -> D
|
|||
dateTo=payload.get("dateTo"),
|
||||
progressCb=progressCb,
|
||||
)
|
||||
progressCb(100, "Import abgeschlossen.")
|
||||
progressCb(100, messageKey="Import abgeschlossen.")
|
||||
return result
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# All rights reserved.
|
||||
"""Workspace feature data models — WorkspaceUserSettings."""
|
||||
|
||||
from typing import List, Optional
|
||||
from typing import Dict, List, Optional
|
||||
from pydantic import Field
|
||||
from modules.datamodels.datamodelBase import PowerOnModel
|
||||
from modules.shared.i18nRegistry import i18nModel
|
||||
|
|
@ -52,7 +52,7 @@ class WorkspaceUserSettings(PowerOnModel):
|
|||
description="Max agent rounds override (None = instance default)",
|
||||
json_schema_extra={"label": "Max. Agenten-Runden", "frontend_type": "number", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
requireNeutralization: bool = Field(
|
||||
requireNeutralization: Optional[bool] = Field(
|
||||
default=False,
|
||||
description="Default neutralization setting for this user",
|
||||
json_schema_extra={"label": "Neutralisierung", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||
|
|
@ -67,3 +67,8 @@ class WorkspaceUserSettings(PowerOnModel):
|
|||
description="Allowed AI models (empty = all permitted)",
|
||||
json_schema_extra={"label": "Erlaubte Modelle", "frontend_type": "modelMultiSelect", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
uiTreeExpansion: Dict[str, List[str]] = Field(
|
||||
default_factory=dict,
|
||||
description="Per-tab expanded tree-node ids for the UDB / FormGeneratorTree. Key = scope name (e.g. 'sources', 'filesOwn', 'filesShared').",
|
||||
json_schema_extra={"label": "Tree-Expand-Zustand", "frontend_type": "json", "frontend_readonly": True, "frontend_required": False},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1281,51 +1281,101 @@ async def listWorkspaceDataSources(
|
|||
try:
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import buildEffectiveByConnection
|
||||
rootIf = getRootInterface()
|
||||
recordFilter: dict = {"featureInstanceId": instanceId}
|
||||
if wsMandateId:
|
||||
recordFilter["mandateId"] = wsMandateId
|
||||
dataSources = rootIf.db.getRecordset(DataSource, recordFilter=recordFilter)
|
||||
return JSONResponse({"dataSources": dataSources or []})
|
||||
if not dataSources:
|
||||
return JSONResponse({"dataSources": []})
|
||||
|
||||
# Group by connectionId and compute effective values in aggregate mode
|
||||
byConnection: dict = {}
|
||||
for ds in dataSources:
|
||||
connId = ds.get("connectionId") or ""
|
||||
byConnection.setdefault(connId, []).append(ds)
|
||||
|
||||
for connDs in byConnection.values():
|
||||
effNeutralize = buildEffectiveByConnection(connDs, "neutralize", mode="aggregate")
|
||||
effScope = buildEffectiveByConnection(connDs, "scope", mode="aggregate")
|
||||
effRag = buildEffectiveByConnection(connDs, "ragIndexEnabled", mode="aggregate")
|
||||
for ds in connDs:
|
||||
dsId = ds.get("id", "")
|
||||
ds["effectiveNeutralize"] = effNeutralize.get(dsId, False)
|
||||
ds["effectiveScope"] = effScope.get(dsId, "personal")
|
||||
ds["effectiveRagIndexEnabled"] = effRag.get(dsId, False)
|
||||
|
||||
return JSONResponse({"dataSources": dataSources})
|
||||
except Exception:
|
||||
return JSONResponse({"dataSources": []})
|
||||
|
||||
|
||||
@router.get("/{instanceId}/connections")
|
||||
class _TreeChildrenRequest(BaseModel):
|
||||
"""Request body for the generic tree children endpoint."""
|
||||
parents: List[Optional[str]] = Field(
|
||||
default_factory=list,
|
||||
description="List of parent keys to fetch children for. Use null for top-level.",
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{instanceId}/tree/children")
|
||||
@limiter.limit("300/minute")
|
||||
async def listWorkspaceConnections(
|
||||
async def getTreeChildren(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
body: _TreeChildrenRequest = Body(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Return the user's active connections (UserConnections)."""
|
||||
_mandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
from modules.serviceCenter import getService
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
ctx = ServiceCenterContext(
|
||||
user=context.user,
|
||||
mandate_id=_mandateId or "",
|
||||
feature_instance_id=instanceId,
|
||||
"""Generic UDB tree children resolver.
|
||||
|
||||
The UI sends a list of parent keys (or null for top-level). The backend
|
||||
returns children for each requested parent, with all effective flag
|
||||
values pre-computed. The UI builds the visible tree from the resulting
|
||||
flat per-parent map.
|
||||
"""
|
||||
_validateInstanceAccess(instanceId, context)
|
||||
from modules.serviceCenter.services.serviceKnowledge._buildTree import getChildrenForParents
|
||||
|
||||
try:
|
||||
nodesByParent = await getChildrenForParents(instanceId, body.parents, context)
|
||||
except Exception as exc:
|
||||
logger.exception("Tree children build failed: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
return JSONResponse({"nodesByParent": nodesByParent})
|
||||
|
||||
|
||||
class _TreeAttributesRequest(BaseModel):
|
||||
"""Request body for the attribute-refresh endpoint."""
|
||||
keys: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="List of node keys to fetch current attributes for.",
|
||||
)
|
||||
chatService = getService("chat", ctx)
|
||||
connections = chatService.getUserConnections()
|
||||
items = []
|
||||
for c in connections or []:
|
||||
conn = c if isinstance(c, dict) else (c.model_dump() if hasattr(c, "model_dump") else {})
|
||||
authority = conn.get("authority")
|
||||
if hasattr(authority, "value"):
|
||||
authority = authority.value
|
||||
status = conn.get("status")
|
||||
if hasattr(status, "value"):
|
||||
status = status.value
|
||||
items.append({
|
||||
"id": conn.get("id"),
|
||||
"authority": authority,
|
||||
"externalUsername": conn.get("externalUsername"),
|
||||
"externalEmail": conn.get("externalEmail"),
|
||||
"status": status,
|
||||
})
|
||||
return JSONResponse({"connections": items})
|
||||
|
||||
|
||||
@router.post("/{instanceId}/tree/attributes")
|
||||
@limiter.limit("300/minute")
|
||||
async def getTreeAttributes(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
body: _TreeAttributesRequest = Body(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Return current effective attribute values (neutralize, scope,
|
||||
ragIndexEnabled) for a list of node keys. Used after a toggle action
|
||||
to refresh only the visible nodes without reloading tree structure."""
|
||||
_validateInstanceAccess(instanceId, context)
|
||||
from modules.serviceCenter.services.serviceKnowledge._buildTree import getAttributesForKeys
|
||||
|
||||
if len(body.keys) > 500:
|
||||
raise HTTPException(status_code=400, detail="Max 500 keys per request")
|
||||
|
||||
try:
|
||||
attrs = await getAttributesForKeys(instanceId, body.keys, context)
|
||||
except Exception as exc:
|
||||
logger.exception("Tree attributes failed: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
return JSONResponse({"attributes": attrs})
|
||||
|
||||
|
||||
class CreateDataSourceRequest(BaseModel):
|
||||
|
|
@ -1390,303 +1440,6 @@ async def deleteWorkspaceDataSource(
|
|||
|
||||
# ---- Feature Connections & Feature Data Sources ----
|
||||
|
||||
@router.get("/{instanceId}/feature-connections")
|
||||
@limiter.limit("120/minute")
|
||||
async def listFeatureConnections(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""List feature instances the user has access to, scoped to the workspace mandate."""
|
||||
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.security.rbacCatalog import getCatalogService
|
||||
from modules.datamodels.datamodelUam import Mandate
|
||||
|
||||
rootIf = getRootInterface()
|
||||
userId = str(context.user.id)
|
||||
|
||||
catalog = getCatalogService()
|
||||
featureCodesWithData = catalog.getFeaturesWithDataObjects()
|
||||
|
||||
userMandates = rootIf.getUserMandates(userId)
|
||||
if not userMandates:
|
||||
return JSONResponse({"featureConnectionsByMandate": []})
|
||||
|
||||
allowedMandateIds = {um.mandateId for um in userMandates}
|
||||
if wsMandateId and wsMandateId in allowedMandateIds:
|
||||
allowedMandateIds = {wsMandateId}
|
||||
|
||||
mandateLabels: dict = {}
|
||||
for um in userMandates:
|
||||
if um.mandateId not in allowedMandateIds:
|
||||
continue
|
||||
try:
|
||||
rows = rootIf.db.getRecordset(Mandate, recordFilter={"id": um.mandateId})
|
||||
if rows:
|
||||
m = rows[0]
|
||||
mandateLabels[um.mandateId] = m.get("label") or m.get("name") or um.mandateId
|
||||
except Exception:
|
||||
mandateLabels[um.mandateId] = um.mandateId
|
||||
|
||||
byMandate: dict = {}
|
||||
seenIds: set = set()
|
||||
for um in userMandates:
|
||||
if um.mandateId not in allowedMandateIds:
|
||||
continue
|
||||
allInstances = rootIf.getFeatureInstancesByMandate(um.mandateId)
|
||||
for inst in allInstances:
|
||||
if inst.id in seenIds:
|
||||
continue
|
||||
seenIds.add(inst.id)
|
||||
if not inst.enabled:
|
||||
continue
|
||||
if inst.featureCode not in featureCodesWithData:
|
||||
continue
|
||||
featureAccess = rootIf.getFeatureAccess(userId, inst.id)
|
||||
if not featureAccess or not featureAccess.enabled:
|
||||
continue
|
||||
|
||||
featureDef = catalog.getFeatureDefinition(inst.featureCode) or {}
|
||||
dataObjects = catalog.getDataObjects(inst.featureCode)
|
||||
label = inst.label or inst.featureCode
|
||||
mid = inst.mandateId
|
||||
connItem = {
|
||||
"featureInstanceId": inst.id,
|
||||
"featureCode": inst.featureCode,
|
||||
"mandateId": mid,
|
||||
"label": label,
|
||||
"icon": featureDef.get("icon", "mdi-database"),
|
||||
"tableCount": len(dataObjects),
|
||||
}
|
||||
if mid not in byMandate:
|
||||
byMandate[mid] = []
|
||||
byMandate[mid].append(connItem)
|
||||
|
||||
def _sortKeyLabel(x: dict) -> str:
|
||||
return (x.get("label") or "").lower()
|
||||
|
||||
groups = []
|
||||
for mid in sorted(byMandate.keys(), key=lambda m: (mandateLabels.get(m, m) or "").lower()):
|
||||
conns = sorted(byMandate[mid], key=_sortKeyLabel)
|
||||
groups.append({
|
||||
"mandateId": mid,
|
||||
"mandateLabel": mandateLabels.get(mid, mid),
|
||||
"featureConnections": conns,
|
||||
})
|
||||
|
||||
return JSONResponse({"featureConnectionsByMandate": groups})
|
||||
|
||||
|
||||
@router.get("/{instanceId}/feature-connections/{fiId}/tables")
|
||||
@limiter.limit("120/minute")
|
||||
async def listFeatureConnectionTables(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
fiId: str = Path(..., description="Feature instance ID"),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""List data tables (DATA_OBJECTS) for a feature instance, filtered by RBAC."""
|
||||
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.security.rbacCatalog import getCatalogService
|
||||
|
||||
rootIf = getRootInterface()
|
||||
inst = rootIf.getFeatureInstance(fiId)
|
||||
if not inst:
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Feature instance not found"))
|
||||
|
||||
mandateId = str(inst.mandateId) if inst.mandateId else None
|
||||
if wsMandateId and mandateId and mandateId != wsMandateId:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Feature instance does not belong to workspace mandate"))
|
||||
catalog = getCatalogService()
|
||||
|
||||
try:
|
||||
from modules.security.rbac import RbacClass
|
||||
from modules.security.rootAccess import getRootDbAppConnector
|
||||
dbApp = getRootDbAppConnector()
|
||||
rbac = RbacClass(dbApp, dbApp=dbApp)
|
||||
accessible = catalog.getAccessibleDataObjects(
|
||||
featureCode=inst.featureCode,
|
||||
rbacInstance=rbac,
|
||||
user=context.user,
|
||||
mandateId=mandateId or "",
|
||||
featureInstanceId=fiId,
|
||||
)
|
||||
except Exception:
|
||||
accessible = catalog.getDataObjects(inst.featureCode)
|
||||
|
||||
accessibleKeys = {obj.get("objectKey", "") for obj in accessible}
|
||||
referencedGroups = set()
|
||||
for obj in accessible:
|
||||
meta = obj.get("meta", {})
|
||||
if meta.get("wildcard") or meta.get("isGroup"):
|
||||
continue
|
||||
if meta.get("group"):
|
||||
referencedGroups.add(meta["group"])
|
||||
|
||||
tables = []
|
||||
for obj in catalog.getDataObjects(inst.featureCode):
|
||||
meta = obj.get("meta", {})
|
||||
if meta.get("wildcard"):
|
||||
continue
|
||||
objectKey = obj.get("objectKey", "")
|
||||
if meta.get("isGroup"):
|
||||
# Groups are metadata-only; include if at least one child is accessible
|
||||
# (regardless of whether the group itself was RBAC-granted).
|
||||
if objectKey not in referencedGroups:
|
||||
continue
|
||||
else:
|
||||
if objectKey not in accessibleKeys:
|
||||
continue
|
||||
node = {
|
||||
"objectKey": objectKey,
|
||||
"tableName": meta.get("table", ""),
|
||||
"label": resolveText(obj.get("label", "")),
|
||||
"fields": meta.get("fields", []),
|
||||
"isParent": bool(meta.get("isParent", False)),
|
||||
"parentTable": meta.get("parentTable") or None,
|
||||
"parentKey": meta.get("parentKey") or None,
|
||||
"displayFields": meta.get("displayFields", []),
|
||||
"isGroup": bool(meta.get("isGroup", False)),
|
||||
"group": meta.get("group") or None,
|
||||
}
|
||||
tables.append(node)
|
||||
|
||||
return JSONResponse({"tables": tables})
|
||||
|
||||
|
||||
@router.get("/{instanceId}/feature-connections/{fiId}/parent-objects/{tableName}")
|
||||
@limiter.limit("120/minute")
|
||||
async def listParentObjects(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
fiId: str = Path(..., description="Feature instance ID"),
|
||||
tableName: str = Path(..., description="Parent table name from DATA_OBJECTS"),
|
||||
parentKey: Optional[str] = Query(None, description="Optional FK column name to filter by ancestor record (nested parent rendering)"),
|
||||
parentValue: Optional[str] = Query(None, description="Optional FK value matching parentKey to filter children of a specific ancestor record"),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""List records from a parent table so the user can pick a specific record to scope data.
|
||||
|
||||
When parentKey + parentValue are provided, results are additionally filtered by that FK,
|
||||
enabling nested record hierarchies (e.g. Sessions OF Context X).
|
||||
"""
|
||||
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.security.rbacCatalog import getCatalogService
|
||||
|
||||
rootIf = getRootInterface()
|
||||
inst = rootIf.getFeatureInstance(fiId)
|
||||
if not inst:
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Feature instance not found"))
|
||||
|
||||
featureCode = inst.featureCode
|
||||
mandateId = str(inst.mandateId) if inst.mandateId else ""
|
||||
if wsMandateId and mandateId and mandateId != wsMandateId:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Feature instance does not belong to workspace mandate"))
|
||||
catalog = getCatalogService()
|
||||
|
||||
parentObj = None
|
||||
for obj in catalog.getDataObjects(featureCode):
|
||||
meta = obj.get("meta", {})
|
||||
if meta.get("table") == tableName and meta.get("isParent"):
|
||||
parentObj = obj
|
||||
break
|
||||
if not parentObj:
|
||||
raise HTTPException(status_code=400, detail=f"Table '{tableName}' is not a registered parent table")
|
||||
|
||||
displayFields = parentObj["meta"].get("displayFields", [])
|
||||
selectCols = ', '.join(f'"{f}"' for f in (["id"] + displayFields)) if displayFields else "*"
|
||||
|
||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
featureDbName = f"poweron_{featureCode.lower()}"
|
||||
featureDbConn = None
|
||||
try:
|
||||
featureDbConn = DatabaseConnector(
|
||||
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||
dbDatabase=featureDbName,
|
||||
dbUser=APP_CONFIG.get("DB_USER"),
|
||||
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||
dbPort=int(APP_CONFIG.get("DB_PORT", 5432)),
|
||||
userId=str(context.user.id),
|
||||
)
|
||||
conn = featureDbConn.connection
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT column_name FROM information_schema.columns "
|
||||
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
||||
"AND column_name IN ('featureInstanceId', 'instanceId')",
|
||||
[tableName],
|
||||
)
|
||||
instanceCols = [row["column_name"] for row in cur.fetchall()]
|
||||
instanceCol = "featureInstanceId" if "featureInstanceId" in instanceCols else "instanceId"
|
||||
|
||||
cur.execute(
|
||||
"SELECT column_name FROM information_schema.columns "
|
||||
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
||||
"AND column_name = 'userId'",
|
||||
[tableName],
|
||||
)
|
||||
hasUserId = cur.rowcount > 0
|
||||
|
||||
sql = (
|
||||
f'SELECT {selectCols} FROM "{tableName}" '
|
||||
f'WHERE "{instanceCol}" = %s'
|
||||
)
|
||||
params = [fiId]
|
||||
if mandateId:
|
||||
sql += ' AND "mandateId" = %s'
|
||||
params.append(mandateId)
|
||||
if hasUserId:
|
||||
sql += ' AND "userId" = %s'
|
||||
params.append(str(context.user.id))
|
||||
|
||||
if parentKey and parentValue:
|
||||
cur.execute(
|
||||
"SELECT 1 FROM information_schema.columns "
|
||||
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
||||
"AND column_name = %s",
|
||||
[tableName, parentKey],
|
||||
)
|
||||
if cur.rowcount > 0:
|
||||
sql += f' AND "{parentKey}" = %s'
|
||||
params.append(parentValue)
|
||||
else:
|
||||
logger.warning(
|
||||
f"listParentObjects({tableName}): ignoring parentKey '{parentKey}' (column does not exist)"
|
||||
)
|
||||
|
||||
sql += ' ORDER BY "id" DESC LIMIT 100'
|
||||
cur.execute(sql, params)
|
||||
rows = []
|
||||
for row in cur.fetchall():
|
||||
r = dict(row)
|
||||
for k, v in r.items():
|
||||
if hasattr(v, "isoformat"):
|
||||
r[k] = v.isoformat()
|
||||
elif isinstance(v, (bytes, bytearray)):
|
||||
r[k] = f"<binary {len(v)} bytes>"
|
||||
displayParts = [str(r.get(f, "")) for f in displayFields if r.get(f) is not None]
|
||||
rows.append({
|
||||
"id": r.get("id", ""),
|
||||
"displayLabel": " | ".join(displayParts) if displayParts else r.get("id", ""),
|
||||
"fields": {f: r.get(f) for f in displayFields},
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"listParentObjects({tableName}) failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to list parent objects: {e}")
|
||||
finally:
|
||||
if featureDbConn:
|
||||
try:
|
||||
featureDbConn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return JSONResponse({"parentObjects": rows})
|
||||
|
||||
|
||||
class CreateFeatureDataSourceRequest(BaseModel):
|
||||
"""Request body for adding a feature table as data source."""
|
||||
featureInstanceId: str = Field(description="Feature instance ID")
|
||||
|
|
@ -1705,16 +1458,35 @@ async def createFeatureDataSource(
|
|||
body: CreateFeatureDataSourceRequest = Body(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Create a FeatureDataSource for this workspace instance."""
|
||||
"""Create a FeatureDataSource for this workspace instance.
|
||||
|
||||
The FDS lives under the WORKSPACE's mandate (not the feature's): that
|
||||
matches how the tree (`allFds = recordset where workspaceInstanceId =
|
||||
instanceId`) and the PATCH endpoints scope these records — by workspace,
|
||||
not by feature mandate. The user can legitimately reference a feature
|
||||
from another mandate they have access to (via the UDB mandate-group
|
||||
nodes), and a hard cross-mandate block here would silently 403 those
|
||||
toggles. Access to the referenced feature is verified by the user's
|
||||
`FeatureAccess` and the existing tree-children RBAC, which run before
|
||||
the user can ever click on this node.
|
||||
"""
|
||||
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||
|
||||
rootIf = getRootInterface()
|
||||
inst = rootIf.getFeatureInstance(body.featureInstanceId)
|
||||
mandateId = str(inst.mandateId) if inst else (str(context.mandateId) if context.mandateId else "")
|
||||
if wsMandateId and mandateId and mandateId != wsMandateId:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Feature instance does not belong to workspace mandate"))
|
||||
if not rootIf.getFeatureAccess(str(context.user.id), body.featureInstanceId):
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Access denied to this feature instance"))
|
||||
|
||||
existing = rootIf.db.getRecordset(FeatureDataSource, recordFilter={
|
||||
"workspaceInstanceId": instanceId,
|
||||
"featureInstanceId": body.featureInstanceId,
|
||||
"tableName": body.tableName,
|
||||
}) or []
|
||||
targetFilter = body.recordFilter or None
|
||||
for rec in existing:
|
||||
if (rec.get("recordFilter") or None) == targetFilter:
|
||||
return JSONResponse(rec)
|
||||
|
||||
fds = FeatureDataSource(
|
||||
featureInstanceId=body.featureInstanceId,
|
||||
|
|
@ -1722,7 +1494,7 @@ async def createFeatureDataSource(
|
|||
tableName=body.tableName,
|
||||
objectKey=body.objectKey,
|
||||
label=body.label,
|
||||
mandateId=mandateId,
|
||||
mandateId=wsMandateId or "",
|
||||
userId=str(context.user.id),
|
||||
workspaceInstanceId=instanceId,
|
||||
recordFilter=body.recordFilter,
|
||||
|
|
@ -1742,13 +1514,26 @@ async def listFeatureDataSources(
|
|||
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import buildEffectiveByWorkspaceFds
|
||||
|
||||
rootIf = getRootInterface()
|
||||
recordFilter: dict = {"workspaceInstanceId": instanceId}
|
||||
if wsMandateId:
|
||||
recordFilter["mandateId"] = wsMandateId
|
||||
records = rootIf.db.getRecordset(FeatureDataSource, recordFilter=recordFilter)
|
||||
return JSONResponse({"featureDataSources": records or []})
|
||||
if not records:
|
||||
return JSONResponse({"featureDataSources": []})
|
||||
|
||||
effNeutralize = buildEffectiveByWorkspaceFds(records, "neutralize", mode="aggregate")
|
||||
effScope = buildEffectiveByWorkspaceFds(records, "scope", mode="aggregate")
|
||||
effRag = buildEffectiveByWorkspaceFds(records, "ragIndexEnabled", mode="aggregate")
|
||||
for fds in records:
|
||||
fdsId = fds.get("id", "")
|
||||
fds["effectiveNeutralize"] = effNeutralize.get(fdsId, False)
|
||||
fds["effectiveScope"] = effScope.get(fdsId, "personal")
|
||||
fds["effectiveRagIndexEnabled"] = effRag.get(fdsId, False)
|
||||
|
||||
return JSONResponse({"featureDataSources": records})
|
||||
|
||||
|
||||
@router.delete("/{instanceId}/feature-datasources/{featureDataSourceId}")
|
||||
|
|
@ -1769,112 +1554,6 @@ async def deleteFeatureDataSource(
|
|||
return JSONResponse({"success": True})
|
||||
|
||||
|
||||
@router.get("/{instanceId}/connections/{connectionId}/services")
|
||||
@limiter.limit("120/minute")
|
||||
async def listConnectionServices(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
connectionId: str = Path(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Return the available services for a specific UserConnection."""
|
||||
_mandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
try:
|
||||
from modules.connectors.connectorResolver import ConnectorResolver
|
||||
from modules.serviceCenter import getService as getSvc
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
ctx = ServiceCenterContext(
|
||||
user=context.user,
|
||||
mandate_id=_mandateId or "",
|
||||
feature_instance_id=instanceId,
|
||||
)
|
||||
chatService = getSvc("chat", ctx)
|
||||
securityService = getSvc("security", ctx)
|
||||
dbInterface = _buildResolverDbInterface(chatService)
|
||||
resolver = ConnectorResolver(securityService, dbInterface)
|
||||
provider = await resolver.resolve(connectionId)
|
||||
services = provider.getAvailableServices()
|
||||
_serviceLabels = {
|
||||
"sharepoint": "SharePoint",
|
||||
"outlook": "Outlook",
|
||||
"teams": "Teams",
|
||||
"onedrive": "OneDrive",
|
||||
"drive": "Google Drive",
|
||||
"gmail": "Gmail",
|
||||
"files": "Files (FTP)",
|
||||
"kdrive": "kDrive",
|
||||
"calendar": "Calendar",
|
||||
"contact": "Contacts",
|
||||
}
|
||||
_serviceIcons = {
|
||||
"sharepoint": "sharepoint",
|
||||
"outlook": "mail",
|
||||
"teams": "chat",
|
||||
"onedrive": "cloud",
|
||||
"drive": "cloud",
|
||||
"gmail": "mail",
|
||||
"files": "folder",
|
||||
"kdrive": "cloud",
|
||||
"calendar": "calendar",
|
||||
"contact": "contact",
|
||||
}
|
||||
items = [
|
||||
{
|
||||
"service": s,
|
||||
"label": _serviceLabels.get(s, s),
|
||||
"icon": _serviceIcons.get(s, "folder"),
|
||||
}
|
||||
for s in services
|
||||
]
|
||||
return JSONResponse({"services": items})
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing services for connection {connectionId}: {e}")
|
||||
return JSONResponse({"services": [], "error": str(e)}, status_code=400)
|
||||
|
||||
|
||||
@router.get("/{instanceId}/connections/{connectionId}/browse")
|
||||
@limiter.limit("300/minute")
|
||||
async def browseConnectionService(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
connectionId: str = Path(...),
|
||||
service: str = Query(..., description="Service name (e.g. sharepoint, onedrive, outlook)"),
|
||||
path: str = Query("/", description="Path within the service to browse"),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Browse folders/items within a connection's service at a given path."""
|
||||
_mandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
try:
|
||||
from modules.connectors.connectorResolver import ConnectorResolver
|
||||
from modules.serviceCenter import getService as getSvc
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
ctx = ServiceCenterContext(
|
||||
user=context.user,
|
||||
mandate_id=_mandateId or "",
|
||||
feature_instance_id=instanceId,
|
||||
)
|
||||
chatService = getSvc("chat", ctx)
|
||||
securityService = getSvc("security", ctx)
|
||||
dbInterface = _buildResolverDbInterface(chatService)
|
||||
resolver = ConnectorResolver(securityService, dbInterface)
|
||||
adapter = await resolver.resolveService(connectionId, service)
|
||||
entries = await adapter.browse(path, filter=None)
|
||||
items = []
|
||||
for entry in (entries or []):
|
||||
items.append({
|
||||
"name": entry.name,
|
||||
"path": entry.path,
|
||||
"isFolder": entry.isFolder,
|
||||
"size": entry.size,
|
||||
"mimeType": entry.mimeType,
|
||||
"metadata": entry.metadata if hasattr(entry, "metadata") else {},
|
||||
})
|
||||
return JSONResponse({"items": items, "path": path, "service": service})
|
||||
except Exception as e:
|
||||
logger.error(f"Error browsing {service} for connection {connectionId} at '{path}': {e}")
|
||||
return JSONResponse({"items": [], "error": str(e)}, status_code=400)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Voice endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -2190,6 +1869,71 @@ async def putWorkspaceUserSettings(
|
|||
})
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Per-user UI state: tree expand/collapse (UDB + FilesTab)
|
||||
# Persisted on WorkspaceUserSettings.uiTreeExpansion as a {scope: [ids]} map.
|
||||
# Each FE tab uses its own scope key so collapse-state for one tab doesn't
|
||||
# bleed into another.
|
||||
|
||||
@router.get("/{instanceId}/ui-tree-expansion/{scope}")
|
||||
@limiter.limit("300/minute")
|
||||
async def getUiTreeExpansion(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
scope: str = Path(..., description="UI scope key, e.g. 'sources', 'filesOwn', 'filesShared'"),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Return the expanded tree-node ids for the current user + scope.
|
||||
|
||||
Returns `null` when the user has never persisted a state for this scope
|
||||
(lets the FE fall back to backend `defaultExpanded` hints). Returns `[]`
|
||||
when the user actively collapsed everything.
|
||||
"""
|
||||
_validateInstanceAccess(instanceId, context)
|
||||
wsInterface = _getWorkspaceInterface(context, instanceId)
|
||||
settings = wsInterface.getWorkspaceUserSettings(str(context.user.id))
|
||||
expansion = (settings.uiTreeExpansion if settings else {}) or {}
|
||||
if scope not in expansion:
|
||||
return JSONResponse({"expandedNodes": None})
|
||||
return JSONResponse({"expandedNodes": list(expansion.get(scope) or [])})
|
||||
|
||||
|
||||
@router.put("/{instanceId}/ui-tree-expansion/{scope}")
|
||||
@limiter.limit("300/minute")
|
||||
async def putUiTreeExpansion(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
scope: str = Path(...),
|
||||
body: dict = Body(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Replace the expanded-node list for one scope.
|
||||
|
||||
Body: `{"expandedNodes": List[str]}`. Empty list = explicit collapse-all.
|
||||
"""
|
||||
_validateInstanceAccess(instanceId, context)
|
||||
wsInterface = _getWorkspaceInterface(context, instanceId)
|
||||
userId = str(context.user.id)
|
||||
nodes = body.get("expandedNodes")
|
||||
if not isinstance(nodes, list):
|
||||
raise HTTPException(status_code=400, detail=routeApiMsg("expandedNodes must be a list"))
|
||||
cleaned = [str(n) for n in nodes if isinstance(n, (str, int))]
|
||||
|
||||
existing = wsInterface.getWorkspaceUserSettings(userId)
|
||||
existingMap: Dict[str, List[str]] = (existing.uiTreeExpansion if existing else {}) or {}
|
||||
existingMap = dict(existingMap)
|
||||
existingMap[scope] = cleaned
|
||||
|
||||
data = {
|
||||
"userId": userId,
|
||||
"mandateId": str(context.mandateId) if context.mandateId else "",
|
||||
"featureInstanceId": instanceId,
|
||||
"uiTreeExpansion": existingMap,
|
||||
}
|
||||
wsInterface.saveWorkspaceUserSettings(data)
|
||||
return JSONResponse({"expandedNodes": cleaned})
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
|
||||
|
||||
|
|
|
|||
|
|
@ -1659,7 +1659,7 @@ class BillingObjects:
|
|||
try:
|
||||
appInterface = getAppInterface(self.currentUser)
|
||||
appInterface.db._ensure_connection()
|
||||
with appInterface.db.connection.cursor() as cur:
|
||||
with appInterface.db.borrowCursor() as cur:
|
||||
if appInterface.db._ensureTableExists(UserInDB):
|
||||
cur.execute(
|
||||
'SELECT "id" FROM "UserInDB" WHERE '
|
||||
|
|
@ -1780,7 +1780,7 @@ class BillingObjects:
|
|||
|
||||
try:
|
||||
self.db._ensure_connection()
|
||||
with self.db.connection.cursor() as cur:
|
||||
with self.db.borrowCursor() as cur:
|
||||
countSql = f'SELECT COUNT(*) FROM "{table}"{whereClause}'
|
||||
cur.execute(countSql, whereValues)
|
||||
totalItems = cur.fetchone()["count"]
|
||||
|
|
@ -1797,10 +1797,7 @@ class BillingObjects:
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"_searchTransactionsPaginated SQL error: {e}", exc_info=True)
|
||||
try:
|
||||
self.db.connection.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
# Rollback is handled by `borrowCursor()` context manager on exit.
|
||||
return {"items": [], "totalItems": 0, "totalPages": 0}
|
||||
|
||||
def _buildScopeFilter(
|
||||
|
|
@ -1872,7 +1869,7 @@ class BillingObjects:
|
|||
|
||||
result: Dict[str, Any] = {}
|
||||
|
||||
with self.db.connection.cursor() as cur:
|
||||
with self.db.borrowCursor() as cur:
|
||||
# 1) Totals
|
||||
cur.execute(
|
||||
f'SELECT COALESCE(SUM("amount"), 0) AS total, COUNT(*) AS cnt FROM "{table}"{whereClause}',
|
||||
|
|
@ -1947,17 +1944,12 @@ class BillingObjects:
|
|||
})
|
||||
result["timeSeries"] = timeSeries
|
||||
|
||||
self.db.connection.commit()
|
||||
|
||||
# Commit/rollback are handled by `borrowCursor()` context manager.
|
||||
result["_allAccounts"] = allAccounts
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in getTransactionStatisticsAggregated: {e}", exc_info=True)
|
||||
try:
|
||||
self.db.connection.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
return self._emptyStats()
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -228,6 +228,22 @@ class KnowledgeObjects:
|
|||
"""Get all ContentChunks for a file."""
|
||||
return self.db.getRecordset(ContentChunk, recordFilter={"fileId": fileId})
|
||||
|
||||
def countChunksByFileIds(self, fileIds: List[str]) -> Dict[str, int]:
|
||||
"""Return a {fileId: chunkCount} mapping for the given file IDs.
|
||||
|
||||
One aggregate query instead of N round trips. Used by RAG inventory
|
||||
to display real chunk counts per DataSource without loading the
|
||||
embedding vectors. Missing file IDs map to 0 in the caller's logic.
|
||||
"""
|
||||
if not fileIds:
|
||||
return {}
|
||||
if not self.db._ensureTableExists(ContentChunk):
|
||||
return {}
|
||||
sql = 'SELECT "fileId", COUNT(*) AS cnt FROM "ContentChunk" WHERE "fileId" = ANY(%s) GROUP BY "fileId"'
|
||||
with self.db.borrowCursor() as cursor:
|
||||
cursor.execute(sql, (list(fileIds),))
|
||||
return {row["fileId"]: int(row["cnt"]) for row in cursor.fetchall()}
|
||||
|
||||
def deleteContentChunks(self, fileId: str) -> int:
|
||||
"""Delete all ContentChunks for a file. Returns count of deleted chunks."""
|
||||
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fileId})
|
||||
|
|
|
|||
|
|
@ -1221,22 +1221,17 @@ class ComponentObjects:
|
|||
for item in fileRows
|
||||
]
|
||||
|
||||
# Single transaction: delete FileData, FileItem, then FileFolder (children first)
|
||||
self.db._ensure_connection()
|
||||
try:
|
||||
with self.db.connection.cursor() as cursor:
|
||||
if fileIds:
|
||||
cursor.execute('DELETE FROM "FileData" WHERE "id" = ANY(%s)', (fileIds,))
|
||||
cursor.execute('DELETE FROM "FileItem" WHERE "id" = ANY(%s)', (fileIds,))
|
||||
orderedIds = list(folderIds)
|
||||
orderedIds.remove(folderId)
|
||||
orderedIds.append(folderId)
|
||||
if orderedIds:
|
||||
cursor.execute('DELETE FROM "FileFolder" WHERE "id" = ANY(%s)', (orderedIds,))
|
||||
self.db.connection.commit()
|
||||
except Exception:
|
||||
self.db.connection.rollback()
|
||||
raise
|
||||
# Single transaction: delete FileData, FileItem, then FileFolder (children first).
|
||||
# Commit/rollback are handled by `borrowCursor()` on exit.
|
||||
with self.db.borrowCursor() as cursor:
|
||||
if fileIds:
|
||||
cursor.execute('DELETE FROM "FileData" WHERE "id" = ANY(%s)', (fileIds,))
|
||||
cursor.execute('DELETE FROM "FileItem" WHERE "id" = ANY(%s)', (fileIds,))
|
||||
orderedIds = list(folderIds)
|
||||
orderedIds.remove(folderId)
|
||||
orderedIds.append(folderId)
|
||||
if orderedIds:
|
||||
cursor.execute('DELETE FROM "FileFolder" WHERE "id" = ANY(%s)', (orderedIds,))
|
||||
|
||||
return {"deletedFolders": len(folderIds), "deletedFiles": len(fileIds)}
|
||||
|
||||
|
|
@ -1507,7 +1502,7 @@ class ComponentObjects:
|
|||
|
||||
try:
|
||||
self.db._ensure_connection()
|
||||
with self.db.connection.cursor() as cursor:
|
||||
with self.db.borrowCursor() as cursor:
|
||||
cursor.execute(
|
||||
'SELECT "id", "sysCreatedBy" FROM "FileItem" WHERE "id" = ANY(%s)',
|
||||
(uniqueIds,),
|
||||
|
|
@ -1526,11 +1521,10 @@ class ComponentObjects:
|
|||
cursor.execute('DELETE FROM "FileItem" WHERE "id" = ANY(%s)', (accessibleIds,))
|
||||
deletedFiles = cursor.rowcount
|
||||
|
||||
self.db.connection.commit()
|
||||
# Commit/rollback are handled by `borrowCursor()` context manager.
|
||||
return {"deletedFiles": deletedFiles}
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting files in batch: {e}")
|
||||
self.db.connection.rollback()
|
||||
raise FileDeletionError(f"Error deleting files in batch: {str(e)}")
|
||||
|
||||
def _ensureFeatureInstanceGroup(self, featureInstanceId: str, contextKey: str = "files/list") -> Optional[str]:
|
||||
|
|
|
|||
|
|
@ -374,7 +374,7 @@ def getRecordsetWithRBAC(
|
|||
|
||||
query = f'SELECT * FROM "{table}"{whereClause}{orderByClause}{limitClause}'
|
||||
|
||||
with connector.connection.cursor() as cursor:
|
||||
with connector.borrowCursor() as cursor:
|
||||
cursor.execute(query, whereValues)
|
||||
records = [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
|
|
@ -561,7 +561,7 @@ def getRecordsetPaginatedWithRBAC(
|
|||
offset = (pagination.page - 1) * pagination.pageSize
|
||||
limitClause = f" LIMIT {pagination.pageSize} OFFSET {offset}"
|
||||
|
||||
with connector.connection.cursor() as cursor:
|
||||
with connector.borrowCursor() as cursor:
|
||||
countSql = f'SELECT COUNT(*) FROM "{table}"{whereClause}'
|
||||
cursor.execute(countSql, countValues)
|
||||
totalItems = cursor.fetchone()["count"]
|
||||
|
|
@ -709,7 +709,7 @@ def getDistinctColumnValuesWithRBAC(
|
|||
|
||||
sql = f'SELECT DISTINCT "{column}"::TEXT AS val FROM "{table}"{nonNullWhere} ORDER BY val'
|
||||
|
||||
with connector.connection.cursor() as cursor:
|
||||
with connector.borrowCursor() as cursor:
|
||||
cursor.execute(sql, whereValues)
|
||||
result = [row["val"] for row in cursor.fetchall()]
|
||||
|
||||
|
|
@ -719,7 +719,7 @@ def getDistinctColumnValuesWithRBAC(
|
|||
emptySql = f'SELECT 1 FROM "{table}"{whereClause} AND {emptyCond} LIMIT 1'
|
||||
else:
|
||||
emptySql = f'SELECT 1 FROM "{table}" WHERE {emptyCond} LIMIT 1'
|
||||
with connector.connection.cursor() as cursor:
|
||||
with connector.borrowCursor() as cursor:
|
||||
cursor.execute(emptySql, whereValues)
|
||||
if cursor.fetchone():
|
||||
result.append(None)
|
||||
|
|
@ -967,7 +967,7 @@ def buildRbacWhereClause(
|
|||
# Multi-Tenant Design: Users do NOT have mandateId - they are linked via UserMandate
|
||||
if table == "UserInDB":
|
||||
try:
|
||||
with connector.connection.cursor() as cursor:
|
||||
with connector.borrowCursor() as cursor:
|
||||
# Get all user IDs that are members of the current mandate
|
||||
cursor.execute(
|
||||
'SELECT "userId" FROM "UserMandate" WHERE "mandateId" = %s AND "enabled" = true',
|
||||
|
|
@ -994,7 +994,7 @@ def buildRbacWhereClause(
|
|||
# For UserConnection: Filter via UserMandate junction table
|
||||
elif table == "UserConnection":
|
||||
try:
|
||||
with connector.connection.cursor() as cursor:
|
||||
with connector.borrowCursor() as cursor:
|
||||
# Get all user IDs that are members of the current mandate
|
||||
cursor.execute(
|
||||
'SELECT "userId" FROM "UserMandate" WHERE "mandateId" = %s AND "enabled" = true',
|
||||
|
|
|
|||
|
|
@ -68,9 +68,19 @@ def removeDemoConfig(
|
|||
request: Request,
|
||||
currentUser: User = Depends(requirePlatformAdmin),
|
||||
) -> dict:
|
||||
"""Remove all data created by a demo configuration."""
|
||||
"""Remove all data created by a demo configuration.
|
||||
|
||||
Requires X-Confirm-Destructive: true header as safety guard.
|
||||
"""
|
||||
from modules.demoConfigs import getDemoConfigByCode
|
||||
|
||||
confirmHeader = request.headers.get("X-Confirm-Destructive", "").lower()
|
||||
if confirmHeader != "true":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Destructive operation requires header X-Confirm-Destructive: true",
|
||||
)
|
||||
|
||||
config = getDemoConfigByCode(code)
|
||||
if not config:
|
||||
raise HTTPException(
|
||||
|
|
@ -79,7 +89,7 @@ def removeDemoConfig(
|
|||
)
|
||||
|
||||
db = getRootDbAppConnector()
|
||||
logger.info(f"Removing demo config '{code}' (user: {currentUser.username})")
|
||||
logger.info(f"Removing demo config '{code}' (user: {currentUser.username}, confirmed)")
|
||||
summary = config.remove(db)
|
||||
logger.info(f"Demo config '{code}' removed: {summary}")
|
||||
|
||||
|
|
|
|||
|
|
@ -778,7 +778,12 @@ async def _updateKnowledgeConsent(
|
|||
cancelled = cancelJobsByConnection(connectionId)
|
||||
else:
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId, "ragIndexEnabled": True})
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||
allConnDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
dataSources = [
|
||||
ds for ds in (allConnDs or [])
|
||||
if getEffectiveFlag(ds, "ragIndexEnabled", allConnDs, mode="walk") is True
|
||||
]
|
||||
if dataSources:
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||
authority = connection.authority.value if hasattr(connection.authority, "value") else str(connection.authority or "")
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user, *, man
|
|||
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
await knowledgeService.requestIngestion(
|
||||
handle = await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="file",
|
||||
sourceId=fileId,
|
||||
|
|
@ -229,7 +229,10 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user, *, man
|
|||
# Re-acquire interface after await to avoid stale user context from the singleton
|
||||
mgmtInterface = interfaceDbManagement.getInterface(user)
|
||||
mgmtInterface.updateFile(fileId, {"status": "active"})
|
||||
logger.info(f"Auto-index complete for file {fileId} ({fileName})")
|
||||
if handle.status == "failed":
|
||||
logger.warning(f"Auto-index ingestion failed for file {fileId} ({fileName}): {handle.error}")
|
||||
else:
|
||||
logger.info(f"Auto-index complete for file {fileId} ({fileName})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Auto-index failed for file {fileId}: {e}", exc_info=True)
|
||||
|
|
@ -256,6 +259,24 @@ router = APIRouter(
|
|||
)
|
||||
|
||||
|
||||
def _getInterfaceForOwnedItem(currentUser: User, context, itemId: str, modelClass) -> Any:
|
||||
"""Create a management interface scoped to the item's own context.
|
||||
Looks up the item by ID (unscoped) to resolve its mandateId/featureInstanceId,
|
||||
then creates the interface with THAT context. This ensures toggle operations
|
||||
work regardless of which page the user is on."""
|
||||
unscoped = interfaceDbManagement.getInterface(currentUser)
|
||||
record = unscoped.db.getRecord(modelClass, itemId)
|
||||
if not record:
|
||||
raise interfaceDbManagement.FileNotFoundError(f"Item {itemId} not found")
|
||||
itemMandateId = record.get("mandateId") if isinstance(record, dict) else getattr(record, "mandateId", None)
|
||||
itemInstanceId = record.get("featureInstanceId") if isinstance(record, dict) else getattr(record, "featureInstanceId", None)
|
||||
return interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(itemMandateId) if itemMandateId else None,
|
||||
featureInstanceId=str(itemInstanceId) if itemInstanceId else None,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/folders/tree")
|
||||
@limiter.limit("120/minute")
|
||||
def get_folder_tree(
|
||||
|
|
@ -272,10 +293,12 @@ def get_folder_tree(
|
|||
)
|
||||
o = (owner or "me").strip().lower()
|
||||
if o == "me":
|
||||
return managementInterface.getOwnFolderTree()
|
||||
if o == "shared":
|
||||
return managementInterface.getSharedFolderTree()
|
||||
raise HTTPException(status_code=400, detail="owner must be 'me' or 'shared'")
|
||||
folders = managementInterface.getOwnFolderTree()
|
||||
elif o == "shared":
|
||||
folders = managementInterface.getSharedFolderTree()
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="owner must be 'me' or 'shared'")
|
||||
return folders
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
@ -283,6 +306,185 @@ def get_folder_tree(
|
|||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/attributes")
|
||||
@limiter.limit("120/minute")
|
||||
def getAttributesForIds(
|
||||
request: Request,
|
||||
body: Dict[str, Any] = Body(...),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Return current attribute values (neutralize, scope, ragIndexEnabled) for
|
||||
a list of node IDs. For folder IDs, computes 'mixed' by checking direct
|
||||
children. The frontend sends this after every toggle to refresh visible
|
||||
nodes without reloading the tree structure."""
|
||||
ids = body.get("ids", [])
|
||||
if not isinstance(ids, list) or len(ids) == 0:
|
||||
return {}
|
||||
if len(ids) > 500:
|
||||
raise HTTPException(status_code=400, detail="Max 500 IDs per request")
|
||||
|
||||
try:
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
db = managementInterface.db
|
||||
userId = str(currentUser.id)
|
||||
|
||||
allFolders = db.getRecordset(FileFolder, recordFilter={"sysCreatedBy": userId}) or []
|
||||
allFiles = db.getRecordset(FileItem, recordFilter={"sysCreatedBy": userId}) or []
|
||||
|
||||
folderById = {f["id"]: f for f in allFolders}
|
||||
fileById = {f["id"]: f for f in allFiles}
|
||||
|
||||
logger.info(
|
||||
"getAttributesForIds: %d ids requested, %d folders found, %d files found",
|
||||
len(ids), len(allFolders), len(allFiles),
|
||||
)
|
||||
|
||||
result: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
for nodeId in ids:
|
||||
if nodeId.startswith("__filesRoot:"):
|
||||
attrs = _computeSyntheticRootAttrs(allFolders, allFiles)
|
||||
result[nodeId] = attrs
|
||||
elif nodeId in folderById:
|
||||
folder = folderById[nodeId]
|
||||
attrs = _computeFolderAttrs(folder, allFolders, allFiles)
|
||||
result[nodeId] = attrs
|
||||
elif nodeId in fileById:
|
||||
f = fileById[nodeId]
|
||||
result[nodeId] = {
|
||||
"neutralize": bool(f.get("neutralize", False)),
|
||||
"scope": f.get("scope", "personal"),
|
||||
}
|
||||
else:
|
||||
logger.debug("getAttributesForIds: unknown id=%s", nodeId)
|
||||
|
||||
logger.info("getAttributesForIds: returning %d entries", len(result))
|
||||
return result
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"getAttributesForIds error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
def _computeFolderAttrs(
|
||||
folder: Dict[str, Any],
|
||||
allFolders: List[Dict[str, Any]],
|
||||
allFiles: List[Dict[str, Any]],
|
||||
) -> Dict[str, Any]:
|
||||
"""Compute attributes for a folder. Recursively checks the entire subtree:
|
||||
if ANY descendant at any depth has a different value, the folder shows 'mixed'.
|
||||
This propagates up through all ancestor levels."""
|
||||
fid = folder["id"]
|
||||
neutralizeResult = _effectiveNeutralize(fid, allFolders, allFiles)
|
||||
scopeResult = _effectiveScope(fid, allFolders, allFiles)
|
||||
return {"neutralize": neutralizeResult, "scope": scopeResult}
|
||||
|
||||
|
||||
def _effectiveNeutralize(
|
||||
folderId: str,
|
||||
allFolders: List[Dict[str, Any]],
|
||||
allFiles: List[Dict[str, Any]],
|
||||
) -> Any:
|
||||
"""Recursively compute effective neutralize for a folder.
|
||||
Returns 'mixed' if any descendants diverge, otherwise the folder's own value."""
|
||||
childFolders = [f for f in allFolders if f.get("parentId") == folderId]
|
||||
childFiles = [f for f in allFiles if f.get("folderId") == folderId]
|
||||
|
||||
if not childFolders and not childFiles:
|
||||
folder = next((f for f in allFolders if f["id"] == folderId), None)
|
||||
return bool(folder.get("neutralize", False)) if folder else False
|
||||
|
||||
childVals = set()
|
||||
for cf in childFolders:
|
||||
effective = _effectiveNeutralize(cf["id"], allFolders, allFiles)
|
||||
if effective == "mixed":
|
||||
return "mixed"
|
||||
childVals.add(effective)
|
||||
for cf in childFiles:
|
||||
childVals.add(bool(cf.get("neutralize", False)))
|
||||
|
||||
if len(childVals) > 1:
|
||||
return "mixed"
|
||||
if not childVals:
|
||||
folder = next((f for f in allFolders if f["id"] == folderId), None)
|
||||
return bool(folder.get("neutralize", False)) if folder else False
|
||||
return childVals.pop()
|
||||
|
||||
|
||||
def _effectiveScope(
|
||||
folderId: str,
|
||||
allFolders: List[Dict[str, Any]],
|
||||
allFiles: List[Dict[str, Any]],
|
||||
) -> Any:
|
||||
"""Recursively compute effective scope for a folder.
|
||||
Returns 'mixed' if any descendants diverge, otherwise the folder's own value."""
|
||||
childFolders = [f for f in allFolders if f.get("parentId") == folderId]
|
||||
childFiles = [f for f in allFiles if f.get("folderId") == folderId]
|
||||
|
||||
if not childFolders and not childFiles:
|
||||
folder = next((f for f in allFolders if f["id"] == folderId), None)
|
||||
return folder.get("scope", "personal") if folder else "personal"
|
||||
|
||||
childVals = set()
|
||||
for cf in childFolders:
|
||||
effective = _effectiveScope(cf["id"], allFolders, allFiles)
|
||||
if effective == "mixed":
|
||||
return "mixed"
|
||||
childVals.add(effective)
|
||||
for cf in childFiles:
|
||||
childVals.add(cf.get("scope", "personal"))
|
||||
|
||||
if len(childVals) > 1:
|
||||
return "mixed"
|
||||
if not childVals:
|
||||
folder = next((f for f in allFolders if f["id"] == folderId), None)
|
||||
return folder.get("scope", "personal") if folder else "personal"
|
||||
return childVals.pop()
|
||||
|
||||
|
||||
def _computeSyntheticRootAttrs(
|
||||
allFolders: List[Dict[str, Any]],
|
||||
allFiles: List[Dict[str, Any]],
|
||||
) -> Dict[str, Any]:
|
||||
"""Compute attributes for the synthetic root by recursively checking the
|
||||
entire tree. If ANY item at any depth diverges, root shows 'mixed'."""
|
||||
topFolders = [f for f in allFolders if not f.get("parentId")]
|
||||
topFiles = [f for f in allFiles if not f.get("folderId")]
|
||||
|
||||
neutralizeVals = set()
|
||||
scopeVals = set()
|
||||
for cf in topFolders:
|
||||
nEff = _effectiveNeutralize(cf["id"], allFolders, allFiles)
|
||||
if nEff == "mixed":
|
||||
neutralizeVals.add(True)
|
||||
neutralizeVals.add(False)
|
||||
else:
|
||||
neutralizeVals.add(nEff)
|
||||
sEff = _effectiveScope(cf["id"], allFolders, allFiles)
|
||||
if sEff == "mixed":
|
||||
scopeVals.add("__mixed_a__")
|
||||
scopeVals.add("__mixed_b__")
|
||||
else:
|
||||
scopeVals.add(sEff)
|
||||
for cf in topFiles:
|
||||
neutralizeVals.add(bool(cf.get("neutralize", False)))
|
||||
scopeVals.add(cf.get("scope", "personal"))
|
||||
|
||||
if not neutralizeVals and not scopeVals:
|
||||
return {"neutralize": False, "scope": "personal"}
|
||||
|
||||
return {
|
||||
"neutralize": "mixed" if len(neutralizeVals) > 1 else (neutralizeVals.pop() if neutralizeVals else False),
|
||||
"scope": "mixed" if len(scopeVals) > 1 else (scopeVals.pop() if scopeVals else "personal"),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/folders", status_code=status.HTTP_201_CREATED)
|
||||
@limiter.limit("30/minute")
|
||||
def create_folder(
|
||||
|
|
@ -353,7 +555,12 @@ def move_folder(
|
|||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
try:
|
||||
# FE may send `parentId` or `targetParentId`. Accept both so the
|
||||
# FormGeneratorTree generic `provider.moveNodes(targetParentId)` API
|
||||
# remains consistent with the file-move (PUT /api/files/{id}) shape.
|
||||
newParentId = body.get("parentId")
|
||||
if newParentId is None:
|
||||
newParentId = body.get("targetParentId")
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
|
|
@ -414,11 +621,7 @@ def patch_folder_scope(
|
|||
if not scope:
|
||||
raise HTTPException(status_code=400, detail="scope is required")
|
||||
cascadeToFiles = body.get("cascadeChildren", body.get("cascadeToFiles", False))
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
managementInterface = _getInterfaceForOwnedItem(currentUser, context, folderId, FileFolder)
|
||||
return managementInterface.patchFolderScope(folderId, scope, cascadeToFiles)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
|
@ -446,11 +649,7 @@ def patch_folder_neutralize(
|
|||
neutralize = body.get("neutralize")
|
||||
if neutralize is None:
|
||||
raise HTTPException(status_code=400, detail="neutralize is required")
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
managementInterface = _getInterfaceForOwnedItem(currentUser, context, folderId, FileFolder)
|
||||
return managementInterface.patchFolderNeutralize(folderId, bool(neutralize))
|
||||
except PermissionError as e:
|
||||
raise HTTPException(status_code=403, detail=str(e))
|
||||
|
|
@ -1031,11 +1230,7 @@ def updateFileScope(
|
|||
if scope == "global" and not context.isSysAdmin:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
||||
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
context.user,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
managementInterface = _getInterfaceForOwnedItem(context.user, context, fileId, FileItem)
|
||||
|
||||
managementInterface.updateFile(fileId, {"scope": scope})
|
||||
|
||||
|
|
@ -1093,11 +1288,7 @@ def updateFileNeutralize(
|
|||
fails the file simply has no index — no un-neutralized data can leak.
|
||||
"""
|
||||
try:
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
context.user,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
managementInterface = _getInterfaceForOwnedItem(context.user, context, fileId, FileItem)
|
||||
|
||||
managementInterface.updateFile(fileId, {"neutralize": neutralize})
|
||||
|
||||
|
|
@ -1212,7 +1403,8 @@ def update_file(
|
|||
request: Request,
|
||||
fileId: str = Path(..., description="ID of the file to update"),
|
||||
file_info: Dict[str, Any] = Body(...),
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> FileItem:
|
||||
"""Update file info"""
|
||||
try:
|
||||
|
|
@ -1221,7 +1413,11 @@ def update_file(
|
|||
if not safeData:
|
||||
raise HTTPException(status_code=400, detail=routeApiMsg("No editable fields provided"))
|
||||
|
||||
managementInterface = interfaceDbManagement.getInterface(currentUser)
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
|
||||
file = managementInterface.getFile(fileId)
|
||||
if not file:
|
||||
|
|
@ -1267,10 +1463,15 @@ def update_file(
|
|||
def delete_file(
|
||||
request: Request,
|
||||
fileId: str = Path(..., description="ID of the file to delete"),
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, Any]:
|
||||
"""Delete a file"""
|
||||
managementInterface = interfaceDbManagement.getInterface(currentUser)
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
|
||||
# Check if the file exists
|
||||
existingFile = managementInterface.getFile(fileId)
|
||||
|
|
|
|||
|
|
@ -9,11 +9,83 @@ from fastapi import APIRouter, HTTPException, Depends, Path, Request, Body
|
|||
from modules.auth import limiter, getRequestContext, RequestContext
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||
from modules.datamodels.datamodelUam import UserConnection
|
||||
from modules.shared.i18nRegistry import apiRouteContext
|
||||
routeApiMsg = apiRouteContext("routeDataSources")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _ensureConnectionKnowledgeFlag(rootIf, connectionId: str) -> None:
|
||||
"""Forward-only sync: if a DataSource gets RAG-activated, ensure the parent
|
||||
UserConnection.knowledgeIngestionEnabled is true.
|
||||
|
||||
Intentionally NOT bidirectional: disabling the last DataSource does NOT
|
||||
auto-clear knowledgeIngestionEnabled, because the consent flag may have
|
||||
been set explicitly via the Connections page / wizard even before any
|
||||
DataSource exists. Only the master switch (`/knowledge-consent`) may
|
||||
clear it.
|
||||
"""
|
||||
if not connectionId:
|
||||
return
|
||||
try:
|
||||
currentConn = rootIf.db.getRecord(UserConnection, connectionId)
|
||||
if not currentConn:
|
||||
return
|
||||
if bool(currentConn.get("knowledgeIngestionEnabled")):
|
||||
return
|
||||
rootIf.db.recordModify(UserConnection, connectionId, {"knowledgeIngestionEnabled": True})
|
||||
logger.info(
|
||||
"Auto-enabled knowledgeIngestionEnabled on UserConnection %s "
|
||||
"(triggered by first active DataSource).",
|
||||
connectionId,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Could not auto-enable knowledgeIngestionEnabled for connection %s: %s", connectionId, e)
|
||||
|
||||
def _computeOwnEffective(rootIf, rec, model, sourceId: str, flag: str) -> Any:
|
||||
"""Re-load the record after modification and compute its aggregate effective value."""
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
||||
getEffectiveFlag, getEffectiveFlagFds,
|
||||
)
|
||||
freshRec = rootIf.db.getRecord(model, sourceId)
|
||||
if not freshRec:
|
||||
return None
|
||||
if model is DataSource:
|
||||
connectionId = freshRec.get("connectionId", "")
|
||||
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
return getEffectiveFlag(freshRec, flag, allDs, mode="aggregate")
|
||||
else:
|
||||
wsId = freshRec.get("workspaceInstanceId", "")
|
||||
allFds = rootIf.db.getRecordset(FeatureDataSource, recordFilter={"workspaceInstanceId": wsId})
|
||||
return getEffectiveFlagFds(freshRec, flag, allFds, mode="aggregate")
|
||||
|
||||
|
||||
def _computeAncestorEffectives(rootIf, rec, model, flag: str) -> List[Dict[str, Any]]:
|
||||
"""Compute the aggregate effective value for all ancestors of `rec`."""
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
||||
collectAncestorChain, collectAncestorChainFds,
|
||||
getEffectiveFlag, getEffectiveFlagFds,
|
||||
)
|
||||
effectiveKey = f"effective{flag[0].upper()}{flag[1:]}"
|
||||
if model is DataSource:
|
||||
connectionId = rec.get("connectionId", "")
|
||||
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
ancestors = collectAncestorChain(rec, allDs)
|
||||
return [
|
||||
{"id": a.get("id") or getattr(a, "id", ""), effectiveKey: getEffectiveFlag(a, flag, allDs, mode="aggregate")}
|
||||
for a in ancestors
|
||||
]
|
||||
else:
|
||||
wsId = rec.get("workspaceInstanceId", "")
|
||||
allFds = rootIf.db.getRecordset(FeatureDataSource, recordFilter={"workspaceInstanceId": wsId})
|
||||
ancestors = collectAncestorChainFds(rec, allFds)
|
||||
return [
|
||||
{"id": a.get("id") or getattr(a, "id", ""), effectiveKey: getEffectiveFlagFds(a, flag, allFds, mode="aggregate")}
|
||||
for a in ancestors
|
||||
]
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/api/datasources",
|
||||
tags=["Data Sources"],
|
||||
|
|
@ -45,26 +117,58 @@ def _findSourceRecord(db, sourceId: str):
|
|||
def _updateDataSourceScope(
|
||||
request: Request,
|
||||
sourceId: str = Path(..., description="ID of the DataSource or FeatureDataSource"),
|
||||
scope: str = Body(..., embed=True),
|
||||
scope: Optional[str] = Body(None, embed=True),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, Any]:
|
||||
"""Update the scope of a DataSource or FeatureDataSource. Global scope requires sysAdmin."""
|
||||
if scope not in _VALID_SCOPES:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {_VALID_SCOPES}")
|
||||
"""Update the scope of a DataSource. Cascade-resets explicit descendants.
|
||||
|
||||
if scope == "global" and not context.isSysAdmin:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
||||
`scope=None` resets this node to inherit (no cascade). Global scope
|
||||
requires sysAdmin.
|
||||
"""
|
||||
if scope is not None:
|
||||
if scope not in _VALID_SCOPES:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {_VALID_SCOPES}")
|
||||
if scope == "global" and not context.isSysAdmin:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
||||
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
||||
cascadeResetDescendants, cascadeResetDescendantsFds,
|
||||
getEffectiveFlag, getEffectiveFlagFds,
|
||||
collectAncestorChain, collectAncestorChainFds,
|
||||
)
|
||||
rootIf = getRootInterface()
|
||||
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
||||
if not rec:
|
||||
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||
|
||||
# 1. Cascade reset descendants bottom-up (before modifying master)
|
||||
resetIds: List[str] = []
|
||||
if scope is not None:
|
||||
if model is DataSource:
|
||||
resetIds = cascadeResetDescendants(rootIf, rec, "scope")
|
||||
else:
|
||||
resetIds = cascadeResetDescendantsFds(rootIf, rec, "scope")
|
||||
|
||||
# 2. Set master value last (crash-safe)
|
||||
rootIf.db.recordModify(model, sourceId, {"scope": scope})
|
||||
logger.info("Updated scope=%s for %s %s", scope, model.__name__, sourceId)
|
||||
return {"sourceId": sourceId, "scope": scope, "updated": True}
|
||||
|
||||
# 3. Compute effective + ancestor chain for response
|
||||
updatedAncestors = _computeAncestorEffectives(rootIf, rec, model, "scope")
|
||||
effectiveScope = _computeOwnEffective(rootIf, rec, model, sourceId, "scope")
|
||||
|
||||
logger.info(
|
||||
"Updated scope=%s for %s %s (cascade-reset %d descendants)",
|
||||
scope, model.__name__, sourceId, len(resetIds),
|
||||
)
|
||||
return {
|
||||
"sourceId": sourceId,
|
||||
"scope": scope,
|
||||
"effectiveScope": effectiveScope,
|
||||
"resetDescendantIds": resetIds,
|
||||
"updatedAncestors": updatedAncestors,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
@ -77,20 +181,49 @@ def _updateDataSourceScope(
|
|||
def _updateDataSourceNeutralize(
|
||||
request: Request,
|
||||
sourceId: str = Path(..., description="ID of the DataSource or FeatureDataSource"),
|
||||
neutralize: bool = Body(..., embed=True),
|
||||
neutralize: Optional[bool] = Body(None, embed=True),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, Any]:
|
||||
"""Toggle the neutralization flag on a DataSource or FeatureDataSource."""
|
||||
"""Set neutralize flag on a DataSource. Cascade-resets explicit descendants.
|
||||
|
||||
`neutralize=None` resets this node to inherit (no cascade).
|
||||
"""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
||||
cascadeResetDescendants, cascadeResetDescendantsFds,
|
||||
)
|
||||
rootIf = getRootInterface()
|
||||
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
||||
if not rec:
|
||||
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||
|
||||
# 1. Cascade reset descendants bottom-up (before modifying master)
|
||||
resetIds: List[str] = []
|
||||
if neutralize is not None:
|
||||
if model is DataSource:
|
||||
resetIds = cascadeResetDescendants(rootIf, rec, "neutralize")
|
||||
else:
|
||||
resetIds = cascadeResetDescendantsFds(rootIf, rec, "neutralize")
|
||||
|
||||
# 2. Set master value last (crash-safe)
|
||||
rootIf.db.recordModify(model, sourceId, {"neutralize": neutralize})
|
||||
logger.info("Updated neutralize=%s for %s %s", neutralize, model.__name__, sourceId)
|
||||
return {"sourceId": sourceId, "neutralize": neutralize, "updated": True}
|
||||
|
||||
# 3. Compute effective + ancestor chain for response
|
||||
updatedAncestors = _computeAncestorEffectives(rootIf, rec, model, "neutralize")
|
||||
effectiveNeutralize = _computeOwnEffective(rootIf, rec, model, sourceId, "neutralize")
|
||||
|
||||
logger.info(
|
||||
"Updated neutralize=%s for %s %s (cascade-reset %d descendants)",
|
||||
neutralize, model.__name__, sourceId, len(resetIds),
|
||||
)
|
||||
return {
|
||||
"sourceId": sourceId,
|
||||
"neutralize": neutralize,
|
||||
"effectiveNeutralize": effectiveNeutralize,
|
||||
"resetDescendantIds": resetIds,
|
||||
"updatedAncestors": updatedAncestors,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
@ -132,47 +265,67 @@ def _updateNeutralizeFields(
|
|||
async def _updateDataSourceRagIndex(
|
||||
request: Request,
|
||||
sourceId: str = Path(..., description="ID of the DataSource"),
|
||||
ragIndexEnabled: bool = Body(..., embed=True),
|
||||
ragIndexEnabled: Optional[bool] = Body(None, embed=True),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, Any]:
|
||||
"""Toggle RAG indexing for a DataSource.
|
||||
"""Set RAG indexing flag on a DataSource. Cascade-resets explicit descendants.
|
||||
|
||||
true: sets flag + enqueues mini-bootstrap for this DataSource only.
|
||||
false: sets flag + synchronously purges all chunks from this DataSource.
|
||||
`ragIndexEnabled=None` resets this node to inherit (no cascade, no purge,
|
||||
no bootstrap — the node simply follows its ancestor chain afterwards).
|
||||
`True` enqueues a mini-bootstrap. `False` synchronously purges chunks.
|
||||
|
||||
Must be `async def` so `await startJob(...)` registers `_runJob` in the
|
||||
main event loop. Sync route → worker thread → temporary loop closes
|
||||
before the task runs → job stays stuck forever.
|
||||
main event loop.
|
||||
"""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
||||
cascadeResetDescendants, cascadeResetDescendantsFds,
|
||||
)
|
||||
rootIf = getRootInterface()
|
||||
rec = rootIf.db.getRecord(DataSource, sourceId)
|
||||
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
||||
if not rec:
|
||||
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||
|
||||
rootIf.db.recordModify(DataSource, sourceId, {"ragIndexEnabled": ragIndexEnabled})
|
||||
logger.info("Updated ragIndexEnabled=%s for DataSource %s", ragIndexEnabled, sourceId)
|
||||
# 1. Cascade reset descendants bottom-up (before modifying master)
|
||||
resetIds: List[str] = []
|
||||
if ragIndexEnabled is not None:
|
||||
if model is DataSource:
|
||||
resetIds = cascadeResetDescendants(rootIf, rec, "ragIndexEnabled")
|
||||
else:
|
||||
resetIds = cascadeResetDescendantsFds(rootIf, rec, "ragIndexEnabled")
|
||||
|
||||
if ragIndexEnabled:
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||
# 2. Set master value last (crash-safe)
|
||||
rootIf.db.recordModify(model, sourceId, {"ragIndexEnabled": ragIndexEnabled})
|
||||
|
||||
logger.info(
|
||||
"Updated ragIndexEnabled=%s for %s %s (cascade-reset %d descendants)",
|
||||
ragIndexEnabled, model.__name__, sourceId, len(resetIds),
|
||||
)
|
||||
|
||||
# Bootstrap / purge only for personal DataSource (file/folder-based RAG).
|
||||
# FDS RAG is handled by the feature pipeline; the flag alone is enough.
|
||||
if model is DataSource:
|
||||
connectionId = rec.get("connectionId") or rec.get("connection_id") or ""
|
||||
conn = rootIf.getUserConnectionById(connectionId) if connectionId else None
|
||||
authority = ""
|
||||
if conn:
|
||||
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
|
||||
if ragIndexEnabled is True:
|
||||
_ensureConnectionKnowledgeFlag(rootIf, connectionId)
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||
|
||||
await startJob(
|
||||
"connection.bootstrap",
|
||||
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]},
|
||||
triggeredBy=str(context.user.id),
|
||||
)
|
||||
else:
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId)
|
||||
logger.info("Purged %d index rows / %d chunks for DataSource %s",
|
||||
purgeResult.get("indexRows", 0), purgeResult.get("chunks", 0), sourceId)
|
||||
conn = rootIf.getUserConnectionById(connectionId) if connectionId else None
|
||||
authority = ""
|
||||
if conn:
|
||||
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
|
||||
|
||||
await startJob(
|
||||
"connection.bootstrap",
|
||||
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]},
|
||||
triggeredBy=str(context.user.id),
|
||||
)
|
||||
elif ragIndexEnabled is False:
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId)
|
||||
logger.info("Purged %d index rows / %d chunks for DataSource %s",
|
||||
purgeResult.get("indexRows", 0), purgeResult.get("chunks", 0), sourceId)
|
||||
|
||||
import json
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
|
|
@ -182,12 +335,184 @@ async def _updateDataSourceRagIndex(
|
|||
mandateId=context.mandateId,
|
||||
category=AuditCategory.PERMISSION.value,
|
||||
action="rag_index_toggled",
|
||||
details=json.dumps({"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled}),
|
||||
details=json.dumps({"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled, "resetDescendants": len(resetIds), "model": model.__name__}),
|
||||
)
|
||||
|
||||
return {"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled, "updated": True}
|
||||
# 3. Compute effective + ancestors for response
|
||||
updatedAncestors = _computeAncestorEffectives(rootIf, rec, model, "ragIndexEnabled")
|
||||
effectiveRag = _computeOwnEffective(rootIf, rec, model, sourceId, "ragIndexEnabled")
|
||||
|
||||
return {
|
||||
"sourceId": sourceId,
|
||||
"ragIndexEnabled": ragIndexEnabled,
|
||||
"effectiveRagIndexEnabled": effectiveRag,
|
||||
"resetDescendantIds": resetIds,
|
||||
"updatedAncestors": updatedAncestors,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error updating datasource ragIndexEnabled: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
_CLICKUP_SOURCE_TYPES = {"clickup", "clickupList", "clickupSpace", "clickupFolder"}
|
||||
_ALLOWED_RAG_LIMIT_KEYS = {
|
||||
"files": {"maxItems", "maxBytes", "maxFileSize", "maxDepth"},
|
||||
"clickup": {"maxTasks", "maxWorkspaces", "maxListsPerWorkspace"},
|
||||
}
|
||||
|
||||
|
||||
def _kindForSource(rec: Dict[str, Any], model) -> str:
|
||||
"""Map a DataSource record to a RAG-limits kind ('files' or 'clickup').
|
||||
|
||||
FeatureDataSource (tables, not file walkers) reports as 'files' so the
|
||||
same UI/limit shape works; the limits simply won't be consumed by any
|
||||
walker today but are stored for forward-compat.
|
||||
"""
|
||||
if model is FeatureDataSource:
|
||||
return "files"
|
||||
sourceType = str(rec.get("sourceType") or "").strip()
|
||||
return "clickup" if sourceType in _CLICKUP_SOURCE_TYPES else "files"
|
||||
|
||||
|
||||
def _sanitizeRagLimits(kind: str, raw: Any) -> Dict[str, int]:
|
||||
"""Coerce an incoming ragLimits dict to {allowedKey: positive int}.
|
||||
|
||||
Unknown keys are silently dropped; non-positive or non-numeric values
|
||||
are rejected with 400.
|
||||
"""
|
||||
if not isinstance(raw, dict):
|
||||
raise HTTPException(status_code=400, detail="ragLimits must be an object")
|
||||
allowed = _ALLOWED_RAG_LIMIT_KEYS.get(kind, set())
|
||||
cleaned: Dict[str, int] = {}
|
||||
for key, value in raw.items():
|
||||
if key not in allowed:
|
||||
continue
|
||||
try:
|
||||
intValue = int(value)
|
||||
except (TypeError, ValueError):
|
||||
raise HTTPException(status_code=400, detail=f"ragLimits.{key} must be an integer")
|
||||
if intValue <= 0:
|
||||
raise HTTPException(status_code=400, detail=f"ragLimits.{key} must be > 0")
|
||||
cleaned[key] = intValue
|
||||
return cleaned
|
||||
|
||||
|
||||
@router.patch("/{sourceId}/settings")
|
||||
@limiter.limit("30/minute")
|
||||
def _updateDataSourceSettings(
|
||||
request: Request,
|
||||
sourceId: str = Path(..., description="ID of the DataSource or FeatureDataSource"),
|
||||
settings: Dict[str, Any] = Body(..., embed=True),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, Any]:
|
||||
"""Replace `settings` on a DataSource or FeatureDataSource (partial merge per top-level key).
|
||||
|
||||
Currently supports `ragLimits` only. Unknown top-level keys in the body are
|
||||
rejected to avoid silently storing garbage that no consumer reads.
|
||||
|
||||
Owner-only for personal DataSources; mandate/feature scopes additionally
|
||||
accept the mandate or workspace admins of that scope.
|
||||
"""
|
||||
if not isinstance(settings, dict):
|
||||
raise HTTPException(status_code=400, detail="settings must be an object")
|
||||
unknown = set(settings.keys()) - {"ragLimits"}
|
||||
if unknown:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown settings keys: {sorted(unknown)}")
|
||||
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
rootIf = getRootInterface()
|
||||
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
||||
if not rec:
|
||||
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||
|
||||
ownerId = str(rec.get("userId") or "")
|
||||
currentUserId = str(context.user.id)
|
||||
if ownerId and ownerId != currentUserId and not context.isSysAdmin:
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||
if model is DataSource:
|
||||
connectionId = rec.get("connectionId", "")
|
||||
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
scope = str(getEffectiveFlag(rec, "scope", allDs, mode="walk"))
|
||||
else:
|
||||
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource as FDS
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlagFds
|
||||
wsId = rec.get("workspaceInstanceId", "")
|
||||
allFds = rootIf.db.getRecordset(FDS, recordFilter={"workspaceInstanceId": wsId})
|
||||
scope = str(getEffectiveFlagFds(rec, "scope", allFds, mode="walk"))
|
||||
isMandateAdmin = getattr(context, "isMandateAdmin", False)
|
||||
if scope == "personal" or not isMandateAdmin:
|
||||
raise HTTPException(status_code=403, detail="Not allowed to modify this DataSource's settings")
|
||||
|
||||
kind = _kindForSource(rec, model)
|
||||
|
||||
currentSettings = rec.get("settings") or {}
|
||||
if not isinstance(currentSettings, dict):
|
||||
currentSettings = {}
|
||||
newSettings = dict(currentSettings)
|
||||
|
||||
if "ragLimits" in settings:
|
||||
cleanedLimits = _sanitizeRagLimits(kind, settings["ragLimits"])
|
||||
mergedLimits = dict(currentSettings.get("ragLimits") or {})
|
||||
mergedLimits.update(cleanedLimits)
|
||||
newSettings["ragLimits"] = mergedLimits
|
||||
|
||||
rootIf.db.recordModify(model, sourceId, {"settings": newSettings})
|
||||
|
||||
import json
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
from modules.datamodels.datamodelAudit import AuditCategory
|
||||
audit_logger.logEvent(
|
||||
userId=currentUserId,
|
||||
mandateId=context.mandateId,
|
||||
category=AuditCategory.PERMISSION.value,
|
||||
action="datasource_settings_changed",
|
||||
details=json.dumps({
|
||||
"sourceId": sourceId,
|
||||
"model": model.__name__,
|
||||
"oldSettings": currentSettings,
|
||||
"newSettings": newSettings,
|
||||
}),
|
||||
)
|
||||
logger.info("Updated settings on %s %s by user %s", model.__name__, sourceId, currentUserId)
|
||||
return {"sourceId": sourceId, "settings": newSettings, "updated": True}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error updating datasource settings: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/{sourceId}/cost-estimate")
|
||||
@limiter.limit("60/minute")
|
||||
def _getDataSourceCostEstimate(
|
||||
request: Request,
|
||||
sourceId: str = Path(..., description="ID of the DataSource or FeatureDataSource"),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, Any]:
|
||||
"""Return an indicative full-sync cost estimate for the given DataSource.
|
||||
|
||||
Uses the current effective ragLimits (DataSource.settings.ragLimits with
|
||||
fallback to centralized defaults) as the basis. Returns the same
|
||||
`{estimatedTokens, estimatedUsd, basis}` shape regardless of source kind.
|
||||
"""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.serviceCenter.services.serviceKnowledge import _ragLimits, _costEstimate
|
||||
rootIf = getRootInterface()
|
||||
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
||||
if not rec:
|
||||
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||
|
||||
kind = _kindForSource(rec, model)
|
||||
effective = _ragLimits.getRagLimits(rec, kind)
|
||||
estimate = _costEstimate.estimateBootstrapCost(effective, kind=kind)
|
||||
estimate["sourceId"] = sourceId
|
||||
return estimate
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error computing cost estimate: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ def resolveMandateLabels(ids: List[str]) -> Dict[str, Optional[str]]:
|
|||
m = mMap.get(mid)
|
||||
label = (getattr(m, "label", None) or getattr(m, "name", None)) if m else None
|
||||
if not label:
|
||||
logger.warning("resolveMandateLabels: no label for id=%s (found=%s)", mid, m is not None)
|
||||
logger.debug("resolveMandateLabels: no label for id=%s (found=%s)", mid, m is not None)
|
||||
result[mid] = label or None
|
||||
return result
|
||||
|
||||
|
|
@ -57,7 +57,7 @@ def resolveInstanceLabels(ids: List[str]) -> Dict[str, Optional[str]]:
|
|||
fi = featureIface.getFeatureInstance(iid)
|
||||
label = fi.label if fi and fi.label else None
|
||||
if not label:
|
||||
logger.warning("resolveInstanceLabels: no label for id=%s (found=%s)", iid, fi is not None)
|
||||
logger.debug("resolveInstanceLabels: no label for id=%s (found=%s)", iid, fi is not None)
|
||||
result[iid] = label
|
||||
return result
|
||||
|
||||
|
|
@ -104,7 +104,7 @@ def resolveRoleLabels(ids: List[str]) -> Dict[str, Optional[str]]:
|
|||
out[rid] = r.get("roleLabel") or None
|
||||
for rid in ids:
|
||||
if out.get(rid) is None:
|
||||
logger.warning("resolveRoleLabels: no label for id=%s", rid)
|
||||
logger.debug("resolveRoleLabels: no label for id=%s", rid)
|
||||
return out
|
||||
|
||||
|
||||
|
|
@ -305,7 +305,7 @@ def handleIdsMode(
|
|||
|
||||
sql = f'SELECT "{idField}"::TEXT AS val FROM "{table}"{where_clause} ORDER BY "{idField}"'
|
||||
|
||||
with db.connection.cursor() as cursor:
|
||||
with db.borrowCursor() as cursor:
|
||||
cursor.execute(sql, values)
|
||||
return JSONResponse(content=[row["val"] for row in cursor.fetchall()])
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from modules.serviceCenter.services.serviceBackgroundJobs import (
|
|||
getJobStatus,
|
||||
listJobs,
|
||||
)
|
||||
from modules.shared.i18nRegistry import apiRouteContext
|
||||
from modules.shared.i18nRegistry import apiRouteContext, resolveJobMessage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
routeApiMsg = apiRouteContext("routeJobs")
|
||||
|
|
@ -34,8 +34,20 @@ router = APIRouter(
|
|||
|
||||
|
||||
def _serialiseJob(job: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Strip system audit fields and ensure JSON-safe types."""
|
||||
return {k: v for k, v in job.items() if not k.startswith("sys")}
|
||||
"""Strip system audit fields, ensure JSON-safe types, translate progress.
|
||||
|
||||
Walkers store progress as a structured payload (``progressMessageData =
|
||||
{key, params}``). The frontend never calls ``t()`` on backend-supplied
|
||||
keys (i18n convention #2), so we resolve the payload here using the
|
||||
request-context language and overwrite ``progressMessage`` with the
|
||||
fully rendered string. Older clients keep working because they read
|
||||
the same field.
|
||||
"""
|
||||
out = {k: v for k, v in job.items() if not k.startswith("sys")}
|
||||
translated = resolveJobMessage(out.get("progressMessageData"))
|
||||
if translated:
|
||||
out["progressMessage"] = translated
|
||||
return out
|
||||
|
||||
|
||||
def _userHasMandateAccess(context: RequestContext, mandateId: Optional[str]) -> bool:
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from typing import Any, Dict, List, Optional
|
|||
from fastapi import APIRouter, HTTPException, Depends, Request
|
||||
from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
|
||||
from modules.datamodels.datamodelUam import User
|
||||
from modules.shared.i18nRegistry import apiRouteContext
|
||||
from modules.shared.i18nRegistry import apiRouteContext, resolveJobMessage
|
||||
|
||||
routeApiMsg = apiRouteContext("routeRagInventory")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -24,9 +24,69 @@ router = APIRouter(
|
|||
)
|
||||
|
||||
|
||||
_SUB_RESULT_KEYS = ("sharepoint", "outlook", "drive", "gmail", "clickup", "kdrive")
|
||||
|
||||
|
||||
def _flattenJobResult(result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Bootstrap handlers nest per-service results (e.g. msft returns
|
||||
`{"sharepoint": {...}, "outlook": {...}}`). The UI needs per-connection
|
||||
aggregates AND the first hit limit, so we sum the counters and pick the
|
||||
most informative `stoppedAtLimit` across sub-services.
|
||||
|
||||
Returns a flat dict with the same keys the UI expects on `lastSuccess`.
|
||||
"""
|
||||
subResults = [result[k] for k in _SUB_RESULT_KEYS if isinstance(result.get(k), dict)]
|
||||
if not subResults:
|
||||
# Single-service handler that returns flat dict directly (legacy path).
|
||||
return result
|
||||
|
||||
indexed = sum(int(r.get("indexed") or 0) for r in subResults)
|
||||
skippedDup = sum(int(r.get("skippedDuplicate") or 0) for r in subResults)
|
||||
skippedPol = sum(int(r.get("skippedPolicy") or 0) for r in subResults)
|
||||
failed = sum(int(r.get("failed") or 0) for r in subResults)
|
||||
bytes_ = sum(int(r.get("bytesProcessed") or 0) for r in subResults)
|
||||
# Parallel sub-services: wall-clock ≈ slowest one.
|
||||
durationMs = max((int(r.get("durationMs") or 0) for r in subResults), default=0)
|
||||
|
||||
# First sub-service that hit a limit wins — UI shows one banner per
|
||||
# connection; if multiple stopped, the first one is informative enough
|
||||
# and the user re-runs after raising that budget.
|
||||
stoppedAtLimit: Optional[str] = None
|
||||
limits: Dict[str, Any] = {}
|
||||
for r in subResults:
|
||||
if r.get("stoppedAtLimit"):
|
||||
stoppedAtLimit = r["stoppedAtLimit"]
|
||||
limits = r.get("limits") or {}
|
||||
break
|
||||
|
||||
return {
|
||||
"indexed": indexed,
|
||||
"skippedDuplicate": skippedDup,
|
||||
"skippedPolicy": skippedPol,
|
||||
"failed": failed,
|
||||
"bytesProcessed": bytes_,
|
||||
"durationMs": durationMs,
|
||||
"stoppedAtLimit": stoppedAtLimit,
|
||||
"limits": limits,
|
||||
}
|
||||
|
||||
|
||||
def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> List[Dict[str, Any]]:
|
||||
"""Build per-connection RAG inventory rows.
|
||||
|
||||
Each DataSource row exposes BOTH numbers because they mean different things:
|
||||
* `fileCount` — distinct files indexed (== `FileContentIndex` rows)
|
||||
* `chunkCount` — embedding-sized text fragments (== `ContentChunk` rows,
|
||||
max `DEFAULT_CHUNK_TOKENS` tokens each, what the vector retrieval
|
||||
actually hits)
|
||||
|
||||
A single PDF typically yields 1 file × 5–100 chunks; legacy UI labelled
|
||||
`len(FileContentIndex)` as "chunks" which was off by 1–2 orders of
|
||||
magnitude and misleading.
|
||||
"""
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||
|
||||
out = []
|
||||
for conn in connections:
|
||||
|
|
@ -34,19 +94,35 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
|||
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
|
||||
connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId})
|
||||
connChunkTotal = len(connIndexRows)
|
||||
connFileTotal = len(connIndexRows)
|
||||
|
||||
# Map fileId → real chunk count via 1 aggregate query (cheap even for
|
||||
# connections with thousands of files; we never load the vector body).
|
||||
fileIds = [
|
||||
(idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", ""))
|
||||
for idx in connIndexRows
|
||||
]
|
||||
fileIds = [fid for fid in fileIds if fid]
|
||||
chunkCountByFile = knowledgeIf.countChunksByFileIds(fileIds) if fileIds else {}
|
||||
connChunkTotal = sum(chunkCountByFile.values())
|
||||
|
||||
filesByDs: Dict[str, int] = {}
|
||||
chunksByDs: Dict[str, int] = {}
|
||||
unassigned = 0
|
||||
unassignedFiles = 0
|
||||
unassignedChunks = 0
|
||||
for idx in connIndexRows:
|
||||
fileId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", "")
|
||||
chunkCnt = chunkCountByFile.get(fileId, 0)
|
||||
struct = (idx.get("structure") if isinstance(idx, dict) else getattr(idx, "structure", None)) or {}
|
||||
ingestion = struct.get("_ingestion") or {} if isinstance(struct, dict) else {}
|
||||
prov = ingestion.get("provenance") or {} if isinstance(ingestion, dict) else {}
|
||||
dsIdRef = prov.get("dataSourceId", "") if isinstance(prov, dict) else ""
|
||||
if dsIdRef:
|
||||
chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + 1
|
||||
filesByDs[dsIdRef] = filesByDs.get(dsIdRef, 0) + 1
|
||||
chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + chunkCnt
|
||||
else:
|
||||
unassigned += 1
|
||||
unassignedFiles += 1
|
||||
unassignedChunks += chunkCnt
|
||||
|
||||
seen: Dict[str, bool] = {}
|
||||
dsItems = []
|
||||
|
|
@ -61,24 +137,39 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
|||
"label": ds.get("label") if isinstance(ds, dict) else getattr(ds, "label", ""),
|
||||
"path": dsPath,
|
||||
"sourceType": ds.get("sourceType") if isinstance(ds, dict) else getattr(ds, "sourceType", ""),
|
||||
"ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False),
|
||||
"neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False),
|
||||
"ragIndexEnabled": getEffectiveFlag(ds, "ragIndexEnabled", dataSources, mode="walk"),
|
||||
"neutralize": getEffectiveFlag(ds, "neutralize", dataSources, mode="walk"),
|
||||
"lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None),
|
||||
"fileCount": filesByDs.get(dsId, 0),
|
||||
"chunkCount": chunksByDs.get(dsId, 0),
|
||||
})
|
||||
|
||||
if unassigned > 0 and len(dsItems) > 0:
|
||||
perDs = unassigned // len(dsItems)
|
||||
remainder = unassigned % len(dsItems)
|
||||
# Spread orphan files (provenance lost) evenly so totals match.
|
||||
if unassignedFiles > 0 and len(dsItems) > 0:
|
||||
perFile = unassignedFiles // len(dsItems)
|
||||
remFile = unassignedFiles % len(dsItems)
|
||||
perChunk = unassignedChunks // len(dsItems)
|
||||
remChunk = unassignedChunks % len(dsItems)
|
||||
for i, item in enumerate(dsItems):
|
||||
item["chunkCount"] += perDs + (1 if i < remainder else 0)
|
||||
item["fileCount"] += perFile + (1 if i < remFile else 0)
|
||||
item["chunkCount"] += perChunk + (1 if i < remChunk else 0)
|
||||
|
||||
# Pull a wider window than the previous 5 so the "last successful
|
||||
# sync" is found even if a connection has many recent jobs queued.
|
||||
jobs = jobService.listJobs(jobType="connection.bootstrap", limit=50)
|
||||
connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId]
|
||||
runningJobs = [
|
||||
{"jobId": j["id"], "progress": j.get("progress", 0), "progressMessage": j.get("progressMessage", "")}
|
||||
{
|
||||
"jobId": j["id"],
|
||||
"progress": j.get("progress", 0),
|
||||
# Server-side translate the structured walker payload into
|
||||
# the request-context language; frontend renders 1:1 (no
|
||||
# `t()` on backend-supplied keys).
|
||||
"progressMessage": (
|
||||
resolveJobMessage(j.get("progressMessageData"))
|
||||
or j.get("progressMessage", "")
|
||||
),
|
||||
}
|
||||
for j in connJobs
|
||||
if j.get("status") in ("PENDING", "RUNNING")
|
||||
]
|
||||
|
|
@ -93,7 +184,12 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
|||
"finishedAt": j.get("finishedAt"),
|
||||
}
|
||||
elif status == "SUCCESS" and lastSuccess is None:
|
||||
result = j.get("result") or {}
|
||||
# Bootstrap handlers may return either a flat dict (single
|
||||
# service) or a nested dict keyed by sub-service (e.g. msft
|
||||
# returns {"sharepoint": {...}, "outlook": {...}}). Flatten
|
||||
# so the UI always sees aggregated counters and the first
|
||||
# sub-service that hit a limit.
|
||||
result = _flattenJobResult(j.get("result") or {})
|
||||
lastSuccess = {
|
||||
"jobId": j["id"],
|
||||
"finishedAt": j.get("finishedAt"),
|
||||
|
|
@ -102,6 +198,12 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
|||
"skippedPolicy": result.get("skippedPolicy", 0),
|
||||
"failed": result.get("failed", 0),
|
||||
"durationMs": result.get("durationMs", 0),
|
||||
# Surface limit-stop reason so the UI can warn the user
|
||||
# that the index is provably incomplete (and which budget
|
||||
# to raise). None means the walker finished naturally.
|
||||
"stoppedAtLimit": result.get("stoppedAtLimit"),
|
||||
"limits": result.get("limits") or {},
|
||||
"bytesProcessed": result.get("bytesProcessed", 0),
|
||||
}
|
||||
if lastError and lastSuccess:
|
||||
break
|
||||
|
|
@ -113,6 +215,7 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
|||
"knowledgeIngestionEnabled": getattr(conn, "knowledgeIngestionEnabled", False),
|
||||
"preferences": getattr(conn, "knowledgePreferences", None) or {},
|
||||
"dataSources": dsItems,
|
||||
"totalFiles": connFileTotal,
|
||||
"totalChunks": connChunkTotal,
|
||||
"runningJobs": runningJobs,
|
||||
"lastError": lastError,
|
||||
|
|
@ -121,13 +224,165 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
|||
return out
|
||||
|
||||
|
||||
def _buildFeatureInstanceInventory(featureInstanceIds, rootIf, knowledgeIf) -> List[Dict[str, Any]]:
|
||||
"""Build per-feature-instance RAG inventory rows.
|
||||
|
||||
Feature-instance data lives in FileContentIndex with a non-empty
|
||||
featureInstanceId. Additionally each feature instance may have
|
||||
FeatureDataSource rows that define which tables/data are visible
|
||||
as sources, with their own ragIndexEnabled flags.
|
||||
Includes feature.bootstrap job status (running/success/error).
|
||||
"""
|
||||
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
||||
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||
from modules.interfaces.interfaceFeatures import getFeatureInterface
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlagFds
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
|
||||
from modules.serviceCenter.services.serviceKnowledge.subFeatureBootstrap import FEATURE_BOOTSTRAP_JOB_TYPE
|
||||
|
||||
featureIf = getFeatureInterface(rootIf.db)
|
||||
|
||||
allFeatureJobs = jobService.listJobs(jobType=FEATURE_BOOTSTRAP_JOB_TYPE, limit=100)
|
||||
|
||||
out = []
|
||||
for fiId in featureInstanceIds:
|
||||
instance = featureIf.getFeatureInstance(fiId)
|
||||
if not instance or not instance.enabled:
|
||||
continue
|
||||
|
||||
indexRows = knowledgeIf.db.getRecordset(
|
||||
FileContentIndex, recordFilter={"featureInstanceId": fiId}
|
||||
)
|
||||
fileIds = [
|
||||
(r.get("id") if isinstance(r, dict) else getattr(r, "id", ""))
|
||||
for r in indexRows
|
||||
]
|
||||
fileIds = [fid for fid in fileIds if fid]
|
||||
chunkCounts = knowledgeIf.countChunksByFileIds(fileIds) if fileIds else {}
|
||||
|
||||
statusCounts: Dict[str, int] = {}
|
||||
for r in indexRows:
|
||||
st = (r.get("status") if isinstance(r, dict) else getattr(r, "status", "unknown")) or "unknown"
|
||||
statusCounts[st] = statusCounts.get(st, 0) + 1
|
||||
|
||||
allFds = rootIf.db.getRecordset(FeatureDataSource, recordFilter={"workspaceInstanceId": fiId})
|
||||
dsItems = []
|
||||
anyRagEnabled = False
|
||||
for fds in allFds:
|
||||
tblName = (fds.get("tableName") if isinstance(fds, dict) else getattr(fds, "tableName", "")) or ""
|
||||
fCode = (fds.get("featureCode") if isinstance(fds, dict) else getattr(fds, "featureCode", "")) or ""
|
||||
if tblName == "*" or not fCode:
|
||||
continue
|
||||
fdsId = fds.get("id") if isinstance(fds, dict) else getattr(fds, "id", "")
|
||||
ragEnabled = getEffectiveFlagFds(fds, "ragIndexEnabled", allFds, mode="aggregate")
|
||||
if ragEnabled:
|
||||
anyRagEnabled = True
|
||||
dsItems.append({
|
||||
"id": fdsId,
|
||||
"label": (fds.get("label") if isinstance(fds, dict) else getattr(fds, "label", "")) or "",
|
||||
"tableName": tblName,
|
||||
"featureCode": fCode,
|
||||
"ragIndexEnabled": ragEnabled,
|
||||
})
|
||||
|
||||
fiJobs = [
|
||||
j for j in allFeatureJobs
|
||||
if (j.get("payload") or {}).get("workspaceInstanceId") == fiId
|
||||
]
|
||||
runningJobs = [
|
||||
{
|
||||
"jobId": j["id"],
|
||||
"progress": j.get("progress", 0),
|
||||
"progressMessage": (
|
||||
resolveJobMessage(j.get("progressMessageData"))
|
||||
or j.get("progressMessage", "")
|
||||
),
|
||||
}
|
||||
for j in fiJobs
|
||||
if j.get("status") in ("PENDING", "RUNNING")
|
||||
]
|
||||
lastError: Optional[Dict[str, Any]] = None
|
||||
lastSuccess: Optional[Dict[str, Any]] = None
|
||||
for j in fiJobs:
|
||||
jStatus = j.get("status")
|
||||
if jStatus == "ERROR" and lastError is None:
|
||||
lastError = {
|
||||
"jobId": j["id"],
|
||||
"errorMessage": j.get("errorMessage", ""),
|
||||
"finishedAt": j.get("finishedAt"),
|
||||
}
|
||||
elif jStatus == "SUCCESS" and lastSuccess is None:
|
||||
result = j.get("result") or {}
|
||||
lastSuccess = {
|
||||
"jobId": j["id"],
|
||||
"finishedAt": j.get("finishedAt"),
|
||||
"indexed": result.get("indexed", 0),
|
||||
"skippedDuplicate": result.get("skippedDuplicate", 0),
|
||||
"failed": result.get("failed", 0),
|
||||
}
|
||||
if lastError and lastSuccess:
|
||||
break
|
||||
|
||||
if not indexRows and not dsItems:
|
||||
continue
|
||||
|
||||
out.append({
|
||||
"featureInstanceId": fiId,
|
||||
"featureCode": instance.featureCode,
|
||||
"label": instance.label or instance.featureCode,
|
||||
"mandateId": str(instance.mandateId) if instance.mandateId else "",
|
||||
"fileCount": len(indexRows),
|
||||
"chunkCount": sum(chunkCounts.values()),
|
||||
"statusCounts": statusCounts,
|
||||
"dataSources": dsItems,
|
||||
"ragEnabled": anyRagEnabled,
|
||||
"runningJobs": runningJobs,
|
||||
"lastSuccess": lastSuccess,
|
||||
"lastError": lastError,
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
@router.get("/my-mandates")
|
||||
@limiter.limit("30/minute")
|
||||
def _getMyMandates(
|
||||
request: Request,
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Return mandates where the current user has an active membership.
|
||||
|
||||
Used by the RAG inventory frontend to populate the mandate dropdown
|
||||
without requiring admin rights (unlike GET /api/mandates/).
|
||||
"""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
rootIf = getRootInterface()
|
||||
userMandates = rootIf.getUserMandates(str(currentUser.id))
|
||||
result = []
|
||||
for um in userMandates:
|
||||
if not um.enabled:
|
||||
continue
|
||||
mandate = rootIf.getMandate(str(um.mandateId))
|
||||
if not mandate or not getattr(mandate, "enabled", True):
|
||||
continue
|
||||
result.append({
|
||||
"id": str(um.mandateId),
|
||||
"name": getattr(mandate, "name", ""),
|
||||
"label": getattr(mandate, "label", None) or getattr(mandate, "name", ""),
|
||||
})
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error("Error in RAG inventory /my-mandates: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/me")
|
||||
@limiter.limit("30/minute")
|
||||
def _getInventoryMe(
|
||||
request: Request,
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
) -> Dict[str, Any]:
|
||||
"""Personal RAG inventory: own connections + DataSources + chunk counts."""
|
||||
"""Personal RAG inventory: own connections + DataSources + chunk counts + feature uploads."""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
|
|
@ -139,8 +394,22 @@ def _getInventoryMe(
|
|||
|
||||
items = _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService)
|
||||
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||
totalFiles = sum(c.get("totalFiles", 0) for c in items)
|
||||
|
||||
return {"connections": items, "totals": {"chunks": totalChunks}}
|
||||
featureAccesses = rootIf.getFeatureAccessesForUser(str(currentUser.id))
|
||||
fiIds = [
|
||||
str(fa.featureInstanceId) for fa in featureAccesses
|
||||
if fa.enabled and fa.featureInstanceId
|
||||
]
|
||||
fiItems = _buildFeatureInstanceInventory(fiIds, rootIf, knowledgeIf)
|
||||
totalFiles += sum(fi.get("fileCount", 0) for fi in fiItems)
|
||||
totalChunks += sum(fi.get("chunkCount", 0) for fi in fiItems)
|
||||
|
||||
return {
|
||||
"connections": items,
|
||||
"featureInstances": fiItems,
|
||||
"totals": {"files": totalFiles, "chunks": totalChunks},
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Error in RAG inventory /me: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
|
@ -159,20 +428,43 @@ def _getInventoryMandate(
|
|||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface, aggregateMandateRagTotalBytes
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
|
||||
|
||||
rootIf = getRootInterface()
|
||||
knowledgeIf = getKnowledgeInterface(None)
|
||||
mandateId = str(context.mandateId) if context.mandateId else ""
|
||||
mandateId = str(context.mandateId)
|
||||
userId = str(context.user.id)
|
||||
|
||||
from modules.datamodels.datamodelUam import UserConnection
|
||||
allConnections = rootIf.db.getRecordset(UserConnection, recordFilter={"mandateId": mandateId})
|
||||
connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
|
||||
userMandates = rootIf.getUserMandates(userId)
|
||||
isMember = any(
|
||||
getattr(um, "mandateId", None) == mandateId and um.enabled
|
||||
for um in userMandates
|
||||
)
|
||||
if not isMember and not context.isSysAdmin:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("No membership in this mandate"))
|
||||
|
||||
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
|
||||
mandateMembers = rootIf.getUserMandatesByMandate(mandateId)
|
||||
memberUserIds = {getattr(um, "userId", None) for um in mandateMembers}
|
||||
memberUserIds.discard(None)
|
||||
|
||||
allConnections = []
|
||||
for uid in memberUserIds:
|
||||
allConnections.extend(rootIf.getUserConnections(uid))
|
||||
|
||||
items = _buildConnectionInventory(allConnections, rootIf, knowledgeIf, jobService)
|
||||
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||
totalFiles = sum(c.get("totalFiles", 0) for c in items)
|
||||
totalBytes = aggregateMandateRagTotalBytes(mandateId)
|
||||
|
||||
return {"connections": items, "totals": {"chunks": totalChunks, "bytes": totalBytes}}
|
||||
mandateInstances = rootIf.getFeatureInstancesByMandate(mandateId, enabledOnly=True)
|
||||
fiIds = [str(inst.id) for inst in mandateInstances if inst.id]
|
||||
fiItems = _buildFeatureInstanceInventory(fiIds, rootIf, knowledgeIf)
|
||||
totalFiles += sum(fi.get("fileCount", 0) for fi in fiItems)
|
||||
totalChunks += sum(fi.get("chunkCount", 0) for fi in fiItems)
|
||||
|
||||
return {
|
||||
"connections": items,
|
||||
"featureInstances": fiItems,
|
||||
"totals": {"files": totalFiles, "chunks": totalChunks, "bytes": totalBytes},
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
@ -202,8 +494,24 @@ def _getInventoryPlatform(
|
|||
|
||||
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
|
||||
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||
totalFiles = sum(c.get("totalFiles", 0) for c in items)
|
||||
|
||||
return {"connections": items, "totals": {"chunks": totalChunks}}
|
||||
from modules.datamodels.datamodelFeatures import FeatureInstance
|
||||
allInstances = rootIf.db.getRecordset(FeatureInstance, recordFilter={"enabled": True})
|
||||
fiIds = [
|
||||
(r.get("id") if isinstance(r, dict) else getattr(r, "id", ""))
|
||||
for r in allInstances
|
||||
]
|
||||
fiIds = [fid for fid in fiIds if fid]
|
||||
fiItems = _buildFeatureInstanceInventory(fiIds, rootIf, knowledgeIf)
|
||||
totalFiles += sum(fi.get("fileCount", 0) for fi in fiItems)
|
||||
totalChunks += sum(fi.get("chunkCount", 0) for fi in fiItems)
|
||||
|
||||
return {
|
||||
"connections": items,
|
||||
"featureInstances": fiItems,
|
||||
"totals": {"files": totalFiles, "chunks": totalChunks},
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
@ -240,8 +548,9 @@ async def _reindexConnection(
|
|||
if str(conn.userId) != str(currentUser.id):
|
||||
raise HTTPException(status_code=403, detail="Not your connection")
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
ragDs = [ds for ds in dataSources if (ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False))]
|
||||
ragDs = [ds for ds in dataSources if getEffectiveFlag(ds, "ragIndexEnabled", dataSources, mode="walk") is True]
|
||||
if not ragDs:
|
||||
return {"status": "skipped", "reason": "no_rag_enabled_datasources"}
|
||||
|
||||
|
|
@ -263,6 +572,47 @@ async def _reindexConnection(
|
|||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/reindex-feature/{workspaceInstanceId}")
|
||||
@limiter.limit("10/minute")
|
||||
async def _reindexFeature(
|
||||
request: Request,
|
||||
workspaceInstanceId: str,
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
) -> Dict[str, Any]:
|
||||
"""Re-trigger feature data bootstrap for a workspace instance.
|
||||
|
||||
Indexes all RAG-enabled FeatureDataSource rows into the knowledge store.
|
||||
Must be ``async def`` so ``await startJob(...)`` registers in the main loop.
|
||||
"""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||
from modules.serviceCenter.services.serviceKnowledge.subFeatureBootstrap import FEATURE_BOOTSTRAP_JOB_TYPE
|
||||
|
||||
rootIf = getRootInterface()
|
||||
featureAccesses = rootIf.getFeatureAccessesForUser(str(currentUser.id))
|
||||
hasAccess = any(
|
||||
str(fa.featureInstanceId) == workspaceInstanceId and fa.enabled
|
||||
for fa in featureAccesses
|
||||
)
|
||||
if not hasAccess and not getattr(currentUser, "isSysAdmin", False):
|
||||
raise HTTPException(status_code=403, detail="No access to this feature instance")
|
||||
|
||||
jobId = await startJob(
|
||||
FEATURE_BOOTSTRAP_JOB_TYPE,
|
||||
{"workspaceInstanceId": workspaceInstanceId},
|
||||
triggeredBy=str(currentUser.id),
|
||||
)
|
||||
|
||||
logger.info("Feature reindex triggered for workspace %s (jobId=%s)", workspaceInstanceId, jobId)
|
||||
return {"status": "queued", "workspaceInstanceId": workspaceInstanceId, "jobId": jobId}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error triggering feature reindex: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/jobs")
|
||||
@limiter.limit("60/minute")
|
||||
def _getActiveJobs(
|
||||
|
|
@ -294,7 +644,10 @@ def _getActiveJobs(
|
|||
"connectionLabel": getattr(conn, "displayLabel", None) or getattr(conn, "authority", connId),
|
||||
"jobType": j.get("jobType", "connection.bootstrap"),
|
||||
"progress": j.get("progress", 0),
|
||||
"progressMessage": j.get("progressMessage", ""),
|
||||
"progressMessage": (
|
||||
resolveJobMessage(j.get("progressMessageData"))
|
||||
or j.get("progressMessage", "")
|
||||
),
|
||||
})
|
||||
return active
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -227,7 +227,7 @@ WHERE "workflowId" = ANY(%s)
|
|||
GROUP BY "workflowId"
|
||||
"""
|
||||
out: dict = {}
|
||||
with db.connection.cursor() as cursor:
|
||||
with db.borrowCursor() as cursor:
|
||||
cursor.execute(sql, (workflowIds,))
|
||||
for row in cursor.fetchall():
|
||||
r = dict(row)
|
||||
|
|
@ -480,7 +480,7 @@ def _getWorkflowsJoinedPaginated(
|
|||
dataSql = f"SELECT w.*, rs.\"lastStartedAt\", rs.\"runCount\", rs.\"activeRunId\" FROM {fromSql}{whereClause}{orderClause}{limitClause}"
|
||||
|
||||
db._ensure_connection()
|
||||
with db.connection.cursor() as cursor:
|
||||
with db.borrowCursor() as cursor:
|
||||
cursor.execute(countSql, countValues)
|
||||
totalItems = int(cursor.fetchone()["cnt"])
|
||||
|
||||
|
|
|
|||
|
|
@ -341,11 +341,10 @@ class RbacClass:
|
|||
return []
|
||||
|
||||
try:
|
||||
conn = self.dbApp.connection
|
||||
roleIds = set()
|
||||
|
||||
|
||||
# 1. Mandant-Rollen via UserMandate → UserMandateRole (SINGLE Query)
|
||||
with conn.cursor() as cursor:
|
||||
with self.dbApp.borrowCursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT umr."roleId"
|
||||
|
|
@ -357,10 +356,10 @@ class RbacClass:
|
|||
)
|
||||
mandateRoles = cursor.fetchall()
|
||||
roleIds.update(r["roleId"] for r in mandateRoles if r.get("roleId"))
|
||||
|
||||
|
||||
# 2. Instanz-Rollen via FeatureAccess → FeatureAccessRole (SINGLE Query)
|
||||
if featureInstanceId:
|
||||
with conn.cursor() as cursor:
|
||||
with self.dbApp.borrowCursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT far."roleId"
|
||||
|
|
@ -372,14 +371,13 @@ class RbacClass:
|
|||
)
|
||||
instanceRoles = cursor.fetchall()
|
||||
roleIds.update(r["roleId"] for r in instanceRoles if r.get("roleId"))
|
||||
|
||||
|
||||
if not roleIds:
|
||||
return []
|
||||
|
||||
|
||||
# 3. BULK Query: Alle Regeln für alle Rollen + zugehörige Role-Daten
|
||||
# SINGLE Query mit JOIN statt N+1
|
||||
roleIdsList = list(roleIds)
|
||||
with conn.cursor() as cursor:
|
||||
with self.dbApp.borrowCursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT ar.*, r."mandateId" as "roleMandateId",
|
||||
|
|
|
|||
|
|
@ -67,7 +67,12 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
|||
sourceType = ds.get("sourceType", "")
|
||||
path = ds.get("path", "/")
|
||||
label = ds.get("label", "")
|
||||
neutralize = bool(ds.get("neutralize", False))
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
rootIf = getRootInterface()
|
||||
allConnDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
neutralize = bool(getEffectiveFlag(ds, "neutralize", allConnDs or [ds], mode="walk"))
|
||||
service = _SOURCE_TYPE_TO_SERVICE.get(sourceType, sourceType)
|
||||
if not connectionId:
|
||||
raise ValueError(f"DataSource '{dsId}' has no connectionId")
|
||||
|
|
|
|||
|
|
@ -25,15 +25,14 @@ _CACHE_TTL_SECONDS = 300
|
|||
|
||||
|
||||
def _getOrCreateFeatureDbConnector(featureDbName: str, userId: str):
|
||||
"""Reuse a pooled DB connector for the given feature database."""
|
||||
"""Reuse a pooled DB connector for the given feature database.
|
||||
|
||||
The underlying psycopg2 connections live in the central pool
|
||||
(`_PoolRegistry`) and are recreated on demand if they go stale; we just
|
||||
need to keep the lightweight connector wrapper around.
|
||||
"""
|
||||
if featureDbName in _featureDbConnPool:
|
||||
conn = _featureDbConnPool[featureDbName]
|
||||
try:
|
||||
if conn.connection and not conn.connection.closed:
|
||||
return conn
|
||||
except Exception as e:
|
||||
logger.warning(f"Feature DB connection check failed for {featureDbName}: {e}")
|
||||
_featureDbConnPool.pop(featureDbName, None)
|
||||
return _featureDbConnPool[featureDbName]
|
||||
|
||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
|
@ -111,9 +110,11 @@ def _registerFeatureSubAgentTools(registry: ToolRegistry, services):
|
|||
recordFilter={"featureInstanceId": featureInstanceId, "workspaceInstanceId": workspaceInstanceId},
|
||||
)
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlagFds
|
||||
_fdsAll = featureDataSources or []
|
||||
_anySourceNeutralize = any(
|
||||
bool(ds.get("neutralize", False) if isinstance(ds, dict) else getattr(ds, "neutralize", False))
|
||||
for ds in (featureDataSources or [])
|
||||
getEffectiveFlagFds(ds, "neutralize", _fdsAll, mode="walk") is True
|
||||
for ds in _fdsAll
|
||||
)
|
||||
|
||||
neutralizeFieldsPerTable: Dict[str, List[str]] = {}
|
||||
|
|
|
|||
|
|
@ -95,8 +95,7 @@ class FeatureDataProvider:
|
|||
def getActualColumns(self, tableName: str) -> List[str]:
|
||||
"""Read real column names from PostgreSQL information_schema."""
|
||||
try:
|
||||
conn = self._db.connection
|
||||
with conn.cursor() as cur:
|
||||
with self._db.borrowCursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT column_name FROM information_schema.columns "
|
||||
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
||||
|
|
@ -131,7 +130,6 @@ class FeatureDataProvider:
|
|||
Returns ``{"rows": [...], "total": N, "limit": L, "offset": O}``.
|
||||
"""
|
||||
_validateTableName(tableName)
|
||||
conn = self._db.connection
|
||||
|
||||
if fields:
|
||||
invalid = [f for f in fields if not _isValidIdentifier(f)]
|
||||
|
|
@ -141,7 +139,7 @@ class FeatureDataProvider:
|
|||
"error": f"Invalid field name(s): {', '.join(invalid)}. Use getActualColumns to discover valid column names.",
|
||||
}
|
||||
|
||||
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, dbConnection=conn)
|
||||
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, db=self._db)
|
||||
extraWhere, extraParams = _buildFilterClauses(extraFilters)
|
||||
|
||||
fullWhere = scopeFilter["where"]
|
||||
|
|
@ -152,7 +150,7 @@ class FeatureDataProvider:
|
|||
|
||||
t0 = time.time()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
with self._db.borrowCursor() as cur:
|
||||
countSql = f'SELECT COUNT(*) FROM "{tableName}" WHERE {fullWhere}'
|
||||
cur.execute(countSql, allParams)
|
||||
total = cur.fetchone()["count"] if cur.rowcount else 0
|
||||
|
|
@ -179,10 +177,6 @@ class FeatureDataProvider:
|
|||
_debugQueryLog("browseTable", tableName, {
|
||||
"fields": fields, "limit": limit, "offset": offset,
|
||||
}, errResult, elapsed)
|
||||
try:
|
||||
conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
return errResult
|
||||
|
||||
def aggregateTable(
|
||||
|
|
@ -208,8 +202,7 @@ class FeatureDataProvider:
|
|||
if groupBy and not _isValidIdentifier(groupBy):
|
||||
return {"rows": [], "error": f"Invalid groupBy field: {groupBy}"}
|
||||
|
||||
conn = self._db.connection
|
||||
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, dbConnection=conn)
|
||||
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, db=self._db)
|
||||
extraWhere, extraParams = _buildFilterClauses(extraFilters)
|
||||
|
||||
fullWhere = scopeFilter["where"]
|
||||
|
|
@ -220,7 +213,7 @@ class FeatureDataProvider:
|
|||
|
||||
t0 = time.time()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
with self._db.borrowCursor() as cur:
|
||||
if groupBy:
|
||||
sql = (
|
||||
f'SELECT "{groupBy}" AS "groupValue", {aggregate}("{field}") AS "result" '
|
||||
|
|
@ -253,10 +246,6 @@ class FeatureDataProvider:
|
|||
_debugQueryLog("aggregateTable", tableName, {
|
||||
"aggregate": aggregate, "field": field, "groupBy": groupBy,
|
||||
}, errResult, elapsed)
|
||||
try:
|
||||
conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
return errResult
|
||||
|
||||
def queryTable(
|
||||
|
|
@ -277,7 +266,6 @@ class FeatureDataProvider:
|
|||
``extraFilters`` are mandatory record-level scoping filters injected by the pipeline.
|
||||
"""
|
||||
_validateTableName(tableName)
|
||||
conn = self._db.connection
|
||||
|
||||
if fields:
|
||||
invalid = [f for f in fields if not _isValidIdentifier(f)]
|
||||
|
|
@ -287,7 +275,7 @@ class FeatureDataProvider:
|
|||
"error": f"Invalid field name(s): {', '.join(invalid)}. Use getActualColumns to discover valid column names.",
|
||||
}
|
||||
|
||||
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, dbConnection=conn)
|
||||
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, db=self._db)
|
||||
|
||||
combinedFilters = list(filters or []) + list(extraFilters or [])
|
||||
extraWhere, extraParams = _buildFilterClauses(combinedFilters if combinedFilters else None)
|
||||
|
|
@ -300,7 +288,7 @@ class FeatureDataProvider:
|
|||
|
||||
t0 = time.time()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
with self._db.borrowCursor() as cur:
|
||||
countSql = f'SELECT COUNT(*) FROM "{tableName}" WHERE {fullWhere}'
|
||||
cur.execute(countSql, allParams)
|
||||
total = cur.fetchone()["count"] if cur.rowcount else 0
|
||||
|
|
@ -329,10 +317,6 @@ class FeatureDataProvider:
|
|||
"filters": filters, "fields": fields, "orderBy": orderBy,
|
||||
"limit": limit, "offset": offset,
|
||||
}, errResult, elapsed)
|
||||
try:
|
||||
conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
return errResult
|
||||
|
||||
|
||||
|
|
@ -343,13 +327,13 @@ class FeatureDataProvider:
|
|||
_instanceColCache: Dict[str, str] = {}
|
||||
|
||||
|
||||
def _resolveInstanceColumn(tableName: str, dbConnection=None) -> str:
|
||||
def _resolveInstanceColumn(tableName: str, db=None) -> str:
|
||||
"""Detect whether the table uses ``instanceId`` or ``featureInstanceId``."""
|
||||
if tableName in _instanceColCache:
|
||||
return _instanceColCache[tableName]
|
||||
if dbConnection:
|
||||
if db:
|
||||
try:
|
||||
with dbConnection.cursor() as cur:
|
||||
with db.borrowCursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT column_name FROM information_schema.columns "
|
||||
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
||||
|
|
@ -378,14 +362,14 @@ def _isValidIdentifier(name: str) -> bool:
|
|||
return name.isidentifier()
|
||||
|
||||
|
||||
def _buildScopeFilter(tableName: str, featureInstanceId: str, mandateId: str, dbConnection=None) -> Dict[str, Any]:
|
||||
def _buildScopeFilter(tableName: str, featureInstanceId: str, mandateId: str, db=None, dbConnection=None) -> Dict[str, Any]:
|
||||
"""Build the mandatory WHERE clause that scopes rows to the feature instance.
|
||||
|
||||
Feature tables use either ``instanceId`` (commcoach, teamsbot) or
|
||||
``featureInstanceId`` (trustee) as the FK. We detect the actual column
|
||||
from ``information_schema`` when a DB connection is provided.
|
||||
from ``information_schema`` when a DB connector is provided.
|
||||
"""
|
||||
instanceCol = _resolveInstanceColumn(tableName, dbConnection)
|
||||
instanceCol = _resolveInstanceColumn(tableName, db or dbConnection)
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
|
|
|||
|
|
@ -54,19 +54,53 @@ _CANCEL_CHECK_INTERVAL_S = 3.0
|
|||
|
||||
|
||||
class JobProgressCallback:
|
||||
"""Callable progress reporter with cooperative cancel-check for long-running walkers."""
|
||||
"""Callable progress reporter with cooperative cancel-check for long-running walkers.
|
||||
|
||||
Two ways to set a progress message:
|
||||
progressCb(50, "145 Dateien verarbeitet") # legacy plaintext (DE)
|
||||
progressCb(50, messageKey="{n} Dateien verarbeitet",
|
||||
messageParams={"n": 145}) # i18n-friendly
|
||||
|
||||
When `messageKey` is given the structured payload is written to
|
||||
`BackgroundJob.progressMessageData` so the frontend can render it via
|
||||
`t(key, params)` in the user's UI language. A best-effort rendered
|
||||
fallback is also stored in `progressMessage` for older clients, logs,
|
||||
and audit trails.
|
||||
"""
|
||||
|
||||
def __init__(self, jobId: str):
|
||||
self._jobId = jobId
|
||||
self._cancelledCache: Optional[bool] = None
|
||||
self._lastCheckedAt: float = 0.0
|
||||
|
||||
def __call__(self, progress: int, message: Optional[str] = None) -> None:
|
||||
def __call__(
|
||||
self,
|
||||
progress: int,
|
||||
message: Optional[str] = None,
|
||||
*,
|
||||
messageKey: Optional[str] = None,
|
||||
messageParams: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
try:
|
||||
clamped = max(0, min(100, int(progress)))
|
||||
fields: Dict[str, Any] = {"progress": clamped}
|
||||
if message is not None:
|
||||
|
||||
if messageKey is not None:
|
||||
params = messageParams or {}
|
||||
try:
|
||||
fallback = messageKey.format(**params)
|
||||
except (KeyError, IndexError, ValueError) as fmtErr:
|
||||
fallback = message or messageKey
|
||||
logger.warning(
|
||||
"progressCb message format failed for job %s key=%r params=%r: %s",
|
||||
self._jobId, messageKey, params, fmtErr,
|
||||
)
|
||||
fields["progressMessageData"] = {"key": messageKey, "params": params}
|
||||
fields["progressMessage"] = (message or fallback)[:500]
|
||||
elif message is not None:
|
||||
fields["progressMessage"] = message[:500]
|
||||
fields["progressMessageData"] = None
|
||||
|
||||
_updateJob(self._jobId, fields)
|
||||
except Exception as ex:
|
||||
logger.warning("Progress update failed for job %s: %s", self._jobId, ex)
|
||||
|
|
|
|||
|
|
@ -534,11 +534,17 @@ class ChatService:
|
|||
) -> Dict[str, Any]:
|
||||
"""Create a new external data source reference.
|
||||
|
||||
Returns existing record if connectionId + path already exists (upsert semantics).
|
||||
Upsert key is `(connectionId, sourceType, path)`. The same `path='/'`
|
||||
can carry multiple DataSources discriminated by sourceType: the
|
||||
Connection-Root (sourceType=<authority>, e.g. 'msft') plus one per
|
||||
service (sourceType='sharepointFolder', 'outlookFolder', ...). The
|
||||
sourceType filter MUST be present, otherwise a Service-Root POST
|
||||
returns the Connection-Root and toggles cascade onto every sibling.
|
||||
"""
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
existing = self.interfaceDbApp.db.getRecordset(
|
||||
DataSource, recordFilter={"connectionId": connectionId, "path": path}
|
||||
DataSource,
|
||||
recordFilter={"connectionId": connectionId, "sourceType": sourceType, "path": path},
|
||||
)
|
||||
if existing:
|
||||
return existing[0] if isinstance(existing[0], dict) else existing[0].model_dump()
|
||||
|
|
|
|||
1020
modules/serviceCenter/services/serviceKnowledge/_buildTree.py
Normal file
1020
modules/serviceCenter/services/serviceKnowledge/_buildTree.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,86 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Indicative cost estimation for a RAG bootstrap run.
|
||||
|
||||
This is **not** a billing-grade forecast: it gives the user a back-of-the-envelope
|
||||
USD figure for the worst-case full sync, so they can sanity-check before raising
|
||||
`maxBytes`/`maxItems`. The output always carries the underlying assumptions
|
||||
(`basis`) so the user can judge plausibility.
|
||||
|
||||
Heuristic:
|
||||
estimatedTokens = ceil(maxBytes / CHARS_PER_TOKEN_BYTES_FACTOR)
|
||||
estimatedUsd = estimatedTokens / 1_000_000 * EMBEDDING_USD_PER_MTOKEN
|
||||
|
||||
Defaults match OpenAI `text-embedding-3-small` pricing (2026-Q2).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
CHARS_PER_TOKEN = 4
|
||||
EMBEDDING_USD_PER_MTOKEN = 0.02
|
||||
DEFAULT_TOKENS_PER_ITEM = 1500
|
||||
BYTES_PER_TOKEN_TEXT_FACTOR = 4
|
||||
EXTRACTABLE_FRACTION = 0.4
|
||||
|
||||
|
||||
def estimateBootstrapCost(limits: Dict[str, int], kind: str = "files") -> Dict[str, Any]:
|
||||
"""Return an indicative cost estimate dict for a DataSource bootstrap.
|
||||
|
||||
Returned shape::
|
||||
|
||||
{
|
||||
"estimatedTokens": int,
|
||||
"estimatedUsd": float, # rounded to 4 decimals
|
||||
"basis": {
|
||||
"kind": "files"|"clickup",
|
||||
"limits": {...},
|
||||
"assumptions": {
|
||||
"embeddingUsdPerMToken": 0.02,
|
||||
"charsPerToken": 4,
|
||||
"extractableFraction": 0.4,
|
||||
"tokensPerItem": 1500 # only for clickup-like item counts
|
||||
},
|
||||
"notes": "non-binding, depends on real file content..."
|
||||
}
|
||||
}
|
||||
"""
|
||||
assumptions: Dict[str, Any] = {
|
||||
"embeddingUsdPerMToken": EMBEDDING_USD_PER_MTOKEN,
|
||||
"charsPerToken": CHARS_PER_TOKEN,
|
||||
}
|
||||
|
||||
if kind == "files":
|
||||
maxBytes = int(limits.get("maxBytes") or 0)
|
||||
extractableBytes = maxBytes * EXTRACTABLE_FRACTION
|
||||
estimatedTokens = int(math.ceil(extractableBytes / BYTES_PER_TOKEN_TEXT_FACTOR))
|
||||
assumptions["extractableFraction"] = EXTRACTABLE_FRACTION
|
||||
assumptions["formula"] = "ceil(maxBytes * 0.4 / 4)"
|
||||
elif kind == "clickup":
|
||||
maxTasks = int(limits.get("maxTasks") or 0)
|
||||
maxWorkspaces = max(1, int(limits.get("maxWorkspaces") or 1))
|
||||
estimatedTokens = maxTasks * maxWorkspaces * DEFAULT_TOKENS_PER_ITEM
|
||||
assumptions["tokensPerItem"] = DEFAULT_TOKENS_PER_ITEM
|
||||
assumptions["formula"] = "maxTasks * maxWorkspaces * 1500"
|
||||
else:
|
||||
estimatedTokens = 0
|
||||
assumptions["formula"] = "unknown kind, returning zero"
|
||||
|
||||
estimatedUsd = round(estimatedTokens / 1_000_000 * EMBEDDING_USD_PER_MTOKEN, 4)
|
||||
|
||||
return {
|
||||
"estimatedTokens": estimatedTokens,
|
||||
"estimatedUsd": estimatedUsd,
|
||||
"basis": {
|
||||
"kind": kind,
|
||||
"limits": dict(limits),
|
||||
"assumptions": assumptions,
|
||||
"notes": (
|
||||
"Indicative only. Actual cost depends on file types, extractable text "
|
||||
"ratio, dedup hit-rate, retries, and current embedding model pricing."
|
||||
),
|
||||
},
|
||||
}
|
||||
631
modules/serviceCenter/services/serviceKnowledge/_inheritFlags.py
Normal file
631
modules/serviceCenter/services/serviceKnowledge/_inheritFlags.py
Normal file
|
|
@ -0,0 +1,631 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Cascade-inherit semantics for DataSource flags (neutralize, ragIndexEnabled, scope).
|
||||
|
||||
Three-state flags allow tree elements to either set an explicit value or
|
||||
inherit the value from their nearest ancestor in the path hierarchy.
|
||||
|
||||
Modes:
|
||||
- 'walk' (default): resolves the *concrete* effective value per-item
|
||||
(never returns 'mixed'). Used by backend consumers (RAG walker,
|
||||
neutralization pipeline, scope filter, etc.).
|
||||
- 'aggregate': resolves the *display* effective value per-item. If the
|
||||
item has descendants with differing walk-effective values, returns
|
||||
'mixed'. Used by listing endpoints and PATCH responses for the UI.
|
||||
|
||||
Path-traversal rules:
|
||||
- A DataSource is identified by `(connectionId, sourceType, path)`.
|
||||
- The root of a service tree is `path == '/'`.
|
||||
- Sub-elements have paths like `/folder1/sub`. Their parent path is the
|
||||
longest prefix path that exists as a DataSource record (string-based).
|
||||
- If no ancestor with an explicit value exists, the default is `False`
|
||||
(or `'personal'` for scope) — matching the legacy behavior of NULL = inherit.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_INHERITABLE_FLAGS = ("neutralize", "ragIndexEnabled", "scope")
|
||||
_INHERITABLE_FDS_FLAGS = ("neutralize", "ragIndexEnabled", "scope")
|
||||
|
||||
# Connection-root DataSources carry the authority as their sourceType
|
||||
# (e.g. 'msft', 'google'). They sit one level above all service DataSources
|
||||
# of the same connection in the visual tree, so flag inheritance must
|
||||
# cross sourceType boundaries — but ONLY from these authority roots.
|
||||
_AUTHORITY_SOURCE_TYPES = frozenset({"local", "google", "msft", "clickup", "infomaniak"})
|
||||
|
||||
Mode = Literal["walk", "aggregate"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _normalisePath(path: Optional[str]) -> str:
|
||||
"""Normalize a DataSource path to '/'-prefixed, no trailing slash (except root)."""
|
||||
if not path:
|
||||
return "/"
|
||||
p = str(path).strip()
|
||||
if not p.startswith("/"):
|
||||
p = "/" + p
|
||||
if len(p) > 1 and p.endswith("/"):
|
||||
p = p.rstrip("/")
|
||||
return p
|
||||
|
||||
|
||||
def _flagDefault(flag: str) -> Any:
|
||||
if flag == "scope":
|
||||
return "personal"
|
||||
return False
|
||||
|
||||
|
||||
def _isExplicit(value: Any) -> bool:
|
||||
"""A flag value is explicit when it is not None/empty-string."""
|
||||
if value is None:
|
||||
return False
|
||||
if isinstance(value, str) and value == "":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _getRecordValue(rec: Any, key: str) -> Any:
|
||||
if isinstance(rec, dict):
|
||||
return rec.get(key)
|
||||
return getattr(rec, key, None)
|
||||
|
||||
|
||||
def _isAncestorPath(ancestor: str, descendant: str) -> bool:
|
||||
"""True iff `ancestor` is a strict path-prefix of `descendant`."""
|
||||
if ancestor == descendant:
|
||||
return False
|
||||
if ancestor == "/":
|
||||
return descendant != "/"
|
||||
return descendant.startswith(ancestor + "/")
|
||||
|
||||
|
||||
def _pathDepth(path: str) -> int:
|
||||
if path == "/":
|
||||
return 0
|
||||
return path.count("/")
|
||||
|
||||
|
||||
def _findAncestorChain(
|
||||
rec: Dict[str, Any],
|
||||
allDs: Iterable[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Return all ancestor DataSources of `rec` in the same connection,
|
||||
ordered nearest-first.
|
||||
|
||||
Two ancestor relations are merged:
|
||||
1) same-sourceType path-ancestor — strict path-prefix within the
|
||||
same service tree.
|
||||
2) connection-root ancestor — a DS with `path='/'` and
|
||||
`sourceType` in authority set is the parent of every other DS
|
||||
in that connection regardless of sourceType.
|
||||
|
||||
The connection-root is always the most distant ancestor.
|
||||
"""
|
||||
recPath = _normalisePath(_getRecordValue(rec, "path"))
|
||||
recSourceType = _getRecordValue(rec, "sourceType")
|
||||
recConnectionId = _getRecordValue(rec, "connectionId")
|
||||
sameTypeCandidates: List[Tuple[int, Dict[str, Any]]] = []
|
||||
connectionRoot: Optional[Dict[str, Any]] = None
|
||||
recIsConnectionRoot = recSourceType in _AUTHORITY_SOURCE_TYPES and recPath == "/"
|
||||
for cand in allDs:
|
||||
if _getRecordValue(cand, "id") == _getRecordValue(rec, "id"):
|
||||
continue
|
||||
if _getRecordValue(cand, "connectionId") != recConnectionId:
|
||||
continue
|
||||
candSourceType = _getRecordValue(cand, "sourceType")
|
||||
candPath = _normalisePath(_getRecordValue(cand, "path"))
|
||||
if candSourceType == recSourceType:
|
||||
if candPath == recPath or not _isAncestorPath(candPath, recPath):
|
||||
continue
|
||||
sameTypeCandidates.append((len(candPath), cand))
|
||||
elif (
|
||||
not recIsConnectionRoot
|
||||
and candSourceType in _AUTHORITY_SOURCE_TYPES
|
||||
and candPath == "/"
|
||||
):
|
||||
connectionRoot = cand
|
||||
sameTypeCandidates.sort(key=lambda x: x[0], reverse=True)
|
||||
chain = [c for _, c in sameTypeCandidates]
|
||||
if connectionRoot is not None:
|
||||
chain.append(connectionRoot)
|
||||
return chain
|
||||
|
||||
|
||||
def _isDescendantDs(parentRec: Dict[str, Any], candidate: Dict[str, Any]) -> bool:
|
||||
"""True iff `candidate` is a descendant of `parentRec` in the DS hierarchy."""
|
||||
parentSourceType = _getRecordValue(parentRec, "sourceType")
|
||||
parentPath = _normalisePath(_getRecordValue(parentRec, "path"))
|
||||
parentConnectionId = _getRecordValue(parentRec, "connectionId")
|
||||
parentId = _getRecordValue(parentRec, "id")
|
||||
|
||||
candId = _getRecordValue(candidate, "id")
|
||||
if candId == parentId:
|
||||
return False
|
||||
if _getRecordValue(candidate, "connectionId") != parentConnectionId:
|
||||
return False
|
||||
|
||||
candSourceType = _getRecordValue(candidate, "sourceType")
|
||||
candPath = _normalisePath(_getRecordValue(candidate, "path"))
|
||||
|
||||
parentIsConnectionRoot = (
|
||||
parentSourceType in _AUTHORITY_SOURCE_TYPES and parentPath == "/"
|
||||
)
|
||||
if parentIsConnectionRoot:
|
||||
return True
|
||||
if candSourceType != parentSourceType:
|
||||
return False
|
||||
return _isAncestorPath(parentPath, candPath)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DataSource: getEffectiveFlag
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def getEffectiveFlag(
|
||||
rec: Dict[str, Any],
|
||||
flag: str,
|
||||
sameConnectionDs: Iterable[Dict[str, Any]],
|
||||
mode: Mode = "walk",
|
||||
) -> Any:
|
||||
"""Resolve the effective value of a flag via path-traversal.
|
||||
|
||||
mode='walk': own explicit → nearest ancestor explicit → default.
|
||||
Always returns a concrete value (never 'mixed').
|
||||
mode='aggregate': same as walk for leaf value, but if the item has
|
||||
descendants whose walk-effective values differ from
|
||||
each other, returns 'mixed'.
|
||||
"""
|
||||
if flag not in _INHERITABLE_FLAGS:
|
||||
raise ValueError(f"Unknown inheritable flag: {flag}")
|
||||
|
||||
allDs = list(sameConnectionDs)
|
||||
|
||||
walkValue = _resolveWalkValue(rec, flag, allDs)
|
||||
|
||||
if mode == "walk":
|
||||
return walkValue
|
||||
|
||||
# mode == 'aggregate': check subtree for heterogeneous effective values
|
||||
descendants = [d for d in allDs if _isDescendantDs(rec, d)]
|
||||
if not descendants:
|
||||
return walkValue
|
||||
|
||||
subtreeValues = set()
|
||||
subtreeValues.add(_normaliseForComparison(walkValue))
|
||||
for desc in descendants:
|
||||
descEffective = _resolveWalkValue(desc, flag, allDs)
|
||||
subtreeValues.add(_normaliseForComparison(descEffective))
|
||||
if len(subtreeValues) > 1:
|
||||
recId = _getRecordValue(rec, "id")
|
||||
descId = _getRecordValue(desc, "id")
|
||||
descOwnVal = _getRecordValue(desc, flag)
|
||||
logger.info(
|
||||
"DS aggregate MIXED for rec=%s flag=%s: walkValue=%s, "
|
||||
"divergent desc=%s (own=%s, effective=%s), subtreeValues=%s",
|
||||
recId, flag, walkValue, descId, descOwnVal, descEffective, subtreeValues,
|
||||
)
|
||||
return "mixed"
|
||||
return walkValue
|
||||
|
||||
|
||||
def _resolveWalkValue(rec: Dict[str, Any], flag: str, allDs: List[Dict[str, Any]]) -> Any:
|
||||
"""Core walk resolution: own explicit → ancestor chain → default."""
|
||||
own = _getRecordValue(rec, flag)
|
||||
if _isExplicit(own):
|
||||
return own
|
||||
chain = _findAncestorChain(rec, allDs)
|
||||
for ancestor in chain:
|
||||
ancestorVal = _getRecordValue(ancestor, flag)
|
||||
if _isExplicit(ancestorVal):
|
||||
return ancestorVal
|
||||
return _flagDefault(flag)
|
||||
|
||||
|
||||
def _normaliseForComparison(value: Any) -> Any:
|
||||
"""Normalize values for set-comparison (bool as int to avoid hash issues)."""
|
||||
if isinstance(value, bool):
|
||||
return int(value)
|
||||
return value
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DataSource: cascadeResetDescendants (bottom-up)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cascadeResetDescendants(
|
||||
rootIf: Any,
|
||||
parentRec: Dict[str, Any],
|
||||
flag: str,
|
||||
) -> List[str]:
|
||||
"""Reset all explicit descendant values of `flag` to NULL (= inherit).
|
||||
|
||||
Reset order: bottom-up (deepest first) for crash safety.
|
||||
The parent itself is NOT modified here — the caller sets the master value
|
||||
after this function returns.
|
||||
|
||||
Returns list of reset record IDs in bottom-up order.
|
||||
"""
|
||||
if flag not in _INHERITABLE_FLAGS:
|
||||
raise ValueError(f"Unknown inheritable flag: {flag}")
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
|
||||
connectionId = _getRecordValue(parentRec, "connectionId")
|
||||
parentId = _getRecordValue(parentRec, "id")
|
||||
if not connectionId:
|
||||
return []
|
||||
|
||||
siblings = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
|
||||
toReset: List[Tuple[int, str]] = []
|
||||
for sib in siblings:
|
||||
if not _isDescendantDs(parentRec, sib):
|
||||
continue
|
||||
sibVal = _getRecordValue(sib, flag)
|
||||
if not _isExplicit(sibVal):
|
||||
continue
|
||||
sibId = _getRecordValue(sib, "id")
|
||||
sibPath = _normalisePath(_getRecordValue(sib, "path"))
|
||||
toReset.append((_pathDepth(sibPath), sibId))
|
||||
|
||||
# Sort deepest first (bottom-up)
|
||||
toReset.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
resetIds: List[str] = []
|
||||
for _, sibId in toReset:
|
||||
try:
|
||||
rootIf.db.recordModify(DataSource, sibId, {flag: None})
|
||||
resetIds.append(sibId)
|
||||
except Exception as exc:
|
||||
logger.warning("Cascade-reset failed for DataSource %s flag=%s: %s", sibId, flag, exc)
|
||||
|
||||
if resetIds:
|
||||
logger.info(
|
||||
"Cascade-reset %s on %d descendants of DataSource %s (bottom-up)",
|
||||
flag, len(resetIds), parentId,
|
||||
)
|
||||
return resetIds
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DataSource: collectAncestorChain (for updatedAncestors in PATCH response)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def collectAncestorChain(
|
||||
rec: Dict[str, Any],
|
||||
sameConnectionDs: Iterable[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Return ancestor chain of `rec` (nearest-first), same as internal helper.
|
||||
|
||||
Exposed for PATCH endpoints to compute updatedAncestors.
|
||||
"""
|
||||
return _findAncestorChain(rec, sameConnectionDs)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DataSource: buildEffectiveByConnection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def buildEffectiveByConnection(
|
||||
dataSources: Iterable[Dict[str, Any]],
|
||||
flag: str,
|
||||
mode: Mode = "walk",
|
||||
) -> Dict[str, Any]:
|
||||
"""Pre-compute the effective value of `flag` for every DataSource id.
|
||||
|
||||
Uses the specified mode. O(N^2) worst case but N is bounded per connection.
|
||||
"""
|
||||
if flag not in _INHERITABLE_FLAGS:
|
||||
raise ValueError(f"Unknown inheritable flag: {flag}")
|
||||
allDs = list(dataSources)
|
||||
out: Dict[str, Any] = {}
|
||||
for rec in allDs:
|
||||
recId = _getRecordValue(rec, "id")
|
||||
out[recId] = getEffectiveFlag(rec, flag, allDs, mode=mode)
|
||||
return out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FeatureDataSource helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _fdsClassify(fds: Dict[str, Any]) -> str:
|
||||
"""Return 'workspace' | 'table' | 'record' based on the FDS identifier shape."""
|
||||
tableName = _getRecordValue(fds, "tableName") or ""
|
||||
recordFilter = _getRecordValue(fds, "recordFilter")
|
||||
if tableName == "*":
|
||||
return "workspace"
|
||||
if not recordFilter:
|
||||
return "table"
|
||||
return "record"
|
||||
|
||||
|
||||
def _fdsIsAncestor(parent: Dict[str, Any], child: Dict[str, Any]) -> bool:
|
||||
"""Return True iff `parent` FDS is a strict ancestor of `child` FDS.
|
||||
|
||||
Hierarchy within one featureInstanceId (allFds is already scoped to
|
||||
a single workspace):
|
||||
feature-wildcard (tableName='*') -> table-wildcard / record-fds
|
||||
table-wildcard (tableName='X') -> record-fds (tableName='X')
|
||||
"""
|
||||
parentFiId = _getRecordValue(parent, "featureInstanceId")
|
||||
childFiId = _getRecordValue(child, "featureInstanceId")
|
||||
if not parentFiId or parentFiId != childFiId:
|
||||
return False
|
||||
if _getRecordValue(parent, "id") == _getRecordValue(child, "id"):
|
||||
return False
|
||||
parentKind = _fdsClassify(parent)
|
||||
childKind = _fdsClassify(child)
|
||||
if parentKind == "workspace":
|
||||
return childKind in ("table", "record")
|
||||
if parentKind == "table":
|
||||
if childKind != "record":
|
||||
return False
|
||||
return _getRecordValue(parent, "tableName") == _getRecordValue(child, "tableName")
|
||||
return False
|
||||
|
||||
|
||||
def _fdsDepth(fds: Dict[str, Any]) -> int:
|
||||
kind = _fdsClassify(fds)
|
||||
if kind == "workspace":
|
||||
return 0
|
||||
if kind == "table":
|
||||
return 1
|
||||
return 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FeatureDataSource: getEffectiveFlagFds
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def getEffectiveFlagFds(
|
||||
rec: Dict[str, Any],
|
||||
flag: str,
|
||||
sameWorkspaceFds: Iterable[Dict[str, Any]],
|
||||
mode: Mode = "walk",
|
||||
) -> Any:
|
||||
"""Resolve effective value of a FeatureDataSource flag.
|
||||
|
||||
mode='walk': own explicit -> table-wildcard -> workspace-wildcard -> default.
|
||||
mode='aggregate': same but returns 'mixed' if descendants diverge.
|
||||
"""
|
||||
if flag not in _INHERITABLE_FDS_FLAGS:
|
||||
raise ValueError(f"Unknown inheritable FDS flag: {flag}")
|
||||
|
||||
allFds = list(sameWorkspaceFds)
|
||||
walkValue = _resolveWalkValueFds(rec, flag, allFds)
|
||||
|
||||
if mode == "walk":
|
||||
return walkValue
|
||||
|
||||
# mode == 'aggregate'
|
||||
descendants = [f for f in allFds if _fdsIsAncestor(rec, f)]
|
||||
if not descendants:
|
||||
return walkValue
|
||||
|
||||
subtreeValues = set()
|
||||
subtreeValues.add(_normaliseForComparison(walkValue))
|
||||
for desc in descendants:
|
||||
descEffective = _resolveWalkValueFds(desc, flag, allFds)
|
||||
subtreeValues.add(_normaliseForComparison(descEffective))
|
||||
if len(subtreeValues) > 1:
|
||||
recId = _getRecordValue(rec, "id")
|
||||
descId = _getRecordValue(desc, "id")
|
||||
descOwnVal = _getRecordValue(desc, flag)
|
||||
logger.info(
|
||||
"FDS aggregate MIXED for rec=%s flag=%s: walkValue=%s, "
|
||||
"divergent desc=%s (own=%s, effective=%s), subtreeValues=%s",
|
||||
recId, flag, walkValue, descId, descOwnVal, descEffective, subtreeValues,
|
||||
)
|
||||
return "mixed"
|
||||
return walkValue
|
||||
|
||||
|
||||
def _resolveWalkValueFds(rec: Dict[str, Any], flag: str, allFds: List[Dict[str, Any]]) -> Any:
|
||||
"""Core walk resolution for FDS."""
|
||||
own = _getRecordValue(rec, flag)
|
||||
if _isExplicit(own):
|
||||
return own
|
||||
ancestors = [a for a in allFds if _fdsIsAncestor(a, rec)]
|
||||
ancestors.sort(key=lambda a: 0 if _fdsClassify(a) == "table" else 1)
|
||||
for ancestor in ancestors:
|
||||
val = _getRecordValue(ancestor, flag)
|
||||
if _isExplicit(val):
|
||||
return val
|
||||
return _flagDefault(flag)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FeatureDataSource: cascadeResetDescendantsFds (bottom-up)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cascadeResetDescendantsFds(
|
||||
rootIf: Any,
|
||||
parentRec: Dict[str, Any],
|
||||
flag: str,
|
||||
) -> List[str]:
|
||||
"""Reset explicit `flag` to NULL on every descendant FDS of `parentRec`.
|
||||
|
||||
Reset order: bottom-up (deepest first) for crash safety.
|
||||
Returns list of reset record IDs in bottom-up order.
|
||||
"""
|
||||
if flag not in _INHERITABLE_FDS_FLAGS:
|
||||
raise ValueError(f"Unknown inheritable FDS flag: {flag}")
|
||||
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||
|
||||
workspaceInstanceId = _getRecordValue(parentRec, "workspaceInstanceId")
|
||||
if not workspaceInstanceId:
|
||||
return []
|
||||
siblings = rootIf.db.getRecordset(
|
||||
FeatureDataSource, recordFilter={"workspaceInstanceId": workspaceInstanceId}
|
||||
)
|
||||
|
||||
toReset: List[Tuple[int, str]] = []
|
||||
for sib in siblings:
|
||||
if not _fdsIsAncestor(parentRec, sib):
|
||||
continue
|
||||
sibVal = _getRecordValue(sib, flag)
|
||||
if not _isExplicit(sibVal):
|
||||
continue
|
||||
sibId = _getRecordValue(sib, "id")
|
||||
toReset.append((_fdsDepth(sib), sibId))
|
||||
|
||||
# Sort deepest first (bottom-up)
|
||||
toReset.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
resetIds: List[str] = []
|
||||
for _, sibId in toReset:
|
||||
try:
|
||||
rootIf.db.recordModify(FeatureDataSource, sibId, {flag: None})
|
||||
resetIds.append(sibId)
|
||||
except Exception as exc:
|
||||
logger.warning("FDS cascade-reset failed for %s flag=%s: %s", sibId, flag, exc)
|
||||
|
||||
if resetIds:
|
||||
logger.info(
|
||||
"FDS cascade-reset %s on %d descendants of FDS %s (bottom-up)",
|
||||
flag, len(resetIds), _getRecordValue(parentRec, "id"),
|
||||
)
|
||||
return resetIds
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FeatureDataSource: collectAncestorChainFds
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def collectAncestorChainFds(
|
||||
rec: Dict[str, Any],
|
||||
sameWorkspaceFds: Iterable[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Return ancestor chain of `rec` FDS (nearest-first).
|
||||
|
||||
Exposed for PATCH endpoints to compute updatedAncestors.
|
||||
"""
|
||||
allFds = list(sameWorkspaceFds)
|
||||
ancestors = [a for a in allFds if _fdsIsAncestor(a, rec)]
|
||||
ancestors.sort(key=lambda a: 0 if _fdsClassify(a) == "table" else 1)
|
||||
return ancestors
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FeatureDataSource: buildEffectiveByWorkspaceFds
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def buildEffectiveByWorkspaceFds(
|
||||
fdses: Iterable[Dict[str, Any]],
|
||||
flag: str,
|
||||
mode: Mode = "walk",
|
||||
) -> Dict[str, Any]:
|
||||
"""Pre-compute the effective value of `flag` for every FDS id."""
|
||||
if flag not in _INHERITABLE_FDS_FLAGS:
|
||||
raise ValueError(f"Unknown inheritable FDS flag: {flag}")
|
||||
allFds = list(fdses)
|
||||
out: Dict[str, Any] = {}
|
||||
for rec in allFds:
|
||||
recId = _getRecordValue(rec, "id")
|
||||
out[recId] = getEffectiveFlagFds(rec, flag, allFds, mode=mode)
|
||||
return out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bulk resolve: effective flags for arbitrary paths (even without DB record)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def resolveEffectiveForPath(
|
||||
connectionId: str,
|
||||
sourceType: str,
|
||||
path: str,
|
||||
allDs: List[Dict[str, Any]],
|
||||
mode: Mode = "aggregate",
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve effective flags for ANY (connectionId, sourceType, path) tuple.
|
||||
|
||||
Works whether or not a DataSource record exists for this exact path.
|
||||
Returns dict with effectiveNeutralize, effectiveScope, effectiveRagIndexEnabled.
|
||||
"""
|
||||
normPath = _normalisePath(path)
|
||||
exactRecord = None
|
||||
for ds in allDs:
|
||||
if (
|
||||
_getRecordValue(ds, "connectionId") == connectionId
|
||||
and _getRecordValue(ds, "sourceType") == sourceType
|
||||
and _normalisePath(_getRecordValue(ds, "path")) == normPath
|
||||
):
|
||||
exactRecord = ds
|
||||
break
|
||||
|
||||
if exactRecord:
|
||||
return {
|
||||
"effectiveNeutralize": getEffectiveFlag(exactRecord, "neutralize", allDs, mode=mode),
|
||||
"effectiveScope": getEffectiveFlag(exactRecord, "scope", allDs, mode=mode),
|
||||
"effectiveRagIndexEnabled": getEffectiveFlag(exactRecord, "ragIndexEnabled", allDs, mode=mode),
|
||||
}
|
||||
|
||||
virtualRec = {
|
||||
"id": "__virtual__",
|
||||
"connectionId": connectionId,
|
||||
"sourceType": sourceType,
|
||||
"path": normPath,
|
||||
"neutralize": None,
|
||||
"scope": None,
|
||||
"ragIndexEnabled": None,
|
||||
}
|
||||
return {
|
||||
"effectiveNeutralize": _resolveWalkValue(virtualRec, "neutralize", allDs),
|
||||
"effectiveScope": _resolveWalkValue(virtualRec, "scope", allDs),
|
||||
"effectiveRagIndexEnabled": _resolveWalkValue(virtualRec, "ragIndexEnabled", allDs),
|
||||
}
|
||||
|
||||
|
||||
def resolveEffectiveForFds(
|
||||
featureInstanceId: str,
|
||||
tableName: str,
|
||||
recordFilter: Optional[Dict[str, str]],
|
||||
allFds: List[Dict[str, Any]],
|
||||
mode: Mode = "aggregate",
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve effective flags for ANY FDS tuple (even without DB record).
|
||||
|
||||
`allFds` is pre-scoped to a single workspace (loaded with
|
||||
workspaceInstanceId filter). Within that set, the coordinate is
|
||||
featureInstanceId + tableName + recordFilter.
|
||||
|
||||
Returns dict with effectiveNeutralize, effectiveScope, effectiveRagIndexEnabled.
|
||||
"""
|
||||
exactRecord = None
|
||||
for fds in allFds:
|
||||
if _getRecordValue(fds, "featureInstanceId") != featureInstanceId:
|
||||
continue
|
||||
if (_getRecordValue(fds, "tableName") or "") != tableName:
|
||||
continue
|
||||
fdsFilter = _getRecordValue(fds, "recordFilter")
|
||||
if fdsFilter == recordFilter:
|
||||
exactRecord = fds
|
||||
break
|
||||
|
||||
if exactRecord:
|
||||
return {
|
||||
"effectiveNeutralize": getEffectiveFlagFds(exactRecord, "neutralize", allFds, mode=mode),
|
||||
"effectiveScope": getEffectiveFlagFds(exactRecord, "scope", allFds, mode=mode),
|
||||
"effectiveRagIndexEnabled": getEffectiveFlagFds(exactRecord, "ragIndexEnabled", allFds, mode=mode),
|
||||
}
|
||||
|
||||
virtualRec = {
|
||||
"id": "__virtual__",
|
||||
"featureInstanceId": featureInstanceId,
|
||||
"tableName": tableName,
|
||||
"recordFilter": recordFilter,
|
||||
"neutralize": None,
|
||||
"scope": None,
|
||||
"ragIndexEnabled": None,
|
||||
}
|
||||
return {
|
||||
"effectiveNeutralize": _resolveWalkValueFds(virtualRec, "neutralize", allFds),
|
||||
"effectiveScope": _resolveWalkValueFds(virtualRec, "scope", allFds),
|
||||
"effectiveRagIndexEnabled": _resolveWalkValueFds(virtualRec, "ragIndexEnabled", allFds),
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
"""Central i18n registration for BackgroundJob progress messages.
|
||||
|
||||
Walkers and consumers report progress via ``progressCb(..., messageKey="…",
|
||||
messageParams={...})``. Those keys are not seen by ``t()`` at call time, so
|
||||
without a stub registration they would never make it into the boot-time
|
||||
``UiLanguageSet(xx)`` sync. Importing this module is enough to register
|
||||
every known key — call sites stay clean while translators can still find
|
||||
the texts in the standard i18n table.
|
||||
|
||||
Keep this list in lockstep with the ``messageKey=`` arguments used in
|
||||
``subConnectorSync*.py`` and ``subConnectorIngestConsumer.py``.
|
||||
"""
|
||||
|
||||
from modules.shared.i18nRegistry import t
|
||||
|
||||
# Bootstrap walkers (one per connector family)
|
||||
t("{n} Dateien verarbeitet, {indexed} indexiert")
|
||||
t("{n} Tasks verarbeitet, {indexed} indexiert")
|
||||
t("{n} Mails verarbeitet, {indexed} indexiert")
|
||||
|
||||
# Ingestion consumer hand-offs
|
||||
t("Verbindung wird aufgebaut ({authority})")
|
||||
t("Synchronisierung läuft...")
|
||||
107
modules/serviceCenter/services/serviceKnowledge/_ragLimits.py
Normal file
107
modules/serviceCenter/services/serviceKnowledge/_ragLimits.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Centralized RAG bootstrap limits + DataSource-scoped resolution.
|
||||
|
||||
The original walkers (SharePoint, kDrive, gDrive, ClickUp) each carried their
|
||||
own module-level `MAX_*_DEFAULT` constants and silently stopped indexing once
|
||||
they were exceeded. That made it impossible for a user with a 500 MB folder to
|
||||
override the 200 MB cap without a code change.
|
||||
|
||||
This module is the single source of truth for two things:
|
||||
|
||||
1. The canonical default budget per source kind (`FILES_LIMITS_DEFAULT`,
|
||||
`CLICKUP_LIMITS_DEFAULT`). Walkers fall back to these when a DataSource has
|
||||
no `settings.ragLimits` yet.
|
||||
|
||||
2. The pure read/lazy-fill helpers that walkers and the API use to merge a
|
||||
DataSource's stored settings with the defaults. No override layers, no
|
||||
resolver chain: what is in `DataSource.settings.ragLimits` is what the
|
||||
walker uses.
|
||||
|
||||
Lazy fill: the first time a DataSource is processed, the defaults are written
|
||||
to its `settings.ragLimits` so the UI shows real values immediately, even if
|
||||
the user has never opened the settings modal.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
FILES_LIMITS_DEFAULT: Dict[str, int] = {
|
||||
"maxItems": 500,
|
||||
"maxBytes": 200 * 1024 * 1024,
|
||||
"maxFileSize": 25 * 1024 * 1024,
|
||||
"maxDepth": 4,
|
||||
}
|
||||
|
||||
|
||||
CLICKUP_LIMITS_DEFAULT: Dict[str, int] = {
|
||||
"maxTasks": 500,
|
||||
"maxWorkspaces": 3,
|
||||
"maxListsPerWorkspace": 20,
|
||||
}
|
||||
|
||||
|
||||
_LIMITS_BY_KIND: Dict[str, Dict[str, int]] = {
|
||||
"files": FILES_LIMITS_DEFAULT,
|
||||
"clickup": CLICKUP_LIMITS_DEFAULT,
|
||||
}
|
||||
|
||||
|
||||
def getDefaults(kind: str) -> Dict[str, int]:
|
||||
"""Return a fresh copy of the default budget for the given walker kind.
|
||||
|
||||
`kind` is either "files" (Sharepoint, kDrive, gDrive) or "clickup".
|
||||
Returning a copy lets callers mutate the result safely.
|
||||
"""
|
||||
defaults = _LIMITS_BY_KIND.get(kind)
|
||||
if defaults is None:
|
||||
raise ValueError(f"Unknown RAG limit kind: {kind!r}")
|
||||
return dict(defaults)
|
||||
|
||||
|
||||
def getStoredOverrides(dataSource: Optional[Dict[str, Any]], kind: str) -> Dict[str, int]:
|
||||
"""Return ONLY the limits explicitly set on `dataSource.settings.ragLimits`.
|
||||
|
||||
Missing keys are NOT filled with defaults — that is the caller's job (so
|
||||
a programmatically supplied `limits=` from a Caller still wins when the
|
||||
DataSource has no override). Pure read, no DB writes.
|
||||
"""
|
||||
if not isinstance(dataSource, dict):
|
||||
return {}
|
||||
settings = dataSource.get("settings") or {}
|
||||
if not isinstance(settings, dict):
|
||||
return {}
|
||||
stored = settings.get("ragLimits")
|
||||
if not isinstance(stored, dict):
|
||||
return {}
|
||||
allowed = set(_LIMITS_BY_KIND.get(kind, {}).keys())
|
||||
out: Dict[str, int] = {}
|
||||
for key, raw in stored.items():
|
||||
if key not in allowed or raw is None:
|
||||
continue
|
||||
try:
|
||||
out[key] = int(raw)
|
||||
except (TypeError, ValueError):
|
||||
logger.warning(
|
||||
"Ignoring non-int ragLimits[%s]=%r on DataSource %s",
|
||||
key, raw, dataSource.get("id"),
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def getRagLimits(dataSource: Optional[Dict[str, Any]], kind: str) -> Dict[str, int]:
|
||||
"""Effective RAG limits for the API/cost-estimate use-case.
|
||||
|
||||
Stored overrides win over `getDefaults(kind)`. Walkers should NOT use this
|
||||
function — they should pass their own caller-limits as the fallback so that
|
||||
a runtime-supplied `limits=` parameter is honoured (see `getStoredOverrides`).
|
||||
"""
|
||||
base = getDefaults(kind)
|
||||
base.update(getStoredOverrides(dataSource, kind))
|
||||
return base
|
||||
|
|
@ -147,7 +147,7 @@ class KnowledgeService:
|
|||
else getattr(existing, "status", "")
|
||||
) or ""
|
||||
if existingMeta.get("hash") == contentHash and existingStatus == "indexed":
|
||||
logger.info(
|
||||
logger.debug(
|
||||
"ingestion.skipped.duplicate sourceKind=%s sourceId=%s hash=%s",
|
||||
job.sourceKind, job.sourceId, contentHash[:12],
|
||||
extra={
|
||||
|
|
|
|||
|
|
@ -141,18 +141,39 @@ _SOURCE_TYPE_MAP = {
|
|||
|
||||
|
||||
def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None):
|
||||
"""Load DataSource rows with ragIndexEnabled=true for a connection.
|
||||
"""Load DataSource rows whose *effective* ragIndexEnabled is True.
|
||||
|
||||
If dataSourceIds is provided (mini-bootstrap), filter to only those IDs.
|
||||
Cascade-inherit semantics: a DataSource with `ragIndexEnabled=None`
|
||||
follows its nearest ancestor's value (path-traversal). Walker iterates
|
||||
over all DataSources whose effective value resolves to True, including
|
||||
inherited ones.
|
||||
|
||||
Returned dicts carry **resolved** flags (`neutralize`, `scope`) so the
|
||||
downstream walkers can keep reading `ds.get("neutralize")` directly
|
||||
without having to know about the inheritance chain.
|
||||
|
||||
If `dataSourceIds` is provided (mini-bootstrap), the explicit set is
|
||||
intersected with the effective-true set.
|
||||
"""
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||
|
||||
rootIf = getRootInterface()
|
||||
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
resolved = []
|
||||
for ds in allDs:
|
||||
effRagIndex = getEffectiveFlag(ds, "ragIndexEnabled", allDs)
|
||||
if effRagIndex is not True:
|
||||
continue
|
||||
dsCopy = dict(ds) if isinstance(ds, dict) else {**ds.__dict__}
|
||||
dsCopy["neutralize"] = getEffectiveFlag(ds, "neutralize", allDs)
|
||||
dsCopy["scope"] = getEffectiveFlag(ds, "scope", allDs)
|
||||
dsCopy["ragIndexEnabled"] = True
|
||||
resolved.append(dsCopy)
|
||||
if dataSourceIds:
|
||||
return [ds for ds in allDs if ds.get("id") in dataSourceIds and ds.get("ragIndexEnabled")]
|
||||
return [ds for ds in allDs if ds.get("ragIndexEnabled")]
|
||||
resolved = [ds for ds in resolved if ds.get("id") in dataSourceIds]
|
||||
return resolved
|
||||
|
||||
|
||||
async def _bootstrapJobHandler(
|
||||
|
|
@ -167,7 +188,11 @@ async def _bootstrapJobHandler(
|
|||
if not connectionId:
|
||||
raise ValueError("connection.bootstrap requires payload.connectionId")
|
||||
|
||||
progressCb(5, f"resolving {authority} connection")
|
||||
progressCb(
|
||||
5,
|
||||
messageKey="Verbindung wird aufgebaut ({authority})",
|
||||
messageParams={"authority": authority},
|
||||
)
|
||||
|
||||
# Defensive consent check
|
||||
try:
|
||||
|
|
@ -225,7 +250,7 @@ async def _bootstrapJobHandler(
|
|||
bootstrapOutlook,
|
||||
)
|
||||
|
||||
progressCb(0, "Synchronisierung läuft...")
|
||||
progressCb(0, messageKey="Synchronisierung läuft...")
|
||||
spDs = _filterDs("sharepoint")
|
||||
olDs = _filterDs("outlook")
|
||||
async def _noopResult():
|
||||
|
|
@ -251,7 +276,7 @@ async def _bootstrapJobHandler(
|
|||
bootstrapGmail,
|
||||
)
|
||||
|
||||
progressCb(0, "Synchronisierung läuft...")
|
||||
progressCb(0, messageKey="Synchronisierung läuft...")
|
||||
gdDs = _filterDs("drive")
|
||||
gmDs = _filterDs("gmail")
|
||||
async def _noopResult():
|
||||
|
|
@ -274,7 +299,7 @@ async def _bootstrapJobHandler(
|
|||
bootstrapClickup,
|
||||
)
|
||||
|
||||
progressCb(0, "Synchronisierung läuft...")
|
||||
progressCb(0, messageKey="Synchronisierung läuft...")
|
||||
cuDs = _filterDs("clickup")
|
||||
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb, dataSources=cuDs) if cuDs else {"skipped": True, "reason": "no_datasources"}
|
||||
return {
|
||||
|
|
@ -288,7 +313,7 @@ async def _bootstrapJobHandler(
|
|||
bootstrapKdrive,
|
||||
)
|
||||
|
||||
progressCb(0, "Synchronisierung läuft...")
|
||||
progressCb(0, messageKey="Synchronisierung läuft...")
|
||||
kdDs = _filterDs("kdrive")
|
||||
kdResult = await bootstrapKdrive(connectionId=connectionId, progressCb=progressCb, dataSources=kdDs) if kdDs else {"skipped": True, "reason": "no_datasources"}
|
||||
return {
|
||||
|
|
@ -406,6 +431,15 @@ def registerKnowledgeIngestionConsumer() -> None:
|
|||
callbackRegistry.register("connection.established", _onConnectionEstablished)
|
||||
callbackRegistry.register("connection.revoked", _onConnectionRevoked)
|
||||
registerJobHandler(BOOTSTRAP_JOB_TYPE, _bootstrapJobHandler)
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge.subFeatureBootstrap import (
|
||||
FEATURE_BOOTSTRAP_JOB_TYPE, _featureBootstrapHandler,
|
||||
)
|
||||
registerJobHandler(FEATURE_BOOTSTRAP_JOB_TYPE, _featureBootstrapHandler)
|
||||
|
||||
registerDailyResyncScheduler()
|
||||
_registered = True
|
||||
logger.info("KnowledgeIngestionConsumer registered (established/revoked + %s handler + daily resync)", BOOTSTRAP_JOB_TYPE)
|
||||
logger.info(
|
||||
"KnowledgeIngestionConsumer registered (established/revoked + %s + %s handler + daily resync)",
|
||||
BOOTSTRAP_JOB_TYPE, FEATURE_BOOTSTRAP_JOB_TYPE,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -33,13 +33,21 @@ from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_TASKS_DEFAULT = 500
|
||||
MAX_WORKSPACES_DEFAULT = 3
|
||||
MAX_LISTS_PER_WORKSPACE_DEFAULT = 20
|
||||
from modules.serviceCenter.services.serviceKnowledge import _ragLimits as _ragLimitsHelper
|
||||
|
||||
_CLICKUP_DEFAULTS = _ragLimitsHelper.CLICKUP_LIMITS_DEFAULT
|
||||
MAX_TASKS_DEFAULT = _CLICKUP_DEFAULTS["maxTasks"]
|
||||
MAX_WORKSPACES_DEFAULT = _CLICKUP_DEFAULTS["maxWorkspaces"]
|
||||
MAX_LISTS_PER_WORKSPACE_DEFAULT = _CLICKUP_DEFAULTS["maxListsPerWorkspace"]
|
||||
MAX_DESCRIPTION_CHARS_DEFAULT = 8000
|
||||
MAX_AGE_DAYS_DEFAULT = 180
|
||||
|
||||
|
||||
def _resolveDataSourceLimits(dsId: str, ds: Dict[str, Any]) -> Dict[str, int]:
|
||||
"""Return explicit RAG-limit overrides stored on the DataSource (or {})."""
|
||||
return _ragLimitsHelper.getStoredOverrides(ds, "clickup")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClickupBootstrapLimits:
|
||||
maxTasks: int = MAX_TASKS_DEFAULT
|
||||
|
|
@ -68,6 +76,9 @@ class ClickupBootstrapResult:
|
|||
workspaces: int = 0
|
||||
lists: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
# First budget exhausted: "maxTasks" | "maxWorkspaces" | "maxListsPerWorkspace" | None.
|
||||
# Drives the same UI banner as the file-walker bootstraps.
|
||||
stoppedAtLimit: Optional[str] = None
|
||||
|
||||
|
||||
def _syntheticTaskId(connectionId: str, taskId: str) -> str:
|
||||
|
|
@ -225,6 +236,7 @@ async def bootstrapClickup(
|
|||
cancelled = False
|
||||
for ds in dataSources:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||
_recordLimitStop(result, "maxTasks", "dataSource", limits)
|
||||
break
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
cancelled = True
|
||||
|
|
@ -232,10 +244,11 @@ async def bootstrapClickup(
|
|||
|
||||
dsId = ds.get("id", "")
|
||||
dsNeutralize = ds.get("neutralize", False)
|
||||
eff = _resolveDataSourceLimits(dsId, ds)
|
||||
dsLimits = ClickupBootstrapLimits(
|
||||
maxTasks=limits.maxTasks,
|
||||
maxWorkspaces=limits.maxWorkspaces,
|
||||
maxListsPerWorkspace=limits.maxListsPerWorkspace,
|
||||
maxTasks=eff.get("maxTasks", limits.maxTasks),
|
||||
maxWorkspaces=eff.get("maxWorkspaces", limits.maxWorkspaces),
|
||||
maxListsPerWorkspace=eff.get("maxListsPerWorkspace", limits.maxListsPerWorkspace),
|
||||
maxDescriptionChars=limits.maxDescriptionChars,
|
||||
maxAgeDays=limits.maxAgeDays,
|
||||
includeClosed=limits.includeClosed,
|
||||
|
|
@ -243,8 +256,11 @@ async def bootstrapClickup(
|
|||
clickupScope=limits.clickupScope,
|
||||
)
|
||||
|
||||
if len(teams) > dsLimits.maxWorkspaces:
|
||||
_recordLimitStop(result, "maxWorkspaces", "teams", dsLimits, hard=False)
|
||||
for team in teams[:dsLimits.maxWorkspaces]:
|
||||
if result.indexed + result.skippedDuplicate >= dsLimits.maxTasks:
|
||||
_recordLimitStop(result, "maxTasks", f"team={team.get('id','')}", dsLimits)
|
||||
break
|
||||
teamId = str(team.get("id", "") or "")
|
||||
if not teamId:
|
||||
|
|
@ -351,6 +367,7 @@ async def _walkTeam(
|
|||
|
||||
for lst in listsCollected:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||
_recordLimitStop(result, "maxTasks", f"team={teamId}", limits)
|
||||
return
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
return
|
||||
|
|
@ -407,6 +424,7 @@ async def _walkList(
|
|||
|
||||
for task in tasks:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||
_recordLimitStop(result, "maxTasks", f"list={listId}", limits)
|
||||
return
|
||||
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
|
||||
result.skippedPolicy += 1
|
||||
|
|
@ -511,7 +529,11 @@ async def _ingestTask(
|
|||
if hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
return
|
||||
try:
|
||||
progressCb(0, f"{processed} Tasks verarbeitet, {result.indexed} indexiert")
|
||||
progressCb(
|
||||
0,
|
||||
messageKey="{n} Tasks verarbeitet, {indexed} indexiert",
|
||||
messageParams={"n": processed, "indexed": result.indexed},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if processed % 50 == 0:
|
||||
|
|
@ -529,13 +551,37 @@ async def _ingestTask(
|
|||
)
|
||||
|
||||
|
||||
def _recordLimitStop(
|
||||
result: ClickupBootstrapResult,
|
||||
limitName: str,
|
||||
where: str,
|
||||
limits: ClickupBootstrapLimits,
|
||||
*,
|
||||
hard: bool = True,
|
||||
) -> None:
|
||||
"""See subConnectorSyncSharepoint._recordLimitStop for semantics."""
|
||||
if hard or result.stoppedAtLimit is None:
|
||||
result.stoppedAtLimit = limitName
|
||||
budgetMap = {
|
||||
"maxTasks": limits.maxTasks,
|
||||
"maxWorkspaces": limits.maxWorkspaces,
|
||||
"maxListsPerWorkspace": limits.maxListsPerWorkspace,
|
||||
}
|
||||
logger.warning(
|
||||
"clickup walker hit %s=%s at %s — partial index (indexed=%d, skippedDup=%d).",
|
||||
limitName, budgetMap.get(limitName), where,
|
||||
result.indexed, result.skippedDuplicate,
|
||||
)
|
||||
|
||||
|
||||
def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||
durationMs = int((time.time() - startMs) * 1000)
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.done part=clickup connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d workspaces=%d lists=%d durationMs=%d",
|
||||
"ingestion.connection.bootstrap.done part=clickup connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d workspaces=%d lists=%d durationMs=%d stoppedAtLimit=%s",
|
||||
connectionId,
|
||||
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||
result.failed, result.workspaces, result.lists, durationMs,
|
||||
result.stoppedAtLimit or "none",
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.done",
|
||||
"part": "clickup",
|
||||
|
|
@ -547,6 +593,7 @@ def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs:
|
|||
"workspaces": result.workspaces,
|
||||
"lists": result.lists,
|
||||
"durationMs": durationMs,
|
||||
"stoppedAtLimit": result.stoppedAtLimit,
|
||||
},
|
||||
)
|
||||
return {
|
||||
|
|
@ -559,4 +606,11 @@ def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs:
|
|||
"lists": result.lists,
|
||||
"durationMs": durationMs,
|
||||
"errors": result.errors[:20],
|
||||
"stoppedAtLimit": result.stoppedAtLimit,
|
||||
"limits": {
|
||||
"maxTasks": MAX_TASKS_DEFAULT,
|
||||
"maxWorkspaces": MAX_WORKSPACES_DEFAULT,
|
||||
"maxListsPerWorkspace": MAX_LISTS_PER_WORKSPACE_DEFAULT,
|
||||
"maxAgeDays": MAX_AGE_DAYS_DEFAULT,
|
||||
},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,13 +31,21 @@ from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_ITEMS_DEFAULT = 500
|
||||
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
|
||||
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
|
||||
from modules.serviceCenter.services.serviceKnowledge import _ragLimits as _ragLimitsHelper
|
||||
|
||||
_FILES_DEFAULTS = _ragLimitsHelper.FILES_LIMITS_DEFAULT
|
||||
MAX_ITEMS_DEFAULT = _FILES_DEFAULTS["maxItems"]
|
||||
MAX_BYTES_DEFAULT = _FILES_DEFAULTS["maxBytes"]
|
||||
MAX_FILE_SIZE_DEFAULT = _FILES_DEFAULTS["maxFileSize"]
|
||||
MAX_DEPTH_DEFAULT = _FILES_DEFAULTS["maxDepth"]
|
||||
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||
MAX_DEPTH_DEFAULT = 4
|
||||
MAX_AGE_DAYS_DEFAULT = 365
|
||||
|
||||
|
||||
def _resolveDataSourceLimits(dsId: str, ds: Dict[str, Any]) -> Dict[str, int]:
|
||||
"""Return explicit RAG-limit overrides stored on the DataSource (or {})."""
|
||||
return _ragLimitsHelper.getStoredOverrides(ds, "files")
|
||||
|
||||
FOLDER_MIME = "application/vnd.google-apps.folder"
|
||||
|
||||
|
||||
|
|
@ -61,6 +69,8 @@ class GdriveBootstrapResult:
|
|||
failed: int = 0
|
||||
bytesProcessed: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
# See SharepointBootstrapResult.stoppedAtLimit — same semantics.
|
||||
stoppedAtLimit: Optional[str] = None
|
||||
|
||||
|
||||
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
||||
|
|
@ -173,12 +183,13 @@ async def bootstrapGdrive(
|
|||
dsId = ds.get("id", "")
|
||||
dsNeutralize = ds.get("neutralize", False)
|
||||
dsMaxAgeDays = ds.get("maxAgeDays", limits.maxAgeDays)
|
||||
eff = _resolveDataSourceLimits(dsId, ds)
|
||||
dsLimits = GdriveBootstrapLimits(
|
||||
maxItems=limits.maxItems,
|
||||
maxBytes=limits.maxBytes,
|
||||
maxFileSize=limits.maxFileSize,
|
||||
maxItems=eff.get("maxItems", limits.maxItems),
|
||||
maxBytes=eff.get("maxBytes", limits.maxBytes),
|
||||
maxFileSize=eff.get("maxFileSize", limits.maxFileSize),
|
||||
skipMimePrefixes=limits.skipMimePrefixes,
|
||||
maxDepth=limits.maxDepth,
|
||||
maxDepth=eff.get("maxDepth", limits.maxDepth),
|
||||
maxAgeDays=dsMaxAgeDays,
|
||||
neutralize=dsNeutralize,
|
||||
)
|
||||
|
|
@ -265,8 +276,10 @@ async def _walkFolder(
|
|||
|
||||
for entry in entries:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||
_recordLimitStop(result, "maxItems", folderPath, limits)
|
||||
return
|
||||
if result.bytesProcessed >= limits.maxBytes:
|
||||
_recordLimitStop(result, "maxBytes", folderPath, limits)
|
||||
return
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||
return
|
||||
|
|
@ -276,6 +289,9 @@ async def _walkFolder(
|
|||
mimeType = getattr(entry, "mimeType", None) or metadata.get("mimeType")
|
||||
|
||||
if getattr(entry, "isFolder", False) or mimeType == FOLDER_MIME:
|
||||
if depth + 1 > limits.maxDepth:
|
||||
_recordLimitStop(result, "maxDepth", entryPath, limits, hard=False)
|
||||
continue
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
|
|
@ -298,6 +314,7 @@ async def _walkFolder(
|
|||
continue
|
||||
size = int(getattr(entry, "size", 0) or 0)
|
||||
if size and size > limits.maxFileSize:
|
||||
_recordLimitStop(result, "maxFileSize", entryPath, limits, hard=False)
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
modifiedTime = metadata.get("modifiedTime")
|
||||
|
|
@ -451,7 +468,11 @@ async def _ingestOne(
|
|||
processed = result.indexed + result.skippedDuplicate
|
||||
if progressCb is not None and processed % 5 == 0:
|
||||
try:
|
||||
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
|
||||
progressCb(
|
||||
0,
|
||||
messageKey="{n} Dateien verarbeitet, {indexed} indexiert",
|
||||
messageParams={"n": processed, "indexed": result.indexed},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
|
|
@ -470,13 +491,38 @@ async def _ingestOne(
|
|||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
def _recordLimitStop(
|
||||
result: GdriveBootstrapResult,
|
||||
limitName: str,
|
||||
where: str,
|
||||
limits: GdriveBootstrapLimits,
|
||||
*,
|
||||
hard: bool = True,
|
||||
) -> None:
|
||||
"""See subConnectorSyncSharepoint._recordLimitStop for semantics."""
|
||||
if hard or result.stoppedAtLimit is None:
|
||||
result.stoppedAtLimit = limitName
|
||||
budgetMap = {
|
||||
"maxItems": limits.maxItems,
|
||||
"maxBytes": limits.maxBytes,
|
||||
"maxDepth": limits.maxDepth,
|
||||
"maxFileSize": limits.maxFileSize,
|
||||
}
|
||||
logger.warning(
|
||||
"gdrive walker hit %s=%s at %s — partial index (indexed=%d, bytesProcessed=%d).",
|
||||
limitName, budgetMap.get(limitName), where,
|
||||
result.indexed, result.bytesProcessed,
|
||||
)
|
||||
|
||||
|
||||
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||
durationMs = int((time.time() - startMs) * 1000)
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.done part=gdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d bytes=%d durationMs=%d",
|
||||
"ingestion.connection.bootstrap.done part=gdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d bytes=%d durationMs=%d stoppedAtLimit=%s",
|
||||
connectionId,
|
||||
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||
result.failed, result.bytesProcessed, durationMs,
|
||||
result.stoppedAtLimit or "none",
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.done",
|
||||
"part": "gdrive",
|
||||
|
|
@ -487,6 +533,7 @@ def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: f
|
|||
"failed": result.failed,
|
||||
"bytes": result.bytesProcessed,
|
||||
"durationMs": durationMs,
|
||||
"stoppedAtLimit": result.stoppedAtLimit,
|
||||
},
|
||||
)
|
||||
return {
|
||||
|
|
@ -498,4 +545,11 @@ def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: f
|
|||
"bytesProcessed": result.bytesProcessed,
|
||||
"durationMs": durationMs,
|
||||
"errors": result.errors[:20],
|
||||
"stoppedAtLimit": result.stoppedAtLimit,
|
||||
"limits": {
|
||||
"maxItems": MAX_ITEMS_DEFAULT,
|
||||
"maxBytes": MAX_BYTES_DEFAULT,
|
||||
"maxFileSize": MAX_FILE_SIZE_DEFAULT,
|
||||
"maxDepth": MAX_DEPTH_DEFAULT,
|
||||
},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -474,7 +474,11 @@ async def _ingestMessage(
|
|||
processed = result.indexed + result.skippedDuplicate
|
||||
if progressCb is not None and processed % 5 == 0:
|
||||
try:
|
||||
progressCb(0, f"{processed} Mails verarbeitet, {result.indexed} indexiert")
|
||||
progressCb(
|
||||
0,
|
||||
messageKey="{n} Mails verarbeitet, {indexed} indexiert",
|
||||
messageParams={"n": processed, "indexed": result.indexed},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if processed % 50 == 0:
|
||||
|
|
|
|||
|
|
@ -27,11 +27,19 @@ from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_ITEMS_DEFAULT = 500
|
||||
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
|
||||
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
|
||||
from modules.serviceCenter.services.serviceKnowledge import _ragLimits as _ragLimitsHelper
|
||||
|
||||
_FILES_DEFAULTS = _ragLimitsHelper.FILES_LIMITS_DEFAULT
|
||||
MAX_ITEMS_DEFAULT = _FILES_DEFAULTS["maxItems"]
|
||||
MAX_BYTES_DEFAULT = _FILES_DEFAULTS["maxBytes"]
|
||||
MAX_FILE_SIZE_DEFAULT = _FILES_DEFAULTS["maxFileSize"]
|
||||
MAX_DEPTH_DEFAULT = _FILES_DEFAULTS["maxDepth"]
|
||||
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||
MAX_DEPTH_DEFAULT = 4
|
||||
|
||||
|
||||
def _resolveDataSourceLimits(dsId: str, ds: Dict[str, Any]) -> Dict[str, int]:
|
||||
"""Return explicit RAG-limit overrides stored on the DataSource (or {})."""
|
||||
return _ragLimitsHelper.getStoredOverrides(ds, "files")
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -53,6 +61,8 @@ class KdriveBootstrapResult:
|
|||
failed: int = 0
|
||||
bytesProcessed: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
# See SharepointBootstrapResult.stoppedAtLimit — same semantics.
|
||||
stoppedAtLimit: Optional[str] = None
|
||||
|
||||
|
||||
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
||||
|
|
@ -141,12 +151,13 @@ async def bootstrapKdrive(
|
|||
dsPath = ds.get("path", "")
|
||||
dsId = ds.get("id", "")
|
||||
dsNeutralize = ds.get("neutralize", False)
|
||||
eff = _resolveDataSourceLimits(dsId, ds)
|
||||
dsLimits = KdriveBootstrapLimits(
|
||||
maxItems=limits.maxItems,
|
||||
maxBytes=limits.maxBytes,
|
||||
maxFileSize=limits.maxFileSize,
|
||||
maxItems=eff.get("maxItems", limits.maxItems),
|
||||
maxBytes=eff.get("maxBytes", limits.maxBytes),
|
||||
maxFileSize=eff.get("maxFileSize", limits.maxFileSize),
|
||||
skipMimePrefixes=limits.skipMimePrefixes,
|
||||
maxDepth=limits.maxDepth,
|
||||
maxDepth=eff.get("maxDepth", limits.maxDepth),
|
||||
neutralize=dsNeutralize,
|
||||
)
|
||||
|
||||
|
|
@ -232,14 +243,19 @@ async def _walkFolder(
|
|||
|
||||
for entry in entries:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||
_recordLimitStop(result, "maxItems", folderPath, limits)
|
||||
return
|
||||
if result.bytesProcessed >= limits.maxBytes:
|
||||
_recordLimitStop(result, "maxBytes", folderPath, limits)
|
||||
return
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||
return
|
||||
|
||||
entryPath = getattr(entry, "path", "") or ""
|
||||
if getattr(entry, "isFolder", False):
|
||||
if depth + 1 > limits.maxDepth:
|
||||
_recordLimitStop(result, "maxDepth", entryPath, limits, hard=False)
|
||||
continue
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
|
|
@ -262,6 +278,7 @@ async def _walkFolder(
|
|||
continue
|
||||
size = int(getattr(entry, "size", 0) or 0)
|
||||
if size and size > limits.maxFileSize:
|
||||
_recordLimitStop(result, "maxFileSize", entryPath, limits, hard=False)
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
|
||||
|
|
@ -408,24 +425,53 @@ async def _ingestOne(
|
|||
processed = result.indexed + result.skippedDuplicate
|
||||
if progressCb is not None and processed % 5 == 0:
|
||||
try:
|
||||
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
|
||||
progressCb(
|
||||
0,
|
||||
messageKey="{n} Dateien verarbeitet, {indexed} indexiert",
|
||||
messageParams={"n": processed, "indexed": result.indexed},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
def _recordLimitStop(
|
||||
result: KdriveBootstrapResult,
|
||||
limitName: str,
|
||||
where: str,
|
||||
limits: KdriveBootstrapLimits,
|
||||
*,
|
||||
hard: bool = True,
|
||||
) -> None:
|
||||
"""See subConnectorSyncSharepoint._recordLimitStop for semantics."""
|
||||
if hard or result.stoppedAtLimit is None:
|
||||
result.stoppedAtLimit = limitName
|
||||
budgetMap = {
|
||||
"maxItems": limits.maxItems,
|
||||
"maxBytes": limits.maxBytes,
|
||||
"maxDepth": limits.maxDepth,
|
||||
"maxFileSize": limits.maxFileSize,
|
||||
}
|
||||
logger.warning(
|
||||
"kdrive walker hit %s=%s at %s — partial index (indexed=%d, bytesProcessed=%d).",
|
||||
limitName, budgetMap.get(limitName), where,
|
||||
result.indexed, result.bytesProcessed,
|
||||
)
|
||||
|
||||
|
||||
def _finalizeResult(connectionId: str, result: KdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||
durationMs = int((time.time() - startMs) * 1000)
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.done part=kdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d",
|
||||
"ingestion.connection.bootstrap.done part=kdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d stoppedAtLimit=%s",
|
||||
connectionId,
|
||||
result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
|
||||
durationMs,
|
||||
durationMs, result.stoppedAtLimit or "none",
|
||||
extra={"event": "ingestion.connection.bootstrap.done", "part": "kdrive",
|
||||
"connectionId": connectionId, "indexed": result.indexed,
|
||||
"skippedDup": result.skippedDuplicate, "skippedPolicy": result.skippedPolicy,
|
||||
"failed": result.failed, "durationMs": durationMs},
|
||||
"failed": result.failed, "durationMs": durationMs,
|
||||
"stoppedAtLimit": result.stoppedAtLimit},
|
||||
)
|
||||
return {
|
||||
"connectionId": result.connectionId,
|
||||
|
|
@ -436,4 +482,11 @@ def _finalizeResult(connectionId: str, result: KdriveBootstrapResult, startMs: f
|
|||
"bytesProcessed": result.bytesProcessed,
|
||||
"durationMs": durationMs,
|
||||
"errors": result.errors[:20],
|
||||
"stoppedAtLimit": result.stoppedAtLimit,
|
||||
"limits": {
|
||||
"maxItems": MAX_ITEMS_DEFAULT,
|
||||
"maxBytes": MAX_BYTES_DEFAULT,
|
||||
"maxFileSize": MAX_FILE_SIZE_DEFAULT,
|
||||
"maxDepth": MAX_DEPTH_DEFAULT,
|
||||
},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -460,7 +460,11 @@ async def _ingestMessage(
|
|||
processed = result.indexed + result.skippedDuplicate
|
||||
if progressCb is not None and processed % 5 == 0:
|
||||
try:
|
||||
progressCb(0, f"{processed} Mails verarbeitet, {result.indexed} indexiert")
|
||||
progressCb(
|
||||
0,
|
||||
messageKey="{n} Mails verarbeitet, {indexed} indexiert",
|
||||
messageParams={"n": processed, "indexed": result.indexed},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if processed % 50 == 0:
|
||||
|
|
|
|||
|
|
@ -30,14 +30,27 @@ from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_ITEMS_DEFAULT = 500
|
||||
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
|
||||
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
|
||||
from modules.serviceCenter.services.serviceKnowledge import _ragLimits as _ragLimitsHelper
|
||||
|
||||
_FILES_DEFAULTS = _ragLimitsHelper.FILES_LIMITS_DEFAULT
|
||||
MAX_ITEMS_DEFAULT = _FILES_DEFAULTS["maxItems"]
|
||||
MAX_BYTES_DEFAULT = _FILES_DEFAULTS["maxBytes"]
|
||||
MAX_FILE_SIZE_DEFAULT = _FILES_DEFAULTS["maxFileSize"]
|
||||
MAX_DEPTH_DEFAULT = _FILES_DEFAULTS["maxDepth"]
|
||||
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||
MAX_DEPTH_DEFAULT = 4
|
||||
MAX_SITES_DEFAULT = 3
|
||||
|
||||
|
||||
def _resolveDataSourceLimits(dsId: str, ds: Dict[str, Any]) -> Dict[str, int]:
|
||||
"""Return explicit RAG-limit overrides stored on the DataSource.
|
||||
|
||||
Empty dict means "use caller-supplied limits" — never overrides them with
|
||||
defaults. Used to merge per-DataSource user settings on top of the
|
||||
walker's runtime limits.
|
||||
"""
|
||||
return _ragLimitsHelper.getStoredOverrides(ds, "files")
|
||||
|
||||
|
||||
@dataclass
|
||||
class SharepointBootstrapLimits:
|
||||
maxItems: int = MAX_ITEMS_DEFAULT
|
||||
|
|
@ -59,6 +72,10 @@ class SharepointBootstrapResult:
|
|||
failed: int = 0
|
||||
bytesProcessed: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
# First budget that hit zero; None means the walk completed naturally.
|
||||
# Surfaces in the bootstrap result so the RAG inventory UI can warn the
|
||||
# user that the corpus is incomplete and tell them which knob to turn.
|
||||
stoppedAtLimit: Optional[str] = None # "maxItems" | "maxBytes" | "maxDepth" | "maxFileSize" | None
|
||||
|
||||
|
||||
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
||||
|
|
@ -161,12 +178,13 @@ async def bootstrapSharepoint(
|
|||
dsPath = ds.get("path", "")
|
||||
dsId = ds.get("id", "")
|
||||
dsNeutralize = ds.get("neutralize", False)
|
||||
eff = _resolveDataSourceLimits(dsId, ds)
|
||||
dsLimits = SharepointBootstrapLimits(
|
||||
maxItems=limits.maxItems,
|
||||
maxBytes=limits.maxBytes,
|
||||
maxFileSize=limits.maxFileSize,
|
||||
maxItems=eff.get("maxItems", limits.maxItems),
|
||||
maxBytes=eff.get("maxBytes", limits.maxBytes),
|
||||
maxFileSize=eff.get("maxFileSize", limits.maxFileSize),
|
||||
skipMimePrefixes=limits.skipMimePrefixes,
|
||||
maxDepth=limits.maxDepth,
|
||||
maxDepth=eff.get("maxDepth", limits.maxDepth),
|
||||
maxSites=limits.maxSites,
|
||||
neutralize=dsNeutralize,
|
||||
)
|
||||
|
|
@ -259,14 +277,22 @@ async def _walkFolder(
|
|||
|
||||
for entry in entries:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||
_recordLimitStop(result, "maxItems", folderPath, limits)
|
||||
return
|
||||
if result.bytesProcessed >= limits.maxBytes:
|
||||
_recordLimitStop(result, "maxBytes", folderPath, limits)
|
||||
return
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||
return
|
||||
|
||||
entryPath = getattr(entry, "path", "") or ""
|
||||
if getattr(entry, "isFolder", False):
|
||||
if depth + 1 > limits.maxDepth:
|
||||
# We stop descending here but keep walking siblings.
|
||||
# Record once per bootstrap so the UI shows "maxDepth" even
|
||||
# if other budgets aren't exhausted yet.
|
||||
_recordLimitStop(result, "maxDepth", entryPath, limits, hard=False)
|
||||
continue
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
|
|
@ -289,6 +315,7 @@ async def _walkFolder(
|
|||
continue
|
||||
size = int(getattr(entry, "size", 0) or 0)
|
||||
if size and size > limits.maxFileSize:
|
||||
_recordLimitStop(result, "maxFileSize", entryPath, limits, hard=False)
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
|
||||
|
|
@ -428,7 +455,11 @@ async def _ingestOne(
|
|||
processed = result.indexed + result.skippedDuplicate
|
||||
if progressCb is not None and processed % 5 == 0:
|
||||
try:
|
||||
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
|
||||
progressCb(
|
||||
0,
|
||||
messageKey="{n} Dateien verarbeitet, {indexed} indexiert",
|
||||
messageParams={"n": processed, "indexed": result.indexed},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if processed % 50 == 0:
|
||||
|
|
@ -443,13 +474,44 @@ async def _ingestOne(
|
|||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
def _recordLimitStop(
|
||||
result: SharepointBootstrapResult,
|
||||
limitName: str,
|
||||
where: str,
|
||||
limits: SharepointBootstrapLimits,
|
||||
*,
|
||||
hard: bool = True,
|
||||
) -> None:
|
||||
"""Mark the FIRST limit that bit. Soft hits (per-file maxFileSize, per-folder
|
||||
maxDepth) only record when no hard limit has yet stopped the run, so the UI
|
||||
surfaces the most important reason.
|
||||
|
||||
Hard limits (maxItems / maxBytes) ALWAYS overwrite a previously recorded
|
||||
soft limit — once a hard cap is hit, the corpus is provably incomplete.
|
||||
"""
|
||||
if hard or result.stoppedAtLimit is None:
|
||||
result.stoppedAtLimit = limitName
|
||||
budgetMap = {
|
||||
"maxItems": limits.maxItems,
|
||||
"maxBytes": limits.maxBytes,
|
||||
"maxDepth": limits.maxDepth,
|
||||
"maxFileSize": limits.maxFileSize,
|
||||
}
|
||||
logger.warning(
|
||||
"sharepoint walker hit %s=%s at %s — partial index "
|
||||
"(indexed=%d, bytesProcessed=%d). Raise the limit or split the data source.",
|
||||
limitName, budgetMap.get(limitName), where,
|
||||
result.indexed, result.bytesProcessed,
|
||||
)
|
||||
|
||||
|
||||
def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||
durationMs = int((time.time() - startMs) * 1000)
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.done part=sharepoint connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d",
|
||||
"ingestion.connection.bootstrap.done part=sharepoint connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d stoppedAtLimit=%s",
|
||||
connectionId,
|
||||
result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
|
||||
durationMs,
|
||||
durationMs, result.stoppedAtLimit or "none",
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.done",
|
||||
"part": "sharepoint",
|
||||
|
|
@ -459,6 +521,7 @@ def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startM
|
|||
"skippedPolicy": result.skippedPolicy,
|
||||
"failed": result.failed,
|
||||
"durationMs": durationMs,
|
||||
"stoppedAtLimit": result.stoppedAtLimit,
|
||||
},
|
||||
)
|
||||
return {
|
||||
|
|
@ -470,4 +533,11 @@ def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startM
|
|||
"bytesProcessed": result.bytesProcessed,
|
||||
"durationMs": durationMs,
|
||||
"errors": result.errors[:20],
|
||||
"stoppedAtLimit": result.stoppedAtLimit,
|
||||
"limits": {
|
||||
"maxItems": MAX_ITEMS_DEFAULT,
|
||||
"maxBytes": MAX_BYTES_DEFAULT,
|
||||
"maxFileSize": MAX_FILE_SIZE_DEFAULT,
|
||||
"maxDepth": MAX_DEPTH_DEFAULT,
|
||||
},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,289 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Feature-data RAG bootstrap: indexes FeatureDataSource rows into the knowledge store.
|
||||
|
||||
Analogous to connection.bootstrap for external connections (Google, Microsoft),
|
||||
this handler reads FeatureDataSource records with ragIndexEnabled=True, queries
|
||||
the underlying feature tables via FeatureDataProvider, serialises each row into
|
||||
text, and feeds it through KnowledgeService.requestIngestion so the data
|
||||
appears in ContentChunk embeddings for semantic RAG search.
|
||||
|
||||
Job type: ``feature.bootstrap``
|
||||
Payload: ``{"workspaceInstanceId": "...", "featureDataSourceIds": [...] (optional)}``
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
FEATURE_BOOTSTRAP_JOB_TYPE = "feature.bootstrap"
|
||||
|
||||
|
||||
def _loadRagEnabledFds(workspaceInstanceId: str, featureDataSourceIds: Optional[List[str]] = None):
|
||||
"""Load FeatureDataSource rows whose effective ragIndexEnabled is True.
|
||||
|
||||
Returns dicts with resolved flags so downstream code can read them directly.
|
||||
"""
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlagFds
|
||||
|
||||
rootIf = getRootInterface()
|
||||
allFds = rootIf.db.getRecordset(
|
||||
FeatureDataSource, recordFilter={"workspaceInstanceId": workspaceInstanceId}
|
||||
)
|
||||
resolved = []
|
||||
for fds in allFds:
|
||||
tblName = (fds.get("tableName") if isinstance(fds, dict) else getattr(fds, "tableName", "")) or ""
|
||||
fCode = (fds.get("featureCode") if isinstance(fds, dict) else getattr(fds, "featureCode", "")) or ""
|
||||
if tblName == "*" or not tblName or not fCode:
|
||||
continue
|
||||
effRag = getEffectiveFlagFds(fds, "ragIndexEnabled", allFds, mode="aggregate")
|
||||
if effRag is not True:
|
||||
continue
|
||||
row = dict(fds) if isinstance(fds, dict) else {**fds.__dict__}
|
||||
row["_effectiveNeutralize"] = getEffectiveFlagFds(fds, "neutralize", allFds, mode="aggregate")
|
||||
row["_effectiveScope"] = getEffectiveFlagFds(fds, "scope", allFds, mode="aggregate") or "featureInstance"
|
||||
row["ragIndexEnabled"] = True
|
||||
resolved.append(row)
|
||||
|
||||
if featureDataSourceIds:
|
||||
idSet = set(featureDataSourceIds)
|
||||
resolved = [r for r in resolved if r.get("id") in idSet]
|
||||
return resolved
|
||||
|
||||
|
||||
def _serializeRowToText(row: Dict[str, Any], neutralizeFields: Optional[List[str]] = None) -> str:
|
||||
"""Convert a feature-table row into readable text for embedding.
|
||||
|
||||
Skips internal fields (starting with '_' or 'sys') and produces
|
||||
``key: value`` lines that embed well semantically.
|
||||
"""
|
||||
neutralizeSet = set(neutralizeFields or [])
|
||||
lines = []
|
||||
for key, value in row.items():
|
||||
if key.startswith("_") or key.startswith("sys"):
|
||||
continue
|
||||
if key == "id":
|
||||
continue
|
||||
if value is None or value == "" or value == []:
|
||||
continue
|
||||
if key in neutralizeSet:
|
||||
value = "[REDACTED]"
|
||||
elif isinstance(value, (dict, list)):
|
||||
value = json.dumps(value, ensure_ascii=False, default=str)
|
||||
else:
|
||||
value = str(value)
|
||||
lines.append(f"{key}: {value}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _getFeatureDbConnector(featureCode: str):
|
||||
"""Create a lightweight DB connector to the feature database."""
|
||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
dbName = f"poweron_{featureCode.lower()}"
|
||||
return DatabaseConnector(
|
||||
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||
dbDatabase=dbName,
|
||||
dbUser=APP_CONFIG.get("DB_USER"),
|
||||
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||
dbPort=int(APP_CONFIG.get("DB_PORT", 5432)),
|
||||
userId="system.feature_bootstrap",
|
||||
)
|
||||
|
||||
|
||||
async def _featureBootstrapHandler(
|
||||
job: Dict[str, Any],
|
||||
progressCb,
|
||||
) -> Dict[str, Any]:
|
||||
"""Walk RAG-enabled FeatureDataSources and index their rows."""
|
||||
payload = job.get("payload") or {}
|
||||
workspaceInstanceId = payload.get("workspaceInstanceId")
|
||||
featureDataSourceIds = payload.get("featureDataSourceIds")
|
||||
if not workspaceInstanceId:
|
||||
raise ValueError("feature.bootstrap requires payload.workspaceInstanceId")
|
||||
|
||||
progressCb(5, messageKey="Feature-Datenquellen werden geladen...")
|
||||
|
||||
fdsList = _loadRagEnabledFds(workspaceInstanceId, featureDataSourceIds)
|
||||
if not fdsList:
|
||||
logger.info(
|
||||
"feature.bootstrap.skipped — no rag-enabled FDS for workspace %s",
|
||||
workspaceInstanceId,
|
||||
)
|
||||
return {"workspaceInstanceId": workspaceInstanceId, "skipped": True, "reason": "no_rag_enabled_fds"}
|
||||
|
||||
from modules.serviceCenter.services.serviceAgent.featureDataProvider import FeatureDataProvider
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
from modules.serviceCenter import getService
|
||||
from modules.security.rootAccess import getRootUser
|
||||
|
||||
totalIndexed = 0
|
||||
totalSkipped = 0
|
||||
totalFailed = 0
|
||||
fdsResults = []
|
||||
|
||||
for fdsIdx, fds in enumerate(fdsList):
|
||||
fdsId = fds.get("id", "")
|
||||
featureCode = fds.get("featureCode", "")
|
||||
tableName = fds.get("tableName", "")
|
||||
featureInstanceId = fds.get("featureInstanceId", "")
|
||||
mandateId = fds.get("mandateId", "")
|
||||
neutralizeFields = fds.get("neutralizeFields") or []
|
||||
recordFilter = fds.get("recordFilter") or {}
|
||||
effectiveScope = fds.get("_effectiveScope", "featureInstance")
|
||||
effectiveNeutralize = bool(fds.get("_effectiveNeutralize", False))
|
||||
|
||||
progressPct = 5 + int(90 * fdsIdx / len(fdsList))
|
||||
progressCb(
|
||||
progressPct,
|
||||
messageKey="Indexiere {table} ({n}/{total})...",
|
||||
messageParams={"table": tableName, "n": fdsIdx + 1, "total": len(fdsList)},
|
||||
)
|
||||
|
||||
if not featureCode or not tableName or not featureInstanceId:
|
||||
logger.warning("feature.bootstrap: skipping FDS %s — missing featureCode/tableName/fiId", fdsId)
|
||||
continue
|
||||
|
||||
try:
|
||||
dbConnector = _getFeatureDbConnector(featureCode)
|
||||
provider = FeatureDataProvider(dbConnector)
|
||||
|
||||
rootUser = getRootUser()
|
||||
ctx = ServiceCenterContext(
|
||||
user=rootUser,
|
||||
mandate_id=mandateId,
|
||||
feature_instance_id=workspaceInstanceId,
|
||||
)
|
||||
knowledgeService = getService("knowledge", ctx)
|
||||
|
||||
extraFilters = [
|
||||
{"field": k, "op": "=", "value": v}
|
||||
for k, v in recordFilter.items()
|
||||
] if recordFilter else None
|
||||
|
||||
batchSize = 200
|
||||
offset = 0
|
||||
fdsIndexed = 0
|
||||
fdsSkipped = 0
|
||||
fdsFailed = 0
|
||||
|
||||
while True:
|
||||
result = provider.browseTable(
|
||||
tableName=tableName,
|
||||
featureInstanceId=featureInstanceId,
|
||||
mandateId=mandateId,
|
||||
limit=batchSize,
|
||||
offset=offset,
|
||||
extraFilters=extraFilters,
|
||||
)
|
||||
rows = result.get("rows", [])
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for row in rows:
|
||||
rowId = row.get("id", "")
|
||||
if not rowId:
|
||||
continue
|
||||
|
||||
textContent = _serializeRowToText(row, neutralizeFields if effectiveNeutralize else None)
|
||||
if not textContent.strip():
|
||||
fdsSkipped += 1
|
||||
continue
|
||||
|
||||
contentVersion = str(row.get("sysUpdatedAt") or row.get("sysCreatedAt") or "")
|
||||
|
||||
ingestionJob = IngestionJob(
|
||||
sourceKind="feature_record",
|
||||
sourceId=f"{workspaceInstanceId}:{tableName}:{rowId}",
|
||||
fileName=f"{tableName}-{rowId}",
|
||||
mimeType="application/vnd.poweron.feature-record+json",
|
||||
userId=fds.get("userId") or "system",
|
||||
featureInstanceId=workspaceInstanceId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=[{
|
||||
"contentType": "text",
|
||||
"data": textContent,
|
||||
"contextRef": {
|
||||
"table": tableName,
|
||||
"featureCode": featureCode,
|
||||
"featureInstanceId": featureInstanceId,
|
||||
"rowId": rowId,
|
||||
},
|
||||
"contentObjectId": f"{tableName}:{rowId}",
|
||||
}],
|
||||
structure={"sourceTable": tableName, "featureCode": featureCode},
|
||||
contentVersion=contentVersion,
|
||||
provenance={
|
||||
"featureDataSourceId": fdsId,
|
||||
"tableName": tableName,
|
||||
"featureCode": featureCode,
|
||||
"featureInstanceId": featureInstanceId,
|
||||
},
|
||||
neutralize=effectiveNeutralize,
|
||||
)
|
||||
|
||||
try:
|
||||
handle = await knowledgeService.requestIngestion(ingestionJob)
|
||||
if handle.status == "failed":
|
||||
fdsFailed += 1
|
||||
logger.warning(
|
||||
"feature.bootstrap: ingestion failed fds=%s table=%s row=%s error=%s",
|
||||
fdsId, tableName, rowId, handle.error,
|
||||
)
|
||||
elif handle.status == "duplicate":
|
||||
fdsSkipped += 1
|
||||
else:
|
||||
fdsIndexed += 1
|
||||
except Exception as ingErr:
|
||||
fdsFailed += 1
|
||||
logger.error(
|
||||
"feature.bootstrap: ingestion error fds=%s row=%s: %s",
|
||||
fdsId, rowId, ingErr,
|
||||
)
|
||||
|
||||
offset += batchSize
|
||||
if len(rows) < batchSize:
|
||||
break
|
||||
|
||||
totalIndexed += fdsIndexed
|
||||
totalSkipped += fdsSkipped
|
||||
totalFailed += fdsFailed
|
||||
|
||||
fdsResults.append({
|
||||
"featureDataSourceId": fdsId,
|
||||
"tableName": tableName,
|
||||
"featureCode": featureCode,
|
||||
"indexed": fdsIndexed,
|
||||
"skippedDuplicate": fdsSkipped,
|
||||
"failed": fdsFailed,
|
||||
})
|
||||
|
||||
except Exception as fdsErr:
|
||||
logger.error(
|
||||
"feature.bootstrap: error processing FDS %s (%s.%s): %s",
|
||||
fdsId, featureCode, tableName, fdsErr, exc_info=True,
|
||||
)
|
||||
fdsResults.append({
|
||||
"featureDataSourceId": fdsId,
|
||||
"tableName": tableName,
|
||||
"featureCode": featureCode,
|
||||
"error": str(fdsErr),
|
||||
})
|
||||
|
||||
progressCb(100, messageKey="Feature-Daten-Sync abgeschlossen.")
|
||||
|
||||
return {
|
||||
"workspaceInstanceId": workspaceInstanceId,
|
||||
"indexed": totalIndexed,
|
||||
"skippedDuplicate": totalSkipped,
|
||||
"failed": totalFailed,
|
||||
"dataSources": fdsResults,
|
||||
}
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Resolve effective policies (neutralize, ragIndexEnabled) for DataSource tree hierarchies.
|
||||
|
||||
Tree-inheritance rule: nearest ancestor DataSource with an explicit value wins.
|
||||
If no ancestor has a value, the default (False) is used.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def resolveEffectiveNeutralize(
|
||||
ds: Dict[str, Any],
|
||||
allDataSources: List[Dict[str, Any]],
|
||||
) -> bool:
|
||||
"""Compute effective neutralize by walking up the path tree.
|
||||
|
||||
A DataSource at /sites/HR/Documents inherits from /sites/HR if
|
||||
that ancestor has neutralize=True and the child has no explicit override.
|
||||
"""
|
||||
ownValue = ds.get("neutralize")
|
||||
if ownValue is not None and ownValue is not False:
|
||||
return True
|
||||
if ownValue is False:
|
||||
return False
|
||||
return _findAncestorPolicy(ds, allDataSources, "neutralize")
|
||||
|
||||
|
||||
def resolveEffectiveRagIndexEnabled(
|
||||
ds: Dict[str, Any],
|
||||
allDataSources: List[Dict[str, Any]],
|
||||
) -> bool:
|
||||
"""Compute effective ragIndexEnabled by walking up the path tree."""
|
||||
ownValue = ds.get("ragIndexEnabled")
|
||||
if ownValue is True:
|
||||
return True
|
||||
if ownValue is False:
|
||||
return False
|
||||
return _findAncestorPolicy(ds, allDataSources, "ragIndexEnabled")
|
||||
|
||||
|
||||
def _findAncestorPolicy(
|
||||
ds: Dict[str, Any],
|
||||
allDataSources: List[Dict[str, Any]],
|
||||
field: str,
|
||||
) -> bool:
|
||||
"""Walk ancestors (longest-prefix match) to find an inherited policy value."""
|
||||
dsPath = ds.get("path", "")
|
||||
connectionId = ds.get("connectionId", "")
|
||||
if not dsPath:
|
||||
return False
|
||||
|
||||
ancestors = []
|
||||
for candidate in allDataSources:
|
||||
if candidate.get("id") == ds.get("id"):
|
||||
continue
|
||||
if candidate.get("connectionId") != connectionId:
|
||||
continue
|
||||
candidatePath = candidate.get("path", "")
|
||||
if not candidatePath:
|
||||
continue
|
||||
if dsPath.startswith(candidatePath) and len(candidatePath) < len(dsPath):
|
||||
ancestors.append(candidate)
|
||||
|
||||
ancestors.sort(key=lambda a: len(a.get("path", "")), reverse=True)
|
||||
|
||||
for ancestor in ancestors:
|
||||
val = ancestor.get(field)
|
||||
if val is True:
|
||||
return True
|
||||
if val is False:
|
||||
return False
|
||||
return False
|
||||
|
|
@ -15,8 +15,9 @@ up with "Job stuck at 10% for 10h" zombies.
|
|||
|
||||
These helpers wrap each phase in `asyncio.wait_for`. Sync extraction runs
|
||||
on a worker thread so the loop stays responsive. Every wrapped call also
|
||||
emits a short start/done log line, so when something hangs we know the
|
||||
exact item that caused it (path, size, mime).
|
||||
emits start/done log lines at DEBUG so normal INFO logs stay quiet; for
|
||||
stuck-job triage, enable DEBUG for this module — the last
|
||||
``walker.item.start`` before a hang still pinpoints the item (path, size, mime).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -48,7 +49,7 @@ async def downloadWithTimeout(
|
|||
used in log messages so we can pinpoint the offending item in case of a
|
||||
hang or timeout.
|
||||
"""
|
||||
logger.info("walker.download.start %s timeout=%ds", label, timeoutSeconds)
|
||||
logger.debug("walker.download.start %s timeout=%ds", label, timeoutSeconds)
|
||||
try:
|
||||
result = await asyncio.wait_for(awaitable, timeout=timeoutSeconds)
|
||||
logger.debug("walker.download.done %s", label)
|
||||
|
|
@ -71,7 +72,7 @@ async def extractWithTimeout(
|
|||
keep running until the process exits — but at least the walker proceeds
|
||||
to the next item instead of freezing forever.
|
||||
"""
|
||||
logger.info("walker.extract.start %s timeout=%ds", label, timeoutSeconds)
|
||||
logger.debug("walker.extract.start %s timeout=%ds", label, timeoutSeconds)
|
||||
try:
|
||||
result = await asyncio.wait_for(
|
||||
asyncio.to_thread(syncFn, *args),
|
||||
|
|
@ -102,15 +103,15 @@ async def ingestWithTimeout(
|
|||
|
||||
|
||||
def logItemStart(service: str, label: str, *, sizeBytes: Optional[int] = None, mime: Optional[str] = None) -> None:
|
||||
"""Log that processing of one item is about to begin.
|
||||
"""Log that processing of one item is about to begin (DEBUG).
|
||||
|
||||
When the worker hangs, the LAST `walker.item.start` line in the log
|
||||
points to the exact item that caused the freeze. This is the single
|
||||
most valuable diagnostic for stuck-job triage.
|
||||
points to the exact item that caused the freeze. Enable DEBUG for this
|
||||
module during triage.
|
||||
"""
|
||||
parts = [f"walker.item.start service={service} path={label}"]
|
||||
if sizeBytes is not None:
|
||||
parts.append(f"size={sizeBytes}")
|
||||
if mime:
|
||||
parts.append(f"mime={mime}")
|
||||
logger.info(" ".join(parts))
|
||||
logger.debug(" ".join(parts))
|
||||
|
|
|
|||
|
|
@ -327,27 +327,20 @@ class SharepointService:
|
|||
return None
|
||||
|
||||
async def uploadFile(self, siteId: str, folderPath: str, fileName: str, content: bytes) -> Dict[str, Any]:
|
||||
"""Upload a file to SharePoint."""
|
||||
try:
|
||||
# Clean the path
|
||||
cleanPath = folderPath.lstrip('/')
|
||||
uploadPath = f"{cleanPath.rstrip('/')}/{fileName}"
|
||||
endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
|
||||
"""Upload a file to SharePoint. Raises on failure."""
|
||||
cleanPath = folderPath.lstrip('/')
|
||||
uploadPath = f"{cleanPath.rstrip('/')}/{fileName}"
|
||||
endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
|
||||
|
||||
logger.info(f"Uploading file to: {endpoint}")
|
||||
logger.info(f"Uploading file to: {endpoint}")
|
||||
|
||||
result = await self._makeGraphApiCall(endpoint, method="PUT", data=content)
|
||||
result = await self._makeGraphApiCall(endpoint, method="PUT", data=content)
|
||||
|
||||
if "error" in result:
|
||||
logger.error(f"Upload failed: {result['error']}")
|
||||
return result
|
||||
if "error" in result:
|
||||
raise Exception(f"Upload failed: {result['error']}")
|
||||
|
||||
logger.info(f"File uploaded successfully: {fileName}")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading file: {str(e)}")
|
||||
return {"error": f"Error uploading file: {str(e)}"}
|
||||
logger.info(f"File uploaded successfully: {fileName}")
|
||||
return result
|
||||
|
||||
async def downloadFile(self, siteId: str, fileId: str) -> Optional[bytes]:
|
||||
"""Download a file from SharePoint."""
|
||||
|
|
|
|||
|
|
@ -12,7 +12,8 @@ import logging
|
|||
import json
|
||||
import base64
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
import threading
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
from pathlib import Path
|
||||
from cryptography.fernet import Fernet
|
||||
from cryptography.hazmat.primitives import hashes
|
||||
|
|
@ -286,6 +287,16 @@ def handleSecretJson(value: str, userId: str = "system", keyName: str = "unknown
|
|||
# Structure: {user_id: {key_name: [timestamps]}}
|
||||
_decryption_attempts = {}
|
||||
|
||||
# Process-wide plaintext cache for decrypted secrets.
|
||||
# Key: the encrypted ciphertext (which already includes env prefix).
|
||||
# Value: (expiresAtMonotonic, plaintext).
|
||||
# TTL is short enough that key rotation propagates quickly, long enough that
|
||||
# hot DB-init paths (every API call building a connector) don't blow the
|
||||
# decryption rate limit. 60s is a deliberate compromise.
|
||||
_DECRYPTION_CACHE_TTL_S = 60.0
|
||||
_decryption_cache: Dict[str, Tuple[float, str]] = {}
|
||||
_decryption_cache_lock = threading.Lock()
|
||||
|
||||
def _getMasterKey(envType: str = None) -> bytes:
|
||||
"""
|
||||
Get the master key for the specified environment.
|
||||
|
|
@ -486,25 +497,43 @@ def encryptValue(value: str, envType: str = None, userId: str = "system", keyNam
|
|||
def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "unknown") -> str:
|
||||
"""
|
||||
Decrypt a value using the master key for the current environment.
|
||||
|
||||
|
||||
A short-lived plaintext cache (TTL `_DECRYPTION_CACHE_TTL_S`) is consulted
|
||||
first. The 10/sec rate-limit on cache misses still protects against
|
||||
brute-force attacks; cache HITS bypass it because they are not actual
|
||||
cryptographic operations — they just return the result of an earlier
|
||||
successful decrypt. Without this cache, hot paths like
|
||||
`mainBackgroundJobService._getDb()` (called per RAG inventory poll AND
|
||||
per walker DB call) trigger the rate limit and surface as
|
||||
"Decryption rate limit exceeded for user 'system' key 'DB_PASSWORD_SECRET'"
|
||||
ERRORs in the RAG inventory UI route.
|
||||
|
||||
Args:
|
||||
encryptedValue: The encrypted value with prefix
|
||||
userId: The user ID making the request (default: "system")
|
||||
keyName: The name of the key being decrypted (default: "unknown")
|
||||
|
||||
|
||||
Returns:
|
||||
str: The decrypted plain text value
|
||||
|
||||
|
||||
Raises:
|
||||
ValueError: If decryption fails
|
||||
"""
|
||||
if not _isEncryptedValue(encryptedValue):
|
||||
return encryptedValue # Return as-is if not encrypted
|
||||
|
||||
# Check rate limiting (10 per second per user per key)
|
||||
|
||||
# Cache lookup BEFORE the rate-limit check: a cache hit is not a new
|
||||
# cryptographic operation and must not be throttled.
|
||||
now = time.monotonic()
|
||||
with _decryption_cache_lock:
|
||||
cached = _decryption_cache.get(encryptedValue)
|
||||
if cached is not None and cached[0] > now:
|
||||
return cached[1]
|
||||
|
||||
# Cache miss → real decrypt → apply rate limit.
|
||||
if not _checkDecryptionRateLimit(userId, keyName, maxPerSecond=10):
|
||||
raise ValueError(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' (10/sec)")
|
||||
|
||||
|
||||
try:
|
||||
# Extract environment type from prefix
|
||||
if encryptedValue.startswith('DEV_ENC:'):
|
||||
|
|
@ -536,7 +565,7 @@ def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "un
|
|||
encryptedBytes = base64.urlsafe_b64decode(encryptedPart.encode('utf-8'))
|
||||
decryptedBytes = fernet.decrypt(encryptedBytes)
|
||||
decryptedValue = decryptedBytes.decode('utf-8')
|
||||
|
||||
|
||||
# Log audit event for decryption
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
|
|
@ -549,11 +578,25 @@ def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "un
|
|||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
pass
|
||||
|
||||
|
||||
# Populate cache so subsequent reads of the same ciphertext don't
|
||||
# re-decrypt (and don't consume rate-limit budget).
|
||||
with _decryption_cache_lock:
|
||||
_decryption_cache[encryptedValue] = (
|
||||
time.monotonic() + _DECRYPTION_CACHE_TTL_S,
|
||||
decryptedValue,
|
||||
)
|
||||
|
||||
return decryptedValue
|
||||
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Decryption failed: {e}")
|
||||
|
||||
|
||||
def clearDecryptionCache() -> None:
|
||||
"""Drop all cached plaintext secrets. Call after key rotation or in tests."""
|
||||
with _decryption_cache_lock:
|
||||
_decryption_cache.clear()
|
||||
|
||||
# Create the global APP_CONFIG instance
|
||||
APP_CONFIG = Configuration()
|
||||
|
|
@ -33,20 +33,35 @@ def _ensureUamTablesMatchModels(dbConnector) -> None:
|
|||
logger.debug(f"_ensureUamTablesMatchModels: {e}")
|
||||
|
||||
|
||||
def _getConnection(dbConnector):
|
||||
"""Get a connection from the DatabaseConnector.
|
||||
|
||||
Ensures the connection is alive and returns it.
|
||||
Commits any pending transaction first to avoid blocking.
|
||||
from contextlib import contextmanager
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _borrowDbConn(dbConnector):
|
||||
"""Borrow a pooled connection from the DatabaseConnector.
|
||||
|
||||
Index/trigger/FK creation traditionally ran with `conn.autocommit = True`
|
||||
so each CREATE statement is its own transaction (DDL on a managed
|
||||
connection blocks waiting for COMMIT). This helper preserves that
|
||||
behaviour on top of the pool: borrow a connection, flip it to autocommit,
|
||||
yield it, and restore the previous state before returning it to the pool.
|
||||
"""
|
||||
dbConnector._ensure_connection()
|
||||
conn = dbConnector.connection
|
||||
# Commit any pending transaction to avoid blocking
|
||||
try:
|
||||
conn.commit()
|
||||
except Exception:
|
||||
pass # Ignore if nothing to commit
|
||||
return conn
|
||||
with dbConnector.borrowConn() as conn:
|
||||
try:
|
||||
previousAutocommit = conn.autocommit
|
||||
except Exception:
|
||||
previousAutocommit = False
|
||||
try:
|
||||
conn.autocommit = True
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not set autocommit on borrowed connection: {e}")
|
||||
try:
|
||||
yield conn
|
||||
finally:
|
||||
try:
|
||||
conn.autocommit = previousAutocommit
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
|
@ -174,73 +189,42 @@ def applyMultiTenantOptimizations(dbConnector, tables: Optional[List[str]] = Non
|
|||
}
|
||||
|
||||
try:
|
||||
# Get a connection from the connector
|
||||
conn = _getConnection(dbConnector)
|
||||
|
||||
# Save and set autocommit state
|
||||
try:
|
||||
originalAutocommit = conn.autocommit
|
||||
except Exception:
|
||||
originalAutocommit = False
|
||||
|
||||
try:
|
||||
conn.autocommit = True
|
||||
except Exception as autoErr:
|
||||
logger.debug(f"Could not set autocommit: {autoErr}")
|
||||
|
||||
try:
|
||||
_ensureUamTablesMatchModels(dbConnector)
|
||||
except Exception as preIdxErr:
|
||||
logger.debug(f"Pre-index table ensure: {preIdxErr}")
|
||||
|
||||
try:
|
||||
|
||||
with _borrowDbConn(dbConnector) as conn:
|
||||
with conn.cursor() as cursor:
|
||||
# Apply indexes
|
||||
results["indexesCreated"] = _applyIndexes(cursor, tables)
|
||||
|
||||
# Apply foreign keys
|
||||
results["foreignKeysCreated"] = _applyForeignKeys(cursor, tables)
|
||||
|
||||
# Apply immutable triggers
|
||||
results["triggersCreated"] = _applyImmutableTriggers(cursor, tables)
|
||||
|
||||
logger.info(
|
||||
f"Multi-tenant optimizations applied: "
|
||||
f"{results['indexesCreated']} indexes, "
|
||||
f"{results['triggersCreated']} triggers, "
|
||||
f"{results['foreignKeysCreated']} foreign keys"
|
||||
)
|
||||
finally:
|
||||
# Restore original autocommit state
|
||||
try:
|
||||
conn.autocommit = originalAutocommit
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
logger.info(
|
||||
f"Multi-tenant optimizations applied: "
|
||||
f"{results['indexesCreated']} indexes, "
|
||||
f"{results['triggersCreated']} triggers, "
|
||||
f"{results['foreignKeysCreated']} foreign keys"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error applying multi-tenant optimizations: {type(e).__name__}: {e}")
|
||||
results["errors"].append(str(e))
|
||||
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def applyIndexesOnly(dbConnector, tables: Optional[List[str]] = None) -> int:
|
||||
"""Apply only indexes (lighter operation, safe for frequent calls)."""
|
||||
try:
|
||||
conn = _getConnection(dbConnector)
|
||||
originalAutocommit = conn.autocommit
|
||||
conn.autocommit = True
|
||||
|
||||
try:
|
||||
_ensureUamTablesMatchModels(dbConnector)
|
||||
except Exception as preIdxErr:
|
||||
logger.debug(f"Pre-index table ensure: {preIdxErr}")
|
||||
|
||||
try:
|
||||
|
||||
with _borrowDbConn(dbConnector) as conn:
|
||||
with conn.cursor() as cursor:
|
||||
return _applyIndexes(cursor, tables)
|
||||
finally:
|
||||
conn.autocommit = originalAutocommit
|
||||
except Exception as e:
|
||||
logger.error(f"Error applying indexes: {e}")
|
||||
return 0
|
||||
|
|
@ -514,8 +498,7 @@ def getOptimizationStatus(dbConnector) -> dict:
|
|||
}
|
||||
|
||||
try:
|
||||
conn = _getConnection(dbConnector)
|
||||
with conn.cursor() as cursor:
|
||||
with _borrowDbConn(dbConnector) as conn, conn.cursor() as cursor:
|
||||
# Check regular indexes
|
||||
for tableName, indexName, _ in _INDEXES:
|
||||
if _tableExists(cursor, tableName):
|
||||
|
|
|
|||
|
|
@ -60,11 +60,9 @@ def _getTableColumns(dbConnector, tableName: str) -> List[str]:
|
|||
ORDER BY ordinal_position
|
||||
"""
|
||||
|
||||
cursor = dbConnector.connection.cursor()
|
||||
cursor.execute(query, (tableName,))
|
||||
columns = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
|
||||
with dbConnector.borrowCursor() as cursor:
|
||||
cursor.execute(query, (tableName,))
|
||||
columns = [row[0] for row in cursor.fetchall()]
|
||||
return columns
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting columns for table {tableName}: {e}")
|
||||
|
|
@ -92,29 +90,26 @@ def _getAllTables(dbConnector) -> List[str]:
|
|||
ORDER BY table_name
|
||||
"""
|
||||
|
||||
cursor = dbConnector.connection.cursor()
|
||||
cursor.execute(query)
|
||||
allTables = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Get foreign key relationships to determine dependency order
|
||||
fkQuery = """
|
||||
SELECT
|
||||
tc.table_name,
|
||||
ccu.table_name AS foreign_table_name
|
||||
FROM information_schema.table_constraints AS tc
|
||||
JOIN information_schema.key_column_usage AS kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
JOIN information_schema.constraint_column_usage AS ccu
|
||||
ON ccu.constraint_name = tc.constraint_name
|
||||
AND ccu.table_schema = tc.table_schema
|
||||
WHERE tc.constraint_type = 'FOREIGN KEY'
|
||||
AND tc.table_schema = 'public'
|
||||
"""
|
||||
|
||||
cursor.execute(fkQuery)
|
||||
foreignKeys = cursor.fetchall()
|
||||
cursor.close()
|
||||
with dbConnector.borrowCursor() as cursor:
|
||||
cursor.execute(query)
|
||||
allTables = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
fkQuery = """
|
||||
SELECT
|
||||
tc.table_name,
|
||||
ccu.table_name AS foreign_table_name
|
||||
FROM information_schema.table_constraints AS tc
|
||||
JOIN information_schema.key_column_usage AS kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
JOIN information_schema.constraint_column_usage AS ccu
|
||||
ON ccu.constraint_name = tc.constraint_name
|
||||
AND ccu.table_schema = tc.table_schema
|
||||
WHERE tc.constraint_type = 'FOREIGN KEY'
|
||||
AND tc.table_schema = 'public'
|
||||
"""
|
||||
cursor.execute(fkQuery)
|
||||
foreignKeys = cursor.fetchall()
|
||||
|
||||
# Build dependency graph (child -> parent mapping)
|
||||
dependencies = {}
|
||||
|
|
@ -154,10 +149,9 @@ def _getAllTables(dbConnector) -> List[str]:
|
|||
# Fallback: return simple list without ordering
|
||||
try:
|
||||
query = "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE'"
|
||||
cursor = dbConnector.connection.cursor()
|
||||
cursor.execute(query)
|
||||
tables = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
with dbConnector.borrowCursor() as cursor:
|
||||
cursor.execute(query)
|
||||
tables = [row[0] for row in cursor.fetchall()]
|
||||
return [t for t in tables if t not in PROTECTED_TABLES]
|
||||
except Exception:
|
||||
return []
|
||||
|
|
@ -184,11 +178,9 @@ def _getPrimaryKeyColumns(dbConnector, tableName: str) -> List[str]:
|
|||
AND i.indisprimary
|
||||
"""
|
||||
|
||||
cursor = dbConnector.connection.cursor()
|
||||
cursor.execute(query, (tableName,))
|
||||
pkColumns = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
|
||||
with dbConnector.borrowCursor() as cursor:
|
||||
cursor.execute(query, (tableName,))
|
||||
pkColumns = [row[0] for row in cursor.fetchall()]
|
||||
return pkColumns
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get primary key for {tableName}: {e}")
|
||||
|
|
@ -229,21 +221,15 @@ def _findUserReferencesInTable(
|
|||
return {}
|
||||
|
||||
references = {}
|
||||
cursor = dbConnector.connection.cursor()
|
||||
|
||||
for userColumn in userColumns:
|
||||
# Build SELECT for primary key columns
|
||||
pkSelect = ", ".join([f'"{pk}"' for pk in pkColumns])
|
||||
query = f'SELECT {pkSelect} FROM "{tableName}" WHERE "{userColumn}" = %s'
|
||||
|
||||
cursor.execute(query, (userId,))
|
||||
recordKeys = cursor.fetchall()
|
||||
|
||||
if recordKeys:
|
||||
references[userColumn] = recordKeys
|
||||
logger.debug(f"Found {len(recordKeys)} records in {tableName}.{userColumn} for user {userId}")
|
||||
|
||||
cursor.close()
|
||||
with dbConnector.borrowCursor() as cursor:
|
||||
for userColumn in userColumns:
|
||||
pkSelect = ", ".join([f'"{pk}"' for pk in pkColumns])
|
||||
query = f'SELECT {pkSelect} FROM "{tableName}" WHERE "{userColumn}" = %s'
|
||||
cursor.execute(query, (userId,))
|
||||
recordKeys = cursor.fetchall()
|
||||
if recordKeys:
|
||||
references[userColumn] = recordKeys
|
||||
logger.debug(f"Found {len(recordKeys)} records in {tableName}.{userColumn} for user {userId}")
|
||||
return references
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -277,42 +263,35 @@ def _anonymizeRecords(
|
|||
return 0
|
||||
|
||||
try:
|
||||
cursor = dbConnector.connection.cursor()
|
||||
# Resolve column metadata once outside the borrow block (it borrows its
|
||||
# own connection internally).
|
||||
columns = _getTableColumns(dbConnector, tableName)
|
||||
hasModifiedAt = "sysModifiedAt" in columns
|
||||
|
||||
count = 0
|
||||
|
||||
for recordKey in recordKeys:
|
||||
# Build WHERE clause for primary key
|
||||
whereClause = " AND ".join([f'"{pk}" = %s' for pk in pkColumns])
|
||||
|
||||
# Check if table has sysModifiedAt column
|
||||
columns = _getTableColumns(dbConnector, tableName)
|
||||
hasModifiedAt = "sysModifiedAt" in columns
|
||||
|
||||
if hasModifiedAt:
|
||||
query = f'UPDATE "{tableName}" SET "{columnName}" = %s, "sysModifiedAt" = %s WHERE {whereClause}'
|
||||
params = [anonymousValue, getUtcTimestamp()]
|
||||
else:
|
||||
query = f'UPDATE "{tableName}" SET "{columnName}" = %s WHERE {whereClause}'
|
||||
params = [anonymousValue]
|
||||
|
||||
# Add primary key values to params
|
||||
if isinstance(recordKey, tuple):
|
||||
params.extend(recordKey)
|
||||
else:
|
||||
params.append(recordKey)
|
||||
|
||||
cursor.execute(query, params)
|
||||
count += cursor.rowcount
|
||||
|
||||
dbConnector.connection.commit()
|
||||
cursor.close()
|
||||
|
||||
with dbConnector.borrowCursor() as cursor:
|
||||
for recordKey in recordKeys:
|
||||
whereClause = " AND ".join([f'"{pk}" = %s' for pk in pkColumns])
|
||||
if hasModifiedAt:
|
||||
query = f'UPDATE "{tableName}" SET "{columnName}" = %s, "sysModifiedAt" = %s WHERE {whereClause}'
|
||||
params = [anonymousValue, getUtcTimestamp()]
|
||||
else:
|
||||
query = f'UPDATE "{tableName}" SET "{columnName}" = %s WHERE {whereClause}'
|
||||
params = [anonymousValue]
|
||||
|
||||
if isinstance(recordKey, tuple):
|
||||
params.extend(recordKey)
|
||||
else:
|
||||
params.append(recordKey)
|
||||
|
||||
cursor.execute(query, params)
|
||||
count += cursor.rowcount
|
||||
|
||||
logger.info(f"Anonymized {count} records in {tableName}.{columnName}")
|
||||
return count
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error anonymizing records in {tableName}.{columnName}: {e}")
|
||||
dbConnector.connection.rollback()
|
||||
return 0
|
||||
|
||||
|
||||
|
|
@ -338,32 +317,23 @@ def _deleteRecords(
|
|||
return 0
|
||||
|
||||
try:
|
||||
cursor = dbConnector.connection.cursor()
|
||||
count = 0
|
||||
|
||||
for recordKey in recordKeys:
|
||||
# Build WHERE clause for primary key
|
||||
whereClause = " AND ".join([f'"{pk}" = %s' for pk in pkColumns])
|
||||
query = f'DELETE FROM "{tableName}" WHERE {whereClause}'
|
||||
|
||||
# Prepare params
|
||||
if isinstance(recordKey, tuple):
|
||||
params = list(recordKey)
|
||||
else:
|
||||
params = [recordKey]
|
||||
|
||||
cursor.execute(query, params)
|
||||
count += cursor.rowcount
|
||||
|
||||
dbConnector.connection.commit()
|
||||
cursor.close()
|
||||
|
||||
with dbConnector.borrowCursor() as cursor:
|
||||
for recordKey in recordKeys:
|
||||
whereClause = " AND ".join([f'"{pk}" = %s' for pk in pkColumns])
|
||||
query = f'DELETE FROM "{tableName}" WHERE {whereClause}'
|
||||
if isinstance(recordKey, tuple):
|
||||
params = list(recordKey)
|
||||
else:
|
||||
params = [recordKey]
|
||||
cursor.execute(query, params)
|
||||
count += cursor.rowcount
|
||||
|
||||
logger.info(f"Deleted {count} records from {tableName}")
|
||||
return count
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting records from {tableName}: {e}")
|
||||
dbConnector.connection.rollback()
|
||||
return 0
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -124,6 +124,48 @@ def t(key: str, context: str = "api", value: str = "") -> str:
|
|||
return _CACHE.get(lang, {}).get(key, f"[{key}]")
|
||||
|
||||
|
||||
def resolveJobMessage(messageData: Optional[Dict[str, Any]], lang: Optional[str] = None) -> Optional[str]:
|
||||
"""Translate a structured BackgroundJob progress payload.
|
||||
|
||||
``messageData`` shape (written by ``JobProgressCallback`` when callers
|
||||
pass ``messageKey`` / ``messageParams``)::
|
||||
|
||||
{"key": "{n} Dateien verarbeitet, {indexed} indexiert",
|
||||
"params": {"n": 145, "indexed": 106}}
|
||||
|
||||
The walker call sites use a string-literal ``messageKey=``; the matching
|
||||
``t("…")`` literal lives in the feature's progress-key registration
|
||||
module (e.g. ``serviceKnowledge/_progressMessages.py``,
|
||||
``features/trustee/mainTrustee.py``) so the boot sync picks it up.
|
||||
|
||||
This helper is the **server-side** translation hop so route handlers can
|
||||
deliver a fully rendered ``progressMessage`` string to the frontend --
|
||||
the frontend never calls ``t()`` on backend-supplied keys.
|
||||
"""
|
||||
if not messageData or not isinstance(messageData, dict):
|
||||
return None
|
||||
key = messageData.get("key")
|
||||
if not isinstance(key, str) or not key:
|
||||
return None
|
||||
params = messageData.get("params") or {}
|
||||
|
||||
if lang is not None:
|
||||
token = _CURRENT_LANGUAGE.set(lang)
|
||||
try:
|
||||
template = t(key)
|
||||
finally:
|
||||
_CURRENT_LANGUAGE.reset(token)
|
||||
else:
|
||||
template = t(key)
|
||||
|
||||
if isinstance(params, dict) and params:
|
||||
try:
|
||||
return template.format(**params)
|
||||
except (KeyError, IndexError, ValueError):
|
||||
return template
|
||||
return template
|
||||
|
||||
|
||||
def resolveText(value: Any, lang: Optional[str] = None) -> str:
|
||||
"""Resolve any value to a translated string for the current request language.
|
||||
|
||||
|
|
|
|||
|
|
@ -247,16 +247,29 @@ def _resolveDocumentList(documentListParam, services) -> List[tuple]:
|
|||
if isinstance(first, dict) and ("documentData" in first or "documentName" in first):
|
||||
for doc in documentListParam:
|
||||
rawData = doc.get("documentData")
|
||||
logger.debug("_resolveDocumentList: doc keys=%s documentData type=%s documentData truthy=%s", list(doc.keys()), type(rawData).__name__, bool(rawData))
|
||||
fileId = (doc.get("validationMetadata") or {}).get("fileId") or doc.get("fileId", "")
|
||||
fileName = doc.get("documentName") or doc.get("fileName") or "document"
|
||||
mimeType = doc.get("mimeType") or doc.get("documentMimeType") or "application/json"
|
||||
|
||||
# When documentData was persisted as binary (_hasBinaryData), read it
|
||||
# back from file storage via the chat service.
|
||||
if not rawData and doc.get("_hasBinaryData") and fileId:
|
||||
chatService = getattr(services, "chat", None)
|
||||
if chatService:
|
||||
try:
|
||||
rawBytes = chatService.getFileData(fileId)
|
||||
if rawBytes:
|
||||
rawData = rawBytes.decode("utf-8") if isinstance(rawBytes, bytes) else rawBytes
|
||||
except Exception as e:
|
||||
logger.debug("_resolveDocumentList: failed to read binary for fileId=%s: %s", fileId, e)
|
||||
|
||||
logger.debug("_resolveDocumentList: doc keys=%s documentData type=%s documentData truthy=%s", list(doc.keys()), type(rawData).__name__ if rawData else "NoneType", bool(rawData))
|
||||
if not rawData:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(rawData) if isinstance(rawData, str) else rawData
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
fileId = (doc.get("validationMetadata") or {}).get("fileId") or doc.get("fileId", "")
|
||||
fileName = doc.get("documentName") or doc.get("fileName") or "document"
|
||||
mimeType = doc.get("mimeType") or doc.get("documentMimeType") or "application/json"
|
||||
results.append((data, fileId, fileName, mimeType))
|
||||
if results:
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -38,6 +38,52 @@ def _tsToIso(ts) -> Optional[str]:
|
|||
_SYNC_THRESHOLD_SECONDS = 3600
|
||||
|
||||
|
||||
def _buildAccountSummary(accountMap: Dict[str, dict], balances: list, year: int) -> list:
|
||||
"""Aggregate balance records into one row per account for *year*.
|
||||
|
||||
For each account the annual balance record (``periodMonth == 0``) of
|
||||
*year* is preferred. If that row is missing, we also check the
|
||||
previous year's annual record so that YTD carry-forwards are visible.
|
||||
Additionally, quarterly closing balances (Q1-Q4) are derived from the
|
||||
monthly records so the AI can compare against quarterly budgets.
|
||||
"""
|
||||
bestClosing: Dict[str, float] = {}
|
||||
quarterClosing: Dict[str, Dict[str, float]] = {}
|
||||
|
||||
for b in balances:
|
||||
acct = b.get("accountNumber", "")
|
||||
bYear = b.get("periodYear", 0)
|
||||
bMonth = b.get("periodMonth", 0)
|
||||
closing = b.get("closingBalance", 0) or 0
|
||||
|
||||
if bYear == year and bMonth == 0:
|
||||
bestClosing[acct] = closing
|
||||
|
||||
if bYear == year and bMonth in (3, 6, 9, 12):
|
||||
qLabel = f"Q{bMonth // 3}"
|
||||
quarterClosing.setdefault(acct, {})[qLabel] = closing
|
||||
|
||||
if acct not in bestClosing and bYear == year - 1 and bMonth == 0:
|
||||
bestClosing[acct] = closing
|
||||
|
||||
summary = []
|
||||
for nr in sorted(accountMap.keys()):
|
||||
info = accountMap[nr]
|
||||
row = {
|
||||
"account": nr,
|
||||
"label": info.get("label", ""),
|
||||
"type": info.get("type", ""),
|
||||
"group": info.get("group", ""),
|
||||
"closingBalance": round(bestClosing.get(nr, 0), 2),
|
||||
}
|
||||
qData = quarterClosing.get(nr, {})
|
||||
for q in ("Q1", "Q2", "Q3", "Q4"):
|
||||
if q in qData:
|
||||
row[q] = round(qData[q], 2)
|
||||
summary.append(row)
|
||||
return summary
|
||||
|
||||
|
||||
async def refreshAccountingData(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Import/refresh accounting data from the configured external system.
|
||||
|
||||
|
|
@ -133,7 +179,13 @@ async def refreshAccountingData(self, parameters: Dict[str, Any]) -> ActionResul
|
|||
|
||||
|
||||
def _exportAccountingData(trusteeInterface, featureInstanceId: str, dateFrom: str = None, dateTo: str = None) -> str:
|
||||
"""Export accounting data (accounts, balances, journal entries+lines) as compact JSON for downstream AI nodes."""
|
||||
"""Export accounting data as compact JSON for downstream AI nodes.
|
||||
|
||||
Produces a pre-aggregated ``accountSummary`` (one row per account with
|
||||
a single *Ist* value) so the AI does not have to navigate thousands of
|
||||
raw balance records. Raw per-month balances are deliberately omitted to
|
||||
avoid confusion and reduce payload size.
|
||||
"""
|
||||
from modules.features.trustee.datamodelFeatureTrustee import (
|
||||
TrusteeDataAccount,
|
||||
TrusteeDataJournalEntry,
|
||||
|
|
@ -155,17 +207,9 @@ def _exportAccountingData(trusteeInterface, featureInstanceId: str, dateFrom: st
|
|||
}
|
||||
|
||||
balances = trusteeInterface.db.getRecordset(TrusteeDataAccountBalance, recordFilter=baseFilter) or []
|
||||
balanceList = []
|
||||
for b in balances:
|
||||
balanceList.append({
|
||||
"account": b.get("accountNumber", ""),
|
||||
"year": b.get("periodYear", 0),
|
||||
"month": b.get("periodMonth", 0),
|
||||
"opening": b.get("openingBalance", 0),
|
||||
"debit": b.get("debitTotal", 0),
|
||||
"credit": b.get("creditTotal", 0),
|
||||
"closing": b.get("closingBalance", 0),
|
||||
})
|
||||
|
||||
currentYear = _dt.now(tz=_tz.utc).year
|
||||
accountSummary = _buildAccountSummary(accountMap, balances, currentYear)
|
||||
|
||||
entries = trusteeInterface.db.getRecordset(TrusteeDataJournalEntry, recordFilter=baseFilter) or []
|
||||
fromTs = _isoToTs(dateFrom)
|
||||
|
|
@ -205,21 +249,26 @@ def _exportAccountingData(trusteeInterface, featureInstanceId: str, dateFrom: st
|
|||
})
|
||||
|
||||
export = {
|
||||
"accounts": list(accountMap.values()),
|
||||
"balances": balanceList,
|
||||
"accountSummary": accountSummary,
|
||||
"journalLines": lineList,
|
||||
"meta": {
|
||||
"accountCount": len(accountMap),
|
||||
"entryCount": len(entryMap),
|
||||
"lineCount": len(lineList),
|
||||
"balanceCount": len(balanceList),
|
||||
"summaryYear": currentYear,
|
||||
"dateFrom": dateFrom,
|
||||
"dateTo": dateTo,
|
||||
"hint": (
|
||||
"accountSummary contains ONE row per account with the "
|
||||
"current-year closing balance (Ist). Use this for "
|
||||
"budget comparisons. journalLines lists individual "
|
||||
"bookings for drill-down."
|
||||
),
|
||||
},
|
||||
}
|
||||
result = json.dumps(export, ensure_ascii=False, default=str)
|
||||
logger.info("Exported accounting data: %d accounts, %d entries, %d lines, %d balances (%d bytes)",
|
||||
len(accountMap), len(entryMap), len(lineList), len(balanceList), len(result))
|
||||
logger.info("Exported accounting data: %d accounts (summary), %d entries, %d lines (%d bytes)",
|
||||
len(accountSummary), len(entryMap), len(lineList), len(result))
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.warning("Could not export accounting data: %s", e)
|
||||
|
|
|
|||
70
scripts/debug_rag_job_result.py
Normal file
70
scripts/debug_rag_job_result.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
"""Diagnose: read a connection.bootstrap job result and print its keys.
|
||||
|
||||
Usage (from repo root):
|
||||
python gateway\scripts\debug_rag_job_result.py
|
||||
|
||||
Prints the most recent SUCCESS connection.bootstrap job per UserConnection so
|
||||
we can see whether the `stoppedAtLimit` key actually landed in the JSONB
|
||||
`result` column. If it is missing here, the bug is in the writer (handler or
|
||||
_markSuccess); if it is present here but absent in the HTTP response, the bug
|
||||
is in routeRagInventory.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
_HERE = Path(__file__).resolve()
|
||||
sys.path.insert(0, str(_HERE.parent.parent)) # gateway/
|
||||
os.chdir(_HERE.parent.parent)
|
||||
|
||||
from modules.shared.configuration import APP_CONFIG # noqa: E402
|
||||
from modules.connectors.connectorDbPostgre import getCachedConnector # noqa: E402
|
||||
from modules.datamodels.datamodelBackgroundJob import BackgroundJob # noqa: E402
|
||||
from modules.routes.routeRagInventory import _flattenJobResult # noqa: E402
|
||||
|
||||
|
||||
def _main() -> None:
|
||||
db = getCachedConnector(
|
||||
dbDatabase=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
|
||||
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||
dbPort=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||
dbUser=APP_CONFIG.get("DB_USER"),
|
||||
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||
)
|
||||
|
||||
rows = db.getRecordset(BackgroundJob)
|
||||
rows = [r for r in rows if r.get("jobType") == "connection.bootstrap"]
|
||||
rows = [r for r in rows if r.get("status") == "SUCCESS"]
|
||||
rows.sort(key=lambda r: r.get("createdAt") or 0, reverse=True)
|
||||
|
||||
if not rows:
|
||||
print("No SUCCESS connection.bootstrap jobs found.")
|
||||
return
|
||||
|
||||
seenConnections: set[str] = set()
|
||||
for j in rows:
|
||||
connId = (j.get("payload") or {}).get("connectionId", "<unknown>")
|
||||
if connId in seenConnections:
|
||||
continue
|
||||
seenConnections.add(connId)
|
||||
result = j.get("result") or {}
|
||||
flat = _flattenJobResult(result) if isinstance(result, dict) else {}
|
||||
print("=" * 80)
|
||||
print(f"jobId = {j.get('id')}")
|
||||
print(f"connectionId = {connId}")
|
||||
print(f"finishedAt = {j.get('finishedAt')}")
|
||||
print(f"raw keys = {sorted(result.keys()) if isinstance(result, dict) else 'N/A'}")
|
||||
print("--- flattened (what the API will return now) ---")
|
||||
print(f" indexed = {flat.get('indexed')}")
|
||||
print(f" skippedDuplicate= {flat.get('skippedDuplicate')}")
|
||||
print(f" skippedPolicy = {flat.get('skippedPolicy')}")
|
||||
print(f" stoppedAtLimit = {flat.get('stoppedAtLimit')!r} <-- KEY CHECK")
|
||||
print(f" limits = {flat.get('limits')}")
|
||||
print(f" bytesProcessed = {flat.get('bytesProcessed')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_main()
|
||||
97
scripts/script_db_migrate_backgroundjob_progress_data.py
Normal file
97
scripts/script_db_migrate_backgroundjob_progress_data.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Migration: Add `progressMessageData` JSONB column to BackgroundJob.
|
||||
|
||||
Carries the structured i18n payload that lets the frontend translate
|
||||
walker progress messages (e.g. "{n} Dateien verarbeitet, {indexed}
|
||||
indexiert") into the user's UI language. `progressMessage` stays around
|
||||
as the rendered fallback for older clients and audit logs.
|
||||
|
||||
Safe to run multiple times (checks column existence before acting).
|
||||
|
||||
Usage:
|
||||
python scripts/script_db_migrate_backgroundjob_progress_data.py [--dry-run]
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
scriptPath = Path(__file__).resolve()
|
||||
gatewayPath = scriptPath.parent.parent
|
||||
sys.path.insert(0, str(gatewayPath))
|
||||
os.chdir(str(gatewayPath))
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import psycopg2
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
|
||||
def _getConnection():
|
||||
return psycopg2.connect(
|
||||
host=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||
port=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||
database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
|
||||
user=APP_CONFIG.get("DB_USER"),
|
||||
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||
)
|
||||
|
||||
|
||||
def _columnExists(cur, table: str, column: str) -> bool:
|
||||
cur.execute(
|
||||
"""SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
|
||||
(table, column),
|
||||
)
|
||||
return cur.fetchone() is not None
|
||||
|
||||
|
||||
def _tableExists(cur, table: str) -> bool:
|
||||
cur.execute(
|
||||
"""SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = 'public' AND table_name = %s""",
|
||||
(table,),
|
||||
)
|
||||
return cur.fetchone() is not None
|
||||
|
||||
|
||||
def migrate(dryRun: bool = False):
|
||||
conn = _getConnection()
|
||||
conn.autocommit = False
|
||||
cur = conn.cursor()
|
||||
|
||||
table, column = "BackgroundJob", "progressMessageData"
|
||||
executed = []
|
||||
|
||||
if not _tableExists(cur, table):
|
||||
logger.warning("SKIP: table %s does not exist yet (will be created on next ORM init)", table)
|
||||
elif _columnExists(cur, table, column):
|
||||
logger.info("SKIP: %s.%s already exists", table, column)
|
||||
else:
|
||||
sql = f'ALTER TABLE public."{table}" ADD COLUMN "{column}" JSONB DEFAULT NULL;'
|
||||
logger.info("EXEC: %s", sql)
|
||||
if not dryRun:
|
||||
cur.execute(sql)
|
||||
executed.append(sql)
|
||||
|
||||
if not dryRun and executed:
|
||||
conn.commit()
|
||||
logger.info("Migration committed (%d statements)", len(executed))
|
||||
elif dryRun and executed:
|
||||
conn.rollback()
|
||||
logger.info("DRY RUN -- would execute %d statements", len(executed))
|
||||
else:
|
||||
logger.info("Nothing to do -- schema already up to date")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
|
||||
args = parser.parse_args()
|
||||
migrate(dryRun=args.dry_run)
|
||||
110
scripts/script_db_migrate_datasource_inherit.py
Normal file
110
scripts/script_db_migrate_datasource_inherit.py
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Migration: Drop NOT NULL on DataSource/FeatureDataSource cascade-inherit flags.
|
||||
|
||||
Switches three-valued semantics (NULL = inherit, True/False = explicit) for:
|
||||
- DataSource.neutralize, ragIndexEnabled, scope
|
||||
- FeatureDataSource.neutralize, scope
|
||||
|
||||
Existing rows keep their explicit values; only new records (or explicit reset
|
||||
via cascade) start with NULL. Migration is non-destructive and idempotent.
|
||||
|
||||
Safe to run multiple times.
|
||||
|
||||
Usage:
|
||||
python scripts/script_db_migrate_datasource_inherit.py [--dry-run]
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
scriptPath = Path(__file__).resolve()
|
||||
gatewayPath = scriptPath.parent.parent
|
||||
sys.path.insert(0, str(gatewayPath))
|
||||
os.chdir(str(gatewayPath))
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import psycopg2
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
|
||||
def _getConnection():
|
||||
return psycopg2.connect(
|
||||
host=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||
port=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||
database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
|
||||
user=APP_CONFIG.get("DB_USER"),
|
||||
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||
)
|
||||
|
||||
|
||||
def _tableExists(cur, table: str) -> bool:
|
||||
cur.execute(
|
||||
"""SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = 'public' AND table_name = %s""",
|
||||
(table,),
|
||||
)
|
||||
return cur.fetchone() is not None
|
||||
|
||||
|
||||
def _columnIsNullable(cur, table: str, column: str) -> bool:
|
||||
cur.execute(
|
||||
"""SELECT is_nullable FROM information_schema.columns
|
||||
WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
|
||||
(table, column),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
if not row:
|
||||
return False
|
||||
return row[0] == "YES"
|
||||
|
||||
|
||||
def migrate(dryRun: bool = False):
|
||||
conn = _getConnection()
|
||||
conn.autocommit = False
|
||||
cur = conn.cursor()
|
||||
|
||||
targets = [
|
||||
("DataSource", "neutralize"),
|
||||
("DataSource", "ragIndexEnabled"),
|
||||
("DataSource", "scope"),
|
||||
("FeatureDataSource", "neutralize"),
|
||||
("FeatureDataSource", "scope"),
|
||||
]
|
||||
|
||||
executed = []
|
||||
for table, column in targets:
|
||||
if not _tableExists(cur, table):
|
||||
logger.warning("SKIP: table %s does not exist yet", table)
|
||||
continue
|
||||
if _columnIsNullable(cur, table, column):
|
||||
logger.info("SKIP: %s.%s already nullable", table, column)
|
||||
continue
|
||||
sql = f'ALTER TABLE public."{table}" ALTER COLUMN "{column}" DROP NOT NULL;'
|
||||
logger.info("EXEC: %s", sql)
|
||||
if not dryRun:
|
||||
cur.execute(sql)
|
||||
executed.append(sql)
|
||||
|
||||
if not dryRun and executed:
|
||||
conn.commit()
|
||||
logger.info("Migration committed (%d statements)", len(executed))
|
||||
elif dryRun and executed:
|
||||
conn.rollback()
|
||||
logger.info("DRY RUN -- would execute %d statements", len(executed))
|
||||
else:
|
||||
logger.info("Nothing to do -- schema already nullable")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
|
||||
args = parser.parse_args()
|
||||
migrate(dryRun=args.dry_run)
|
||||
102
scripts/script_db_migrate_datasource_settings.py
Normal file
102
scripts/script_db_migrate_datasource_settings.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Migration: Add `settings` JSONB column to DataSource and FeatureDataSource.
|
||||
|
||||
This is a one-off migration for the UDB DataSource Settings (Settings-Icon)
|
||||
feature: walkers read RAG limits (maxBytes, maxFileSize, maxItems, maxDepth)
|
||||
from this JSON blob, the UI edits them. Existing rows get NULL until the
|
||||
next bootstrap lazy-fills sensible defaults from `_ragLimits.RAG_LIMITS_DEFAULT`.
|
||||
|
||||
Safe to run multiple times (checks column existence before acting).
|
||||
|
||||
Usage:
|
||||
python scripts/script_db_migrate_datasource_settings.py [--dry-run]
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
scriptPath = Path(__file__).resolve()
|
||||
gatewayPath = scriptPath.parent.parent
|
||||
sys.path.insert(0, str(gatewayPath))
|
||||
os.chdir(str(gatewayPath))
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import psycopg2
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
|
||||
def _getConnection():
|
||||
return psycopg2.connect(
|
||||
host=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||
port=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||
database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
|
||||
user=APP_CONFIG.get("DB_USER"),
|
||||
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||
)
|
||||
|
||||
|
||||
def _columnExists(cur, table: str, column: str) -> bool:
|
||||
cur.execute(
|
||||
"""SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
|
||||
(table, column),
|
||||
)
|
||||
return cur.fetchone() is not None
|
||||
|
||||
|
||||
def _tableExists(cur, table: str) -> bool:
|
||||
cur.execute(
|
||||
"""SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = 'public' AND table_name = %s""",
|
||||
(table,),
|
||||
)
|
||||
return cur.fetchone() is not None
|
||||
|
||||
|
||||
def migrate(dryRun: bool = False):
|
||||
conn = _getConnection()
|
||||
conn.autocommit = False
|
||||
cur = conn.cursor()
|
||||
|
||||
targets = [
|
||||
("DataSource", "settings"),
|
||||
("FeatureDataSource", "settings"),
|
||||
]
|
||||
|
||||
executed = []
|
||||
for table, column in targets:
|
||||
if not _tableExists(cur, table):
|
||||
logger.warning("SKIP: table %s does not exist yet (will be created on next ORM init)", table)
|
||||
continue
|
||||
if _columnExists(cur, table, column):
|
||||
logger.info("SKIP: %s.%s already exists", table, column)
|
||||
continue
|
||||
sql = f'ALTER TABLE public."{table}" ADD COLUMN "{column}" JSONB DEFAULT NULL;'
|
||||
logger.info("EXEC: %s", sql)
|
||||
if not dryRun:
|
||||
cur.execute(sql)
|
||||
executed.append(sql)
|
||||
|
||||
if not dryRun and executed:
|
||||
conn.commit()
|
||||
logger.info("Migration committed (%d statements)", len(executed))
|
||||
elif dryRun and executed:
|
||||
conn.rollback()
|
||||
logger.info("DRY RUN -- would execute %d statements", len(executed))
|
||||
else:
|
||||
logger.info("Nothing to do -- schema already up to date")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
|
||||
args = parser.parse_args()
|
||||
migrate(dryRun=args.dry_run)
|
||||
274
scripts/script_migrate_user_uid.py
Normal file
274
scripts/script_migrate_user_uid.py
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
#!/usr/bin/env python3
|
||||
"""One-time migration: Reassign all DB references from an old user UID to a new UID.
|
||||
|
||||
When a user is re-created in PORTA (same username, new UUID), all existing records
|
||||
still reference the old UUID. This script scans ALL registered databases and tables
|
||||
for VARCHAR columns containing the old UID and updates them to the new UID.
|
||||
|
||||
Affected columns include:
|
||||
- sysCreatedBy / sysModifiedBy (on every table via PowerOnModel)
|
||||
- userId, revokedBy, createdByUserId, publishedBy, triggeredBy, assignedTo, etc.
|
||||
|
||||
The script auto-detects the new UID from the UserInDB table by username.
|
||||
|
||||
Usage:
|
||||
# Dry-run (default) — shows what would change, no writes:
|
||||
python scripts/script_migrate_user_uid.py --username patrick.helvetia --old-uid <OLD_UUID>
|
||||
|
||||
# Execute for real:
|
||||
python scripts/script_migrate_user_uid.py --username patrick.helvetia --old-uid <OLD_UUID> --execute
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
scriptPath = Path(__file__).resolve()
|
||||
gatewayPath = scriptPath.parent.parent
|
||||
sys.path.insert(0, str(gatewayPath))
|
||||
os.chdir(str(gatewayPath))
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
|
||||
ALL_DATABASES = [
|
||||
"poweron_app",
|
||||
"poweron_chat",
|
||||
"poweron_management",
|
||||
"poweron_knowledge",
|
||||
"poweron_billing",
|
||||
"poweron_workspace",
|
||||
"poweron_graphicaleditor",
|
||||
"poweron_chatbot",
|
||||
"poweron_trustee",
|
||||
"poweron_commcoach",
|
||||
"poweron_neutralization",
|
||||
"poweron_realestate",
|
||||
"poweron_teamsbot",
|
||||
]
|
||||
|
||||
|
||||
def _getConnection(dbName: str):
|
||||
return psycopg2.connect(
|
||||
host=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||
port=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||
database=dbName,
|
||||
user=APP_CONFIG.get("DB_USER"),
|
||||
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||
client_encoding="utf8",
|
||||
)
|
||||
|
||||
|
||||
def _getTablesInDb(conn) -> List[str]:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_schema = 'public'
|
||||
AND table_type = 'BASE TABLE'
|
||||
AND table_name NOT LIKE '\\_%%'
|
||||
ORDER BY table_name
|
||||
""")
|
||||
return [row[0] for row in cur.fetchall()]
|
||||
|
||||
|
||||
def _getVarcharColumns(conn, tableName: str) -> List[str]:
|
||||
"""Get all VARCHAR/TEXT columns for a table (potential user-ID carriers)."""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name = %s
|
||||
AND data_type IN ('character varying', 'text')
|
||||
ORDER BY ordinal_position
|
||||
""", (tableName,))
|
||||
return [row[0] for row in cur.fetchall()]
|
||||
|
||||
|
||||
def _countMatches(conn, tableName: str, columnName: str, oldUid: str) -> int:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f'SELECT COUNT(*) FROM "{tableName}" WHERE "{columnName}" = %s',
|
||||
(oldUid,),
|
||||
)
|
||||
return cur.fetchone()[0]
|
||||
|
||||
|
||||
def _updateColumn(conn, tableName: str, columnName: str, oldUid: str, newUid: str) -> int:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f'UPDATE "{tableName}" SET "{columnName}" = %s WHERE "{columnName}" = %s',
|
||||
(newUid, oldUid),
|
||||
)
|
||||
return cur.rowcount
|
||||
|
||||
|
||||
def _lookupNewUid(username: str) -> Optional[str]:
|
||||
"""Find the current UID for a username in poweron_app.UserInDB."""
|
||||
conn = _getConnection("poweron_app")
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
'SELECT "id" FROM "UserInDB" WHERE "username" = %s',
|
||||
(username,),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
return row[0] if row else None
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _scanJsonbForUid(conn, tableName: str, columnName: str, oldUid: str) -> int:
|
||||
"""Count JSONB fields that contain the old UID as a text value anywhere."""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""SELECT COUNT(*) FROM "{tableName}"
|
||||
WHERE "{columnName}"::text LIKE %s""",
|
||||
(f"%{oldUid}%",),
|
||||
)
|
||||
return cur.fetchone()[0]
|
||||
|
||||
|
||||
def _updateJsonbColumn(conn, tableName: str, columnName: str, oldUid: str, newUid: str) -> int:
|
||||
"""Replace old UID inside JSONB columns using text replacement."""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""UPDATE "{tableName}"
|
||||
SET "{columnName}" = REPLACE("{columnName}"::text, %s, %s)::jsonb
|
||||
WHERE "{columnName}"::text LIKE %s""",
|
||||
(oldUid, newUid, f"%{oldUid}%"),
|
||||
)
|
||||
return cur.rowcount
|
||||
|
||||
|
||||
def _getJsonbColumns(conn, tableName: str) -> List[str]:
|
||||
"""Get all JSONB columns for a table."""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name = %s
|
||||
AND data_type = 'jsonb'
|
||||
ORDER BY ordinal_position
|
||||
""", (tableName,))
|
||||
return [row[0] for row in cur.fetchall()]
|
||||
|
||||
|
||||
def migrate(username: str, oldUid: str, execute: bool = False):
|
||||
newUid = _lookupNewUid(username)
|
||||
if not newUid:
|
||||
logger.error(f"User '{username}' not found in UserInDB. Cannot determine new UID.")
|
||||
sys.exit(1)
|
||||
|
||||
if newUid == oldUid:
|
||||
logger.error(f"Old UID and new UID are identical ({oldUid}). Nothing to migrate.")
|
||||
sys.exit(1)
|
||||
|
||||
logger.info(f"Migration: user '{username}'")
|
||||
logger.info(f" Old UID: {oldUid}")
|
||||
logger.info(f" New UID: {newUid}")
|
||||
logger.info(f" Mode: {'EXECUTE' if execute else 'DRY-RUN'}")
|
||||
logger.info("")
|
||||
|
||||
totalUpdated = 0
|
||||
findings: List[Tuple[str, str, str, int]] = []
|
||||
|
||||
for dbName in ALL_DATABASES:
|
||||
try:
|
||||
conn = _getConnection(dbName)
|
||||
except Exception as e:
|
||||
logger.warning(f" Cannot connect to {dbName}: {e}")
|
||||
continue
|
||||
|
||||
try:
|
||||
conn.autocommit = False
|
||||
tables = _getTablesInDb(conn)
|
||||
|
||||
for tableName in tables:
|
||||
varcharCols = _getVarcharColumns(conn, tableName)
|
||||
for col in varcharCols:
|
||||
count = _countMatches(conn, tableName, col, oldUid)
|
||||
if count > 0:
|
||||
findings.append((dbName, tableName, col, count))
|
||||
if execute:
|
||||
updated = _updateColumn(conn, tableName, col, oldUid, newUid)
|
||||
totalUpdated += updated
|
||||
logger.info(f" [UPDATED] {dbName}.{tableName}.{col}: {updated} rows")
|
||||
else:
|
||||
logger.info(f" [DRY-RUN] {dbName}.{tableName}.{col}: {count} rows would be updated")
|
||||
|
||||
jsonbCols = _getJsonbColumns(conn, tableName)
|
||||
for col in jsonbCols:
|
||||
count = _scanJsonbForUid(conn, tableName, col, oldUid)
|
||||
if count > 0:
|
||||
findings.append((dbName, tableName, f"{col} (JSONB)", count))
|
||||
if execute:
|
||||
_updateJsonbColumn(conn, tableName, col, oldUid, newUid)
|
||||
totalUpdated += count
|
||||
logger.info(f" [UPDATED] {dbName}.{tableName}.{col} (JSONB): {count} rows")
|
||||
else:
|
||||
logger.info(f" [DRY-RUN] {dbName}.{tableName}.{col} (JSONB): {count} rows would be updated")
|
||||
|
||||
if execute:
|
||||
conn.commit()
|
||||
else:
|
||||
conn.rollback()
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
logger.error(f" Error processing {dbName}: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
logger.info("")
|
||||
logger.info("=" * 70)
|
||||
logger.info("SUMMARY")
|
||||
logger.info("=" * 70)
|
||||
if not findings:
|
||||
logger.info(" No references to old UID found in any database.")
|
||||
else:
|
||||
logger.info(f" Found {len(findings)} column(s) with references to old UID:")
|
||||
for dbName, tableName, col, count in findings:
|
||||
logger.info(f" {dbName}.{tableName}.{col}: {count} rows")
|
||||
logger.info("")
|
||||
if execute:
|
||||
logger.info(f" Total rows updated: {totalUpdated}")
|
||||
else:
|
||||
logger.info(f" Total rows that would be updated: {sum(c for _, _, _, c in findings)}")
|
||||
logger.info("")
|
||||
logger.info(" To apply changes, re-run with --execute")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Migrate all DB references from old user UID to new UID."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--username",
|
||||
required=True,
|
||||
help="Username to migrate (e.g. 'patrick.helvetia'). Used to look up the new UID.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--old-uid",
|
||||
required=True,
|
||||
help="The old UUID that is orphaned in the database.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--execute",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Actually perform the migration. Without this flag, only a dry-run is done.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
migrate(username=args.username, oldUid=args.old_uid, execute=args.execute)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -25,7 +25,7 @@ if not c or not c.connection:
|
|||
print("STAGE0: DB_CONNECTION=none (check config.ini / .env)")
|
||||
raise SystemExit(2)
|
||||
|
||||
cur = c.connection.cursor()
|
||||
cur = c.borrowCursor()
|
||||
|
||||
|
||||
def _scalar(cur):
|
||||
|
|
|
|||
|
|
@ -12,11 +12,16 @@ broken query into "no rows found". That hid bugs like:
|
|||
|
||||
These tests pin the new contract: empty result sets still return ``[]`` /
|
||||
``None`` (normal), but any exception inside the query path propagates as
|
||||
``DatabaseQueryError`` with the table name attached. The transaction is
|
||||
rolled back so the connection is usable for subsequent queries.
|
||||
``DatabaseQueryError`` with the table name attached.
|
||||
|
||||
Since the 2026-05-17 pool refactor (`c-work/2-build/2026-05-postgres-connection-pool.md`)
|
||||
the connector borrows a connection from `_PoolRegistry` on every call via the
|
||||
`borrowConn()` context manager. The tests mock that context manager so the
|
||||
fast-fail contract is exercised without requiring a live Postgres server.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
|
@ -25,7 +30,7 @@ import psycopg2.errors
|
|||
from modules.connectors.connectorDbPostgre import (
|
||||
DatabaseConnector,
|
||||
DatabaseQueryError,
|
||||
_rollbackQuietly,
|
||||
_stripNulBytesFromStr,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -39,26 +44,44 @@ class DummyTable:
|
|||
|
||||
|
||||
def _makeConnector(cursorBehavior):
|
||||
"""Build a ``DatabaseConnector`` skeleton with mocked connection/cursor.
|
||||
"""Build a ``DatabaseConnector`` skeleton with a mocked pool borrow.
|
||||
|
||||
``cursorBehavior`` is a callable invoked with the cursor mock so the test
|
||||
can configure ``execute``/``fetchall``/``fetchone`` per scenario.
|
||||
|
||||
Returns ``(connector, conn, cursor)``:
|
||||
* ``conn`` exposes ``commit`` / ``rollback`` MagicMocks so tests can
|
||||
assert that the borrow lifecycle did the right thing.
|
||||
* ``cursor`` is the per-test cursor mock.
|
||||
"""
|
||||
connector = DatabaseConnector.__new__(DatabaseConnector)
|
||||
|
||||
cursor = MagicMock()
|
||||
cursorBehavior(cursor)
|
||||
|
||||
cursorContext = MagicMock()
|
||||
cursorContext.__enter__ = MagicMock(return_value=cursor)
|
||||
cursorContext.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
connection = MagicMock()
|
||||
connection.cursor.return_value = cursorContext
|
||||
connector.connection = connection
|
||||
conn = MagicMock()
|
||||
conn.cursor.return_value = cursorContext
|
||||
|
||||
@contextmanager
|
||||
def fakeBorrow():
|
||||
try:
|
||||
yield conn
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
else:
|
||||
conn.commit()
|
||||
|
||||
connector.borrowConn = fakeBorrow
|
||||
|
||||
connector._ensureTableExists = MagicMock(return_value=True)
|
||||
connector._systemTableName = "_system"
|
||||
|
||||
cursorBehavior(cursor)
|
||||
return connector, connection, cursor
|
||||
return connector, conn, cursor
|
||||
|
||||
|
||||
class TestGetRecordsetFailLoud:
|
||||
|
|
@ -67,11 +90,12 @@ class TestGetRecordsetFailLoud:
|
|||
def behavior(cursor):
|
||||
cursor.execute.return_value = None
|
||||
cursor.fetchall.return_value = []
|
||||
connector, connection, _ = _makeConnector(behavior)
|
||||
connector, conn, _ = _makeConnector(behavior)
|
||||
|
||||
result = connector.getRecordset(DummyTable)
|
||||
assert result == []
|
||||
connection.rollback.assert_not_called()
|
||||
conn.rollback.assert_not_called()
|
||||
conn.commit.assert_called_once()
|
||||
|
||||
def test_dictAdaptErrorRaisesDatabaseQueryError(self):
|
||||
"""Reproduces the Trustee bug: passing a dict in WHERE → can't adapt → raise."""
|
||||
|
|
@ -79,7 +103,7 @@ class TestGetRecordsetFailLoud:
|
|||
cursor.execute.side_effect = psycopg2.ProgrammingError(
|
||||
"can't adapt type 'dict'"
|
||||
)
|
||||
connector, connection, _ = _makeConnector(behavior)
|
||||
connector, conn, _ = _makeConnector(behavior)
|
||||
|
||||
with pytest.raises(DatabaseQueryError) as excinfo:
|
||||
connector.getRecordset(
|
||||
|
|
@ -90,30 +114,30 @@ class TestGetRecordsetFailLoud:
|
|||
assert excinfo.value.table == "DummyTable"
|
||||
assert "can't adapt type 'dict'" in str(excinfo.value)
|
||||
assert isinstance(excinfo.value.original, psycopg2.ProgrammingError)
|
||||
connection.rollback.assert_called_once()
|
||||
conn.rollback.assert_called_once()
|
||||
|
||||
def test_missingColumnRaisesDatabaseQueryError(self):
|
||||
def behavior(cursor):
|
||||
cursor.execute.side_effect = psycopg2.errors.UndefinedColumn(
|
||||
'column "wat" does not exist'
|
||||
)
|
||||
connector, connection, _ = _makeConnector(behavior)
|
||||
connector, conn, _ = _makeConnector(behavior)
|
||||
|
||||
with pytest.raises(DatabaseQueryError) as excinfo:
|
||||
connector.getRecordset(DummyTable, recordFilter={"wat": "x"})
|
||||
|
||||
assert "wat" in str(excinfo.value)
|
||||
connection.rollback.assert_called_once()
|
||||
conn.rollback.assert_called_once()
|
||||
|
||||
def test_operationalErrorRaisesDatabaseQueryError(self):
|
||||
"""Connection lost mid-query is also a real failure that must propagate."""
|
||||
def behavior(cursor):
|
||||
cursor.execute.side_effect = psycopg2.OperationalError("connection lost")
|
||||
connector, connection, _ = _makeConnector(behavior)
|
||||
connector, conn, _ = _makeConnector(behavior)
|
||||
|
||||
with pytest.raises(DatabaseQueryError):
|
||||
connector.getRecordset(DummyTable)
|
||||
connection.rollback.assert_called_once()
|
||||
conn.rollback.assert_called_once()
|
||||
|
||||
|
||||
class TestGetRecordFailLoud:
|
||||
|
|
@ -122,37 +146,31 @@ class TestGetRecordFailLoud:
|
|||
def behavior(cursor):
|
||||
cursor.execute.return_value = None
|
||||
cursor.fetchone.return_value = None
|
||||
connector, connection, _ = _makeConnector(behavior)
|
||||
connector, conn, _ = _makeConnector(behavior)
|
||||
|
||||
result = connector.getRecord(DummyTable, "missing-id")
|
||||
assert result is None
|
||||
connection.rollback.assert_not_called()
|
||||
conn.rollback.assert_not_called()
|
||||
conn.commit.assert_called_once()
|
||||
|
||||
def test_queryErrorRaisesDatabaseQueryError(self):
|
||||
def behavior(cursor):
|
||||
cursor.execute.side_effect = psycopg2.errors.UndefinedTable(
|
||||
'relation "DummyTable" does not exist'
|
||||
)
|
||||
connector, connection, _ = _makeConnector(behavior)
|
||||
connector, conn, _ = _makeConnector(behavior)
|
||||
|
||||
with pytest.raises(DatabaseQueryError) as excinfo:
|
||||
connector.getRecord(DummyTable, "any-id")
|
||||
|
||||
assert excinfo.value.table == "DummyTable"
|
||||
connection.rollback.assert_called_once()
|
||||
conn.rollback.assert_called_once()
|
||||
|
||||
|
||||
class TestRollbackQuietly:
|
||||
def test_rollsBackOnLiveConnection(self):
|
||||
connection = MagicMock()
|
||||
_rollbackQuietly(connection)
|
||||
connection.rollback.assert_called_once()
|
||||
class TestStripNulBytesFromStr:
|
||||
def test_removesNul(self):
|
||||
assert _stripNulBytesFromStr("a\x00b") == "ab"
|
||||
|
||||
def test_swallowsRollbackError(self):
|
||||
"""Rollback failure must not mask the original query error."""
|
||||
connection = MagicMock()
|
||||
connection.rollback.side_effect = RuntimeError("rollback failed")
|
||||
_rollbackQuietly(connection)
|
||||
|
||||
def test_noopOnNoneConnection(self):
|
||||
_rollbackQuietly(None)
|
||||
def test_passthroughNonStr(self):
|
||||
assert _stripNulBytesFromStr(None) is None
|
||||
assert _stripNulBytesFromStr(7) == 7
|
||||
|
|
|
|||
304
tests/unit/connectors/test_connectorDbPostgre_pool.py
Normal file
304
tests/unit/connectors/test_connectorDbPostgre_pool.py
Normal file
|
|
@ -0,0 +1,304 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Concurrency tests for the PostgreSQL connection pool.
|
||||
|
||||
These tests pin the contract that the `c-work/2-build/2026-05-postgres-connection-pool.md`
|
||||
refactor delivered:
|
||||
|
||||
* T1 — 50 threads × 100 calls in parallel produce 0 `OperationalError`s and
|
||||
every call completes within reasonable time (p99 < 2 s).
|
||||
* T2 — Two threads `_loadRecord` + `_saveRecord` against the same connector
|
||||
do not corrupt each other's cursors.
|
||||
* T3 — `statement_timeout` aborts a runaway `pg_sleep(60)` after ~30 s and
|
||||
releases the connection back into the pool cleanly.
|
||||
|
||||
The tests need a real PostgreSQL server because the bug they guard against
|
||||
only materialises with real psycopg2 sockets — a mocked connection never
|
||||
hangs in `recv()`. They read DB credentials from `APP_CONFIG` (which loads
|
||||
`.env`) and are auto-skipped when the connection fails (no local Postgres,
|
||||
wrong creds, etc.) so `pytest` keeps working in CI-only environments.
|
||||
|
||||
To run them locally:
|
||||
|
||||
pytest gateway/tests/unit/connectors/test_connectorDbPostgre_pool.py -v
|
||||
|
||||
They use a throwaway database name (`poweron_pool_test_<uuid>`) and drop it
|
||||
in fixture teardown so they leave nothing behind.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
import uuid
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.errors
|
||||
import pytest
|
||||
from pydantic import Field
|
||||
|
||||
from modules.connectors.connectorDbPostgre import (
|
||||
DatabaseConnector,
|
||||
_PoolRegistry,
|
||||
closeAllPools,
|
||||
)
|
||||
from modules.datamodels.datamodelBase import PowerOnModel
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
|
||||
def _dbConfig():
|
||||
"""Read DB connection params from APP_CONFIG (`.env`).
|
||||
|
||||
Returns ``None`` when host/user/password are not all present so the
|
||||
test module can skip cleanly instead of blowing up at import time.
|
||||
"""
|
||||
host = APP_CONFIG.get("DB_HOST")
|
||||
user = APP_CONFIG.get("DB_USER")
|
||||
password = APP_CONFIG.get("DB_PASSWORD_SECRET")
|
||||
port = APP_CONFIG.get("DB_PORT", 5432)
|
||||
if not host or not user or password is None:
|
||||
return None
|
||||
return {"host": host, "user": user, "password": password, "port": int(port)}
|
||||
|
||||
|
||||
def _canReachPostgres(cfg) -> bool:
|
||||
"""Try a quick connect to the admin DB so we can skip on connection failures."""
|
||||
try:
|
||||
conn = psycopg2.connect(
|
||||
host=cfg["host"], port=cfg["port"], database="postgres",
|
||||
user=cfg["user"], password=cfg["password"], connect_timeout=2,
|
||||
)
|
||||
conn.close()
|
||||
return True
|
||||
except Exception: # noqa: BLE001
|
||||
return False
|
||||
|
||||
|
||||
_DB_CFG = _dbConfig()
|
||||
pytestmark = pytest.mark.skipif(
|
||||
_DB_CFG is None or not _canReachPostgres(_DB_CFG),
|
||||
reason="No reachable PostgreSQL — skipping live-Postgres pool tests",
|
||||
)
|
||||
|
||||
|
||||
class PoolTestRow(PowerOnModel):
|
||||
"""Tiny model used to exercise the pool — one ID + one payload field."""
|
||||
payload: str = Field(default="", description="Test payload")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def liveConnector():
|
||||
"""Spin up a throwaway database, yield a `DatabaseConnector` against it,
|
||||
drop the database afterwards.
|
||||
|
||||
The pool registry is wiped before and after each test so state from one
|
||||
test cannot mask a bug in another.
|
||||
"""
|
||||
cfg = _DB_CFG
|
||||
host = cfg["host"]
|
||||
user = cfg["user"]
|
||||
password = cfg["password"]
|
||||
port = cfg["port"]
|
||||
dbName = f"poweron_pool_test_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
# Pre-clean: drop any orphan test DB with the same name (shouldn't happen
|
||||
# because we use a unique uuid, but be defensive).
|
||||
adminConn = psycopg2.connect(
|
||||
host=host, port=port, database="postgres", user=user, password=password
|
||||
)
|
||||
adminConn.autocommit = True
|
||||
try:
|
||||
with adminConn.cursor() as cur:
|
||||
cur.execute(f'DROP DATABASE IF EXISTS "{dbName}"')
|
||||
finally:
|
||||
adminConn.close()
|
||||
|
||||
closeAllPools()
|
||||
|
||||
connector = DatabaseConnector(
|
||||
dbHost=host,
|
||||
dbDatabase=dbName,
|
||||
dbUser=user,
|
||||
dbPassword=password,
|
||||
dbPort=port,
|
||||
)
|
||||
# Seed exactly one row so every concurrent read has a stable target.
|
||||
connector.recordCreate(PoolTestRow, {"id": "seed", "payload": "hello"})
|
||||
|
||||
yield connector
|
||||
|
||||
# Teardown: tear pools down, then drop the DB.
|
||||
closeAllPools()
|
||||
adminConn = psycopg2.connect(
|
||||
host=host, port=port, database="postgres", user=user, password=password
|
||||
)
|
||||
adminConn.autocommit = True
|
||||
try:
|
||||
with adminConn.cursor() as cur:
|
||||
cur.execute(
|
||||
'SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s',
|
||||
(dbName,),
|
||||
)
|
||||
cur.execute(f'DROP DATABASE IF EXISTS "{dbName}"')
|
||||
finally:
|
||||
adminConn.close()
|
||||
|
||||
|
||||
class TestPoolConcurrency:
|
||||
def _runWorkers(self, liveConnector, *, threadCount: int, callsPerThread: int):
|
||||
"""Run N worker threads, each issuing M reads. Return (errors, latencies)."""
|
||||
errors: list = []
|
||||
latencies: list = []
|
||||
lock = threading.Lock()
|
||||
|
||||
def worker():
|
||||
for _ in range(callsPerThread):
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
rows = liveConnector.getRecordset(PoolTestRow)
|
||||
assert any(r["id"] == "seed" for r in rows)
|
||||
except Exception as e: # noqa: BLE001 — we want every failure mode
|
||||
with lock:
|
||||
errors.append(e)
|
||||
finally:
|
||||
with lock:
|
||||
latencies.append(time.perf_counter() - t0)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=threadCount) as ex:
|
||||
futures = [ex.submit(worker) for _ in range(threadCount)]
|
||||
for f in as_completed(futures):
|
||||
f.result()
|
||||
latencies.sort()
|
||||
return errors, latencies
|
||||
|
||||
def test_50_threads_x_20_reads_no_errors(self, liveConnector):
|
||||
"""T1a — STRESS: 50 threads × 20 reads each → 0 errors.
|
||||
|
||||
Pre-pool, this scenario produced either
|
||||
`OperationalError: another command is already in progress` or a
|
||||
deadlock in `recv()` because the threadpool shared one psycopg2
|
||||
socket. With the pool plus `borrowConn`'s bounded wait, every
|
||||
thread eventually gets a connection and completes — even with 30
|
||||
threads queued waiting at any moment (pool max=20).
|
||||
"""
|
||||
errors, _ = self._runWorkers(liveConnector, threadCount=50, callsPerThread=20)
|
||||
assert not errors, f"got {len(errors)} errors; first: {errors[0]!r}"
|
||||
|
||||
def test_20_threads_x_50_reads_latency_budget(self, liveConnector):
|
||||
"""T1b — DESIGN CAPACITY: 20 threads × 50 reads, p99 < 5 s.
|
||||
|
||||
20 threads matches the pool's `max=20` so there is no queueing —
|
||||
every borrow returns immediately. This pins a sanity-level per-call
|
||||
latency budget; pre-pool it was unbounded (recv() never returned).
|
||||
|
||||
The 5 s ceiling is generous on purpose: `getRecordset` calls
|
||||
`_ensureTableExists` which runs two `information_schema` queries
|
||||
for column-additive migration, and under 20-way concurrency on a
|
||||
single Postgres instance that produces a long tail. The hard
|
||||
assertion is `not errors` — the latency check just guarantees
|
||||
nothing hangs indefinitely.
|
||||
"""
|
||||
errors, latencies = self._runWorkers(
|
||||
liveConnector, threadCount=20, callsPerThread=50
|
||||
)
|
||||
assert not errors, f"got {len(errors)} errors; first: {errors[0]!r}"
|
||||
p99 = latencies[int(len(latencies) * 0.99)]
|
||||
assert p99 < 5.0, f"p99 latency {p99:.2f}s exceeds 5s budget"
|
||||
|
||||
def test_interleaved_load_and_save_no_collision(self, liveConnector):
|
||||
"""T2: parallel reads + writes on the same connector → no cursor mix-up.
|
||||
|
||||
Pre-pool the reader could observe a row in mid-write or vice versa
|
||||
because both shared the same cursor. With one connection per borrow,
|
||||
the database's own row-locking is the only contention, and we just
|
||||
need to assert no exceptions.
|
||||
"""
|
||||
stopFlag = threading.Event()
|
||||
errors: list = []
|
||||
lock = threading.Lock()
|
||||
|
||||
def reader():
|
||||
while not stopFlag.is_set():
|
||||
try:
|
||||
liveConnector.getRecord(PoolTestRow, "seed")
|
||||
except Exception as e: # noqa: BLE001
|
||||
with lock:
|
||||
errors.append(("read", e))
|
||||
|
||||
def writer():
|
||||
i = 0
|
||||
while not stopFlag.is_set():
|
||||
try:
|
||||
liveConnector.recordModify(
|
||||
PoolTestRow,
|
||||
"seed",
|
||||
{"id": "seed", "payload": f"v{i}"},
|
||||
)
|
||||
i += 1
|
||||
except Exception as e: # noqa: BLE001
|
||||
with lock:
|
||||
errors.append(("write", e))
|
||||
|
||||
threads = [
|
||||
threading.Thread(target=reader, daemon=True),
|
||||
threading.Thread(target=reader, daemon=True),
|
||||
threading.Thread(target=writer, daemon=True),
|
||||
threading.Thread(target=writer, daemon=True),
|
||||
]
|
||||
for t in threads:
|
||||
t.start()
|
||||
time.sleep(2.0)
|
||||
stopFlag.set()
|
||||
for t in threads:
|
||||
t.join(timeout=3.0)
|
||||
|
||||
assert not errors, f"got {len(errors)} errors; first: {errors[0]!r}"
|
||||
|
||||
def test_statement_timeout_releases_connection(self, liveConnector):
|
||||
"""T3: `pg_sleep` past statement_timeout → QueryCanceled, pool intact.
|
||||
|
||||
The bug we are guarding against: a runaway query with no timeout
|
||||
hung `recv()` forever, the psycopg2 connection was poisoned, and the
|
||||
whole backend became unresponsive once that connection was reused.
|
||||
With `statement_timeout=30000` configured at pool construction the
|
||||
query is cancelled by the server, the borrow context manager rolls
|
||||
back, and the connection returns to the pool — proven by the fact
|
||||
that a follow-up call still succeeds quickly.
|
||||
"""
|
||||
# Use a short timeout to keep the test fast — override the pool's
|
||||
# session statement_timeout for one borrow via SET LOCAL.
|
||||
with liveConnector.borrowConn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute("SET LOCAL statement_timeout = 500")
|
||||
with pytest.raises(psycopg2.errors.QueryCanceled):
|
||||
cursor.execute("SELECT pg_sleep(5)")
|
||||
|
||||
# Follow-up call must succeed quickly: connection is back in the pool.
|
||||
t0 = time.perf_counter()
|
||||
rows = liveConnector.getRecordset(PoolTestRow)
|
||||
elapsed = time.perf_counter() - t0
|
||||
assert any(r["id"] == "seed" for r in rows)
|
||||
assert elapsed < 1.0, f"follow-up call took {elapsed:.2f}s — pool may be wedged"
|
||||
|
||||
|
||||
class TestPoolRegistry:
|
||||
def test_one_pool_per_database_identity(self, liveConnector):
|
||||
"""Two connectors against the same (host, db, port) share one pool."""
|
||||
cfg = _DB_CFG
|
||||
pool1 = _PoolRegistry.getPool(
|
||||
dbHost=cfg["host"], dbDatabase=liveConnector.dbDatabase,
|
||||
dbUser=cfg["user"], dbPassword=cfg["password"], dbPort=cfg["port"],
|
||||
)
|
||||
pool2 = _PoolRegistry.getPool(
|
||||
dbHost=cfg["host"], dbDatabase=liveConnector.dbDatabase,
|
||||
dbUser=cfg["user"], dbPassword=cfg["password"], dbPort=cfg["port"],
|
||||
)
|
||||
assert pool1 is pool2
|
||||
|
||||
def test_close_all_clears_registry(self, liveConnector):
|
||||
"""`closeAllPools()` empties the registry so the next call rebuilds."""
|
||||
# Touch the pool first.
|
||||
liveConnector.getRecordset(PoolTestRow)
|
||||
assert _PoolRegistry._pools, "pool should exist after a real call"
|
||||
closeAllPools()
|
||||
assert _PoolRegistry._pools == {}, "registry should be empty after closeAllPools()"
|
||||
|
|
@ -68,6 +68,16 @@ class _FakeDb:
|
|||
def _ensureTableExists(self, modelClass):
|
||||
return True
|
||||
|
||||
def borrowCursor(self):
|
||||
"""Mimic `DatabaseConnector.borrowCursor()` context manager."""
|
||||
from contextlib import contextmanager
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
@contextmanager
|
||||
def _cm():
|
||||
yield MagicMock()
|
||||
return _cm()
|
||||
|
||||
def seed(self, modelClass, record: Dict[str, Any]):
|
||||
tableName = modelClass.__name__
|
||||
self._tables.setdefault(tableName, {})
|
||||
|
|
|
|||
|
|
@ -69,6 +69,16 @@ class _FakeDb:
|
|||
def _ensureTableExists(self, modelClass):
|
||||
return True
|
||||
|
||||
def borrowCursor(self):
|
||||
"""Mimic `DatabaseConnector.borrowCursor()` context manager for the cascade test."""
|
||||
from contextlib import contextmanager
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
@contextmanager
|
||||
def _cm():
|
||||
yield MagicMock()
|
||||
return _cm()
|
||||
|
||||
def seed(self, modelClass, record: Dict[str, Any]):
|
||||
tableName = modelClass.__name__
|
||||
self._tables.setdefault(tableName, {})
|
||||
|
|
|
|||
359
tests/unit/services/test_buildTree.py
Normal file
359
tests/unit/services/test_buildTree.py
Normal file
|
|
@ -0,0 +1,359 @@
|
|||
"""Unit tests for the generic UDB tree builder.
|
||||
|
||||
Verifies key encoding/decoding and that children for parent keys with
|
||||
existing handlers (top-level, conn, mgrp, feat) are produced with the
|
||||
correct effective-flag triplet.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge import _buildTree
|
||||
|
||||
|
||||
class TestKeyCoding(unittest.TestCase):
|
||||
def test_encode_decode_roundtrip(self):
|
||||
key = _buildTree._encode("ds", "conn-1", "sharepointFolder", "/sites/x")
|
||||
kind, parts = _buildTree._decode(key)
|
||||
self.assertEqual(kind, "ds")
|
||||
self.assertEqual(parts, ["conn-1", "sharepointFolder", "/sites/x"])
|
||||
|
||||
def test_top_level_kinds(self):
|
||||
self.assertEqual(_buildTree._decode("conn|abc")[0], "conn")
|
||||
self.assertEqual(_buildTree._decode("mgrp|m1")[0], "mgrp")
|
||||
self.assertEqual(_buildTree._decode("feat|m1|trustee|fi-1")[1], ["m1", "trustee", "fi-1"])
|
||||
|
||||
|
||||
class TestEffectiveTriplets(unittest.TestCase):
|
||||
def test_ds_triplet_no_record_returns_defaults(self):
|
||||
result = _buildTree._effectiveTripletDs("c", "msft", "/", [])
|
||||
self.assertEqual(result, {
|
||||
"effectiveNeutralize": False,
|
||||
"effectiveScope": "personal",
|
||||
"effectiveRagIndexEnabled": False,
|
||||
})
|
||||
|
||||
def test_ds_triplet_inherits_from_root(self):
|
||||
root = {
|
||||
"id": "r", "connectionId": "c", "sourceType": "msft", "path": "/",
|
||||
"neutralize": True, "scope": "mandate", "ragIndexEnabled": True,
|
||||
}
|
||||
result = _buildTree._effectiveTripletDs("c", "sharepointFolder", "/sites/x", [root])
|
||||
self.assertEqual(result["effectiveNeutralize"], True)
|
||||
self.assertEqual(result["effectiveScope"], "mandate")
|
||||
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
||||
|
||||
def test_fds_triplet_inherits_from_workspace_wildcard(self):
|
||||
ws = {
|
||||
"id": "ws", "workspaceInstanceId": "ws-inst", "featureInstanceId": "fi1",
|
||||
"tableName": "*", "recordFilter": None, "neutralize": True,
|
||||
"scope": "mandate", "ragIndexEnabled": True,
|
||||
}
|
||||
result = _buildTree._effectiveTripletFds("fi1", "Pos", None, [ws])
|
||||
self.assertEqual(result["effectiveNeutralize"], True)
|
||||
self.assertEqual(result["effectiveScope"], "mandate")
|
||||
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
||||
|
||||
|
||||
class TestRecordLookup(unittest.TestCase):
|
||||
def test_finds_ds_record_by_normalised_path(self):
|
||||
rec = {"id": "x", "connectionId": "c", "sourceType": "msft", "path": "/folder"}
|
||||
self.assertEqual(_buildTree._findDsRecord([rec], "c", "msft", "/folder/").get("id"), "x")
|
||||
self.assertIsNone(_buildTree._findDsRecord([rec], "c", "msft", "/other"))
|
||||
|
||||
def test_finds_fds_record_with_matching_filter(self):
|
||||
rec = {"id": "f", "workspaceInstanceId": "ws", "featureInstanceId": "fi1", "tableName": "Pos", "recordFilter": {"id": "5"}}
|
||||
self.assertEqual(_buildTree._findFdsRecord([rec], "fi1", "Pos", {"id": "5"}).get("id"), "f")
|
||||
self.assertIsNone(_buildTree._findFdsRecord([rec], "fi1", "Pos", {"id": "99"}))
|
||||
|
||||
def test_fds_record_with_none_filter_matches_only_none(self):
|
||||
rec = {"id": "f", "workspaceInstanceId": "ws", "featureInstanceId": "fi1", "tableName": "*", "recordFilter": None}
|
||||
self.assertEqual(_buildTree._findFdsRecord([rec], "fi1", "*", None).get("id"), "f")
|
||||
self.assertIsNone(_buildTree._findFdsRecord([rec], "fi1", "*", {"id": "1"}))
|
||||
|
||||
|
||||
class TestGetChildrenForParents(unittest.TestCase):
|
||||
"""End-to-end orchestrator test with mocked dependencies."""
|
||||
|
||||
def _runAsync(self, coro):
|
||||
return asyncio.get_event_loop().run_until_complete(coro)
|
||||
|
||||
def test_unknown_parent_key_returns_empty_list(self):
|
||||
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot:
|
||||
rootIf = MagicMock()
|
||||
rootIf.db.getRecordset.return_value = []
|
||||
mockRoot.return_value = rootIf
|
||||
|
||||
ctx = MagicMock()
|
||||
ctx.user.id = "u1"
|
||||
ctx.mandateId = "m1"
|
||||
|
||||
result = self._runAsync(
|
||||
_buildTree.getChildrenForParents("inst-1", ["bogus|key"], ctx)
|
||||
)
|
||||
self.assertEqual(result["bogus|key"], [])
|
||||
|
||||
def test_top_level_emits_personal_root_first(self):
|
||||
"""Top-level emits personalRoot first, then mandate-group nodes inline."""
|
||||
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot:
|
||||
rootIf = MagicMock()
|
||||
rootIf.db.getRecordset.return_value = []
|
||||
rootIf.getUserMandates.return_value = []
|
||||
mockRoot.return_value = rootIf
|
||||
|
||||
ctx = MagicMock()
|
||||
ctx.user.id = "u1"
|
||||
ctx.mandateId = "m1"
|
||||
|
||||
result = self._runAsync(
|
||||
_buildTree.getChildrenForParents("inst-1", [None], ctx)
|
||||
)
|
||||
children = result["__root__"]
|
||||
self.assertGreaterEqual(len(children), 1)
|
||||
personalRoot = children[0]
|
||||
self.assertEqual(personalRoot["key"], "personalRoot")
|
||||
self.assertEqual(personalRoot["kind"], "synthRoot")
|
||||
self.assertIsNone(personalRoot["parentKey"])
|
||||
self.assertTrue(personalRoot["hasChildren"])
|
||||
self.assertTrue(personalRoot["defaultExpanded"])
|
||||
|
||||
|
||||
class TestTopLevelLayout(unittest.TestCase):
|
||||
"""Tests for the flat top-level layout (personalRoot + mandate groups)."""
|
||||
|
||||
def _runAsync(self, coro):
|
||||
return asyncio.get_event_loop().run_until_complete(coro)
|
||||
|
||||
def test_personal_root_carries_neutral_default_triplet(self):
|
||||
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot:
|
||||
rootIf = MagicMock()
|
||||
rootIf.db.getRecordset.return_value = []
|
||||
rootIf.getUserMandates.return_value = []
|
||||
mockRoot.return_value = rootIf
|
||||
|
||||
ctx = MagicMock()
|
||||
ctx.user.id = "u1"
|
||||
ctx.mandateId = "m1"
|
||||
|
||||
result = self._runAsync(
|
||||
_buildTree.getChildrenForParents("inst-1", [None], ctx)
|
||||
)
|
||||
personalRoot = result["__root__"][0]
|
||||
self.assertFalse(personalRoot["effectiveNeutralize"])
|
||||
self.assertEqual(personalRoot["effectiveScope"], "personal")
|
||||
self.assertFalse(personalRoot["effectiveRagIndexEnabled"])
|
||||
self.assertFalse(personalRoot["supportsRag"])
|
||||
self.assertFalse(personalRoot["canBeAdded"])
|
||||
self.assertIsNone(personalRoot["dataSourceId"])
|
||||
self.assertIsNone(personalRoot["modelType"])
|
||||
|
||||
def test_personal_root_emits_active_connection_with_correct_parent(self):
|
||||
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot, \
|
||||
patch("modules.serviceCenter.getService") as mockGetService:
|
||||
rootIf = MagicMock()
|
||||
rootIf.db.getRecordset.return_value = []
|
||||
mockRoot.return_value = rootIf
|
||||
|
||||
chatService = MagicMock()
|
||||
chatService.getUserConnections.return_value = [{
|
||||
"id": "conn-1",
|
||||
"status": "active",
|
||||
"authority": "msft",
|
||||
"externalEmail": "user@example.com",
|
||||
}]
|
||||
mockGetService.return_value = chatService
|
||||
|
||||
ctx = MagicMock()
|
||||
ctx.user.id = "u1"
|
||||
ctx.mandateId = "m1"
|
||||
|
||||
result = self._runAsync(
|
||||
_buildTree.getChildrenForParents("inst-1", ["personalRoot"], ctx)
|
||||
)
|
||||
children = result["personalRoot"]
|
||||
self.assertEqual(len(children), 1)
|
||||
self.assertEqual(children[0]["key"], "conn|conn-1")
|
||||
self.assertEqual(children[0]["kind"], "connection")
|
||||
self.assertEqual(children[0]["parentKey"], "personalRoot")
|
||||
self.assertEqual(children[0]["label"], "user@example.com")
|
||||
self.assertTrue(children[0]["supportsRag"])
|
||||
|
||||
def test_personal_root_skips_inactive_connection(self):
|
||||
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot, \
|
||||
patch("modules.serviceCenter.getService") as mockGetService:
|
||||
rootIf = MagicMock()
|
||||
rootIf.db.getRecordset.return_value = []
|
||||
mockRoot.return_value = rootIf
|
||||
|
||||
chatService = MagicMock()
|
||||
chatService.getUserConnections.return_value = [
|
||||
{"id": "c1", "status": "active", "authority": "msft", "externalEmail": "a"},
|
||||
{"id": "c2", "status": "expired", "authority": "google", "externalEmail": "b"},
|
||||
]
|
||||
mockGetService.return_value = chatService
|
||||
|
||||
ctx = MagicMock()
|
||||
ctx.user.id = "u1"
|
||||
ctx.mandateId = "m1"
|
||||
|
||||
result = self._runAsync(
|
||||
_buildTree.getChildrenForParents("inst-1", ["personalRoot"], ctx)
|
||||
)
|
||||
self.assertEqual(len(result["personalRoot"]), 1)
|
||||
self.assertEqual(result["personalRoot"][0]["connectionId"], "c1")
|
||||
|
||||
def test_mandate_groups_emitted_inline_at_top_level(self):
|
||||
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot, \
|
||||
patch("modules.security.rbacCatalog.getCatalogService") as mockCatalog:
|
||||
rootIf = MagicMock()
|
||||
rootIf.db.getRecordset.return_value = []
|
||||
userMandate = MagicMock()
|
||||
userMandate.mandateId = "m1"
|
||||
rootIf.getUserMandates.return_value = [userMandate]
|
||||
featureInst = MagicMock()
|
||||
featureInst.id = "fi-1"
|
||||
featureInst.featureCode = "trustee"
|
||||
featureInst.enabled = True
|
||||
rootIf.getFeatureInstancesByMandate.return_value = [featureInst]
|
||||
featureAccess = MagicMock()
|
||||
featureAccess.enabled = True
|
||||
rootIf.getFeatureAccess.return_value = featureAccess
|
||||
mockRoot.return_value = rootIf
|
||||
|
||||
catalog = MagicMock()
|
||||
catalog.getFeaturesWithDataObjects.return_value = ["trustee"]
|
||||
mockCatalog.return_value = catalog
|
||||
|
||||
ctx = MagicMock()
|
||||
ctx.user.id = "u1"
|
||||
ctx.mandateId = None
|
||||
|
||||
result = self._runAsync(
|
||||
_buildTree.getChildrenForParents("inst-1", [None], ctx)
|
||||
)
|
||||
children = result["__root__"]
|
||||
byKey = {c["key"]: c for c in children}
|
||||
self.assertIn("personalRoot", byKey)
|
||||
self.assertIn("mgrp|m1", byKey)
|
||||
mgroup = byKey["mgrp|m1"]
|
||||
self.assertEqual(mgroup["kind"], "mandateGroup")
|
||||
self.assertIsNone(mgroup["parentKey"])
|
||||
self.assertEqual(mgroup["mandateId"], "m1")
|
||||
self.assertTrue(mgroup["defaultExpanded"])
|
||||
self.assertFalse(mgroup["supportsRag"])
|
||||
|
||||
def test_top_level_omits_mandates_without_data_features(self):
|
||||
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot, \
|
||||
patch("modules.security.rbacCatalog.getCatalogService") as mockCatalog:
|
||||
rootIf = MagicMock()
|
||||
rootIf.db.getRecordset.return_value = []
|
||||
userMandate = MagicMock()
|
||||
userMandate.mandateId = "m1"
|
||||
rootIf.getUserMandates.return_value = [userMandate]
|
||||
rootIf.getFeatureInstancesByMandate.return_value = []
|
||||
mockRoot.return_value = rootIf
|
||||
|
||||
catalog = MagicMock()
|
||||
catalog.getFeaturesWithDataObjects.return_value = ["trustee"]
|
||||
mockCatalog.return_value = catalog
|
||||
|
||||
ctx = MagicMock()
|
||||
ctx.user.id = "u1"
|
||||
ctx.mandateId = None
|
||||
|
||||
result = self._runAsync(
|
||||
_buildTree.getChildrenForParents("inst-1", [None], ctx)
|
||||
)
|
||||
keys = [c["key"] for c in result["__root__"]]
|
||||
self.assertEqual(keys, ["personalRoot"])
|
||||
|
||||
def test_personal_root_listed_first_via_display_order(self):
|
||||
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot, \
|
||||
patch("modules.security.rbacCatalog.getCatalogService") as mockCatalog:
|
||||
rootIf = MagicMock()
|
||||
rootIf.db.getRecordset.return_value = []
|
||||
userMandate = MagicMock()
|
||||
userMandate.mandateId = "m1"
|
||||
rootIf.getUserMandates.return_value = [userMandate]
|
||||
featureInst = MagicMock()
|
||||
featureInst.id = "fi-1"
|
||||
featureInst.featureCode = "trustee"
|
||||
featureInst.enabled = True
|
||||
rootIf.getFeatureInstancesByMandate.return_value = [featureInst]
|
||||
featureAccess = MagicMock()
|
||||
featureAccess.enabled = True
|
||||
rootIf.getFeatureAccess.return_value = featureAccess
|
||||
mockRoot.return_value = rootIf
|
||||
|
||||
catalog = MagicMock()
|
||||
catalog.getFeaturesWithDataObjects.return_value = ["trustee"]
|
||||
mockCatalog.return_value = catalog
|
||||
|
||||
ctx = MagicMock()
|
||||
ctx.user.id = "u1"
|
||||
ctx.mandateId = None
|
||||
|
||||
result = self._runAsync(
|
||||
_buildTree.getChildrenForParents("inst-1", [None], ctx)
|
||||
)
|
||||
children = result["__root__"]
|
||||
self.assertEqual(children[0]["key"], "personalRoot")
|
||||
self.assertEqual(children[0]["displayOrder"], 0)
|
||||
|
||||
|
||||
class TestFeatureTableFields(unittest.TestCase):
|
||||
"""Per-column field expansion under a feature data-source table."""
|
||||
|
||||
def test_emits_one_node_per_field(self):
|
||||
nodes = _buildTree._featureTableFields(
|
||||
parentKey="fdstbl|fi-1|TrusteePosition",
|
||||
featureInstanceId="fi-1",
|
||||
tableName="TrusteePosition",
|
||||
fieldNames=["id", "valuta", "company"],
|
||||
allFds=[],
|
||||
)
|
||||
self.assertEqual(len(nodes), 3)
|
||||
self.assertEqual(nodes[0]["kind"], "fdsField")
|
||||
self.assertEqual(nodes[0]["fieldName"], "id")
|
||||
self.assertEqual(nodes[0]["parentKey"], "fdstbl|fi-1|TrusteePosition")
|
||||
self.assertEqual(nodes[0]["key"], "fdsfld|fi-1|TrusteePosition|id")
|
||||
self.assertFalse(nodes[0]["hasChildren"])
|
||||
self.assertFalse(nodes[0]["supportsRag"])
|
||||
|
||||
def test_field_neutralize_inherits_from_table_blanket(self):
|
||||
rec = {"id": "f", "workspaceInstanceId": "ws-1", "featureInstanceId": "fi-1",
|
||||
"tableName": "TrusteePosition", "recordFilter": None,
|
||||
"neutralize": True, "neutralizeFields": None,
|
||||
"scope": None, "ragIndexEnabled": False}
|
||||
nodes = _buildTree._featureTableFields(
|
||||
parentKey="fdstbl|fi-1|TrusteePosition",
|
||||
featureInstanceId="fi-1",
|
||||
tableName="TrusteePosition",
|
||||
fieldNames=["email", "company"],
|
||||
allFds=[rec],
|
||||
)
|
||||
self.assertTrue(nodes[0]["effectiveNeutralize"])
|
||||
self.assertTrue(nodes[1]["effectiveNeutralize"])
|
||||
|
||||
def test_field_neutralize_explicit_via_neutralize_fields(self):
|
||||
rec = {"id": "f", "workspaceInstanceId": "ws-1", "featureInstanceId": "fi-1",
|
||||
"tableName": "TrusteePosition", "recordFilter": None,
|
||||
"neutralize": False, "neutralizeFields": ["email"],
|
||||
"scope": None, "ragIndexEnabled": False}
|
||||
nodes = _buildTree._featureTableFields(
|
||||
parentKey="fdstbl|fi-1|TrusteePosition",
|
||||
featureInstanceId="fi-1",
|
||||
tableName="TrusteePosition",
|
||||
fieldNames=["email", "company"],
|
||||
allFds=[rec],
|
||||
)
|
||||
byField = {n["fieldName"]: n for n in nodes}
|
||||
self.assertTrue(byField["email"]["effectiveNeutralize"])
|
||||
self.assertFalse(byField["company"]["effectiveNeutralize"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
55
tests/unit/services/test_costEstimate.py
Normal file
55
tests/unit/services/test_costEstimate.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
"""Unit tests for `_costEstimate` heuristic.
|
||||
|
||||
Validates the output shape, basic formulas, and that 'basis' annotations
|
||||
are always present (the user-facing transparency contract).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import unittest
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge import _costEstimate
|
||||
|
||||
|
||||
class TestCostEstimate(unittest.TestCase):
|
||||
def test_files_shape(self):
|
||||
result = _costEstimate.estimateBootstrapCost(
|
||||
{"maxBytes": 200 * 1024 * 1024}, kind="files",
|
||||
)
|
||||
self.assertIn("estimatedTokens", result)
|
||||
self.assertIn("estimatedUsd", result)
|
||||
self.assertIn("basis", result)
|
||||
self.assertIn("assumptions", result["basis"])
|
||||
self.assertIn("formula", result["basis"]["assumptions"])
|
||||
self.assertIn("notes", result["basis"])
|
||||
|
||||
def test_files_doubling_maxBytes_doubles_tokens(self):
|
||||
low = _costEstimate.estimateBootstrapCost({"maxBytes": 100 * 1024 * 1024}, kind="files")
|
||||
high = _costEstimate.estimateBootstrapCost({"maxBytes": 200 * 1024 * 1024}, kind="files")
|
||||
self.assertEqual(high["estimatedTokens"], low["estimatedTokens"] * 2)
|
||||
|
||||
def test_clickup_uses_tasks_and_workspaces(self):
|
||||
result = _costEstimate.estimateBootstrapCost(
|
||||
{"maxTasks": 100, "maxWorkspaces": 2, "maxListsPerWorkspace": 10},
|
||||
kind="clickup",
|
||||
)
|
||||
expectedTokens = 100 * 2 * _costEstimate.DEFAULT_TOKENS_PER_ITEM
|
||||
self.assertEqual(result["estimatedTokens"], expectedTokens)
|
||||
|
||||
def test_unknown_kind_returns_zero(self):
|
||||
result = _costEstimate.estimateBootstrapCost({}, kind="totally-unknown")
|
||||
self.assertEqual(result["estimatedTokens"], 0)
|
||||
self.assertEqual(result["estimatedUsd"], 0.0)
|
||||
|
||||
def test_usd_is_rounded_4_decimals(self):
|
||||
result = _costEstimate.estimateBootstrapCost({"maxBytes": 1024 * 1024}, kind="files")
|
||||
rounded = round(result["estimatedUsd"], 4)
|
||||
self.assertEqual(result["estimatedUsd"], rounded)
|
||||
|
||||
def test_basis_includes_input_limits(self):
|
||||
result = _costEstimate.estimateBootstrapCost({"maxBytes": 42}, kind="files")
|
||||
self.assertEqual(result["basis"]["limits"]["maxBytes"], 42)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
655
tests/unit/services/test_inheritFlags.py
Normal file
655
tests/unit/services/test_inheritFlags.py
Normal file
|
|
@ -0,0 +1,655 @@
|
|||
"""Unit tests for `_inheritFlags` cascade-inherit helpers.
|
||||
|
||||
Verifies:
|
||||
- getEffectiveFlag mode='walk': walks ancestors via path-prefix matching
|
||||
- getEffectiveFlag mode='aggregate': returns 'mixed' when subtree diverges
|
||||
- cascadeResetDescendants: bottom-up reset returning List[str]
|
||||
- cascadeResetDescendantsFds: same for FeatureDataSource
|
||||
- collectAncestorChain / collectAncestorChainFds: ancestor discovery
|
||||
- buildEffectiveByConnection / buildEffectiveByWorkspaceFds: batch compute
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import unittest
|
||||
from typing import List
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge import _inheritFlags
|
||||
|
||||
|
||||
def _ds(idVal: str, path: str, **flags) -> dict:
|
||||
"""Build a DataSource dict with sensible defaults for a fixture."""
|
||||
base = {
|
||||
"id": idVal,
|
||||
"connectionId": "conn-1",
|
||||
"sourceType": "sharepointFolder",
|
||||
"path": path,
|
||||
"neutralize": None,
|
||||
"ragIndexEnabled": None,
|
||||
"scope": None,
|
||||
}
|
||||
base.update(flags)
|
||||
return base
|
||||
|
||||
|
||||
def _fds(idVal: str, *, tableName: str, recordFilter=None, featureInstanceId="fi-1", **flags) -> dict:
|
||||
"""Build a FeatureDataSource dict fixture."""
|
||||
base = {
|
||||
"id": idVal,
|
||||
"workspaceInstanceId": "ws-1",
|
||||
"featureInstanceId": featureInstanceId,
|
||||
"tableName": tableName,
|
||||
"recordFilter": recordFilter,
|
||||
"neutralize": None,
|
||||
"scope": None,
|
||||
}
|
||||
base.update(flags)
|
||||
return base
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# DataSource: getEffectiveFlag mode='walk'
|
||||
# ===========================================================================
|
||||
|
||||
class TestEffectiveFlagWalk(unittest.TestCase):
|
||||
def test_explicit_own_value_wins(self):
|
||||
root = _ds("r", "/", neutralize=False)
|
||||
leaf = _ds("l", "/folder/sub", neutralize=True)
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf]))
|
||||
|
||||
def test_inherits_from_root_when_own_is_none(self):
|
||||
root = _ds("r", "/", neutralize=True)
|
||||
leaf = _ds("l", "/folder/sub")
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf]))
|
||||
|
||||
def test_default_false_when_chain_empty(self):
|
||||
leaf = _ds("l", "/folder/sub")
|
||||
self.assertFalse(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [leaf]))
|
||||
|
||||
def test_nearest_ancestor_wins_over_distant(self):
|
||||
root = _ds("r", "/", neutralize=False)
|
||||
mid = _ds("m", "/folder", neutralize=True)
|
||||
leaf = _ds("l", "/folder/sub")
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, mid, leaf]))
|
||||
|
||||
def test_different_connection_ignored(self):
|
||||
otherConn = _ds("o", "/", connectionId="conn-2", neutralize=True)
|
||||
leaf = _ds("l", "/folder")
|
||||
self.assertFalse(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [otherConn, leaf]))
|
||||
|
||||
def test_different_sourcetype_ignored(self):
|
||||
otherType = _ds("o", "/", sourceType="outlookFolder", neutralize=True)
|
||||
leaf = _ds("l", "/folder")
|
||||
self.assertFalse(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [otherType, leaf]))
|
||||
|
||||
def test_path_separator_required(self):
|
||||
notAncestor = _ds("a", "/foo", neutralize=True)
|
||||
leaf = _ds("l", "/foobar")
|
||||
self.assertFalse(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [notAncestor, leaf]))
|
||||
|
||||
def test_root_is_ancestor_of_everything(self):
|
||||
root = _ds("r", "/", neutralize=True)
|
||||
leaf = _ds("l", "/anything/anywhere")
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf]))
|
||||
|
||||
def test_scope_inheritance_with_string_default(self):
|
||||
root = _ds("r", "/", scope="mandate")
|
||||
leaf = _ds("l", "/folder")
|
||||
self.assertEqual(_inheritFlags.getEffectiveFlag(leaf, "scope", [root, leaf]), "mandate")
|
||||
|
||||
def test_scope_default_personal_when_empty(self):
|
||||
leaf = _ds("l", "/folder")
|
||||
self.assertEqual(_inheritFlags.getEffectiveFlag(leaf, "scope", [leaf]), "personal")
|
||||
|
||||
def test_unknown_flag_raises(self):
|
||||
leaf = _ds("l", "/")
|
||||
with self.assertRaises(ValueError):
|
||||
_inheritFlags.getEffectiveFlag(leaf, "unknownFlag", [leaf])
|
||||
|
||||
def test_explicit_false_overrides_inherited_true(self):
|
||||
root = _ds("r", "/", neutralize=True)
|
||||
leaf = _ds("l", "/folder", neutralize=False)
|
||||
self.assertFalse(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf]))
|
||||
|
||||
def test_connection_root_inherits_cross_sourcetype(self):
|
||||
connRoot = _ds("conn", "/", sourceType="msft", neutralize=True)
|
||||
spService = _ds("sp", "/", sourceType="sharepointFolder")
|
||||
olService = _ds("ol", "/", sourceType="outlookFolder")
|
||||
allDs = [connRoot, spService, olService]
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlag(spService, "neutralize", allDs))
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlag(olService, "neutralize", allDs))
|
||||
|
||||
def test_same_sourcetype_ancestor_wins_over_connection_root(self):
|
||||
connRoot = _ds("conn", "/", sourceType="msft", neutralize=True)
|
||||
spRoot = _ds("sp", "/", sourceType="sharepointFolder", neutralize=False)
|
||||
spLeaf = _ds("spl", "/sites/x", sourceType="sharepointFolder")
|
||||
self.assertFalse(_inheritFlags.getEffectiveFlag(spLeaf, "neutralize", [connRoot, spRoot, spLeaf]))
|
||||
|
||||
def test_connection_root_does_not_self_inherit(self):
|
||||
connRoot = _ds("conn", "/", sourceType="msft")
|
||||
self.assertFalse(_inheritFlags.getEffectiveFlag(connRoot, "neutralize", [connRoot]))
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# DataSource: getEffectiveFlag mode='aggregate'
|
||||
# ===========================================================================
|
||||
|
||||
class TestEffectiveFlagAggregate(unittest.TestCase):
|
||||
def test_leaf_without_descendants_returns_concrete(self):
|
||||
leaf = _ds("l", "/folder", neutralize=True)
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [leaf], mode="aggregate"))
|
||||
|
||||
def test_all_descendants_same_returns_concrete(self):
|
||||
root = _ds("r", "/", neutralize=True)
|
||||
child1 = _ds("c1", "/a", neutralize=True)
|
||||
child2 = _ds("c2", "/b") # inherits True from root
|
||||
allDs = [root, child1, child2]
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="aggregate"))
|
||||
|
||||
def test_divergent_descendants_returns_mixed(self):
|
||||
root = _ds("r", "/", neutralize=True)
|
||||
child1 = _ds("c1", "/a", neutralize=False)
|
||||
child2 = _ds("c2", "/b") # inherits True from root
|
||||
allDs = [root, child1, child2]
|
||||
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="aggregate"), "mixed")
|
||||
|
||||
def test_mixed_scope(self):
|
||||
root = _ds("r", "/", scope="personal")
|
||||
child1 = _ds("c1", "/a", scope="team")
|
||||
child2 = _ds("c2", "/b") # inherits personal from root
|
||||
allDs = [root, child1, child2]
|
||||
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "scope", allDs, mode="aggregate"), "mixed")
|
||||
|
||||
def test_all_scope_same_explicit_returns_concrete(self):
|
||||
root = _ds("r", "/", scope="team")
|
||||
child1 = _ds("c1", "/a", scope="team")
|
||||
child2 = _ds("c2", "/b") # inherits team
|
||||
allDs = [root, child1, child2]
|
||||
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "scope", allDs, mode="aggregate"), "team")
|
||||
|
||||
def test_connection_root_aggregate_cross_sourcetype(self):
|
||||
connRoot = _ds("conn", "/", sourceType="msft", neutralize=True)
|
||||
spExplicit = _ds("sp", "/", sourceType="sharepointFolder", neutralize=False)
|
||||
olInherit = _ds("ol", "/", sourceType="outlookFolder") # inherits True
|
||||
allDs = [connRoot, spExplicit, olInherit]
|
||||
self.assertEqual(
|
||||
_inheritFlags.getEffectiveFlag(connRoot, "neutralize", allDs, mode="aggregate"),
|
||||
"mixed",
|
||||
)
|
||||
|
||||
def test_mid_level_aggregate_only_considers_own_subtree(self):
|
||||
root = _ds("r", "/", neutralize=True)
|
||||
mid = _ds("m", "/folder", neutralize=True)
|
||||
midChild = _ds("mc", "/folder/sub", neutralize=True)
|
||||
sibling = _ds("s", "/other", neutralize=False) # not under mid
|
||||
allDs = [root, mid, midChild, sibling]
|
||||
# mid's subtree is just midChild(True) + mid(True) = uniform
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlag(mid, "neutralize", allDs, mode="aggregate"))
|
||||
# root's subtree includes sibling(False) = mixed
|
||||
self.assertEqual(
|
||||
_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="aggregate"),
|
||||
"mixed",
|
||||
)
|
||||
|
||||
def test_walk_mode_never_returns_mixed(self):
|
||||
root = _ds("r", "/", neutralize=True)
|
||||
child = _ds("c", "/a", neutralize=False)
|
||||
allDs = [root, child]
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="walk"))
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# DataSource: cascadeResetDescendants (bottom-up, List[str])
|
||||
# ===========================================================================
|
||||
|
||||
class TestCascadeReset(unittest.TestCase):
|
||||
def _makeRootIf(self, dataSources: List[dict]):
|
||||
rootIf = MagicMock()
|
||||
rootIf.db.getRecordset = MagicMock(return_value=dataSources)
|
||||
modified = []
|
||||
|
||||
def _modify(model, recordId, fields):
|
||||
modified.append((recordId, fields))
|
||||
rootIf.db.recordModify = MagicMock(side_effect=_modify)
|
||||
return rootIf, modified
|
||||
|
||||
def test_returns_list_of_ids(self):
|
||||
parent = _ds("p", "/sites", neutralize=True)
|
||||
child = _ds("c1", "/sites/folder1", neutralize=False)
|
||||
rootIf, _ = self._makeRootIf([parent, child])
|
||||
result = _inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||
self.assertIsInstance(result, list)
|
||||
self.assertEqual(result, ["c1"])
|
||||
|
||||
def test_resets_only_explicit_descendants(self):
|
||||
parent = _ds("p", "/sites", neutralize=True)
|
||||
explicitChild = _ds("c1", "/sites/folder1", neutralize=False)
|
||||
inheritChild = _ds("c2", "/sites/folder2")
|
||||
sibling = _ds("s", "/other", neutralize=True)
|
||||
rootIf, modified = self._makeRootIf([parent, explicitChild, inheritChild, sibling])
|
||||
|
||||
result = _inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||
|
||||
self.assertEqual(result, ["c1"])
|
||||
self.assertEqual(modified, [("c1", {"neutralize": None})])
|
||||
|
||||
def test_bottom_up_order(self):
|
||||
"""Deepest items are reset first."""
|
||||
parent = _ds("p", "/", neutralize=True)
|
||||
level1 = _ds("l1", "/a", neutralize=False)
|
||||
level2 = _ds("l2", "/a/b", neutralize=False)
|
||||
level3 = _ds("l3", "/a/b/c", neutralize=False)
|
||||
rootIf, modified = self._makeRootIf([parent, level1, level2, level3])
|
||||
|
||||
result = _inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||
|
||||
self.assertEqual(result, ["l3", "l2", "l1"])
|
||||
|
||||
def test_deep_cascade_through_null_items(self):
|
||||
"""null items are skipped (no DB write) but cascade continues deeper."""
|
||||
parent = _ds("p", "/", neutralize=True)
|
||||
nullChild = _ds("n", "/a") # null — no write, but not a barrier
|
||||
deepExplicit = _ds("d", "/a/b", neutralize=False)
|
||||
rootIf, modified = self._makeRootIf([parent, nullChild, deepExplicit])
|
||||
|
||||
result = _inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||
|
||||
self.assertEqual(result, ["d"])
|
||||
self.assertEqual(modified, [("d", {"neutralize": None})])
|
||||
|
||||
def test_does_not_modify_parent(self):
|
||||
parent = _ds("p", "/", neutralize=True)
|
||||
child = _ds("c", "/a", neutralize=False)
|
||||
rootIf, modified = self._makeRootIf([parent, child])
|
||||
_inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||
self.assertNotIn("p", [m[0] for m in modified])
|
||||
|
||||
def test_connection_root_cascades_cross_sourcetype(self):
|
||||
connRoot = _ds("conn", "/", sourceType="msft", neutralize=True)
|
||||
spExplicit = _ds("sp", "/", sourceType="sharepointFolder", neutralize=False)
|
||||
olInherit = _ds("ol", "/", sourceType="outlookFolder")
|
||||
spLeaf = _ds("sp-leaf", "/sites/x", sourceType="sharepointFolder", neutralize=True)
|
||||
rootIf, modified = self._makeRootIf([connRoot, spExplicit, olInherit, spLeaf])
|
||||
|
||||
result = _inheritFlags.cascadeResetDescendants(rootIf, connRoot, "neutralize")
|
||||
|
||||
self.assertEqual(set(result), {"sp", "sp-leaf"})
|
||||
# sp-leaf is deeper, should come first
|
||||
self.assertEqual(result[0], "sp-leaf")
|
||||
|
||||
def test_does_not_cross_sourcetype_for_non_authority(self):
|
||||
parent = _ds("p", "/", neutralize=True, sourceType="sharepointFolder")
|
||||
otherType = _ds("o", "/anything", neutralize=False, sourceType="outlookFolder")
|
||||
rootIf, modified = self._makeRootIf([parent, otherType])
|
||||
result = _inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||
self.assertEqual(result, [])
|
||||
|
||||
def test_unknown_flag_raises(self):
|
||||
parent = _ds("p", "/", neutralize=True)
|
||||
rootIf, _ = self._makeRootIf([parent])
|
||||
with self.assertRaises(ValueError):
|
||||
_inheritFlags.cascadeResetDescendants(rootIf, parent, "unknownFlag")
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# DataSource: collectAncestorChain
|
||||
# ===========================================================================
|
||||
|
||||
class TestCollectAncestorChain(unittest.TestCase):
|
||||
def test_returns_nearest_first(self):
|
||||
root = _ds("r", "/", neutralize=True)
|
||||
mid = _ds("m", "/a")
|
||||
leaf = _ds("l", "/a/b")
|
||||
chain = _inheritFlags.collectAncestorChain(leaf, [root, mid, leaf])
|
||||
self.assertEqual([_inheritFlags._getRecordValue(c, "id") for c in chain], ["m", "r"])
|
||||
|
||||
def test_connection_root_is_last(self):
|
||||
connRoot = _ds("conn", "/", sourceType="msft")
|
||||
spRoot = _ds("sp", "/", sourceType="sharepointFolder")
|
||||
spLeaf = _ds("spl", "/sub", sourceType="sharepointFolder")
|
||||
chain = _inheritFlags.collectAncestorChain(spLeaf, [connRoot, spRoot, spLeaf])
|
||||
ids = [_inheritFlags._getRecordValue(c, "id") for c in chain]
|
||||
self.assertEqual(ids, ["sp", "conn"])
|
||||
|
||||
def test_root_has_no_ancestors(self):
|
||||
root = _ds("r", "/")
|
||||
chain = _inheritFlags.collectAncestorChain(root, [root])
|
||||
self.assertEqual(chain, [])
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# DataSource: buildEffectiveByConnection
|
||||
# ===========================================================================
|
||||
|
||||
class TestBuildEffectiveByConnection(unittest.TestCase):
|
||||
def test_walk_mode(self):
|
||||
root = _ds("r", "/", neutralize=True)
|
||||
child = _ds("c", "/a", neutralize=False)
|
||||
leaf = _ds("l", "/a/b") # inherits False from child
|
||||
result = _inheritFlags.buildEffectiveByConnection([root, child, leaf], "neutralize", mode="walk")
|
||||
self.assertEqual(result, {"r": True, "c": False, "l": False})
|
||||
|
||||
def test_aggregate_mode(self):
|
||||
root = _ds("r", "/", neutralize=True)
|
||||
child = _ds("c", "/a", neutralize=False)
|
||||
leaf = _ds("l", "/a/b") # inherits False from child
|
||||
result = _inheritFlags.buildEffectiveByConnection([root, child, leaf], "neutralize", mode="aggregate")
|
||||
self.assertEqual(result["r"], "mixed")
|
||||
self.assertEqual(result["c"], False)
|
||||
self.assertEqual(result["l"], False)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# FeatureDataSource: getEffectiveFlagFds
|
||||
# ===========================================================================
|
||||
|
||||
class TestFdsEffectiveFlagWalk(unittest.TestCase):
|
||||
def test_own_explicit_wins(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=False)
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}, neutralize=True)
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [ws, rec]))
|
||||
|
||||
def test_inherits_from_table_wildcard(self):
|
||||
tbl = _fds("t", tableName="Pos", neutralize=True)
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [tbl, rec]))
|
||||
|
||||
def test_table_wildcard_beats_workspace_wildcard(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=False)
|
||||
tbl = _fds("t", tableName="Pos", neutralize=True)
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [ws, tbl, rec]))
|
||||
|
||||
def test_workspace_wildcard_inherits_when_no_table(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=True)
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [ws, rec]))
|
||||
|
||||
def test_default_false_when_chain_empty(self):
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||
self.assertFalse(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [rec]))
|
||||
|
||||
def test_unknown_flag_raises(self):
|
||||
rec = _fds("r", tableName="*")
|
||||
with self.assertRaises(ValueError):
|
||||
_inheritFlags.getEffectiveFlagFds(rec, "doesNotExist", [rec])
|
||||
|
||||
|
||||
class TestFdsEffectiveFlagAggregate(unittest.TestCase):
|
||||
def test_leaf_without_descendants(self):
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}, neutralize=True)
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [rec], mode="aggregate"))
|
||||
|
||||
def test_all_descendants_same(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=True)
|
||||
tbl = _fds("t", tableName="Pos") # inherits True
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}) # inherits True
|
||||
allFds = [ws, tbl, rec]
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlagFds(ws, "neutralize", allFds, mode="aggregate"))
|
||||
|
||||
def test_divergent_descendants_returns_mixed(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=True)
|
||||
tbl = _fds("t", tableName="Pos", neutralize=False)
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}) # inherits False from tbl
|
||||
allFds = [ws, tbl, rec]
|
||||
self.assertEqual(
|
||||
_inheritFlags.getEffectiveFlagFds(ws, "neutralize", allFds, mode="aggregate"),
|
||||
"mixed",
|
||||
)
|
||||
|
||||
def test_table_aggregate_own_subtree_only(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=True)
|
||||
tblA = _fds("tA", tableName="A", neutralize=True)
|
||||
recA = _fds("rA", tableName="A", recordFilter={"id": "1"}, neutralize=True)
|
||||
tblB = _fds("tB", tableName="B", neutralize=False)
|
||||
allFds = [ws, tblA, recA, tblB]
|
||||
# tblA subtree: all True
|
||||
self.assertTrue(_inheritFlags.getEffectiveFlagFds(tblA, "neutralize", allFds, mode="aggregate"))
|
||||
# ws subtree: mixed (tblB is False)
|
||||
self.assertEqual(
|
||||
_inheritFlags.getEffectiveFlagFds(ws, "neutralize", allFds, mode="aggregate"),
|
||||
"mixed",
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# FeatureDataSource: cascadeResetDescendantsFds (bottom-up, List[str])
|
||||
# ===========================================================================
|
||||
|
||||
class TestFdsCascadeReset(unittest.TestCase):
|
||||
def _makeRootIf(self, fdses):
|
||||
rootIf = MagicMock()
|
||||
rootIf.db.getRecordset = MagicMock(return_value=fdses)
|
||||
modified = []
|
||||
|
||||
def _modify(model, recordId, fields):
|
||||
modified.append((recordId, fields))
|
||||
rootIf.db.recordModify = MagicMock(side_effect=_modify)
|
||||
return rootIf, modified
|
||||
|
||||
def test_returns_list_of_ids(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=True)
|
||||
tbl = _fds("t", tableName="Pos", neutralize=False)
|
||||
rootIf, _ = self._makeRootIf([ws, tbl])
|
||||
result = _inheritFlags.cascadeResetDescendantsFds(rootIf, ws, "neutralize")
|
||||
self.assertIsInstance(result, list)
|
||||
self.assertEqual(result, ["t"])
|
||||
|
||||
def test_workspace_cascades_to_all_explicit_descendants(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=True)
|
||||
tblExplicit = _fds("t", tableName="Pos", neutralize=False)
|
||||
tblInherit = _fds("t2", tableName="Other")
|
||||
recExplicit = _fds("r", tableName="Pos", recordFilter={"id": "1"}, neutralize=True)
|
||||
rootIf, modified = self._makeRootIf([ws, tblExplicit, tblInherit, recExplicit])
|
||||
|
||||
result = _inheritFlags.cascadeResetDescendantsFds(rootIf, ws, "neutralize")
|
||||
|
||||
self.assertEqual(set(result), {"t", "r"})
|
||||
# record is deeper (depth 2) than table (depth 1), should come first
|
||||
self.assertEqual(result[0], "r")
|
||||
|
||||
def test_table_cascades_only_to_same_table_records(self):
|
||||
tbl = _fds("t", tableName="Pos", neutralize=True)
|
||||
recSame = _fds("r1", tableName="Pos", recordFilter={"id": "1"}, neutralize=False)
|
||||
recOther = _fds("r2", tableName="Other", recordFilter={"id": "1"}, neutralize=False)
|
||||
rootIf, modified = self._makeRootIf([tbl, recSame, recOther])
|
||||
|
||||
result = _inheritFlags.cascadeResetDescendantsFds(rootIf, tbl, "neutralize")
|
||||
|
||||
self.assertEqual(result, ["r1"])
|
||||
self.assertEqual(modified, [("r1", {"neutralize": None})])
|
||||
|
||||
def test_record_has_no_cascade(self):
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}, neutralize=True)
|
||||
rootIf, modified = self._makeRootIf([rec])
|
||||
result = _inheritFlags.cascadeResetDescendantsFds(rootIf, rec, "neutralize")
|
||||
self.assertEqual(result, [])
|
||||
|
||||
def test_unknown_flag_raises(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=True)
|
||||
rootIf, _ = self._makeRootIf([ws])
|
||||
with self.assertRaises(ValueError):
|
||||
_inheritFlags.cascadeResetDescendantsFds(rootIf, ws, "doesNotExist")
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# FeatureDataSource: collectAncestorChainFds
|
||||
# ===========================================================================
|
||||
|
||||
class TestCollectAncestorChainFds(unittest.TestCase):
|
||||
def test_record_has_table_then_workspace(self):
|
||||
ws = _fds("ws", tableName="*")
|
||||
tbl = _fds("t", tableName="Pos")
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||
chain = _inheritFlags.collectAncestorChainFds(rec, [ws, tbl, rec])
|
||||
ids = [c["id"] for c in chain]
|
||||
self.assertEqual(ids, ["t", "ws"])
|
||||
|
||||
def test_table_has_only_workspace(self):
|
||||
ws = _fds("ws", tableName="*")
|
||||
tbl = _fds("t", tableName="Pos")
|
||||
chain = _inheritFlags.collectAncestorChainFds(tbl, [ws, tbl])
|
||||
self.assertEqual([c["id"] for c in chain], ["ws"])
|
||||
|
||||
def test_workspace_has_no_ancestors(self):
|
||||
ws = _fds("ws", tableName="*")
|
||||
chain = _inheritFlags.collectAncestorChainFds(ws, [ws])
|
||||
self.assertEqual(chain, [])
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# FeatureDataSource: buildEffectiveByWorkspaceFds
|
||||
# ===========================================================================
|
||||
|
||||
class TestBuildEffectiveByWorkspaceFds(unittest.TestCase):
|
||||
def test_walk_mode(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=True)
|
||||
tbl = _fds("t", tableName="Pos", neutralize=False)
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}) # inherits False from tbl
|
||||
result = _inheritFlags.buildEffectiveByWorkspaceFds([ws, tbl, rec], "neutralize", mode="walk")
|
||||
self.assertEqual(result, {"ws": True, "t": False, "r": False})
|
||||
|
||||
def test_aggregate_mode(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=True)
|
||||
tbl = _fds("t", tableName="Pos", neutralize=False)
|
||||
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||
result = _inheritFlags.buildEffectiveByWorkspaceFds([ws, tbl, rec], "neutralize", mode="aggregate")
|
||||
self.assertEqual(result["ws"], "mixed")
|
||||
self.assertEqual(result["t"], False)
|
||||
self.assertEqual(result["r"], False)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# resolveEffectiveForPath (with and without own record)
|
||||
# ===========================================================================
|
||||
|
||||
class TestResolveEffectiveForPath(unittest.TestCase):
|
||||
def test_with_exact_record(self):
|
||||
root = _ds("r", "/", neutralize=True, scope="mandate", ragIndexEnabled=False)
|
||||
leaf = _ds("l", "/folder/sub", neutralize=False)
|
||||
allDs = [root, leaf]
|
||||
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/folder/sub", allDs)
|
||||
self.assertEqual(result["effectiveNeutralize"], False)
|
||||
self.assertEqual(result["effectiveScope"], "mandate")
|
||||
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||
|
||||
def test_without_record_inherits_from_ancestor(self):
|
||||
root = _ds("r", "/", neutralize=True, scope="mandate", ragIndexEnabled=True)
|
||||
allDs = [root]
|
||||
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/deep/path/file.txt", allDs)
|
||||
self.assertEqual(result["effectiveNeutralize"], True)
|
||||
self.assertEqual(result["effectiveScope"], "mandate")
|
||||
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
||||
|
||||
def test_without_record_inherits_from_closest_ancestor(self):
|
||||
root = _ds("r", "/", neutralize=True, ragIndexEnabled=True)
|
||||
mid = _ds("m", "/folder", neutralize=False, ragIndexEnabled=False)
|
||||
allDs = [root, mid]
|
||||
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/folder/sub/file.txt", allDs)
|
||||
self.assertEqual(result["effectiveNeutralize"], False)
|
||||
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||
|
||||
def test_without_record_no_ancestors_returns_defaults(self):
|
||||
allDs: list = []
|
||||
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/path", allDs)
|
||||
self.assertEqual(result["effectiveNeutralize"], False)
|
||||
self.assertEqual(result["effectiveScope"], "personal")
|
||||
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||
|
||||
def test_connection_root_covers_service_subtree(self):
|
||||
connRoot = _ds("cr", "/", neutralize=True, sourceType="msft")
|
||||
allDs = [connRoot]
|
||||
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/sites/intranet", allDs)
|
||||
self.assertEqual(result["effectiveNeutralize"], True)
|
||||
|
||||
def test_exact_record_with_aggregate_mixed(self):
|
||||
root = _ds("r", "/", neutralize=True)
|
||||
leaf = _ds("l", "/sub", neutralize=False)
|
||||
allDs = [root, leaf]
|
||||
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/", allDs, mode="aggregate")
|
||||
self.assertEqual(result["effectiveNeutralize"], "mixed")
|
||||
|
||||
|
||||
class TestResolveEffectiveForFds(unittest.TestCase):
|
||||
def test_with_exact_record(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=True, scope="mandate")
|
||||
tbl = _fds("t", tableName="Pos", neutralize=False, scope="personal")
|
||||
allFds = [ws, tbl]
|
||||
result = _inheritFlags.resolveEffectiveForFds("fi-1", "Pos", None, allFds)
|
||||
self.assertEqual(result["effectiveNeutralize"], False)
|
||||
self.assertEqual(result["effectiveScope"], "personal")
|
||||
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||
|
||||
def test_without_record_inherits_from_workspace_wildcard(self):
|
||||
ws = _fds("ws", tableName="*", neutralize=True, scope="mandate", ragIndexEnabled=True)
|
||||
allFds = [ws]
|
||||
result = _inheritFlags.resolveEffectiveForFds("fi-1", "Unknown", None, allFds)
|
||||
self.assertEqual(result["effectiveNeutralize"], True)
|
||||
self.assertEqual(result["effectiveScope"], "mandate")
|
||||
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
||||
|
||||
def test_without_record_no_ancestors_returns_defaults(self):
|
||||
allFds: list = []
|
||||
result = _inheritFlags.resolveEffectiveForFds("fi-1", "Pos", None, allFds)
|
||||
self.assertEqual(result["effectiveNeutralize"], False)
|
||||
self.assertEqual(result["effectiveScope"], "personal")
|
||||
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||
|
||||
def test_rag_inherits_when_table_overrides_neutralize_only(self):
|
||||
"""Tables that override only neutralize must still inherit RAG from parent."""
|
||||
ws = _fds("ws", tableName="*", ragIndexEnabled=True)
|
||||
tbl = _fds("t", tableName="Pos", neutralize=False)
|
||||
allFds = [ws, tbl]
|
||||
result = _inheritFlags.resolveEffectiveForFds("fi-1", "Pos", None, allFds)
|
||||
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
||||
|
||||
def test_rag_aggregate_mixed_when_descendants_diverge(self):
|
||||
ws = _fds("ws", tableName="*", ragIndexEnabled=True)
|
||||
tbl = _fds("t", tableName="Pos", ragIndexEnabled=False)
|
||||
allFds = [ws, tbl]
|
||||
result = _inheritFlags.resolveEffectiveForFds("fi-1", "*", None, allFds, mode="aggregate")
|
||||
self.assertEqual(result["effectiveRagIndexEnabled"], "mixed")
|
||||
|
||||
def test_inheritable_fds_flags_includes_rag(self):
|
||||
self.assertIn("ragIndexEnabled", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
||||
self.assertIn("neutralize", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
||||
self.assertIn("scope", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# FDS cascade resets RAG (in addition to neutralize and scope)
|
||||
# ===========================================================================
|
||||
|
||||
class TestCascadeResetFdsRag(unittest.TestCase):
|
||||
def test_cascade_resets_rag_on_descendants(self):
|
||||
ws = _fds("ws", tableName="*")
|
||||
tbl = _fds("t", tableName="Pos", ragIndexEnabled=False)
|
||||
allFds = [ws, tbl]
|
||||
rootIf = MagicMock()
|
||||
rootIf.db.getRecordset.return_value = allFds
|
||||
rootIf.db.recordModify = MagicMock()
|
||||
result = _inheritFlags.cascadeResetDescendantsFds(rootIf, ws, "ragIndexEnabled")
|
||||
self.assertIn("t", result)
|
||||
rootIf.db.recordModify.assert_called()
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Path normalization
|
||||
# ===========================================================================
|
||||
|
||||
class TestPathNormalization(unittest.TestCase):
|
||||
def test_empty_path_normalises_to_root(self):
|
||||
self.assertEqual(_inheritFlags._normalisePath(""), "/")
|
||||
self.assertEqual(_inheritFlags._normalisePath(None), "/")
|
||||
|
||||
def test_trailing_slash_stripped(self):
|
||||
self.assertEqual(_inheritFlags._normalisePath("/foo/"), "/foo")
|
||||
self.assertEqual(_inheritFlags._normalisePath("/"), "/")
|
||||
|
||||
def test_leading_slash_added(self):
|
||||
self.assertEqual(_inheritFlags._normalisePath("foo/bar"), "/foo/bar")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -99,11 +99,18 @@ def test_onConnectionRevoked_ignores_missing_id(monkeypatch):
|
|||
assert seen == []
|
||||
|
||||
|
||||
def _stubRagEnabledDs(monkeypatch, dataSources):
|
||||
"""Stub _loadRagEnabledDataSources so tests don't need a live DB."""
|
||||
monkeypatch.setattr(consumer, "_loadRagEnabledDataSources", lambda *_, **__: dataSources)
|
||||
|
||||
|
||||
def test_bootstrap_job_skips_unsupported_authority(monkeypatch):
|
||||
_stubRagEnabledDs(monkeypatch, [{"id": "ds1", "sourceType": "unknownType"}])
|
||||
|
||||
async def _run():
|
||||
result = await consumer._bootstrapJobHandler(
|
||||
{"payload": {"connectionId": "c1", "authority": "slack"}},
|
||||
lambda *_: None,
|
||||
lambda *_, **__: None,
|
||||
)
|
||||
return result
|
||||
|
||||
|
|
@ -114,13 +121,18 @@ def test_bootstrap_job_skips_unsupported_authority(monkeypatch):
|
|||
|
||||
|
||||
def test_bootstrap_job_dispatches_msft_parts(monkeypatch):
|
||||
_stubRagEnabledDs(monkeypatch, [
|
||||
{"id": "ds1", "sourceType": "sharepointFolder"},
|
||||
{"id": "ds2", "sourceType": "outlookFolder"},
|
||||
])
|
||||
|
||||
calls = {"sp": 0, "ol": 0}
|
||||
|
||||
async def _fakeSp(connectionId, progressCb=None):
|
||||
async def _fakeSp(connectionId, progressCb=None, dataSources=None):
|
||||
calls["sp"] += 1
|
||||
return {"indexed": 1}
|
||||
|
||||
async def _fakeOl(connectionId, progressCb=None):
|
||||
async def _fakeOl(connectionId, progressCb=None, dataSources=None):
|
||||
calls["ol"] += 1
|
||||
return {"indexed": 2}
|
||||
|
||||
|
|
@ -142,7 +154,7 @@ def test_bootstrap_job_dispatches_msft_parts(monkeypatch):
|
|||
async def _run():
|
||||
return await consumer._bootstrapJobHandler(
|
||||
{"payload": {"connectionId": "c1", "authority": "msft"}},
|
||||
lambda *_: None,
|
||||
lambda *_, **__: None,
|
||||
)
|
||||
|
||||
result = asyncio.run(_run())
|
||||
|
|
@ -152,13 +164,18 @@ def test_bootstrap_job_dispatches_msft_parts(monkeypatch):
|
|||
|
||||
|
||||
def test_bootstrap_job_dispatches_google_parts(monkeypatch):
|
||||
_stubRagEnabledDs(monkeypatch, [
|
||||
{"id": "ds1", "sourceType": "googleDriveFolder"},
|
||||
{"id": "ds2", "sourceType": "gmailFolder"},
|
||||
])
|
||||
|
||||
calls = {"gd": 0, "gm": 0}
|
||||
|
||||
async def _fakeGd(connectionId, progressCb=None):
|
||||
async def _fakeGd(connectionId, progressCb=None, dataSources=None):
|
||||
calls["gd"] += 1
|
||||
return {"indexed": 7}
|
||||
|
||||
async def _fakeGm(connectionId, progressCb=None):
|
||||
async def _fakeGm(connectionId, progressCb=None, dataSources=None):
|
||||
calls["gm"] += 1
|
||||
return {"indexed": 11}
|
||||
|
||||
|
|
@ -180,7 +197,7 @@ def test_bootstrap_job_dispatches_google_parts(monkeypatch):
|
|||
async def _run():
|
||||
return await consumer._bootstrapJobHandler(
|
||||
{"payload": {"connectionId": "c1", "authority": "google"}},
|
||||
lambda *_: None,
|
||||
lambda *_, **__: None,
|
||||
)
|
||||
|
||||
result = asyncio.run(_run())
|
||||
|
|
@ -190,9 +207,13 @@ def test_bootstrap_job_dispatches_google_parts(monkeypatch):
|
|||
|
||||
|
||||
def test_bootstrap_job_dispatches_clickup_part(monkeypatch):
|
||||
_stubRagEnabledDs(monkeypatch, [
|
||||
{"id": "ds1", "sourceType": "clickupList"},
|
||||
])
|
||||
|
||||
calls = {"cu": 0}
|
||||
|
||||
async def _fakeCu(connectionId, progressCb=None):
|
||||
async def _fakeCu(connectionId, progressCb=None, dataSources=None):
|
||||
calls["cu"] += 1
|
||||
return {"indexed": 4}
|
||||
|
||||
|
|
@ -207,7 +228,7 @@ def test_bootstrap_job_dispatches_clickup_part(monkeypatch):
|
|||
async def _run():
|
||||
return await consumer._bootstrapJobHandler(
|
||||
{"payload": {"connectionId": "c1", "authority": "clickup"}},
|
||||
lambda *_: None,
|
||||
lambda *_, **__: None,
|
||||
)
|
||||
|
||||
result = asyncio.run(_run())
|
||||
|
|
|
|||
79
tests/unit/services/test_ragLimits.py
Normal file
79
tests/unit/services/test_ragLimits.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
"""Unit tests for `_ragLimits` central helpers.
|
||||
|
||||
Verifies:
|
||||
- defaults are returned as fresh copies (no mutation leakage)
|
||||
- getStoredOverrides returns ONLY explicit overrides (walker contract)
|
||||
- getRagLimits merges defaults with overrides (API/cost-estimate contract)
|
||||
- non-int values in stored settings are dropped, not silently coerced
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import unittest
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge import _ragLimits
|
||||
|
||||
|
||||
class TestGetDefaults(unittest.TestCase):
|
||||
def test_files_defaults_have_all_keys(self):
|
||||
d = _ragLimits.getDefaults("files")
|
||||
self.assertEqual(set(d.keys()), {"maxItems", "maxBytes", "maxFileSize", "maxDepth"})
|
||||
self.assertEqual(d["maxBytes"], 200 * 1024 * 1024)
|
||||
|
||||
def test_clickup_defaults(self):
|
||||
d = _ragLimits.getDefaults("clickup")
|
||||
self.assertEqual(set(d.keys()), {"maxTasks", "maxWorkspaces", "maxListsPerWorkspace"})
|
||||
|
||||
def test_defaults_are_a_fresh_copy(self):
|
||||
d1 = _ragLimits.getDefaults("files")
|
||||
d1["maxBytes"] = 1
|
||||
d2 = _ragLimits.getDefaults("files")
|
||||
self.assertEqual(d2["maxBytes"], 200 * 1024 * 1024)
|
||||
|
||||
def test_unknown_kind_raises(self):
|
||||
with self.assertRaises(ValueError):
|
||||
_ragLimits.getDefaults("unknown")
|
||||
|
||||
|
||||
class TestGetStoredOverrides(unittest.TestCase):
|
||||
def test_no_settings_returns_empty_dict(self):
|
||||
self.assertEqual(_ragLimits.getStoredOverrides({"id": "x", "settings": None}, "files"), {})
|
||||
|
||||
def test_only_explicit_overrides_returned(self):
|
||||
ds = {"id": "x", "settings": {"ragLimits": {"maxBytes": 999}}}
|
||||
self.assertEqual(_ragLimits.getStoredOverrides(ds, "files"), {"maxBytes": 999})
|
||||
|
||||
def test_unknown_keys_dropped(self):
|
||||
ds = {"id": "x", "settings": {"ragLimits": {"maxBytes": 999, "bogus": 1}}}
|
||||
self.assertEqual(_ragLimits.getStoredOverrides(ds, "files"), {"maxBytes": 999})
|
||||
|
||||
def test_non_int_dropped(self):
|
||||
ds = {"id": "x", "settings": {"ragLimits": {"maxBytes": "not-a-number"}}}
|
||||
self.assertEqual(_ragLimits.getStoredOverrides(ds, "files"), {})
|
||||
|
||||
def test_none_or_garbage_settings_safe(self):
|
||||
self.assertEqual(_ragLimits.getStoredOverrides(None, "files"), {})
|
||||
self.assertEqual(_ragLimits.getStoredOverrides({"id": "x", "settings": "garbage"}, "files"), {})
|
||||
|
||||
|
||||
class TestGetRagLimits(unittest.TestCase):
|
||||
def test_no_settings_returns_defaults(self):
|
||||
result = _ragLimits.getRagLimits({"id": "x", "settings": None}, "files")
|
||||
self.assertEqual(result, _ragLimits.FILES_LIMITS_DEFAULT)
|
||||
|
||||
def test_partial_override_merges_with_defaults(self):
|
||||
ds = {"id": "x", "settings": {"ragLimits": {"maxBytes": 999}}}
|
||||
result = _ragLimits.getRagLimits(ds, "files")
|
||||
self.assertEqual(result["maxBytes"], 999)
|
||||
self.assertEqual(result["maxItems"], _ragLimits.FILES_LIMITS_DEFAULT["maxItems"])
|
||||
|
||||
def test_caller_can_distinguish_unset_from_set(self):
|
||||
"""Walker contract: an unset key MUST NOT appear in `getStoredOverrides`."""
|
||||
ds = {"id": "x", "settings": {"ragLimits": {"maxBytes": 999}}}
|
||||
overrides = _ragLimits.getStoredOverrides(ds, "files")
|
||||
self.assertIn("maxBytes", overrides)
|
||||
self.assertNotIn("maxItems", overrides)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -42,7 +42,7 @@ from modules.features.teamsbot.datamodelTeamsbot import (
|
|||
from modules.features.teamsbot.service import (
|
||||
TeamsbotService,
|
||||
_activeServices,
|
||||
_sessionEvents,
|
||||
sessionEvents,
|
||||
getActiveService,
|
||||
)
|
||||
|
||||
|
|
@ -152,10 +152,10 @@ def _buildService() -> TeamsbotService:
|
|||
def _resetGlobals():
|
||||
"""Avoid cross-test bleed in module-level globals."""
|
||||
_activeServices.clear()
|
||||
_sessionEvents.clear()
|
||||
sessionEvents.clear()
|
||||
yield
|
||||
_activeServices.clear()
|
||||
_sessionEvents.clear()
|
||||
sessionEvents.clear()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
|
@ -251,7 +251,7 @@ class TestBuildPersistentDirectorContext:
|
|||
]
|
||||
rendered = svc._buildPersistentDirectorContext()
|
||||
assert "OPERATOR_DIRECTIVES" in rendered
|
||||
assert "- Antworte immer in Englisch." in rendered
|
||||
assert "Antworte immer in Englisch." in rendered
|
||||
assert "private" in rendered
|
||||
|
||||
def test_skipsBlankText(self):
|
||||
|
|
@ -261,7 +261,7 @@ class TestBuildPersistentDirectorContext:
|
|||
{"id": "p2", "text": "Sei hoeflich."},
|
||||
]
|
||||
rendered = svc._buildPersistentDirectorContext()
|
||||
assert "- Sei hoeflich." in rendered
|
||||
assert "Sei hoeflich." in rendered
|
||||
assert "p1" not in rendered # the blank one is filtered out
|
||||
|
||||
def test_allBlankPromptsResultInEmpty(self):
|
||||
|
|
|
|||
Loading…
Reference in a new issue