commit
45091dc596
70 changed files with 7203 additions and 1590 deletions
12
app.py
12
app.py
|
|
@ -418,6 +418,9 @@ async def lifespan(app: FastAPI):
|
||||||
registerKnowledgeIngestionConsumer,
|
registerKnowledgeIngestionConsumer,
|
||||||
)
|
)
|
||||||
registerKnowledgeIngestionConsumer()
|
registerKnowledgeIngestionConsumer()
|
||||||
|
# Side-effect import: registers all walker progress message keys
|
||||||
|
# in the i18n registry so `syncRegistryToDb` picks them up.
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge import _progressMessages # noqa: F401
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"KnowledgeIngestionConsumer registration failed (non-critical): {e}")
|
logger.warning(f"KnowledgeIngestionConsumer registration failed (non-critical): {e}")
|
||||||
|
|
||||||
|
|
@ -439,6 +442,15 @@ async def lifespan(app: FastAPI):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Could not shutdown feature containers: {e}")
|
logger.warning(f"Could not shutdown feature containers: {e}")
|
||||||
|
|
||||||
|
# --- Close all PostgreSQL connection pools ---
|
||||||
|
# Must run LAST: feature `onStop` hooks may still issue DB calls during
|
||||||
|
# shutdown. Once we tear down the pools, no more borrows are possible.
|
||||||
|
try:
|
||||||
|
from modules.connectors.connectorDbPostgre import closeAllPools
|
||||||
|
closeAllPools()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Closing DB connection pools failed: {e}")
|
||||||
|
|
||||||
logger.info("Application has been shut down")
|
logger.info("Application has been shut down")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -96,6 +96,17 @@ class BackgroundJob(PowerOnModel):
|
||||||
description="Human-readable current step (e.g. 'Importing journal entries...')",
|
description="Human-readable current step (e.g. 'Importing journal entries...')",
|
||||||
json_schema_extra={"label": "Fortschritts-Nachricht"},
|
json_schema_extra={"label": "Fortschritts-Nachricht"},
|
||||||
)
|
)
|
||||||
|
progressMessageData: Optional[Dict[str, Any]] = Field(
|
||||||
|
None,
|
||||||
|
description=(
|
||||||
|
"Structured i18n payload for `progressMessage`. Shape: "
|
||||||
|
"{'key': '<de-text-with-{placeholders}>', 'params': {...}}. "
|
||||||
|
"Frontend renders via `t(key, params)`; older clients fall back "
|
||||||
|
"to `progressMessage`. Single source of truth — keep `progressMessage` "
|
||||||
|
"as the rendered fallback in the producing language."
|
||||||
|
),
|
||||||
|
json_schema_extra={"label": "Fortschritts-Nachricht (i18n)"},
|
||||||
|
)
|
||||||
|
|
||||||
payload: Dict[str, Any] = Field(
|
payload: Dict[str, Any] = Field(
|
||||||
default_factory=dict,
|
default_factory=dict,
|
||||||
|
|
|
||||||
|
|
@ -62,9 +62,14 @@ class DataSource(PowerOnModel):
|
||||||
description="Owner user ID",
|
description="Owner user ID",
|
||||||
json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "UserInDB", "labelField": "username"}},
|
json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "UserInDB", "labelField": "username"}},
|
||||||
)
|
)
|
||||||
ragIndexEnabled: bool = Field(
|
ragIndexEnabled: Optional[bool] = Field(
|
||||||
default=False,
|
default=None,
|
||||||
description="When true this tree element is indexed into the RAG knowledge store",
|
description=(
|
||||||
|
"Three-state RAG indexing flag with cascade-inherit semantics. "
|
||||||
|
"None = inherit from nearest ancestor DataSource (path-traversal); "
|
||||||
|
"True/False = explicit override that propagates to descendants. "
|
||||||
|
"Walker computes effective value via getEffectiveFlag()."
|
||||||
|
),
|
||||||
json_schema_extra={"label": "Im RAG indexieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
json_schema_extra={"label": "Im RAG indexieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||||
)
|
)
|
||||||
lastIndexed: Optional[float] = Field(
|
lastIndexed: Optional[float] = Field(
|
||||||
|
|
@ -72,9 +77,13 @@ class DataSource(PowerOnModel):
|
||||||
description="Timestamp of last successful RAG indexing run",
|
description="Timestamp of last successful RAG indexing run",
|
||||||
json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
|
json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
|
||||||
)
|
)
|
||||||
scope: str = Field(
|
scope: Optional[str] = Field(
|
||||||
default="personal",
|
default=None,
|
||||||
description="Data visibility scope: personal, featureInstance, mandate, global",
|
description=(
|
||||||
|
"Data visibility scope with inherit semantics. "
|
||||||
|
"None = inherit; values: personal, featureInstance, mandate, global. "
|
||||||
|
"Cascade-reset on parent toggle."
|
||||||
|
),
|
||||||
json_schema_extra={"label": "Sichtbarkeit", "frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [
|
json_schema_extra={"label": "Sichtbarkeit", "frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [
|
||||||
{"value": "personal", "label": "Persönlich"},
|
{"value": "personal", "label": "Persönlich"},
|
||||||
{"value": "featureInstance", "label": "Feature-Instanz"},
|
{"value": "featureInstance", "label": "Feature-Instanz"},
|
||||||
|
|
@ -82,11 +91,25 @@ class DataSource(PowerOnModel):
|
||||||
{"value": "global", "label": "Global"},
|
{"value": "global", "label": "Global"},
|
||||||
]},
|
]},
|
||||||
)
|
)
|
||||||
neutralize: bool = Field(
|
neutralize: Optional[bool] = Field(
|
||||||
default=False,
|
default=None,
|
||||||
description="Whether this data source should be neutralized before AI processing",
|
description=(
|
||||||
|
"Three-state neutralization flag with cascade-inherit semantics. "
|
||||||
|
"None = inherit from nearest ancestor DataSource (path-traversal); "
|
||||||
|
"True/False = explicit override that propagates to descendants."
|
||||||
|
),
|
||||||
json_schema_extra={"label": "Neutralisieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
json_schema_extra={"label": "Neutralisieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||||
)
|
)
|
||||||
|
settings: Optional[Dict[str, Any]] = Field(
|
||||||
|
default=None,
|
||||||
|
description=(
|
||||||
|
"DataSource-scoped settings (JSON). Currently used keys: "
|
||||||
|
"ragLimits.{maxBytes,maxFileSize,maxItems,maxDepth}. "
|
||||||
|
"Walker reads these directly; missing keys fall back to RAG_LIMITS_DEFAULT "
|
||||||
|
"and are lazily persisted on next bootstrap."
|
||||||
|
),
|
||||||
|
json_schema_extra={"label": "Einstellungen", "frontend_type": "json", "frontend_readonly": True, "frontend_required": False},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ExternalEntry(BaseModel):
|
class ExternalEntry(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ A FeatureDataSource links a FeatureInstance table (DATA_OBJECT) to a workspace
|
||||||
so the agent can query structured feature data (e.g. TrusteePosition rows).
|
so the agent can query structured feature data (e.g. TrusteePosition rows).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from modules.datamodels.datamodelBase import PowerOnModel
|
from modules.datamodels.datamodelBase import PowerOnModel
|
||||||
from modules.shared.i18nRegistry import i18nModel
|
from modules.shared.i18nRegistry import i18nModel
|
||||||
|
|
@ -55,9 +55,12 @@ class FeatureDataSource(PowerOnModel):
|
||||||
description="Workspace feature instance where this source is used",
|
description="Workspace feature instance where this source is used",
|
||||||
json_schema_extra={"label": "Workspace", "fk_target": {"db": "poweron_app", "table": "FeatureInstance", "labelField": "label"}},
|
json_schema_extra={"label": "Workspace", "fk_target": {"db": "poweron_app", "table": "FeatureInstance", "labelField": "label"}},
|
||||||
)
|
)
|
||||||
scope: str = Field(
|
scope: Optional[str] = Field(
|
||||||
default="personal",
|
default=None,
|
||||||
description="Data visibility scope: personal, featureInstance, mandate, global",
|
description=(
|
||||||
|
"Data visibility scope with inherit semantics. "
|
||||||
|
"None = inherit; values: personal, featureInstance, mandate, global."
|
||||||
|
),
|
||||||
json_schema_extra={"label": "Sichtbarkeit", "frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [
|
json_schema_extra={"label": "Sichtbarkeit", "frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [
|
||||||
{"value": "personal", "label": "Persönlich"},
|
{"value": "personal", "label": "Persönlich"},
|
||||||
{"value": "featureInstance", "label": "Feature-Instanz"},
|
{"value": "featureInstance", "label": "Feature-Instanz"},
|
||||||
|
|
@ -65,11 +68,22 @@ class FeatureDataSource(PowerOnModel):
|
||||||
{"value": "global", "label": "Global"},
|
{"value": "global", "label": "Global"},
|
||||||
]},
|
]},
|
||||||
)
|
)
|
||||||
neutralize: bool = Field(
|
neutralize: Optional[bool] = Field(
|
||||||
default=False,
|
default=None,
|
||||||
description="Whether this data source should be neutralized before AI processing",
|
description=(
|
||||||
|
"Three-state neutralization flag with cascade-inherit semantics. "
|
||||||
|
"None = inherit; True/False = explicit. Cascade-reset on parent toggle."
|
||||||
|
),
|
||||||
json_schema_extra={"label": "Neutralisieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
json_schema_extra={"label": "Neutralisieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||||
)
|
)
|
||||||
|
ragIndexEnabled: Optional[bool] = Field(
|
||||||
|
default=None,
|
||||||
|
description=(
|
||||||
|
"Three-state RAG-indexing flag with cascade-inherit semantics. "
|
||||||
|
"None = inherit; True/False = explicit. Cascade-reset on parent toggle."
|
||||||
|
),
|
||||||
|
json_schema_extra={"label": "RAG-Indexierung", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||||
|
)
|
||||||
neutralizeFields: Optional[List[str]] = Field(
|
neutralizeFields: Optional[List[str]] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Column names whose values are replaced with placeholders before AI processing",
|
description="Column names whose values are replaced with placeholders before AI processing",
|
||||||
|
|
@ -80,3 +94,12 @@ class FeatureDataSource(PowerOnModel):
|
||||||
description="Record-level filter applied when querying this table, e.g. {'sessionId': 'abc-123'}",
|
description="Record-level filter applied when querying this table, e.g. {'sessionId': 'abc-123'}",
|
||||||
json_schema_extra={"label": "Datensatzfilter"},
|
json_schema_extra={"label": "Datensatzfilter"},
|
||||||
)
|
)
|
||||||
|
settings: Optional[Dict[str, Any]] = Field(
|
||||||
|
default=None,
|
||||||
|
description=(
|
||||||
|
"FeatureDataSource-scoped settings (JSON). Currently used keys: "
|
||||||
|
"ragLimits.{maxBytes,maxFileSize,maxItems,maxDepth}. "
|
||||||
|
"Mirror of DataSource.settings so the UDB settings modal can target both."
|
||||||
|
),
|
||||||
|
json_schema_extra={"label": "Einstellungen", "frontend_type": "json", "frontend_readonly": True, "frontend_required": False},
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -124,6 +124,7 @@ class InvestorDemo2026(_BaseDemoConfig):
|
||||||
from modules.datamodels.datamodelUam import Mandate, UserInDB
|
from modules.datamodels.datamodelUam import Mandate, UserInDB
|
||||||
from modules.datamodels.datamodelMembership import UserMandate
|
from modules.datamodels.datamodelMembership import UserMandate
|
||||||
|
|
||||||
|
summary["_removedMandateIds"] = []
|
||||||
for mandateDef in [_MANDATE_HAPPYLIFE, _MANDATE_ALPINA]:
|
for mandateDef in [_MANDATE_HAPPYLIFE, _MANDATE_ALPINA]:
|
||||||
try:
|
try:
|
||||||
existing = db.getRecordset(Mandate, recordFilter={"name": mandateDef["name"]})
|
existing = db.getRecordset(Mandate, recordFilter={"name": mandateDef["name"]})
|
||||||
|
|
@ -132,28 +133,36 @@ class InvestorDemo2026(_BaseDemoConfig):
|
||||||
self._removeMandateData(db, mid, mandateDef["label"], summary)
|
self._removeMandateData(db, mid, mandateDef["label"], summary)
|
||||||
db.recordDelete(Mandate, mid)
|
db.recordDelete(Mandate, mid)
|
||||||
summary["removed"].append(f"Mandate {mandateDef['label']} ({mid})")
|
summary["removed"].append(f"Mandate {mandateDef['label']} ({mid})")
|
||||||
|
summary["_removedMandateIds"].append({"id": mid, "mandateId": mid})
|
||||||
logger.info(f"Removed mandate {mandateDef['label']} ({mid})")
|
logger.info(f"Removed mandate {mandateDef['label']} ({mid})")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
summary["errors"].append(f"Remove mandate {mandateDef['label']}: {e}")
|
summary["errors"].append(f"Remove mandate {mandateDef['label']}: {e}")
|
||||||
|
|
||||||
|
# SAFETY: NEVER delete the user record. The user may have connections,
|
||||||
|
# chats, workflows, files, and other data across multiple databases.
|
||||||
|
# Only remove the mandate memberships that THIS demo created.
|
||||||
try:
|
try:
|
||||||
existing = db.getRecordset(UserInDB, recordFilter={"username": _USER["username"]})
|
existing = db.getRecordset(UserInDB, recordFilter={"username": _USER["username"]})
|
||||||
for u in existing:
|
for u in existing:
|
||||||
uid = u.get("id")
|
uid = u.get("id")
|
||||||
|
removedMandateIds = {m.get("mandateId") for m in summary.get("_removedMandateIds", [])}
|
||||||
memberships = db.getRecordset(UserMandate, recordFilter={"userId": uid})
|
memberships = db.getRecordset(UserMandate, recordFilter={"userId": uid})
|
||||||
for mem in memberships:
|
for mem in memberships:
|
||||||
try:
|
if mem.get("mandateId") in removedMandateIds:
|
||||||
db.recordDelete(UserMandate, mem.get("id"))
|
try:
|
||||||
except Exception:
|
db.recordDelete(UserMandate, mem.get("id"))
|
||||||
pass
|
except Exception:
|
||||||
db.recordDelete(UserInDB, uid)
|
pass
|
||||||
summary["removed"].append(f"User {_USER['username']} ({uid})")
|
summary["skipped"].append(
|
||||||
logger.info(f"Removed user {_USER['username']} ({uid})")
|
f"User {_USER['username']} ({uid}) preserved (only demo mandate memberships removed)"
|
||||||
|
)
|
||||||
|
logger.info(f"Preserved user {_USER['username']} ({uid}) - removed demo mandate memberships only")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
summary["errors"].append(f"Remove user: {e}")
|
summary["errors"].append(f"Remove user memberships: {e}")
|
||||||
|
|
||||||
self._removeLanguageSet(db, "es", summary)
|
self._removeLanguageSet(db, "es", summary)
|
||||||
|
|
||||||
|
summary.pop("_removedMandateIds", None)
|
||||||
return summary
|
return summary
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -121,32 +121,39 @@ class PwgDemo2026(_BaseDemoConfig):
|
||||||
from modules.datamodels.datamodelMembership import UserMandate
|
from modules.datamodels.datamodelMembership import UserMandate
|
||||||
from modules.datamodels.datamodelUam import Mandate, UserInDB
|
from modules.datamodels.datamodelUam import Mandate, UserInDB
|
||||||
|
|
||||||
|
removedMandateIds = set()
|
||||||
try:
|
try:
|
||||||
existing = db.getRecordset(Mandate, recordFilter={"name": _MANDATE_PWG["name"]})
|
existing = db.getRecordset(Mandate, recordFilter={"name": _MANDATE_PWG["name"]})
|
||||||
for m in existing:
|
for m in existing:
|
||||||
mid = m.get("id")
|
mid = m.get("id")
|
||||||
self._removeMandateData(db, mid, _MANDATE_PWG["label"], summary)
|
self._removeMandateData(db, mid, _MANDATE_PWG["label"], summary)
|
||||||
db.recordDelete(Mandate, mid)
|
db.recordDelete(Mandate, mid)
|
||||||
|
removedMandateIds.add(mid)
|
||||||
summary["removed"].append(f"Mandate {_MANDATE_PWG['label']} ({mid})")
|
summary["removed"].append(f"Mandate {_MANDATE_PWG['label']} ({mid})")
|
||||||
logger.info(f"Removed mandate {_MANDATE_PWG['label']} ({mid})")
|
logger.info(f"Removed mandate {_MANDATE_PWG['label']} ({mid})")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
summary["errors"].append(f"Remove mandate {_MANDATE_PWG['label']}: {e}")
|
summary["errors"].append(f"Remove mandate {_MANDATE_PWG['label']}: {e}")
|
||||||
|
|
||||||
|
# SAFETY: NEVER delete the user record. The user may have connections,
|
||||||
|
# chats, workflows, files, and other data across multiple databases.
|
||||||
|
# Only remove the mandate memberships that THIS demo created.
|
||||||
try:
|
try:
|
||||||
existing = db.getRecordset(UserInDB, recordFilter={"username": _USER["username"]})
|
existing = db.getRecordset(UserInDB, recordFilter={"username": _USER["username"]})
|
||||||
for u in existing:
|
for u in existing:
|
||||||
uid = u.get("id")
|
uid = u.get("id")
|
||||||
memberships = db.getRecordset(UserMandate, recordFilter={"userId": uid}) or []
|
memberships = db.getRecordset(UserMandate, recordFilter={"userId": uid}) or []
|
||||||
for mem in memberships:
|
for mem in memberships:
|
||||||
try:
|
if mem.get("mandateId") in removedMandateIds:
|
||||||
db.recordDelete(UserMandate, mem.get("id"))
|
try:
|
||||||
except Exception:
|
db.recordDelete(UserMandate, mem.get("id"))
|
||||||
pass
|
except Exception:
|
||||||
db.recordDelete(UserInDB, uid)
|
pass
|
||||||
summary["removed"].append(f"User {_USER['username']} ({uid})")
|
summary["skipped"].append(
|
||||||
logger.info(f"Removed user {_USER['username']} ({uid})")
|
f"User {_USER['username']} ({uid}) preserved (only demo mandate memberships removed)"
|
||||||
|
)
|
||||||
|
logger.info(f"Preserved user {_USER['username']} ({uid}) - removed demo mandate memberships only")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
summary["errors"].append(f"Remove user: {e}")
|
summary["errors"].append(f"Remove user memberships: {e}")
|
||||||
|
|
||||||
return summary
|
return summary
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -342,7 +342,7 @@ class RealEstateObjects:
|
||||||
# If no exact match, try case-insensitive search via SQL query
|
# If no exact match, try case-insensitive search via SQL query
|
||||||
# This handles cases where the name might have different casing
|
# This handles cases where the name might have different casing
|
||||||
self.db._ensure_connection()
|
self.db._ensure_connection()
|
||||||
with self.db.connection.cursor() as cursor:
|
with self.db.borrowCursor() as cursor:
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
'SELECT "id" FROM "Gemeinde" WHERE LOWER("label") = LOWER(%s) LIMIT 1',
|
'SELECT "id" FROM "Gemeinde" WHERE LOWER("label") = LOWER(%s) LIMIT 1',
|
||||||
(name,)
|
(name,)
|
||||||
|
|
@ -375,7 +375,7 @@ class RealEstateObjects:
|
||||||
|
|
||||||
# Try case-insensitive search
|
# Try case-insensitive search
|
||||||
self.db._ensure_connection()
|
self.db._ensure_connection()
|
||||||
with self.db.connection.cursor() as cursor:
|
with self.db.borrowCursor() as cursor:
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
'SELECT "id" FROM "Kanton" WHERE LOWER("label") = LOWER(%s) LIMIT 1',
|
'SELECT "id" FROM "Kanton" WHERE LOWER("label") = LOWER(%s) LIMIT 1',
|
||||||
(name,)
|
(name,)
|
||||||
|
|
@ -408,7 +408,7 @@ class RealEstateObjects:
|
||||||
|
|
||||||
# Try case-insensitive search
|
# Try case-insensitive search
|
||||||
self.db._ensure_connection()
|
self.db._ensure_connection()
|
||||||
with self.db.connection.cursor() as cursor:
|
with self.db.borrowCursor() as cursor:
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
'SELECT "id" FROM "Land" WHERE LOWER("label") = LOWER(%s) LIMIT 1',
|
'SELECT "id" FROM "Land" WHERE LOWER("label") = LOWER(%s) LIMIT 1',
|
||||||
(name,)
|
(name,)
|
||||||
|
|
@ -840,7 +840,7 @@ class RealEstateObjects:
|
||||||
# Ensure connection is alive
|
# Ensure connection is alive
|
||||||
self.db._ensure_connection()
|
self.db._ensure_connection()
|
||||||
|
|
||||||
with self.db.connection.cursor() as cursor:
|
with self.db.borrowCursor() as cursor:
|
||||||
# Execute query
|
# Execute query
|
||||||
if parameters:
|
if parameters:
|
||||||
# Use parameterized query for safety
|
# Use parameterized query for safety
|
||||||
|
|
|
||||||
|
|
@ -205,11 +205,16 @@ class AccountingDataSync:
|
||||||
boundary so the UI poll on ``GET /api/jobs/{jobId}`` shows real
|
boundary so the UI poll on ``GET /api/jobs/{jobId}`` shows real
|
||||||
movement instead of jumping from 10 % to 100 %. Safe to omit.
|
movement instead of jumping from 10 % to 100 %. Safe to omit.
|
||||||
"""
|
"""
|
||||||
def _progress(pct: int, msg: str) -> None:
|
def _progress(pct: int, msgKey: str, msgParams: Optional[Dict[str, Any]] = None) -> None:
|
||||||
|
"""Forward to progressCb using the i18n contract.
|
||||||
|
|
||||||
|
`msgKey` is the German plaintext-as-key; the frontend translates
|
||||||
|
it via `t(key, params)` when rendering.
|
||||||
|
"""
|
||||||
if progressCb is None:
|
if progressCb is None:
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
progressCb(pct, msg)
|
progressCb(pct, messageKey=msgKey, messageParams=msgParams or {})
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
logger.warning(f"progressCb failed at {pct}%: {ex}")
|
logger.warning(f"progressCb failed at {pct}%: {ex}")
|
||||||
from modules.features.trustee.datamodelFeatureTrustee import (
|
from modules.features.trustee.datamodelFeatureTrustee import (
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,27 @@ from modules.shared.i18nRegistry import t
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# i18n: register BackgroundJob progress message keys used by routeFeatureTrustee /
|
||||||
|
# accountingDataSync. Walker call sites use `progressCb(..., messageKey="…")`
|
||||||
|
# without going through `t()`, so we must register each key here as a
|
||||||
|
# string-literal `t(...)` call -- per i18n convention `t()` MUST receive a
|
||||||
|
# literal so static scanners and the boot-time `syncRegistryToDb` can pick
|
||||||
|
# it up. Do NOT collapse these into a loop over a list of variables.
|
||||||
|
t("Sync wird vorbereitet ({total} Position(en))...")
|
||||||
|
t("Verbindungsaufbau fehlgeschlagen.")
|
||||||
|
t("Keine aktive Buchhaltungs-Konfiguration gefunden.")
|
||||||
|
t("Position {index}/{total} verarbeitet")
|
||||||
|
t("Sync abgeschlossen.")
|
||||||
|
t("Initialisiere Import...")
|
||||||
|
t("Verbinde mit Buchhaltungssystem...")
|
||||||
|
t("Import abgeschlossen.")
|
||||||
|
t("Lade Kontenplan...")
|
||||||
|
t("Lade Journaleintraege vom Buchhaltungssystem...")
|
||||||
|
t("Lade Kunden...")
|
||||||
|
t("Lade Lieferanten...")
|
||||||
|
t("Lade Kontensaldi vom Buchhaltungssystem...")
|
||||||
|
t("Speichere Kontensaldi...")
|
||||||
|
|
||||||
# Feature metadata
|
# Feature metadata
|
||||||
FEATURE_CODE = "trustee"
|
FEATURE_CODE = "trustee"
|
||||||
FEATURE_LABEL = t("Treuhand", context="UI")
|
FEATURE_LABEL = t("Treuhand", context="UI")
|
||||||
|
|
@ -463,8 +484,14 @@ TEMPLATE_WORKFLOWS = [
|
||||||
"3. Kurzer Management-Summary-Absatz (3-5 Saetze) UNTER dem Chart "
|
"3. Kurzer Management-Summary-Absatz (3-5 Saetze) UNTER dem Chart "
|
||||||
"mit den 3 groessten Abweichungen (>10%) und einer fachlichen "
|
"mit den 3 groessten Abweichungen (>10%) und einer fachlichen "
|
||||||
"Einschaetzung.\n\n"
|
"Einschaetzung.\n\n"
|
||||||
"Verwende die uebergebene Budget-Datei als Soll-Quelle und die im "
|
"DATENQUELLEN:\n"
|
||||||
"Kontext bereitgestellten Buchhaltungsdaten als Ist-Quelle.\n"
|
"- SOLL (Budget): Aus der uebergebenen Budget-Datei (Excel).\n"
|
||||||
|
"- IST (Buchhaltung): Verwende AUSSCHLIESSLICH das Feld "
|
||||||
|
"\"closingBalance\" aus \"accountSummary\" im Kontext-JSON. "
|
||||||
|
"Dort steht pro Konto GENAU EIN Ist-Wert (Jahresabschluss-Saldo). "
|
||||||
|
"Fuer Quartals-Budgets stehen zusaetzlich Q1/Q2/Q3/Q4-Felder bereit. "
|
||||||
|
"SUMMIERE NIEMALS mehrere Zeilen oder Journal-Eintraege auf -- der "
|
||||||
|
"closingBalance in accountSummary ist bereits der korrekte Ist-Wert.\n\n"
|
||||||
"WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN "
|
"WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN "
|
||||||
"Uebersichts-Chart ueber alle Konten ist gewuenscht.\n\n"
|
"Uebersichts-Chart ueber alle Konten ist gewuenscht.\n\n"
|
||||||
"Hinweis: Das documentTheme ist 'finance'. Wenn du ein Dokument erstellst, "
|
"Hinweis: Das documentTheme ist 'finance'. Wenn du ein Dokument erstellst, "
|
||||||
|
|
|
||||||
|
|
@ -1644,7 +1644,11 @@ async def _trusteeAccountingPushJobHandler(job: Dict[str, Any], progressCb) -> D
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
total = len(positionIds)
|
total = len(positionIds)
|
||||||
progressCb(2, f"Sync wird vorbereitet ({total} Position(en))...")
|
progressCb(
|
||||||
|
2,
|
||||||
|
messageKey="Sync wird vorbereitet ({total} Position(en))...",
|
||||||
|
messageParams={"total": total},
|
||||||
|
)
|
||||||
|
|
||||||
# Resolve connector + plain config once to avoid decryption rate-limits
|
# Resolve connector + plain config once to avoid decryption rate-limits
|
||||||
# (mirrors the optimisation in pushBatchToAccounting). We push positions
|
# (mirrors the optimisation in pushBatchToAccounting). We push positions
|
||||||
|
|
@ -1655,12 +1659,12 @@ async def _trusteeAccountingPushJobHandler(job: Dict[str, Any], progressCb) -> D
|
||||||
connector, plainConfig, configRecord = await bridge._resolveConnectorAndConfig(instanceId)
|
connector, plainConfig, configRecord = await bridge._resolveConnectorAndConfig(instanceId)
|
||||||
except Exception as resolveErr:
|
except Exception as resolveErr:
|
||||||
logger.exception("Accounting push: failed to resolve connector/config")
|
logger.exception("Accounting push: failed to resolve connector/config")
|
||||||
progressCb(100, "Verbindungsaufbau fehlgeschlagen.")
|
progressCb(100, messageKey="Verbindungsaufbau fehlgeschlagen.")
|
||||||
raise resolveErr
|
raise resolveErr
|
||||||
|
|
||||||
if not connector or not plainConfig:
|
if not connector or not plainConfig:
|
||||||
results = [SyncResult(success=False, errorMessage="No active accounting configuration found") for _ in positionIds]
|
results = [SyncResult(success=False, errorMessage="No active accounting configuration found") for _ in positionIds]
|
||||||
progressCb(100, "Keine aktive Buchhaltungs-Konfiguration gefunden.")
|
progressCb(100, messageKey="Keine aktive Buchhaltungs-Konfiguration gefunden.")
|
||||||
return {
|
return {
|
||||||
"total": len(results),
|
"total": len(results),
|
||||||
"success": 0,
|
"success": 0,
|
||||||
|
|
@ -1680,7 +1684,11 @@ async def _trusteeAccountingPushJobHandler(job: Dict[str, Any], progressCb) -> D
|
||||||
results.append(result)
|
results.append(result)
|
||||||
# Reserve 5..95% for the push loop, keep the tail for summary.
|
# Reserve 5..95% for the push loop, keep the tail for summary.
|
||||||
pct = 5 + int(90 * index / total)
|
pct = 5 + int(90 * index / total)
|
||||||
progressCb(pct, f"Position {index}/{total} verarbeitet")
|
progressCb(
|
||||||
|
pct,
|
||||||
|
messageKey="Position {index}/{total} verarbeitet",
|
||||||
|
messageParams={"index": index, "total": total},
|
||||||
|
)
|
||||||
|
|
||||||
skipped = [r for r in results if not r.success and r.errorMessage and "already synced" in r.errorMessage]
|
skipped = [r for r in results if not r.success and r.errorMessage and "already synced" in r.errorMessage]
|
||||||
failed = [r for r in results if not r.success and r not in skipped]
|
failed = [r for r in results if not r.success and r not in skipped]
|
||||||
|
|
@ -1693,7 +1701,7 @@ async def _trusteeAccountingPushJobHandler(job: Dict[str, Any], progressCb) -> D
|
||||||
"; ".join(r.errorMessage or "unknown" for r in failed[:3]),
|
"; ".join(r.errorMessage or "unknown" for r in failed[:3]),
|
||||||
)
|
)
|
||||||
|
|
||||||
progressCb(100, "Sync abgeschlossen.")
|
progressCb(100, messageKey="Sync abgeschlossen.")
|
||||||
return {
|
return {
|
||||||
"total": len(results),
|
"total": len(results),
|
||||||
"success": sum(1 for r in results if r.success),
|
"success": sum(1 for r in results if r.success),
|
||||||
|
|
@ -1823,10 +1831,10 @@ async def _trusteeAccountingSyncJobHandler(job: Dict[str, Any], progressCb) -> D
|
||||||
payload = job.get("payload") or {}
|
payload = job.get("payload") or {}
|
||||||
rootUser = getRootUser()
|
rootUser = getRootUser()
|
||||||
|
|
||||||
progressCb(5, "Initialisiere Import...")
|
progressCb(5, messageKey="Initialisiere Import...")
|
||||||
interface = getInterface(rootUser, mandateId=mandateId, featureInstanceId=instanceId)
|
interface = getInterface(rootUser, mandateId=mandateId, featureInstanceId=instanceId)
|
||||||
sync = AccountingDataSync(interface)
|
sync = AccountingDataSync(interface)
|
||||||
progressCb(10, "Verbinde mit Buchhaltungssystem...")
|
progressCb(10, messageKey="Verbinde mit Buchhaltungssystem...")
|
||||||
result = await sync.importData(
|
result = await sync.importData(
|
||||||
featureInstanceId=instanceId,
|
featureInstanceId=instanceId,
|
||||||
mandateId=mandateId,
|
mandateId=mandateId,
|
||||||
|
|
@ -1834,7 +1842,7 @@ async def _trusteeAccountingSyncJobHandler(job: Dict[str, Any], progressCb) -> D
|
||||||
dateTo=payload.get("dateTo"),
|
dateTo=payload.get("dateTo"),
|
||||||
progressCb=progressCb,
|
progressCb=progressCb,
|
||||||
)
|
)
|
||||||
progressCb(100, "Import abgeschlossen.")
|
progressCb(100, messageKey="Import abgeschlossen.")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
"""Workspace feature data models — WorkspaceUserSettings."""
|
"""Workspace feature data models — WorkspaceUserSettings."""
|
||||||
|
|
||||||
from typing import List, Optional
|
from typing import Dict, List, Optional
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
from modules.datamodels.datamodelBase import PowerOnModel
|
from modules.datamodels.datamodelBase import PowerOnModel
|
||||||
from modules.shared.i18nRegistry import i18nModel
|
from modules.shared.i18nRegistry import i18nModel
|
||||||
|
|
@ -52,7 +52,7 @@ class WorkspaceUserSettings(PowerOnModel):
|
||||||
description="Max agent rounds override (None = instance default)",
|
description="Max agent rounds override (None = instance default)",
|
||||||
json_schema_extra={"label": "Max. Agenten-Runden", "frontend_type": "number", "frontend_readonly": False, "frontend_required": False},
|
json_schema_extra={"label": "Max. Agenten-Runden", "frontend_type": "number", "frontend_readonly": False, "frontend_required": False},
|
||||||
)
|
)
|
||||||
requireNeutralization: bool = Field(
|
requireNeutralization: Optional[bool] = Field(
|
||||||
default=False,
|
default=False,
|
||||||
description="Default neutralization setting for this user",
|
description="Default neutralization setting for this user",
|
||||||
json_schema_extra={"label": "Neutralisierung", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
json_schema_extra={"label": "Neutralisierung", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||||
|
|
@ -67,3 +67,8 @@ class WorkspaceUserSettings(PowerOnModel):
|
||||||
description="Allowed AI models (empty = all permitted)",
|
description="Allowed AI models (empty = all permitted)",
|
||||||
json_schema_extra={"label": "Erlaubte Modelle", "frontend_type": "modelMultiSelect", "frontend_readonly": False, "frontend_required": False},
|
json_schema_extra={"label": "Erlaubte Modelle", "frontend_type": "modelMultiSelect", "frontend_readonly": False, "frontend_required": False},
|
||||||
)
|
)
|
||||||
|
uiTreeExpansion: Dict[str, List[str]] = Field(
|
||||||
|
default_factory=dict,
|
||||||
|
description="Per-tab expanded tree-node ids for the UDB / FormGeneratorTree. Key = scope name (e.g. 'sources', 'filesOwn', 'filesShared').",
|
||||||
|
json_schema_extra={"label": "Tree-Expand-Zustand", "frontend_type": "json", "frontend_readonly": True, "frontend_required": False},
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -1281,51 +1281,101 @@ async def listWorkspaceDataSources(
|
||||||
try:
|
try:
|
||||||
from modules.datamodels.datamodelDataSource import DataSource
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import buildEffectiveByConnection
|
||||||
rootIf = getRootInterface()
|
rootIf = getRootInterface()
|
||||||
recordFilter: dict = {"featureInstanceId": instanceId}
|
recordFilter: dict = {"featureInstanceId": instanceId}
|
||||||
if wsMandateId:
|
if wsMandateId:
|
||||||
recordFilter["mandateId"] = wsMandateId
|
recordFilter["mandateId"] = wsMandateId
|
||||||
dataSources = rootIf.db.getRecordset(DataSource, recordFilter=recordFilter)
|
dataSources = rootIf.db.getRecordset(DataSource, recordFilter=recordFilter)
|
||||||
return JSONResponse({"dataSources": dataSources or []})
|
if not dataSources:
|
||||||
|
return JSONResponse({"dataSources": []})
|
||||||
|
|
||||||
|
# Group by connectionId and compute effective values in aggregate mode
|
||||||
|
byConnection: dict = {}
|
||||||
|
for ds in dataSources:
|
||||||
|
connId = ds.get("connectionId") or ""
|
||||||
|
byConnection.setdefault(connId, []).append(ds)
|
||||||
|
|
||||||
|
for connDs in byConnection.values():
|
||||||
|
effNeutralize = buildEffectiveByConnection(connDs, "neutralize", mode="aggregate")
|
||||||
|
effScope = buildEffectiveByConnection(connDs, "scope", mode="aggregate")
|
||||||
|
effRag = buildEffectiveByConnection(connDs, "ragIndexEnabled", mode="aggregate")
|
||||||
|
for ds in connDs:
|
||||||
|
dsId = ds.get("id", "")
|
||||||
|
ds["effectiveNeutralize"] = effNeutralize.get(dsId, False)
|
||||||
|
ds["effectiveScope"] = effScope.get(dsId, "personal")
|
||||||
|
ds["effectiveRagIndexEnabled"] = effRag.get(dsId, False)
|
||||||
|
|
||||||
|
return JSONResponse({"dataSources": dataSources})
|
||||||
except Exception:
|
except Exception:
|
||||||
return JSONResponse({"dataSources": []})
|
return JSONResponse({"dataSources": []})
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{instanceId}/connections")
|
class _TreeChildrenRequest(BaseModel):
|
||||||
|
"""Request body for the generic tree children endpoint."""
|
||||||
|
parents: List[Optional[str]] = Field(
|
||||||
|
default_factory=list,
|
||||||
|
description="List of parent keys to fetch children for. Use null for top-level.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{instanceId}/tree/children")
|
||||||
@limiter.limit("300/minute")
|
@limiter.limit("300/minute")
|
||||||
async def listWorkspaceConnections(
|
async def getTreeChildren(
|
||||||
request: Request,
|
request: Request,
|
||||||
instanceId: str = Path(...),
|
instanceId: str = Path(...),
|
||||||
|
body: _TreeChildrenRequest = Body(...),
|
||||||
context: RequestContext = Depends(getRequestContext),
|
context: RequestContext = Depends(getRequestContext),
|
||||||
):
|
):
|
||||||
"""Return the user's active connections (UserConnections)."""
|
"""Generic UDB tree children resolver.
|
||||||
_mandateId, _ = _validateInstanceAccess(instanceId, context)
|
|
||||||
from modules.serviceCenter import getService
|
The UI sends a list of parent keys (or null for top-level). The backend
|
||||||
from modules.serviceCenter.context import ServiceCenterContext
|
returns children for each requested parent, with all effective flag
|
||||||
ctx = ServiceCenterContext(
|
values pre-computed. The UI builds the visible tree from the resulting
|
||||||
user=context.user,
|
flat per-parent map.
|
||||||
mandate_id=_mandateId or "",
|
"""
|
||||||
feature_instance_id=instanceId,
|
_validateInstanceAccess(instanceId, context)
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._buildTree import getChildrenForParents
|
||||||
|
|
||||||
|
try:
|
||||||
|
nodesByParent = await getChildrenForParents(instanceId, body.parents, context)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("Tree children build failed: %s", exc)
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc))
|
||||||
|
return JSONResponse({"nodesByParent": nodesByParent})
|
||||||
|
|
||||||
|
|
||||||
|
class _TreeAttributesRequest(BaseModel):
|
||||||
|
"""Request body for the attribute-refresh endpoint."""
|
||||||
|
keys: List[str] = Field(
|
||||||
|
default_factory=list,
|
||||||
|
description="List of node keys to fetch current attributes for.",
|
||||||
)
|
)
|
||||||
chatService = getService("chat", ctx)
|
|
||||||
connections = chatService.getUserConnections()
|
|
||||||
items = []
|
@router.post("/{instanceId}/tree/attributes")
|
||||||
for c in connections or []:
|
@limiter.limit("300/minute")
|
||||||
conn = c if isinstance(c, dict) else (c.model_dump() if hasattr(c, "model_dump") else {})
|
async def getTreeAttributes(
|
||||||
authority = conn.get("authority")
|
request: Request,
|
||||||
if hasattr(authority, "value"):
|
instanceId: str = Path(...),
|
||||||
authority = authority.value
|
body: _TreeAttributesRequest = Body(...),
|
||||||
status = conn.get("status")
|
context: RequestContext = Depends(getRequestContext),
|
||||||
if hasattr(status, "value"):
|
):
|
||||||
status = status.value
|
"""Return current effective attribute values (neutralize, scope,
|
||||||
items.append({
|
ragIndexEnabled) for a list of node keys. Used after a toggle action
|
||||||
"id": conn.get("id"),
|
to refresh only the visible nodes without reloading tree structure."""
|
||||||
"authority": authority,
|
_validateInstanceAccess(instanceId, context)
|
||||||
"externalUsername": conn.get("externalUsername"),
|
from modules.serviceCenter.services.serviceKnowledge._buildTree import getAttributesForKeys
|
||||||
"externalEmail": conn.get("externalEmail"),
|
|
||||||
"status": status,
|
if len(body.keys) > 500:
|
||||||
})
|
raise HTTPException(status_code=400, detail="Max 500 keys per request")
|
||||||
return JSONResponse({"connections": items})
|
|
||||||
|
try:
|
||||||
|
attrs = await getAttributesForKeys(instanceId, body.keys, context)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("Tree attributes failed: %s", exc)
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc))
|
||||||
|
return JSONResponse({"attributes": attrs})
|
||||||
|
|
||||||
|
|
||||||
class CreateDataSourceRequest(BaseModel):
|
class CreateDataSourceRequest(BaseModel):
|
||||||
|
|
@ -1390,303 +1440,6 @@ async def deleteWorkspaceDataSource(
|
||||||
|
|
||||||
# ---- Feature Connections & Feature Data Sources ----
|
# ---- Feature Connections & Feature Data Sources ----
|
||||||
|
|
||||||
@router.get("/{instanceId}/feature-connections")
|
|
||||||
@limiter.limit("120/minute")
|
|
||||||
async def listFeatureConnections(
|
|
||||||
request: Request,
|
|
||||||
instanceId: str = Path(...),
|
|
||||||
context: RequestContext = Depends(getRequestContext),
|
|
||||||
):
|
|
||||||
"""List feature instances the user has access to, scoped to the workspace mandate."""
|
|
||||||
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
||||||
from modules.security.rbacCatalog import getCatalogService
|
|
||||||
from modules.datamodels.datamodelUam import Mandate
|
|
||||||
|
|
||||||
rootIf = getRootInterface()
|
|
||||||
userId = str(context.user.id)
|
|
||||||
|
|
||||||
catalog = getCatalogService()
|
|
||||||
featureCodesWithData = catalog.getFeaturesWithDataObjects()
|
|
||||||
|
|
||||||
userMandates = rootIf.getUserMandates(userId)
|
|
||||||
if not userMandates:
|
|
||||||
return JSONResponse({"featureConnectionsByMandate": []})
|
|
||||||
|
|
||||||
allowedMandateIds = {um.mandateId for um in userMandates}
|
|
||||||
if wsMandateId and wsMandateId in allowedMandateIds:
|
|
||||||
allowedMandateIds = {wsMandateId}
|
|
||||||
|
|
||||||
mandateLabels: dict = {}
|
|
||||||
for um in userMandates:
|
|
||||||
if um.mandateId not in allowedMandateIds:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
rows = rootIf.db.getRecordset(Mandate, recordFilter={"id": um.mandateId})
|
|
||||||
if rows:
|
|
||||||
m = rows[0]
|
|
||||||
mandateLabels[um.mandateId] = m.get("label") or m.get("name") or um.mandateId
|
|
||||||
except Exception:
|
|
||||||
mandateLabels[um.mandateId] = um.mandateId
|
|
||||||
|
|
||||||
byMandate: dict = {}
|
|
||||||
seenIds: set = set()
|
|
||||||
for um in userMandates:
|
|
||||||
if um.mandateId not in allowedMandateIds:
|
|
||||||
continue
|
|
||||||
allInstances = rootIf.getFeatureInstancesByMandate(um.mandateId)
|
|
||||||
for inst in allInstances:
|
|
||||||
if inst.id in seenIds:
|
|
||||||
continue
|
|
||||||
seenIds.add(inst.id)
|
|
||||||
if not inst.enabled:
|
|
||||||
continue
|
|
||||||
if inst.featureCode not in featureCodesWithData:
|
|
||||||
continue
|
|
||||||
featureAccess = rootIf.getFeatureAccess(userId, inst.id)
|
|
||||||
if not featureAccess or not featureAccess.enabled:
|
|
||||||
continue
|
|
||||||
|
|
||||||
featureDef = catalog.getFeatureDefinition(inst.featureCode) or {}
|
|
||||||
dataObjects = catalog.getDataObjects(inst.featureCode)
|
|
||||||
label = inst.label or inst.featureCode
|
|
||||||
mid = inst.mandateId
|
|
||||||
connItem = {
|
|
||||||
"featureInstanceId": inst.id,
|
|
||||||
"featureCode": inst.featureCode,
|
|
||||||
"mandateId": mid,
|
|
||||||
"label": label,
|
|
||||||
"icon": featureDef.get("icon", "mdi-database"),
|
|
||||||
"tableCount": len(dataObjects),
|
|
||||||
}
|
|
||||||
if mid not in byMandate:
|
|
||||||
byMandate[mid] = []
|
|
||||||
byMandate[mid].append(connItem)
|
|
||||||
|
|
||||||
def _sortKeyLabel(x: dict) -> str:
|
|
||||||
return (x.get("label") or "").lower()
|
|
||||||
|
|
||||||
groups = []
|
|
||||||
for mid in sorted(byMandate.keys(), key=lambda m: (mandateLabels.get(m, m) or "").lower()):
|
|
||||||
conns = sorted(byMandate[mid], key=_sortKeyLabel)
|
|
||||||
groups.append({
|
|
||||||
"mandateId": mid,
|
|
||||||
"mandateLabel": mandateLabels.get(mid, mid),
|
|
||||||
"featureConnections": conns,
|
|
||||||
})
|
|
||||||
|
|
||||||
return JSONResponse({"featureConnectionsByMandate": groups})
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{instanceId}/feature-connections/{fiId}/tables")
|
|
||||||
@limiter.limit("120/minute")
|
|
||||||
async def listFeatureConnectionTables(
|
|
||||||
request: Request,
|
|
||||||
instanceId: str = Path(...),
|
|
||||||
fiId: str = Path(..., description="Feature instance ID"),
|
|
||||||
context: RequestContext = Depends(getRequestContext),
|
|
||||||
):
|
|
||||||
"""List data tables (DATA_OBJECTS) for a feature instance, filtered by RBAC."""
|
|
||||||
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
||||||
from modules.security.rbacCatalog import getCatalogService
|
|
||||||
|
|
||||||
rootIf = getRootInterface()
|
|
||||||
inst = rootIf.getFeatureInstance(fiId)
|
|
||||||
if not inst:
|
|
||||||
raise HTTPException(status_code=404, detail=routeApiMsg("Feature instance not found"))
|
|
||||||
|
|
||||||
mandateId = str(inst.mandateId) if inst.mandateId else None
|
|
||||||
if wsMandateId and mandateId and mandateId != wsMandateId:
|
|
||||||
raise HTTPException(status_code=403, detail=routeApiMsg("Feature instance does not belong to workspace mandate"))
|
|
||||||
catalog = getCatalogService()
|
|
||||||
|
|
||||||
try:
|
|
||||||
from modules.security.rbac import RbacClass
|
|
||||||
from modules.security.rootAccess import getRootDbAppConnector
|
|
||||||
dbApp = getRootDbAppConnector()
|
|
||||||
rbac = RbacClass(dbApp, dbApp=dbApp)
|
|
||||||
accessible = catalog.getAccessibleDataObjects(
|
|
||||||
featureCode=inst.featureCode,
|
|
||||||
rbacInstance=rbac,
|
|
||||||
user=context.user,
|
|
||||||
mandateId=mandateId or "",
|
|
||||||
featureInstanceId=fiId,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
accessible = catalog.getDataObjects(inst.featureCode)
|
|
||||||
|
|
||||||
accessibleKeys = {obj.get("objectKey", "") for obj in accessible}
|
|
||||||
referencedGroups = set()
|
|
||||||
for obj in accessible:
|
|
||||||
meta = obj.get("meta", {})
|
|
||||||
if meta.get("wildcard") or meta.get("isGroup"):
|
|
||||||
continue
|
|
||||||
if meta.get("group"):
|
|
||||||
referencedGroups.add(meta["group"])
|
|
||||||
|
|
||||||
tables = []
|
|
||||||
for obj in catalog.getDataObjects(inst.featureCode):
|
|
||||||
meta = obj.get("meta", {})
|
|
||||||
if meta.get("wildcard"):
|
|
||||||
continue
|
|
||||||
objectKey = obj.get("objectKey", "")
|
|
||||||
if meta.get("isGroup"):
|
|
||||||
# Groups are metadata-only; include if at least one child is accessible
|
|
||||||
# (regardless of whether the group itself was RBAC-granted).
|
|
||||||
if objectKey not in referencedGroups:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
if objectKey not in accessibleKeys:
|
|
||||||
continue
|
|
||||||
node = {
|
|
||||||
"objectKey": objectKey,
|
|
||||||
"tableName": meta.get("table", ""),
|
|
||||||
"label": resolveText(obj.get("label", "")),
|
|
||||||
"fields": meta.get("fields", []),
|
|
||||||
"isParent": bool(meta.get("isParent", False)),
|
|
||||||
"parentTable": meta.get("parentTable") or None,
|
|
||||||
"parentKey": meta.get("parentKey") or None,
|
|
||||||
"displayFields": meta.get("displayFields", []),
|
|
||||||
"isGroup": bool(meta.get("isGroup", False)),
|
|
||||||
"group": meta.get("group") or None,
|
|
||||||
}
|
|
||||||
tables.append(node)
|
|
||||||
|
|
||||||
return JSONResponse({"tables": tables})
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{instanceId}/feature-connections/{fiId}/parent-objects/{tableName}")
|
|
||||||
@limiter.limit("120/minute")
|
|
||||||
async def listParentObjects(
|
|
||||||
request: Request,
|
|
||||||
instanceId: str = Path(...),
|
|
||||||
fiId: str = Path(..., description="Feature instance ID"),
|
|
||||||
tableName: str = Path(..., description="Parent table name from DATA_OBJECTS"),
|
|
||||||
parentKey: Optional[str] = Query(None, description="Optional FK column name to filter by ancestor record (nested parent rendering)"),
|
|
||||||
parentValue: Optional[str] = Query(None, description="Optional FK value matching parentKey to filter children of a specific ancestor record"),
|
|
||||||
context: RequestContext = Depends(getRequestContext),
|
|
||||||
):
|
|
||||||
"""List records from a parent table so the user can pick a specific record to scope data.
|
|
||||||
|
|
||||||
When parentKey + parentValue are provided, results are additionally filtered by that FK,
|
|
||||||
enabling nested record hierarchies (e.g. Sessions OF Context X).
|
|
||||||
"""
|
|
||||||
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
||||||
from modules.security.rbacCatalog import getCatalogService
|
|
||||||
|
|
||||||
rootIf = getRootInterface()
|
|
||||||
inst = rootIf.getFeatureInstance(fiId)
|
|
||||||
if not inst:
|
|
||||||
raise HTTPException(status_code=404, detail=routeApiMsg("Feature instance not found"))
|
|
||||||
|
|
||||||
featureCode = inst.featureCode
|
|
||||||
mandateId = str(inst.mandateId) if inst.mandateId else ""
|
|
||||||
if wsMandateId and mandateId and mandateId != wsMandateId:
|
|
||||||
raise HTTPException(status_code=403, detail=routeApiMsg("Feature instance does not belong to workspace mandate"))
|
|
||||||
catalog = getCatalogService()
|
|
||||||
|
|
||||||
parentObj = None
|
|
||||||
for obj in catalog.getDataObjects(featureCode):
|
|
||||||
meta = obj.get("meta", {})
|
|
||||||
if meta.get("table") == tableName and meta.get("isParent"):
|
|
||||||
parentObj = obj
|
|
||||||
break
|
|
||||||
if not parentObj:
|
|
||||||
raise HTTPException(status_code=400, detail=f"Table '{tableName}' is not a registered parent table")
|
|
||||||
|
|
||||||
displayFields = parentObj["meta"].get("displayFields", [])
|
|
||||||
selectCols = ', '.join(f'"{f}"' for f in (["id"] + displayFields)) if displayFields else "*"
|
|
||||||
|
|
||||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
featureDbName = f"poweron_{featureCode.lower()}"
|
|
||||||
featureDbConn = None
|
|
||||||
try:
|
|
||||||
featureDbConn = DatabaseConnector(
|
|
||||||
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
|
|
||||||
dbDatabase=featureDbName,
|
|
||||||
dbUser=APP_CONFIG.get("DB_USER"),
|
|
||||||
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
|
||||||
dbPort=int(APP_CONFIG.get("DB_PORT", 5432)),
|
|
||||||
userId=str(context.user.id),
|
|
||||||
)
|
|
||||||
conn = featureDbConn.connection
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute(
|
|
||||||
"SELECT column_name FROM information_schema.columns "
|
|
||||||
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
|
||||||
"AND column_name IN ('featureInstanceId', 'instanceId')",
|
|
||||||
[tableName],
|
|
||||||
)
|
|
||||||
instanceCols = [row["column_name"] for row in cur.fetchall()]
|
|
||||||
instanceCol = "featureInstanceId" if "featureInstanceId" in instanceCols else "instanceId"
|
|
||||||
|
|
||||||
cur.execute(
|
|
||||||
"SELECT column_name FROM information_schema.columns "
|
|
||||||
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
|
||||||
"AND column_name = 'userId'",
|
|
||||||
[tableName],
|
|
||||||
)
|
|
||||||
hasUserId = cur.rowcount > 0
|
|
||||||
|
|
||||||
sql = (
|
|
||||||
f'SELECT {selectCols} FROM "{tableName}" '
|
|
||||||
f'WHERE "{instanceCol}" = %s'
|
|
||||||
)
|
|
||||||
params = [fiId]
|
|
||||||
if mandateId:
|
|
||||||
sql += ' AND "mandateId" = %s'
|
|
||||||
params.append(mandateId)
|
|
||||||
if hasUserId:
|
|
||||||
sql += ' AND "userId" = %s'
|
|
||||||
params.append(str(context.user.id))
|
|
||||||
|
|
||||||
if parentKey and parentValue:
|
|
||||||
cur.execute(
|
|
||||||
"SELECT 1 FROM information_schema.columns "
|
|
||||||
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
|
||||||
"AND column_name = %s",
|
|
||||||
[tableName, parentKey],
|
|
||||||
)
|
|
||||||
if cur.rowcount > 0:
|
|
||||||
sql += f' AND "{parentKey}" = %s'
|
|
||||||
params.append(parentValue)
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
f"listParentObjects({tableName}): ignoring parentKey '{parentKey}' (column does not exist)"
|
|
||||||
)
|
|
||||||
|
|
||||||
sql += ' ORDER BY "id" DESC LIMIT 100'
|
|
||||||
cur.execute(sql, params)
|
|
||||||
rows = []
|
|
||||||
for row in cur.fetchall():
|
|
||||||
r = dict(row)
|
|
||||||
for k, v in r.items():
|
|
||||||
if hasattr(v, "isoformat"):
|
|
||||||
r[k] = v.isoformat()
|
|
||||||
elif isinstance(v, (bytes, bytearray)):
|
|
||||||
r[k] = f"<binary {len(v)} bytes>"
|
|
||||||
displayParts = [str(r.get(f, "")) for f in displayFields if r.get(f) is not None]
|
|
||||||
rows.append({
|
|
||||||
"id": r.get("id", ""),
|
|
||||||
"displayLabel": " | ".join(displayParts) if displayParts else r.get("id", ""),
|
|
||||||
"fields": {f: r.get(f) for f in displayFields},
|
|
||||||
})
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"listParentObjects({tableName}) failed: {e}", exc_info=True)
|
|
||||||
raise HTTPException(status_code=500, detail=f"Failed to list parent objects: {e}")
|
|
||||||
finally:
|
|
||||||
if featureDbConn:
|
|
||||||
try:
|
|
||||||
featureDbConn.close()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return JSONResponse({"parentObjects": rows})
|
|
||||||
|
|
||||||
|
|
||||||
class CreateFeatureDataSourceRequest(BaseModel):
|
class CreateFeatureDataSourceRequest(BaseModel):
|
||||||
"""Request body for adding a feature table as data source."""
|
"""Request body for adding a feature table as data source."""
|
||||||
featureInstanceId: str = Field(description="Feature instance ID")
|
featureInstanceId: str = Field(description="Feature instance ID")
|
||||||
|
|
@ -1705,16 +1458,35 @@ async def createFeatureDataSource(
|
||||||
body: CreateFeatureDataSourceRequest = Body(...),
|
body: CreateFeatureDataSourceRequest = Body(...),
|
||||||
context: RequestContext = Depends(getRequestContext),
|
context: RequestContext = Depends(getRequestContext),
|
||||||
):
|
):
|
||||||
"""Create a FeatureDataSource for this workspace instance."""
|
"""Create a FeatureDataSource for this workspace instance.
|
||||||
|
|
||||||
|
The FDS lives under the WORKSPACE's mandate (not the feature's): that
|
||||||
|
matches how the tree (`allFds = recordset where workspaceInstanceId =
|
||||||
|
instanceId`) and the PATCH endpoints scope these records — by workspace,
|
||||||
|
not by feature mandate. The user can legitimately reference a feature
|
||||||
|
from another mandate they have access to (via the UDB mandate-group
|
||||||
|
nodes), and a hard cross-mandate block here would silently 403 those
|
||||||
|
toggles. Access to the referenced feature is verified by the user's
|
||||||
|
`FeatureAccess` and the existing tree-children RBAC, which run before
|
||||||
|
the user can ever click on this node.
|
||||||
|
"""
|
||||||
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||||
|
|
||||||
rootIf = getRootInterface()
|
rootIf = getRootInterface()
|
||||||
inst = rootIf.getFeatureInstance(body.featureInstanceId)
|
if not rootIf.getFeatureAccess(str(context.user.id), body.featureInstanceId):
|
||||||
mandateId = str(inst.mandateId) if inst else (str(context.mandateId) if context.mandateId else "")
|
raise HTTPException(status_code=403, detail=routeApiMsg("Access denied to this feature instance"))
|
||||||
if wsMandateId and mandateId and mandateId != wsMandateId:
|
|
||||||
raise HTTPException(status_code=403, detail=routeApiMsg("Feature instance does not belong to workspace mandate"))
|
existing = rootIf.db.getRecordset(FeatureDataSource, recordFilter={
|
||||||
|
"workspaceInstanceId": instanceId,
|
||||||
|
"featureInstanceId": body.featureInstanceId,
|
||||||
|
"tableName": body.tableName,
|
||||||
|
}) or []
|
||||||
|
targetFilter = body.recordFilter or None
|
||||||
|
for rec in existing:
|
||||||
|
if (rec.get("recordFilter") or None) == targetFilter:
|
||||||
|
return JSONResponse(rec)
|
||||||
|
|
||||||
fds = FeatureDataSource(
|
fds = FeatureDataSource(
|
||||||
featureInstanceId=body.featureInstanceId,
|
featureInstanceId=body.featureInstanceId,
|
||||||
|
|
@ -1722,7 +1494,7 @@ async def createFeatureDataSource(
|
||||||
tableName=body.tableName,
|
tableName=body.tableName,
|
||||||
objectKey=body.objectKey,
|
objectKey=body.objectKey,
|
||||||
label=body.label,
|
label=body.label,
|
||||||
mandateId=mandateId,
|
mandateId=wsMandateId or "",
|
||||||
userId=str(context.user.id),
|
userId=str(context.user.id),
|
||||||
workspaceInstanceId=instanceId,
|
workspaceInstanceId=instanceId,
|
||||||
recordFilter=body.recordFilter,
|
recordFilter=body.recordFilter,
|
||||||
|
|
@ -1742,13 +1514,26 @@ async def listFeatureDataSources(
|
||||||
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
wsMandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import buildEffectiveByWorkspaceFds
|
||||||
|
|
||||||
rootIf = getRootInterface()
|
rootIf = getRootInterface()
|
||||||
recordFilter: dict = {"workspaceInstanceId": instanceId}
|
recordFilter: dict = {"workspaceInstanceId": instanceId}
|
||||||
if wsMandateId:
|
if wsMandateId:
|
||||||
recordFilter["mandateId"] = wsMandateId
|
recordFilter["mandateId"] = wsMandateId
|
||||||
records = rootIf.db.getRecordset(FeatureDataSource, recordFilter=recordFilter)
|
records = rootIf.db.getRecordset(FeatureDataSource, recordFilter=recordFilter)
|
||||||
return JSONResponse({"featureDataSources": records or []})
|
if not records:
|
||||||
|
return JSONResponse({"featureDataSources": []})
|
||||||
|
|
||||||
|
effNeutralize = buildEffectiveByWorkspaceFds(records, "neutralize", mode="aggregate")
|
||||||
|
effScope = buildEffectiveByWorkspaceFds(records, "scope", mode="aggregate")
|
||||||
|
effRag = buildEffectiveByWorkspaceFds(records, "ragIndexEnabled", mode="aggregate")
|
||||||
|
for fds in records:
|
||||||
|
fdsId = fds.get("id", "")
|
||||||
|
fds["effectiveNeutralize"] = effNeutralize.get(fdsId, False)
|
||||||
|
fds["effectiveScope"] = effScope.get(fdsId, "personal")
|
||||||
|
fds["effectiveRagIndexEnabled"] = effRag.get(fdsId, False)
|
||||||
|
|
||||||
|
return JSONResponse({"featureDataSources": records})
|
||||||
|
|
||||||
|
|
||||||
@router.delete("/{instanceId}/feature-datasources/{featureDataSourceId}")
|
@router.delete("/{instanceId}/feature-datasources/{featureDataSourceId}")
|
||||||
|
|
@ -1769,112 +1554,6 @@ async def deleteFeatureDataSource(
|
||||||
return JSONResponse({"success": True})
|
return JSONResponse({"success": True})
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{instanceId}/connections/{connectionId}/services")
|
|
||||||
@limiter.limit("120/minute")
|
|
||||||
async def listConnectionServices(
|
|
||||||
request: Request,
|
|
||||||
instanceId: str = Path(...),
|
|
||||||
connectionId: str = Path(...),
|
|
||||||
context: RequestContext = Depends(getRequestContext),
|
|
||||||
):
|
|
||||||
"""Return the available services for a specific UserConnection."""
|
|
||||||
_mandateId, _ = _validateInstanceAccess(instanceId, context)
|
|
||||||
try:
|
|
||||||
from modules.connectors.connectorResolver import ConnectorResolver
|
|
||||||
from modules.serviceCenter import getService as getSvc
|
|
||||||
from modules.serviceCenter.context import ServiceCenterContext
|
|
||||||
ctx = ServiceCenterContext(
|
|
||||||
user=context.user,
|
|
||||||
mandate_id=_mandateId or "",
|
|
||||||
feature_instance_id=instanceId,
|
|
||||||
)
|
|
||||||
chatService = getSvc("chat", ctx)
|
|
||||||
securityService = getSvc("security", ctx)
|
|
||||||
dbInterface = _buildResolverDbInterface(chatService)
|
|
||||||
resolver = ConnectorResolver(securityService, dbInterface)
|
|
||||||
provider = await resolver.resolve(connectionId)
|
|
||||||
services = provider.getAvailableServices()
|
|
||||||
_serviceLabels = {
|
|
||||||
"sharepoint": "SharePoint",
|
|
||||||
"outlook": "Outlook",
|
|
||||||
"teams": "Teams",
|
|
||||||
"onedrive": "OneDrive",
|
|
||||||
"drive": "Google Drive",
|
|
||||||
"gmail": "Gmail",
|
|
||||||
"files": "Files (FTP)",
|
|
||||||
"kdrive": "kDrive",
|
|
||||||
"calendar": "Calendar",
|
|
||||||
"contact": "Contacts",
|
|
||||||
}
|
|
||||||
_serviceIcons = {
|
|
||||||
"sharepoint": "sharepoint",
|
|
||||||
"outlook": "mail",
|
|
||||||
"teams": "chat",
|
|
||||||
"onedrive": "cloud",
|
|
||||||
"drive": "cloud",
|
|
||||||
"gmail": "mail",
|
|
||||||
"files": "folder",
|
|
||||||
"kdrive": "cloud",
|
|
||||||
"calendar": "calendar",
|
|
||||||
"contact": "contact",
|
|
||||||
}
|
|
||||||
items = [
|
|
||||||
{
|
|
||||||
"service": s,
|
|
||||||
"label": _serviceLabels.get(s, s),
|
|
||||||
"icon": _serviceIcons.get(s, "folder"),
|
|
||||||
}
|
|
||||||
for s in services
|
|
||||||
]
|
|
||||||
return JSONResponse({"services": items})
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error listing services for connection {connectionId}: {e}")
|
|
||||||
return JSONResponse({"services": [], "error": str(e)}, status_code=400)
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{instanceId}/connections/{connectionId}/browse")
|
|
||||||
@limiter.limit("300/minute")
|
|
||||||
async def browseConnectionService(
|
|
||||||
request: Request,
|
|
||||||
instanceId: str = Path(...),
|
|
||||||
connectionId: str = Path(...),
|
|
||||||
service: str = Query(..., description="Service name (e.g. sharepoint, onedrive, outlook)"),
|
|
||||||
path: str = Query("/", description="Path within the service to browse"),
|
|
||||||
context: RequestContext = Depends(getRequestContext),
|
|
||||||
):
|
|
||||||
"""Browse folders/items within a connection's service at a given path."""
|
|
||||||
_mandateId, _ = _validateInstanceAccess(instanceId, context)
|
|
||||||
try:
|
|
||||||
from modules.connectors.connectorResolver import ConnectorResolver
|
|
||||||
from modules.serviceCenter import getService as getSvc
|
|
||||||
from modules.serviceCenter.context import ServiceCenterContext
|
|
||||||
ctx = ServiceCenterContext(
|
|
||||||
user=context.user,
|
|
||||||
mandate_id=_mandateId or "",
|
|
||||||
feature_instance_id=instanceId,
|
|
||||||
)
|
|
||||||
chatService = getSvc("chat", ctx)
|
|
||||||
securityService = getSvc("security", ctx)
|
|
||||||
dbInterface = _buildResolverDbInterface(chatService)
|
|
||||||
resolver = ConnectorResolver(securityService, dbInterface)
|
|
||||||
adapter = await resolver.resolveService(connectionId, service)
|
|
||||||
entries = await adapter.browse(path, filter=None)
|
|
||||||
items = []
|
|
||||||
for entry in (entries or []):
|
|
||||||
items.append({
|
|
||||||
"name": entry.name,
|
|
||||||
"path": entry.path,
|
|
||||||
"isFolder": entry.isFolder,
|
|
||||||
"size": entry.size,
|
|
||||||
"mimeType": entry.mimeType,
|
|
||||||
"metadata": entry.metadata if hasattr(entry, "metadata") else {},
|
|
||||||
})
|
|
||||||
return JSONResponse({"items": items, "path": path, "service": service})
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error browsing {service} for connection {connectionId} at '{path}': {e}")
|
|
||||||
return JSONResponse({"items": [], "error": str(e)}, status_code=400)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Voice endpoints
|
# Voice endpoints
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -2190,6 +1869,71 @@ async def putWorkspaceUserSettings(
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Per-user UI state: tree expand/collapse (UDB + FilesTab)
|
||||||
|
# Persisted on WorkspaceUserSettings.uiTreeExpansion as a {scope: [ids]} map.
|
||||||
|
# Each FE tab uses its own scope key so collapse-state for one tab doesn't
|
||||||
|
# bleed into another.
|
||||||
|
|
||||||
|
@router.get("/{instanceId}/ui-tree-expansion/{scope}")
|
||||||
|
@limiter.limit("300/minute")
|
||||||
|
async def getUiTreeExpansion(
|
||||||
|
request: Request,
|
||||||
|
instanceId: str = Path(...),
|
||||||
|
scope: str = Path(..., description="UI scope key, e.g. 'sources', 'filesOwn', 'filesShared'"),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
):
|
||||||
|
"""Return the expanded tree-node ids for the current user + scope.
|
||||||
|
|
||||||
|
Returns `null` when the user has never persisted a state for this scope
|
||||||
|
(lets the FE fall back to backend `defaultExpanded` hints). Returns `[]`
|
||||||
|
when the user actively collapsed everything.
|
||||||
|
"""
|
||||||
|
_validateInstanceAccess(instanceId, context)
|
||||||
|
wsInterface = _getWorkspaceInterface(context, instanceId)
|
||||||
|
settings = wsInterface.getWorkspaceUserSettings(str(context.user.id))
|
||||||
|
expansion = (settings.uiTreeExpansion if settings else {}) or {}
|
||||||
|
if scope not in expansion:
|
||||||
|
return JSONResponse({"expandedNodes": None})
|
||||||
|
return JSONResponse({"expandedNodes": list(expansion.get(scope) or [])})
|
||||||
|
|
||||||
|
|
||||||
|
@router.put("/{instanceId}/ui-tree-expansion/{scope}")
|
||||||
|
@limiter.limit("300/minute")
|
||||||
|
async def putUiTreeExpansion(
|
||||||
|
request: Request,
|
||||||
|
instanceId: str = Path(...),
|
||||||
|
scope: str = Path(...),
|
||||||
|
body: dict = Body(...),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
):
|
||||||
|
"""Replace the expanded-node list for one scope.
|
||||||
|
|
||||||
|
Body: `{"expandedNodes": List[str]}`. Empty list = explicit collapse-all.
|
||||||
|
"""
|
||||||
|
_validateInstanceAccess(instanceId, context)
|
||||||
|
wsInterface = _getWorkspaceInterface(context, instanceId)
|
||||||
|
userId = str(context.user.id)
|
||||||
|
nodes = body.get("expandedNodes")
|
||||||
|
if not isinstance(nodes, list):
|
||||||
|
raise HTTPException(status_code=400, detail=routeApiMsg("expandedNodes must be a list"))
|
||||||
|
cleaned = [str(n) for n in nodes if isinstance(n, (str, int))]
|
||||||
|
|
||||||
|
existing = wsInterface.getWorkspaceUserSettings(userId)
|
||||||
|
existingMap: Dict[str, List[str]] = (existing.uiTreeExpansion if existing else {}) or {}
|
||||||
|
existingMap = dict(existingMap)
|
||||||
|
existingMap[scope] = cleaned
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"userId": userId,
|
||||||
|
"mandateId": str(context.mandateId) if context.mandateId else "",
|
||||||
|
"featureInstanceId": instanceId,
|
||||||
|
"uiTreeExpansion": existingMap,
|
||||||
|
}
|
||||||
|
wsInterface.saveWorkspaceUserSettings(data)
|
||||||
|
return JSONResponse({"expandedNodes": cleaned})
|
||||||
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
|
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1659,7 +1659,7 @@ class BillingObjects:
|
||||||
try:
|
try:
|
||||||
appInterface = getAppInterface(self.currentUser)
|
appInterface = getAppInterface(self.currentUser)
|
||||||
appInterface.db._ensure_connection()
|
appInterface.db._ensure_connection()
|
||||||
with appInterface.db.connection.cursor() as cur:
|
with appInterface.db.borrowCursor() as cur:
|
||||||
if appInterface.db._ensureTableExists(UserInDB):
|
if appInterface.db._ensureTableExists(UserInDB):
|
||||||
cur.execute(
|
cur.execute(
|
||||||
'SELECT "id" FROM "UserInDB" WHERE '
|
'SELECT "id" FROM "UserInDB" WHERE '
|
||||||
|
|
@ -1780,7 +1780,7 @@ class BillingObjects:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.db._ensure_connection()
|
self.db._ensure_connection()
|
||||||
with self.db.connection.cursor() as cur:
|
with self.db.borrowCursor() as cur:
|
||||||
countSql = f'SELECT COUNT(*) FROM "{table}"{whereClause}'
|
countSql = f'SELECT COUNT(*) FROM "{table}"{whereClause}'
|
||||||
cur.execute(countSql, whereValues)
|
cur.execute(countSql, whereValues)
|
||||||
totalItems = cur.fetchone()["count"]
|
totalItems = cur.fetchone()["count"]
|
||||||
|
|
@ -1797,10 +1797,7 @@ class BillingObjects:
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"_searchTransactionsPaginated SQL error: {e}", exc_info=True)
|
logger.error(f"_searchTransactionsPaginated SQL error: {e}", exc_info=True)
|
||||||
try:
|
# Rollback is handled by `borrowCursor()` context manager on exit.
|
||||||
self.db.connection.rollback()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return {"items": [], "totalItems": 0, "totalPages": 0}
|
return {"items": [], "totalItems": 0, "totalPages": 0}
|
||||||
|
|
||||||
def _buildScopeFilter(
|
def _buildScopeFilter(
|
||||||
|
|
@ -1872,7 +1869,7 @@ class BillingObjects:
|
||||||
|
|
||||||
result: Dict[str, Any] = {}
|
result: Dict[str, Any] = {}
|
||||||
|
|
||||||
with self.db.connection.cursor() as cur:
|
with self.db.borrowCursor() as cur:
|
||||||
# 1) Totals
|
# 1) Totals
|
||||||
cur.execute(
|
cur.execute(
|
||||||
f'SELECT COALESCE(SUM("amount"), 0) AS total, COUNT(*) AS cnt FROM "{table}"{whereClause}',
|
f'SELECT COALESCE(SUM("amount"), 0) AS total, COUNT(*) AS cnt FROM "{table}"{whereClause}',
|
||||||
|
|
@ -1947,17 +1944,12 @@ class BillingObjects:
|
||||||
})
|
})
|
||||||
result["timeSeries"] = timeSeries
|
result["timeSeries"] = timeSeries
|
||||||
|
|
||||||
self.db.connection.commit()
|
# Commit/rollback are handled by `borrowCursor()` context manager.
|
||||||
|
|
||||||
result["_allAccounts"] = allAccounts
|
result["_allAccounts"] = allAccounts
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in getTransactionStatisticsAggregated: {e}", exc_info=True)
|
logger.error(f"Error in getTransactionStatisticsAggregated: {e}", exc_info=True)
|
||||||
try:
|
|
||||||
self.db.connection.rollback()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return self._emptyStats()
|
return self._emptyStats()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
||||||
|
|
@ -228,6 +228,22 @@ class KnowledgeObjects:
|
||||||
"""Get all ContentChunks for a file."""
|
"""Get all ContentChunks for a file."""
|
||||||
return self.db.getRecordset(ContentChunk, recordFilter={"fileId": fileId})
|
return self.db.getRecordset(ContentChunk, recordFilter={"fileId": fileId})
|
||||||
|
|
||||||
|
def countChunksByFileIds(self, fileIds: List[str]) -> Dict[str, int]:
|
||||||
|
"""Return a {fileId: chunkCount} mapping for the given file IDs.
|
||||||
|
|
||||||
|
One aggregate query instead of N round trips. Used by RAG inventory
|
||||||
|
to display real chunk counts per DataSource without loading the
|
||||||
|
embedding vectors. Missing file IDs map to 0 in the caller's logic.
|
||||||
|
"""
|
||||||
|
if not fileIds:
|
||||||
|
return {}
|
||||||
|
if not self.db._ensureTableExists(ContentChunk):
|
||||||
|
return {}
|
||||||
|
sql = 'SELECT "fileId", COUNT(*) AS cnt FROM "ContentChunk" WHERE "fileId" = ANY(%s) GROUP BY "fileId"'
|
||||||
|
with self.db.borrowCursor() as cursor:
|
||||||
|
cursor.execute(sql, (list(fileIds),))
|
||||||
|
return {row["fileId"]: int(row["cnt"]) for row in cursor.fetchall()}
|
||||||
|
|
||||||
def deleteContentChunks(self, fileId: str) -> int:
|
def deleteContentChunks(self, fileId: str) -> int:
|
||||||
"""Delete all ContentChunks for a file. Returns count of deleted chunks."""
|
"""Delete all ContentChunks for a file. Returns count of deleted chunks."""
|
||||||
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fileId})
|
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fileId})
|
||||||
|
|
|
||||||
|
|
@ -1221,22 +1221,17 @@ class ComponentObjects:
|
||||||
for item in fileRows
|
for item in fileRows
|
||||||
]
|
]
|
||||||
|
|
||||||
# Single transaction: delete FileData, FileItem, then FileFolder (children first)
|
# Single transaction: delete FileData, FileItem, then FileFolder (children first).
|
||||||
self.db._ensure_connection()
|
# Commit/rollback are handled by `borrowCursor()` on exit.
|
||||||
try:
|
with self.db.borrowCursor() as cursor:
|
||||||
with self.db.connection.cursor() as cursor:
|
if fileIds:
|
||||||
if fileIds:
|
cursor.execute('DELETE FROM "FileData" WHERE "id" = ANY(%s)', (fileIds,))
|
||||||
cursor.execute('DELETE FROM "FileData" WHERE "id" = ANY(%s)', (fileIds,))
|
cursor.execute('DELETE FROM "FileItem" WHERE "id" = ANY(%s)', (fileIds,))
|
||||||
cursor.execute('DELETE FROM "FileItem" WHERE "id" = ANY(%s)', (fileIds,))
|
orderedIds = list(folderIds)
|
||||||
orderedIds = list(folderIds)
|
orderedIds.remove(folderId)
|
||||||
orderedIds.remove(folderId)
|
orderedIds.append(folderId)
|
||||||
orderedIds.append(folderId)
|
if orderedIds:
|
||||||
if orderedIds:
|
cursor.execute('DELETE FROM "FileFolder" WHERE "id" = ANY(%s)', (orderedIds,))
|
||||||
cursor.execute('DELETE FROM "FileFolder" WHERE "id" = ANY(%s)', (orderedIds,))
|
|
||||||
self.db.connection.commit()
|
|
||||||
except Exception:
|
|
||||||
self.db.connection.rollback()
|
|
||||||
raise
|
|
||||||
|
|
||||||
return {"deletedFolders": len(folderIds), "deletedFiles": len(fileIds)}
|
return {"deletedFolders": len(folderIds), "deletedFiles": len(fileIds)}
|
||||||
|
|
||||||
|
|
@ -1507,7 +1502,7 @@ class ComponentObjects:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.db._ensure_connection()
|
self.db._ensure_connection()
|
||||||
with self.db.connection.cursor() as cursor:
|
with self.db.borrowCursor() as cursor:
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
'SELECT "id", "sysCreatedBy" FROM "FileItem" WHERE "id" = ANY(%s)',
|
'SELECT "id", "sysCreatedBy" FROM "FileItem" WHERE "id" = ANY(%s)',
|
||||||
(uniqueIds,),
|
(uniqueIds,),
|
||||||
|
|
@ -1526,11 +1521,10 @@ class ComponentObjects:
|
||||||
cursor.execute('DELETE FROM "FileItem" WHERE "id" = ANY(%s)', (accessibleIds,))
|
cursor.execute('DELETE FROM "FileItem" WHERE "id" = ANY(%s)', (accessibleIds,))
|
||||||
deletedFiles = cursor.rowcount
|
deletedFiles = cursor.rowcount
|
||||||
|
|
||||||
self.db.connection.commit()
|
# Commit/rollback are handled by `borrowCursor()` context manager.
|
||||||
return {"deletedFiles": deletedFiles}
|
return {"deletedFiles": deletedFiles}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error deleting files in batch: {e}")
|
logger.error(f"Error deleting files in batch: {e}")
|
||||||
self.db.connection.rollback()
|
|
||||||
raise FileDeletionError(f"Error deleting files in batch: {str(e)}")
|
raise FileDeletionError(f"Error deleting files in batch: {str(e)}")
|
||||||
|
|
||||||
def _ensureFeatureInstanceGroup(self, featureInstanceId: str, contextKey: str = "files/list") -> Optional[str]:
|
def _ensureFeatureInstanceGroup(self, featureInstanceId: str, contextKey: str = "files/list") -> Optional[str]:
|
||||||
|
|
|
||||||
|
|
@ -374,7 +374,7 @@ def getRecordsetWithRBAC(
|
||||||
|
|
||||||
query = f'SELECT * FROM "{table}"{whereClause}{orderByClause}{limitClause}'
|
query = f'SELECT * FROM "{table}"{whereClause}{orderByClause}{limitClause}'
|
||||||
|
|
||||||
with connector.connection.cursor() as cursor:
|
with connector.borrowCursor() as cursor:
|
||||||
cursor.execute(query, whereValues)
|
cursor.execute(query, whereValues)
|
||||||
records = [dict(row) for row in cursor.fetchall()]
|
records = [dict(row) for row in cursor.fetchall()]
|
||||||
|
|
||||||
|
|
@ -561,7 +561,7 @@ def getRecordsetPaginatedWithRBAC(
|
||||||
offset = (pagination.page - 1) * pagination.pageSize
|
offset = (pagination.page - 1) * pagination.pageSize
|
||||||
limitClause = f" LIMIT {pagination.pageSize} OFFSET {offset}"
|
limitClause = f" LIMIT {pagination.pageSize} OFFSET {offset}"
|
||||||
|
|
||||||
with connector.connection.cursor() as cursor:
|
with connector.borrowCursor() as cursor:
|
||||||
countSql = f'SELECT COUNT(*) FROM "{table}"{whereClause}'
|
countSql = f'SELECT COUNT(*) FROM "{table}"{whereClause}'
|
||||||
cursor.execute(countSql, countValues)
|
cursor.execute(countSql, countValues)
|
||||||
totalItems = cursor.fetchone()["count"]
|
totalItems = cursor.fetchone()["count"]
|
||||||
|
|
@ -709,7 +709,7 @@ def getDistinctColumnValuesWithRBAC(
|
||||||
|
|
||||||
sql = f'SELECT DISTINCT "{column}"::TEXT AS val FROM "{table}"{nonNullWhere} ORDER BY val'
|
sql = f'SELECT DISTINCT "{column}"::TEXT AS val FROM "{table}"{nonNullWhere} ORDER BY val'
|
||||||
|
|
||||||
with connector.connection.cursor() as cursor:
|
with connector.borrowCursor() as cursor:
|
||||||
cursor.execute(sql, whereValues)
|
cursor.execute(sql, whereValues)
|
||||||
result = [row["val"] for row in cursor.fetchall()]
|
result = [row["val"] for row in cursor.fetchall()]
|
||||||
|
|
||||||
|
|
@ -719,7 +719,7 @@ def getDistinctColumnValuesWithRBAC(
|
||||||
emptySql = f'SELECT 1 FROM "{table}"{whereClause} AND {emptyCond} LIMIT 1'
|
emptySql = f'SELECT 1 FROM "{table}"{whereClause} AND {emptyCond} LIMIT 1'
|
||||||
else:
|
else:
|
||||||
emptySql = f'SELECT 1 FROM "{table}" WHERE {emptyCond} LIMIT 1'
|
emptySql = f'SELECT 1 FROM "{table}" WHERE {emptyCond} LIMIT 1'
|
||||||
with connector.connection.cursor() as cursor:
|
with connector.borrowCursor() as cursor:
|
||||||
cursor.execute(emptySql, whereValues)
|
cursor.execute(emptySql, whereValues)
|
||||||
if cursor.fetchone():
|
if cursor.fetchone():
|
||||||
result.append(None)
|
result.append(None)
|
||||||
|
|
@ -967,7 +967,7 @@ def buildRbacWhereClause(
|
||||||
# Multi-Tenant Design: Users do NOT have mandateId - they are linked via UserMandate
|
# Multi-Tenant Design: Users do NOT have mandateId - they are linked via UserMandate
|
||||||
if table == "UserInDB":
|
if table == "UserInDB":
|
||||||
try:
|
try:
|
||||||
with connector.connection.cursor() as cursor:
|
with connector.borrowCursor() as cursor:
|
||||||
# Get all user IDs that are members of the current mandate
|
# Get all user IDs that are members of the current mandate
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
'SELECT "userId" FROM "UserMandate" WHERE "mandateId" = %s AND "enabled" = true',
|
'SELECT "userId" FROM "UserMandate" WHERE "mandateId" = %s AND "enabled" = true',
|
||||||
|
|
@ -994,7 +994,7 @@ def buildRbacWhereClause(
|
||||||
# For UserConnection: Filter via UserMandate junction table
|
# For UserConnection: Filter via UserMandate junction table
|
||||||
elif table == "UserConnection":
|
elif table == "UserConnection":
|
||||||
try:
|
try:
|
||||||
with connector.connection.cursor() as cursor:
|
with connector.borrowCursor() as cursor:
|
||||||
# Get all user IDs that are members of the current mandate
|
# Get all user IDs that are members of the current mandate
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
'SELECT "userId" FROM "UserMandate" WHERE "mandateId" = %s AND "enabled" = true',
|
'SELECT "userId" FROM "UserMandate" WHERE "mandateId" = %s AND "enabled" = true',
|
||||||
|
|
|
||||||
|
|
@ -68,9 +68,19 @@ def removeDemoConfig(
|
||||||
request: Request,
|
request: Request,
|
||||||
currentUser: User = Depends(requirePlatformAdmin),
|
currentUser: User = Depends(requirePlatformAdmin),
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Remove all data created by a demo configuration."""
|
"""Remove all data created by a demo configuration.
|
||||||
|
|
||||||
|
Requires X-Confirm-Destructive: true header as safety guard.
|
||||||
|
"""
|
||||||
from modules.demoConfigs import getDemoConfigByCode
|
from modules.demoConfigs import getDemoConfigByCode
|
||||||
|
|
||||||
|
confirmHeader = request.headers.get("X-Confirm-Destructive", "").lower()
|
||||||
|
if confirmHeader != "true":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail="Destructive operation requires header X-Confirm-Destructive: true",
|
||||||
|
)
|
||||||
|
|
||||||
config = getDemoConfigByCode(code)
|
config = getDemoConfigByCode(code)
|
||||||
if not config:
|
if not config:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
@ -79,7 +89,7 @@ def removeDemoConfig(
|
||||||
)
|
)
|
||||||
|
|
||||||
db = getRootDbAppConnector()
|
db = getRootDbAppConnector()
|
||||||
logger.info(f"Removing demo config '{code}' (user: {currentUser.username})")
|
logger.info(f"Removing demo config '{code}' (user: {currentUser.username}, confirmed)")
|
||||||
summary = config.remove(db)
|
summary = config.remove(db)
|
||||||
logger.info(f"Demo config '{code}' removed: {summary}")
|
logger.info(f"Demo config '{code}' removed: {summary}")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -778,7 +778,12 @@ async def _updateKnowledgeConsent(
|
||||||
cancelled = cancelJobsByConnection(connectionId)
|
cancelled = cancelJobsByConnection(connectionId)
|
||||||
else:
|
else:
|
||||||
from modules.datamodels.datamodelDataSource import DataSource
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId, "ragIndexEnabled": True})
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||||
|
allConnDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
|
dataSources = [
|
||||||
|
ds for ds in (allConnDs or [])
|
||||||
|
if getEffectiveFlag(ds, "ragIndexEnabled", allConnDs, mode="walk") is True
|
||||||
|
]
|
||||||
if dataSources:
|
if dataSources:
|
||||||
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||||
authority = connection.authority.value if hasattr(connection.authority, "value") else str(connection.authority or "")
|
authority = connection.authority.value if hasattr(connection.authority, "value") else str(connection.authority or "")
|
||||||
|
|
|
||||||
|
|
@ -211,7 +211,7 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user, *, man
|
||||||
|
|
||||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
await knowledgeService.requestIngestion(
|
handle = await knowledgeService.requestIngestion(
|
||||||
IngestionJob(
|
IngestionJob(
|
||||||
sourceKind="file",
|
sourceKind="file",
|
||||||
sourceId=fileId,
|
sourceId=fileId,
|
||||||
|
|
@ -229,7 +229,10 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user, *, man
|
||||||
# Re-acquire interface after await to avoid stale user context from the singleton
|
# Re-acquire interface after await to avoid stale user context from the singleton
|
||||||
mgmtInterface = interfaceDbManagement.getInterface(user)
|
mgmtInterface = interfaceDbManagement.getInterface(user)
|
||||||
mgmtInterface.updateFile(fileId, {"status": "active"})
|
mgmtInterface.updateFile(fileId, {"status": "active"})
|
||||||
logger.info(f"Auto-index complete for file {fileId} ({fileName})")
|
if handle.status == "failed":
|
||||||
|
logger.warning(f"Auto-index ingestion failed for file {fileId} ({fileName}): {handle.error}")
|
||||||
|
else:
|
||||||
|
logger.info(f"Auto-index complete for file {fileId} ({fileName})")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Auto-index failed for file {fileId}: {e}", exc_info=True)
|
logger.error(f"Auto-index failed for file {fileId}: {e}", exc_info=True)
|
||||||
|
|
@ -256,6 +259,24 @@ router = APIRouter(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _getInterfaceForOwnedItem(currentUser: User, context, itemId: str, modelClass) -> Any:
|
||||||
|
"""Create a management interface scoped to the item's own context.
|
||||||
|
Looks up the item by ID (unscoped) to resolve its mandateId/featureInstanceId,
|
||||||
|
then creates the interface with THAT context. This ensures toggle operations
|
||||||
|
work regardless of which page the user is on."""
|
||||||
|
unscoped = interfaceDbManagement.getInterface(currentUser)
|
||||||
|
record = unscoped.db.getRecord(modelClass, itemId)
|
||||||
|
if not record:
|
||||||
|
raise interfaceDbManagement.FileNotFoundError(f"Item {itemId} not found")
|
||||||
|
itemMandateId = record.get("mandateId") if isinstance(record, dict) else getattr(record, "mandateId", None)
|
||||||
|
itemInstanceId = record.get("featureInstanceId") if isinstance(record, dict) else getattr(record, "featureInstanceId", None)
|
||||||
|
return interfaceDbManagement.getInterface(
|
||||||
|
currentUser,
|
||||||
|
mandateId=str(itemMandateId) if itemMandateId else None,
|
||||||
|
featureInstanceId=str(itemInstanceId) if itemInstanceId else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/folders/tree")
|
@router.get("/folders/tree")
|
||||||
@limiter.limit("120/minute")
|
@limiter.limit("120/minute")
|
||||||
def get_folder_tree(
|
def get_folder_tree(
|
||||||
|
|
@ -272,10 +293,12 @@ def get_folder_tree(
|
||||||
)
|
)
|
||||||
o = (owner or "me").strip().lower()
|
o = (owner or "me").strip().lower()
|
||||||
if o == "me":
|
if o == "me":
|
||||||
return managementInterface.getOwnFolderTree()
|
folders = managementInterface.getOwnFolderTree()
|
||||||
if o == "shared":
|
elif o == "shared":
|
||||||
return managementInterface.getSharedFolderTree()
|
folders = managementInterface.getSharedFolderTree()
|
||||||
raise HTTPException(status_code=400, detail="owner must be 'me' or 'shared'")
|
else:
|
||||||
|
raise HTTPException(status_code=400, detail="owner must be 'me' or 'shared'")
|
||||||
|
return folders
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -283,6 +306,185 @@ def get_folder_tree(
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/attributes")
|
||||||
|
@limiter.limit("120/minute")
|
||||||
|
def getAttributesForIds(
|
||||||
|
request: Request,
|
||||||
|
body: Dict[str, Any] = Body(...),
|
||||||
|
currentUser: User = Depends(getCurrentUser),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
):
|
||||||
|
"""Return current attribute values (neutralize, scope, ragIndexEnabled) for
|
||||||
|
a list of node IDs. For folder IDs, computes 'mixed' by checking direct
|
||||||
|
children. The frontend sends this after every toggle to refresh visible
|
||||||
|
nodes without reloading the tree structure."""
|
||||||
|
ids = body.get("ids", [])
|
||||||
|
if not isinstance(ids, list) or len(ids) == 0:
|
||||||
|
return {}
|
||||||
|
if len(ids) > 500:
|
||||||
|
raise HTTPException(status_code=400, detail="Max 500 IDs per request")
|
||||||
|
|
||||||
|
try:
|
||||||
|
managementInterface = interfaceDbManagement.getInterface(
|
||||||
|
currentUser,
|
||||||
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||||
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||||
|
)
|
||||||
|
db = managementInterface.db
|
||||||
|
userId = str(currentUser.id)
|
||||||
|
|
||||||
|
allFolders = db.getRecordset(FileFolder, recordFilter={"sysCreatedBy": userId}) or []
|
||||||
|
allFiles = db.getRecordset(FileItem, recordFilter={"sysCreatedBy": userId}) or []
|
||||||
|
|
||||||
|
folderById = {f["id"]: f for f in allFolders}
|
||||||
|
fileById = {f["id"]: f for f in allFiles}
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"getAttributesForIds: %d ids requested, %d folders found, %d files found",
|
||||||
|
len(ids), len(allFolders), len(allFiles),
|
||||||
|
)
|
||||||
|
|
||||||
|
result: Dict[str, Dict[str, Any]] = {}
|
||||||
|
|
||||||
|
for nodeId in ids:
|
||||||
|
if nodeId.startswith("__filesRoot:"):
|
||||||
|
attrs = _computeSyntheticRootAttrs(allFolders, allFiles)
|
||||||
|
result[nodeId] = attrs
|
||||||
|
elif nodeId in folderById:
|
||||||
|
folder = folderById[nodeId]
|
||||||
|
attrs = _computeFolderAttrs(folder, allFolders, allFiles)
|
||||||
|
result[nodeId] = attrs
|
||||||
|
elif nodeId in fileById:
|
||||||
|
f = fileById[nodeId]
|
||||||
|
result[nodeId] = {
|
||||||
|
"neutralize": bool(f.get("neutralize", False)),
|
||||||
|
"scope": f.get("scope", "personal"),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
logger.debug("getAttributesForIds: unknown id=%s", nodeId)
|
||||||
|
|
||||||
|
logger.info("getAttributesForIds: returning %d entries", len(result))
|
||||||
|
return result
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"getAttributesForIds error: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
def _computeFolderAttrs(
|
||||||
|
folder: Dict[str, Any],
|
||||||
|
allFolders: List[Dict[str, Any]],
|
||||||
|
allFiles: List[Dict[str, Any]],
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Compute attributes for a folder. Recursively checks the entire subtree:
|
||||||
|
if ANY descendant at any depth has a different value, the folder shows 'mixed'.
|
||||||
|
This propagates up through all ancestor levels."""
|
||||||
|
fid = folder["id"]
|
||||||
|
neutralizeResult = _effectiveNeutralize(fid, allFolders, allFiles)
|
||||||
|
scopeResult = _effectiveScope(fid, allFolders, allFiles)
|
||||||
|
return {"neutralize": neutralizeResult, "scope": scopeResult}
|
||||||
|
|
||||||
|
|
||||||
|
def _effectiveNeutralize(
|
||||||
|
folderId: str,
|
||||||
|
allFolders: List[Dict[str, Any]],
|
||||||
|
allFiles: List[Dict[str, Any]],
|
||||||
|
) -> Any:
|
||||||
|
"""Recursively compute effective neutralize for a folder.
|
||||||
|
Returns 'mixed' if any descendants diverge, otherwise the folder's own value."""
|
||||||
|
childFolders = [f for f in allFolders if f.get("parentId") == folderId]
|
||||||
|
childFiles = [f for f in allFiles if f.get("folderId") == folderId]
|
||||||
|
|
||||||
|
if not childFolders and not childFiles:
|
||||||
|
folder = next((f for f in allFolders if f["id"] == folderId), None)
|
||||||
|
return bool(folder.get("neutralize", False)) if folder else False
|
||||||
|
|
||||||
|
childVals = set()
|
||||||
|
for cf in childFolders:
|
||||||
|
effective = _effectiveNeutralize(cf["id"], allFolders, allFiles)
|
||||||
|
if effective == "mixed":
|
||||||
|
return "mixed"
|
||||||
|
childVals.add(effective)
|
||||||
|
for cf in childFiles:
|
||||||
|
childVals.add(bool(cf.get("neutralize", False)))
|
||||||
|
|
||||||
|
if len(childVals) > 1:
|
||||||
|
return "mixed"
|
||||||
|
if not childVals:
|
||||||
|
folder = next((f for f in allFolders if f["id"] == folderId), None)
|
||||||
|
return bool(folder.get("neutralize", False)) if folder else False
|
||||||
|
return childVals.pop()
|
||||||
|
|
||||||
|
|
||||||
|
def _effectiveScope(
|
||||||
|
folderId: str,
|
||||||
|
allFolders: List[Dict[str, Any]],
|
||||||
|
allFiles: List[Dict[str, Any]],
|
||||||
|
) -> Any:
|
||||||
|
"""Recursively compute effective scope for a folder.
|
||||||
|
Returns 'mixed' if any descendants diverge, otherwise the folder's own value."""
|
||||||
|
childFolders = [f for f in allFolders if f.get("parentId") == folderId]
|
||||||
|
childFiles = [f for f in allFiles if f.get("folderId") == folderId]
|
||||||
|
|
||||||
|
if not childFolders and not childFiles:
|
||||||
|
folder = next((f for f in allFolders if f["id"] == folderId), None)
|
||||||
|
return folder.get("scope", "personal") if folder else "personal"
|
||||||
|
|
||||||
|
childVals = set()
|
||||||
|
for cf in childFolders:
|
||||||
|
effective = _effectiveScope(cf["id"], allFolders, allFiles)
|
||||||
|
if effective == "mixed":
|
||||||
|
return "mixed"
|
||||||
|
childVals.add(effective)
|
||||||
|
for cf in childFiles:
|
||||||
|
childVals.add(cf.get("scope", "personal"))
|
||||||
|
|
||||||
|
if len(childVals) > 1:
|
||||||
|
return "mixed"
|
||||||
|
if not childVals:
|
||||||
|
folder = next((f for f in allFolders if f["id"] == folderId), None)
|
||||||
|
return folder.get("scope", "personal") if folder else "personal"
|
||||||
|
return childVals.pop()
|
||||||
|
|
||||||
|
|
||||||
|
def _computeSyntheticRootAttrs(
|
||||||
|
allFolders: List[Dict[str, Any]],
|
||||||
|
allFiles: List[Dict[str, Any]],
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Compute attributes for the synthetic root by recursively checking the
|
||||||
|
entire tree. If ANY item at any depth diverges, root shows 'mixed'."""
|
||||||
|
topFolders = [f for f in allFolders if not f.get("parentId")]
|
||||||
|
topFiles = [f for f in allFiles if not f.get("folderId")]
|
||||||
|
|
||||||
|
neutralizeVals = set()
|
||||||
|
scopeVals = set()
|
||||||
|
for cf in topFolders:
|
||||||
|
nEff = _effectiveNeutralize(cf["id"], allFolders, allFiles)
|
||||||
|
if nEff == "mixed":
|
||||||
|
neutralizeVals.add(True)
|
||||||
|
neutralizeVals.add(False)
|
||||||
|
else:
|
||||||
|
neutralizeVals.add(nEff)
|
||||||
|
sEff = _effectiveScope(cf["id"], allFolders, allFiles)
|
||||||
|
if sEff == "mixed":
|
||||||
|
scopeVals.add("__mixed_a__")
|
||||||
|
scopeVals.add("__mixed_b__")
|
||||||
|
else:
|
||||||
|
scopeVals.add(sEff)
|
||||||
|
for cf in topFiles:
|
||||||
|
neutralizeVals.add(bool(cf.get("neutralize", False)))
|
||||||
|
scopeVals.add(cf.get("scope", "personal"))
|
||||||
|
|
||||||
|
if not neutralizeVals and not scopeVals:
|
||||||
|
return {"neutralize": False, "scope": "personal"}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"neutralize": "mixed" if len(neutralizeVals) > 1 else (neutralizeVals.pop() if neutralizeVals else False),
|
||||||
|
"scope": "mixed" if len(scopeVals) > 1 else (scopeVals.pop() if scopeVals else "personal"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@router.post("/folders", status_code=status.HTTP_201_CREATED)
|
@router.post("/folders", status_code=status.HTTP_201_CREATED)
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
def create_folder(
|
def create_folder(
|
||||||
|
|
@ -353,7 +555,12 @@ def move_folder(
|
||||||
context: RequestContext = Depends(getRequestContext),
|
context: RequestContext = Depends(getRequestContext),
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
|
# FE may send `parentId` or `targetParentId`. Accept both so the
|
||||||
|
# FormGeneratorTree generic `provider.moveNodes(targetParentId)` API
|
||||||
|
# remains consistent with the file-move (PUT /api/files/{id}) shape.
|
||||||
newParentId = body.get("parentId")
|
newParentId = body.get("parentId")
|
||||||
|
if newParentId is None:
|
||||||
|
newParentId = body.get("targetParentId")
|
||||||
managementInterface = interfaceDbManagement.getInterface(
|
managementInterface = interfaceDbManagement.getInterface(
|
||||||
currentUser,
|
currentUser,
|
||||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||||
|
|
@ -414,11 +621,7 @@ def patch_folder_scope(
|
||||||
if not scope:
|
if not scope:
|
||||||
raise HTTPException(status_code=400, detail="scope is required")
|
raise HTTPException(status_code=400, detail="scope is required")
|
||||||
cascadeToFiles = body.get("cascadeChildren", body.get("cascadeToFiles", False))
|
cascadeToFiles = body.get("cascadeChildren", body.get("cascadeToFiles", False))
|
||||||
managementInterface = interfaceDbManagement.getInterface(
|
managementInterface = _getInterfaceForOwnedItem(currentUser, context, folderId, FileFolder)
|
||||||
currentUser,
|
|
||||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
||||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
||||||
)
|
|
||||||
return managementInterface.patchFolderScope(folderId, scope, cascadeToFiles)
|
return managementInterface.patchFolderScope(folderId, scope, cascadeToFiles)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise HTTPException(status_code=400, detail=str(e))
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
@ -446,11 +649,7 @@ def patch_folder_neutralize(
|
||||||
neutralize = body.get("neutralize")
|
neutralize = body.get("neutralize")
|
||||||
if neutralize is None:
|
if neutralize is None:
|
||||||
raise HTTPException(status_code=400, detail="neutralize is required")
|
raise HTTPException(status_code=400, detail="neutralize is required")
|
||||||
managementInterface = interfaceDbManagement.getInterface(
|
managementInterface = _getInterfaceForOwnedItem(currentUser, context, folderId, FileFolder)
|
||||||
currentUser,
|
|
||||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
||||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
||||||
)
|
|
||||||
return managementInterface.patchFolderNeutralize(folderId, bool(neutralize))
|
return managementInterface.patchFolderNeutralize(folderId, bool(neutralize))
|
||||||
except PermissionError as e:
|
except PermissionError as e:
|
||||||
raise HTTPException(status_code=403, detail=str(e))
|
raise HTTPException(status_code=403, detail=str(e))
|
||||||
|
|
@ -1031,11 +1230,7 @@ def updateFileScope(
|
||||||
if scope == "global" and not context.isSysAdmin:
|
if scope == "global" and not context.isSysAdmin:
|
||||||
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
||||||
|
|
||||||
managementInterface = interfaceDbManagement.getInterface(
|
managementInterface = _getInterfaceForOwnedItem(context.user, context, fileId, FileItem)
|
||||||
context.user,
|
|
||||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
||||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
managementInterface.updateFile(fileId, {"scope": scope})
|
managementInterface.updateFile(fileId, {"scope": scope})
|
||||||
|
|
||||||
|
|
@ -1093,11 +1288,7 @@ def updateFileNeutralize(
|
||||||
fails the file simply has no index — no un-neutralized data can leak.
|
fails the file simply has no index — no un-neutralized data can leak.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
managementInterface = interfaceDbManagement.getInterface(
|
managementInterface = _getInterfaceForOwnedItem(context.user, context, fileId, FileItem)
|
||||||
context.user,
|
|
||||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
|
||||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
managementInterface.updateFile(fileId, {"neutralize": neutralize})
|
managementInterface.updateFile(fileId, {"neutralize": neutralize})
|
||||||
|
|
||||||
|
|
@ -1212,7 +1403,8 @@ def update_file(
|
||||||
request: Request,
|
request: Request,
|
||||||
fileId: str = Path(..., description="ID of the file to update"),
|
fileId: str = Path(..., description="ID of the file to update"),
|
||||||
file_info: Dict[str, Any] = Body(...),
|
file_info: Dict[str, Any] = Body(...),
|
||||||
currentUser: User = Depends(getCurrentUser)
|
currentUser: User = Depends(getCurrentUser),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
) -> FileItem:
|
) -> FileItem:
|
||||||
"""Update file info"""
|
"""Update file info"""
|
||||||
try:
|
try:
|
||||||
|
|
@ -1221,7 +1413,11 @@ def update_file(
|
||||||
if not safeData:
|
if not safeData:
|
||||||
raise HTTPException(status_code=400, detail=routeApiMsg("No editable fields provided"))
|
raise HTTPException(status_code=400, detail=routeApiMsg("No editable fields provided"))
|
||||||
|
|
||||||
managementInterface = interfaceDbManagement.getInterface(currentUser)
|
managementInterface = interfaceDbManagement.getInterface(
|
||||||
|
currentUser,
|
||||||
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||||
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||||
|
)
|
||||||
|
|
||||||
file = managementInterface.getFile(fileId)
|
file = managementInterface.getFile(fileId)
|
||||||
if not file:
|
if not file:
|
||||||
|
|
@ -1267,10 +1463,15 @@ def update_file(
|
||||||
def delete_file(
|
def delete_file(
|
||||||
request: Request,
|
request: Request,
|
||||||
fileId: str = Path(..., description="ID of the file to delete"),
|
fileId: str = Path(..., description="ID of the file to delete"),
|
||||||
currentUser: User = Depends(getCurrentUser)
|
currentUser: User = Depends(getCurrentUser),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Delete a file"""
|
"""Delete a file"""
|
||||||
managementInterface = interfaceDbManagement.getInterface(currentUser)
|
managementInterface = interfaceDbManagement.getInterface(
|
||||||
|
currentUser,
|
||||||
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||||
|
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||||
|
)
|
||||||
|
|
||||||
# Check if the file exists
|
# Check if the file exists
|
||||||
existingFile = managementInterface.getFile(fileId)
|
existingFile = managementInterface.getFile(fileId)
|
||||||
|
|
|
||||||
|
|
@ -9,11 +9,83 @@ from fastapi import APIRouter, HTTPException, Depends, Path, Request, Body
|
||||||
from modules.auth import limiter, getRequestContext, RequestContext
|
from modules.auth import limiter, getRequestContext, RequestContext
|
||||||
from modules.datamodels.datamodelDataSource import DataSource
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||||
|
from modules.datamodels.datamodelUam import UserConnection
|
||||||
from modules.shared.i18nRegistry import apiRouteContext
|
from modules.shared.i18nRegistry import apiRouteContext
|
||||||
routeApiMsg = apiRouteContext("routeDataSources")
|
routeApiMsg = apiRouteContext("routeDataSources")
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _ensureConnectionKnowledgeFlag(rootIf, connectionId: str) -> None:
|
||||||
|
"""Forward-only sync: if a DataSource gets RAG-activated, ensure the parent
|
||||||
|
UserConnection.knowledgeIngestionEnabled is true.
|
||||||
|
|
||||||
|
Intentionally NOT bidirectional: disabling the last DataSource does NOT
|
||||||
|
auto-clear knowledgeIngestionEnabled, because the consent flag may have
|
||||||
|
been set explicitly via the Connections page / wizard even before any
|
||||||
|
DataSource exists. Only the master switch (`/knowledge-consent`) may
|
||||||
|
clear it.
|
||||||
|
"""
|
||||||
|
if not connectionId:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
currentConn = rootIf.db.getRecord(UserConnection, connectionId)
|
||||||
|
if not currentConn:
|
||||||
|
return
|
||||||
|
if bool(currentConn.get("knowledgeIngestionEnabled")):
|
||||||
|
return
|
||||||
|
rootIf.db.recordModify(UserConnection, connectionId, {"knowledgeIngestionEnabled": True})
|
||||||
|
logger.info(
|
||||||
|
"Auto-enabled knowledgeIngestionEnabled on UserConnection %s "
|
||||||
|
"(triggered by first active DataSource).",
|
||||||
|
connectionId,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Could not auto-enable knowledgeIngestionEnabled for connection %s: %s", connectionId, e)
|
||||||
|
|
||||||
|
def _computeOwnEffective(rootIf, rec, model, sourceId: str, flag: str) -> Any:
|
||||||
|
"""Re-load the record after modification and compute its aggregate effective value."""
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
||||||
|
getEffectiveFlag, getEffectiveFlagFds,
|
||||||
|
)
|
||||||
|
freshRec = rootIf.db.getRecord(model, sourceId)
|
||||||
|
if not freshRec:
|
||||||
|
return None
|
||||||
|
if model is DataSource:
|
||||||
|
connectionId = freshRec.get("connectionId", "")
|
||||||
|
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
|
return getEffectiveFlag(freshRec, flag, allDs, mode="aggregate")
|
||||||
|
else:
|
||||||
|
wsId = freshRec.get("workspaceInstanceId", "")
|
||||||
|
allFds = rootIf.db.getRecordset(FeatureDataSource, recordFilter={"workspaceInstanceId": wsId})
|
||||||
|
return getEffectiveFlagFds(freshRec, flag, allFds, mode="aggregate")
|
||||||
|
|
||||||
|
|
||||||
|
def _computeAncestorEffectives(rootIf, rec, model, flag: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Compute the aggregate effective value for all ancestors of `rec`."""
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
||||||
|
collectAncestorChain, collectAncestorChainFds,
|
||||||
|
getEffectiveFlag, getEffectiveFlagFds,
|
||||||
|
)
|
||||||
|
effectiveKey = f"effective{flag[0].upper()}{flag[1:]}"
|
||||||
|
if model is DataSource:
|
||||||
|
connectionId = rec.get("connectionId", "")
|
||||||
|
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
|
ancestors = collectAncestorChain(rec, allDs)
|
||||||
|
return [
|
||||||
|
{"id": a.get("id") or getattr(a, "id", ""), effectiveKey: getEffectiveFlag(a, flag, allDs, mode="aggregate")}
|
||||||
|
for a in ancestors
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
wsId = rec.get("workspaceInstanceId", "")
|
||||||
|
allFds = rootIf.db.getRecordset(FeatureDataSource, recordFilter={"workspaceInstanceId": wsId})
|
||||||
|
ancestors = collectAncestorChainFds(rec, allFds)
|
||||||
|
return [
|
||||||
|
{"id": a.get("id") or getattr(a, "id", ""), effectiveKey: getEffectiveFlagFds(a, flag, allFds, mode="aggregate")}
|
||||||
|
for a in ancestors
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter(
|
router = APIRouter(
|
||||||
prefix="/api/datasources",
|
prefix="/api/datasources",
|
||||||
tags=["Data Sources"],
|
tags=["Data Sources"],
|
||||||
|
|
@ -45,26 +117,58 @@ def _findSourceRecord(db, sourceId: str):
|
||||||
def _updateDataSourceScope(
|
def _updateDataSourceScope(
|
||||||
request: Request,
|
request: Request,
|
||||||
sourceId: str = Path(..., description="ID of the DataSource or FeatureDataSource"),
|
sourceId: str = Path(..., description="ID of the DataSource or FeatureDataSource"),
|
||||||
scope: str = Body(..., embed=True),
|
scope: Optional[str] = Body(None, embed=True),
|
||||||
context: RequestContext = Depends(getRequestContext),
|
context: RequestContext = Depends(getRequestContext),
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Update the scope of a DataSource or FeatureDataSource. Global scope requires sysAdmin."""
|
"""Update the scope of a DataSource. Cascade-resets explicit descendants.
|
||||||
if scope not in _VALID_SCOPES:
|
|
||||||
raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {_VALID_SCOPES}")
|
|
||||||
|
|
||||||
if scope == "global" and not context.isSysAdmin:
|
`scope=None` resets this node to inherit (no cascade). Global scope
|
||||||
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
requires sysAdmin.
|
||||||
|
"""
|
||||||
|
if scope is not None:
|
||||||
|
if scope not in _VALID_SCOPES:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {_VALID_SCOPES}")
|
||||||
|
if scope == "global" and not context.isSysAdmin:
|
||||||
|
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
||||||
|
cascadeResetDescendants, cascadeResetDescendantsFds,
|
||||||
|
getEffectiveFlag, getEffectiveFlagFds,
|
||||||
|
collectAncestorChain, collectAncestorChainFds,
|
||||||
|
)
|
||||||
rootIf = getRootInterface()
|
rootIf = getRootInterface()
|
||||||
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
||||||
if not rec:
|
if not rec:
|
||||||
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||||
|
|
||||||
|
# 1. Cascade reset descendants bottom-up (before modifying master)
|
||||||
|
resetIds: List[str] = []
|
||||||
|
if scope is not None:
|
||||||
|
if model is DataSource:
|
||||||
|
resetIds = cascadeResetDescendants(rootIf, rec, "scope")
|
||||||
|
else:
|
||||||
|
resetIds = cascadeResetDescendantsFds(rootIf, rec, "scope")
|
||||||
|
|
||||||
|
# 2. Set master value last (crash-safe)
|
||||||
rootIf.db.recordModify(model, sourceId, {"scope": scope})
|
rootIf.db.recordModify(model, sourceId, {"scope": scope})
|
||||||
logger.info("Updated scope=%s for %s %s", scope, model.__name__, sourceId)
|
|
||||||
return {"sourceId": sourceId, "scope": scope, "updated": True}
|
# 3. Compute effective + ancestor chain for response
|
||||||
|
updatedAncestors = _computeAncestorEffectives(rootIf, rec, model, "scope")
|
||||||
|
effectiveScope = _computeOwnEffective(rootIf, rec, model, sourceId, "scope")
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Updated scope=%s for %s %s (cascade-reset %d descendants)",
|
||||||
|
scope, model.__name__, sourceId, len(resetIds),
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"sourceId": sourceId,
|
||||||
|
"scope": scope,
|
||||||
|
"effectiveScope": effectiveScope,
|
||||||
|
"resetDescendantIds": resetIds,
|
||||||
|
"updatedAncestors": updatedAncestors,
|
||||||
|
}
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -77,20 +181,49 @@ def _updateDataSourceScope(
|
||||||
def _updateDataSourceNeutralize(
|
def _updateDataSourceNeutralize(
|
||||||
request: Request,
|
request: Request,
|
||||||
sourceId: str = Path(..., description="ID of the DataSource or FeatureDataSource"),
|
sourceId: str = Path(..., description="ID of the DataSource or FeatureDataSource"),
|
||||||
neutralize: bool = Body(..., embed=True),
|
neutralize: Optional[bool] = Body(None, embed=True),
|
||||||
context: RequestContext = Depends(getRequestContext),
|
context: RequestContext = Depends(getRequestContext),
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Toggle the neutralization flag on a DataSource or FeatureDataSource."""
|
"""Set neutralize flag on a DataSource. Cascade-resets explicit descendants.
|
||||||
|
|
||||||
|
`neutralize=None` resets this node to inherit (no cascade).
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
||||||
|
cascadeResetDescendants, cascadeResetDescendantsFds,
|
||||||
|
)
|
||||||
rootIf = getRootInterface()
|
rootIf = getRootInterface()
|
||||||
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
||||||
if not rec:
|
if not rec:
|
||||||
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||||
|
|
||||||
|
# 1. Cascade reset descendants bottom-up (before modifying master)
|
||||||
|
resetIds: List[str] = []
|
||||||
|
if neutralize is not None:
|
||||||
|
if model is DataSource:
|
||||||
|
resetIds = cascadeResetDescendants(rootIf, rec, "neutralize")
|
||||||
|
else:
|
||||||
|
resetIds = cascadeResetDescendantsFds(rootIf, rec, "neutralize")
|
||||||
|
|
||||||
|
# 2. Set master value last (crash-safe)
|
||||||
rootIf.db.recordModify(model, sourceId, {"neutralize": neutralize})
|
rootIf.db.recordModify(model, sourceId, {"neutralize": neutralize})
|
||||||
logger.info("Updated neutralize=%s for %s %s", neutralize, model.__name__, sourceId)
|
|
||||||
return {"sourceId": sourceId, "neutralize": neutralize, "updated": True}
|
# 3. Compute effective + ancestor chain for response
|
||||||
|
updatedAncestors = _computeAncestorEffectives(rootIf, rec, model, "neutralize")
|
||||||
|
effectiveNeutralize = _computeOwnEffective(rootIf, rec, model, sourceId, "neutralize")
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Updated neutralize=%s for %s %s (cascade-reset %d descendants)",
|
||||||
|
neutralize, model.__name__, sourceId, len(resetIds),
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"sourceId": sourceId,
|
||||||
|
"neutralize": neutralize,
|
||||||
|
"effectiveNeutralize": effectiveNeutralize,
|
||||||
|
"resetDescendantIds": resetIds,
|
||||||
|
"updatedAncestors": updatedAncestors,
|
||||||
|
}
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -132,47 +265,67 @@ def _updateNeutralizeFields(
|
||||||
async def _updateDataSourceRagIndex(
|
async def _updateDataSourceRagIndex(
|
||||||
request: Request,
|
request: Request,
|
||||||
sourceId: str = Path(..., description="ID of the DataSource"),
|
sourceId: str = Path(..., description="ID of the DataSource"),
|
||||||
ragIndexEnabled: bool = Body(..., embed=True),
|
ragIndexEnabled: Optional[bool] = Body(None, embed=True),
|
||||||
context: RequestContext = Depends(getRequestContext),
|
context: RequestContext = Depends(getRequestContext),
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Toggle RAG indexing for a DataSource.
|
"""Set RAG indexing flag on a DataSource. Cascade-resets explicit descendants.
|
||||||
|
|
||||||
true: sets flag + enqueues mini-bootstrap for this DataSource only.
|
`ragIndexEnabled=None` resets this node to inherit (no cascade, no purge,
|
||||||
false: sets flag + synchronously purges all chunks from this DataSource.
|
no bootstrap — the node simply follows its ancestor chain afterwards).
|
||||||
|
`True` enqueues a mini-bootstrap. `False` synchronously purges chunks.
|
||||||
|
|
||||||
Must be `async def` so `await startJob(...)` registers `_runJob` in the
|
Must be `async def` so `await startJob(...)` registers `_runJob` in the
|
||||||
main event loop. Sync route → worker thread → temporary loop closes
|
main event loop.
|
||||||
before the task runs → job stays stuck forever.
|
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
||||||
|
cascadeResetDescendants, cascadeResetDescendantsFds,
|
||||||
|
)
|
||||||
rootIf = getRootInterface()
|
rootIf = getRootInterface()
|
||||||
rec = rootIf.db.getRecord(DataSource, sourceId)
|
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
||||||
if not rec:
|
if not rec:
|
||||||
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||||
|
|
||||||
rootIf.db.recordModify(DataSource, sourceId, {"ragIndexEnabled": ragIndexEnabled})
|
# 1. Cascade reset descendants bottom-up (before modifying master)
|
||||||
logger.info("Updated ragIndexEnabled=%s for DataSource %s", ragIndexEnabled, sourceId)
|
resetIds: List[str] = []
|
||||||
|
if ragIndexEnabled is not None:
|
||||||
|
if model is DataSource:
|
||||||
|
resetIds = cascadeResetDescendants(rootIf, rec, "ragIndexEnabled")
|
||||||
|
else:
|
||||||
|
resetIds = cascadeResetDescendantsFds(rootIf, rec, "ragIndexEnabled")
|
||||||
|
|
||||||
if ragIndexEnabled:
|
# 2. Set master value last (crash-safe)
|
||||||
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
rootIf.db.recordModify(model, sourceId, {"ragIndexEnabled": ragIndexEnabled})
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Updated ragIndexEnabled=%s for %s %s (cascade-reset %d descendants)",
|
||||||
|
ragIndexEnabled, model.__name__, sourceId, len(resetIds),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Bootstrap / purge only for personal DataSource (file/folder-based RAG).
|
||||||
|
# FDS RAG is handled by the feature pipeline; the flag alone is enough.
|
||||||
|
if model is DataSource:
|
||||||
connectionId = rec.get("connectionId") or rec.get("connection_id") or ""
|
connectionId = rec.get("connectionId") or rec.get("connection_id") or ""
|
||||||
conn = rootIf.getUserConnectionById(connectionId) if connectionId else None
|
if ragIndexEnabled is True:
|
||||||
authority = ""
|
_ensureConnectionKnowledgeFlag(rootIf, connectionId)
|
||||||
if conn:
|
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||||
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
|
|
||||||
|
|
||||||
await startJob(
|
conn = rootIf.getUserConnectionById(connectionId) if connectionId else None
|
||||||
"connection.bootstrap",
|
authority = ""
|
||||||
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]},
|
if conn:
|
||||||
triggeredBy=str(context.user.id),
|
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
|
||||||
)
|
|
||||||
else:
|
await startJob(
|
||||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
"connection.bootstrap",
|
||||||
purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId)
|
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]},
|
||||||
logger.info("Purged %d index rows / %d chunks for DataSource %s",
|
triggeredBy=str(context.user.id),
|
||||||
purgeResult.get("indexRows", 0), purgeResult.get("chunks", 0), sourceId)
|
)
|
||||||
|
elif ragIndexEnabled is False:
|
||||||
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||||
|
purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId)
|
||||||
|
logger.info("Purged %d index rows / %d chunks for DataSource %s",
|
||||||
|
purgeResult.get("indexRows", 0), purgeResult.get("chunks", 0), sourceId)
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
|
|
@ -182,12 +335,184 @@ async def _updateDataSourceRagIndex(
|
||||||
mandateId=context.mandateId,
|
mandateId=context.mandateId,
|
||||||
category=AuditCategory.PERMISSION.value,
|
category=AuditCategory.PERMISSION.value,
|
||||||
action="rag_index_toggled",
|
action="rag_index_toggled",
|
||||||
details=json.dumps({"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled}),
|
details=json.dumps({"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled, "resetDescendants": len(resetIds), "model": model.__name__}),
|
||||||
)
|
)
|
||||||
|
|
||||||
return {"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled, "updated": True}
|
# 3. Compute effective + ancestors for response
|
||||||
|
updatedAncestors = _computeAncestorEffectives(rootIf, rec, model, "ragIndexEnabled")
|
||||||
|
effectiveRag = _computeOwnEffective(rootIf, rec, model, sourceId, "ragIndexEnabled")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"sourceId": sourceId,
|
||||||
|
"ragIndexEnabled": ragIndexEnabled,
|
||||||
|
"effectiveRagIndexEnabled": effectiveRag,
|
||||||
|
"resetDescendantIds": resetIds,
|
||||||
|
"updatedAncestors": updatedAncestors,
|
||||||
|
}
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Error updating datasource ragIndexEnabled: %s", e)
|
logger.error("Error updating datasource ragIndexEnabled: %s", e)
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
_CLICKUP_SOURCE_TYPES = {"clickup", "clickupList", "clickupSpace", "clickupFolder"}
|
||||||
|
_ALLOWED_RAG_LIMIT_KEYS = {
|
||||||
|
"files": {"maxItems", "maxBytes", "maxFileSize", "maxDepth"},
|
||||||
|
"clickup": {"maxTasks", "maxWorkspaces", "maxListsPerWorkspace"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _kindForSource(rec: Dict[str, Any], model) -> str:
|
||||||
|
"""Map a DataSource record to a RAG-limits kind ('files' or 'clickup').
|
||||||
|
|
||||||
|
FeatureDataSource (tables, not file walkers) reports as 'files' so the
|
||||||
|
same UI/limit shape works; the limits simply won't be consumed by any
|
||||||
|
walker today but are stored for forward-compat.
|
||||||
|
"""
|
||||||
|
if model is FeatureDataSource:
|
||||||
|
return "files"
|
||||||
|
sourceType = str(rec.get("sourceType") or "").strip()
|
||||||
|
return "clickup" if sourceType in _CLICKUP_SOURCE_TYPES else "files"
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitizeRagLimits(kind: str, raw: Any) -> Dict[str, int]:
|
||||||
|
"""Coerce an incoming ragLimits dict to {allowedKey: positive int}.
|
||||||
|
|
||||||
|
Unknown keys are silently dropped; non-positive or non-numeric values
|
||||||
|
are rejected with 400.
|
||||||
|
"""
|
||||||
|
if not isinstance(raw, dict):
|
||||||
|
raise HTTPException(status_code=400, detail="ragLimits must be an object")
|
||||||
|
allowed = _ALLOWED_RAG_LIMIT_KEYS.get(kind, set())
|
||||||
|
cleaned: Dict[str, int] = {}
|
||||||
|
for key, value in raw.items():
|
||||||
|
if key not in allowed:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
intValue = int(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
raise HTTPException(status_code=400, detail=f"ragLimits.{key} must be an integer")
|
||||||
|
if intValue <= 0:
|
||||||
|
raise HTTPException(status_code=400, detail=f"ragLimits.{key} must be > 0")
|
||||||
|
cleaned[key] = intValue
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
|
@router.patch("/{sourceId}/settings")
|
||||||
|
@limiter.limit("30/minute")
|
||||||
|
def _updateDataSourceSettings(
|
||||||
|
request: Request,
|
||||||
|
sourceId: str = Path(..., description="ID of the DataSource or FeatureDataSource"),
|
||||||
|
settings: Dict[str, Any] = Body(..., embed=True),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Replace `settings` on a DataSource or FeatureDataSource (partial merge per top-level key).
|
||||||
|
|
||||||
|
Currently supports `ragLimits` only. Unknown top-level keys in the body are
|
||||||
|
rejected to avoid silently storing garbage that no consumer reads.
|
||||||
|
|
||||||
|
Owner-only for personal DataSources; mandate/feature scopes additionally
|
||||||
|
accept the mandate or workspace admins of that scope.
|
||||||
|
"""
|
||||||
|
if not isinstance(settings, dict):
|
||||||
|
raise HTTPException(status_code=400, detail="settings must be an object")
|
||||||
|
unknown = set(settings.keys()) - {"ragLimits"}
|
||||||
|
if unknown:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Unknown settings keys: {sorted(unknown)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
||||||
|
if not rec:
|
||||||
|
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||||
|
|
||||||
|
ownerId = str(rec.get("userId") or "")
|
||||||
|
currentUserId = str(context.user.id)
|
||||||
|
if ownerId and ownerId != currentUserId and not context.isSysAdmin:
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||||
|
if model is DataSource:
|
||||||
|
connectionId = rec.get("connectionId", "")
|
||||||
|
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
|
scope = str(getEffectiveFlag(rec, "scope", allDs, mode="walk"))
|
||||||
|
else:
|
||||||
|
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource as FDS
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlagFds
|
||||||
|
wsId = rec.get("workspaceInstanceId", "")
|
||||||
|
allFds = rootIf.db.getRecordset(FDS, recordFilter={"workspaceInstanceId": wsId})
|
||||||
|
scope = str(getEffectiveFlagFds(rec, "scope", allFds, mode="walk"))
|
||||||
|
isMandateAdmin = getattr(context, "isMandateAdmin", False)
|
||||||
|
if scope == "personal" or not isMandateAdmin:
|
||||||
|
raise HTTPException(status_code=403, detail="Not allowed to modify this DataSource's settings")
|
||||||
|
|
||||||
|
kind = _kindForSource(rec, model)
|
||||||
|
|
||||||
|
currentSettings = rec.get("settings") or {}
|
||||||
|
if not isinstance(currentSettings, dict):
|
||||||
|
currentSettings = {}
|
||||||
|
newSettings = dict(currentSettings)
|
||||||
|
|
||||||
|
if "ragLimits" in settings:
|
||||||
|
cleanedLimits = _sanitizeRagLimits(kind, settings["ragLimits"])
|
||||||
|
mergedLimits = dict(currentSettings.get("ragLimits") or {})
|
||||||
|
mergedLimits.update(cleanedLimits)
|
||||||
|
newSettings["ragLimits"] = mergedLimits
|
||||||
|
|
||||||
|
rootIf.db.recordModify(model, sourceId, {"settings": newSettings})
|
||||||
|
|
||||||
|
import json
|
||||||
|
from modules.shared.auditLogger import audit_logger
|
||||||
|
from modules.datamodels.datamodelAudit import AuditCategory
|
||||||
|
audit_logger.logEvent(
|
||||||
|
userId=currentUserId,
|
||||||
|
mandateId=context.mandateId,
|
||||||
|
category=AuditCategory.PERMISSION.value,
|
||||||
|
action="datasource_settings_changed",
|
||||||
|
details=json.dumps({
|
||||||
|
"sourceId": sourceId,
|
||||||
|
"model": model.__name__,
|
||||||
|
"oldSettings": currentSettings,
|
||||||
|
"newSettings": newSettings,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
logger.info("Updated settings on %s %s by user %s", model.__name__, sourceId, currentUserId)
|
||||||
|
return {"sourceId": sourceId, "settings": newSettings, "updated": True}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error updating datasource settings: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{sourceId}/cost-estimate")
|
||||||
|
@limiter.limit("60/minute")
|
||||||
|
def _getDataSourceCostEstimate(
|
||||||
|
request: Request,
|
||||||
|
sourceId: str = Path(..., description="ID of the DataSource or FeatureDataSource"),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Return an indicative full-sync cost estimate for the given DataSource.
|
||||||
|
|
||||||
|
Uses the current effective ragLimits (DataSource.settings.ragLimits with
|
||||||
|
fallback to centralized defaults) as the basis. Returns the same
|
||||||
|
`{estimatedTokens, estimatedUsd, basis}` shape regardless of source kind.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge import _ragLimits, _costEstimate
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
rec, model = _findSourceRecord(rootIf.db, sourceId)
|
||||||
|
if not rec:
|
||||||
|
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||||
|
|
||||||
|
kind = _kindForSource(rec, model)
|
||||||
|
effective = _ragLimits.getRagLimits(rec, kind)
|
||||||
|
estimate = _costEstimate.estimateBootstrapCost(effective, kind=kind)
|
||||||
|
estimate["sourceId"] = sourceId
|
||||||
|
return estimate
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error computing cost estimate: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ def resolveMandateLabels(ids: List[str]) -> Dict[str, Optional[str]]:
|
||||||
m = mMap.get(mid)
|
m = mMap.get(mid)
|
||||||
label = (getattr(m, "label", None) or getattr(m, "name", None)) if m else None
|
label = (getattr(m, "label", None) or getattr(m, "name", None)) if m else None
|
||||||
if not label:
|
if not label:
|
||||||
logger.warning("resolveMandateLabels: no label for id=%s (found=%s)", mid, m is not None)
|
logger.debug("resolveMandateLabels: no label for id=%s (found=%s)", mid, m is not None)
|
||||||
result[mid] = label or None
|
result[mid] = label or None
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
@ -57,7 +57,7 @@ def resolveInstanceLabels(ids: List[str]) -> Dict[str, Optional[str]]:
|
||||||
fi = featureIface.getFeatureInstance(iid)
|
fi = featureIface.getFeatureInstance(iid)
|
||||||
label = fi.label if fi and fi.label else None
|
label = fi.label if fi and fi.label else None
|
||||||
if not label:
|
if not label:
|
||||||
logger.warning("resolveInstanceLabels: no label for id=%s (found=%s)", iid, fi is not None)
|
logger.debug("resolveInstanceLabels: no label for id=%s (found=%s)", iid, fi is not None)
|
||||||
result[iid] = label
|
result[iid] = label
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
@ -104,7 +104,7 @@ def resolveRoleLabels(ids: List[str]) -> Dict[str, Optional[str]]:
|
||||||
out[rid] = r.get("roleLabel") or None
|
out[rid] = r.get("roleLabel") or None
|
||||||
for rid in ids:
|
for rid in ids:
|
||||||
if out.get(rid) is None:
|
if out.get(rid) is None:
|
||||||
logger.warning("resolveRoleLabels: no label for id=%s", rid)
|
logger.debug("resolveRoleLabels: no label for id=%s", rid)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -305,7 +305,7 @@ def handleIdsMode(
|
||||||
|
|
||||||
sql = f'SELECT "{idField}"::TEXT AS val FROM "{table}"{where_clause} ORDER BY "{idField}"'
|
sql = f'SELECT "{idField}"::TEXT AS val FROM "{table}"{where_clause} ORDER BY "{idField}"'
|
||||||
|
|
||||||
with db.connection.cursor() as cursor:
|
with db.borrowCursor() as cursor:
|
||||||
cursor.execute(sql, values)
|
cursor.execute(sql, values)
|
||||||
return JSONResponse(content=[row["val"] for row in cursor.fetchall()])
|
return JSONResponse(content=[row["val"] for row in cursor.fetchall()])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ from modules.serviceCenter.services.serviceBackgroundJobs import (
|
||||||
getJobStatus,
|
getJobStatus,
|
||||||
listJobs,
|
listJobs,
|
||||||
)
|
)
|
||||||
from modules.shared.i18nRegistry import apiRouteContext
|
from modules.shared.i18nRegistry import apiRouteContext, resolveJobMessage
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
routeApiMsg = apiRouteContext("routeJobs")
|
routeApiMsg = apiRouteContext("routeJobs")
|
||||||
|
|
@ -34,8 +34,20 @@ router = APIRouter(
|
||||||
|
|
||||||
|
|
||||||
def _serialiseJob(job: Dict[str, Any]) -> Dict[str, Any]:
|
def _serialiseJob(job: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Strip system audit fields and ensure JSON-safe types."""
|
"""Strip system audit fields, ensure JSON-safe types, translate progress.
|
||||||
return {k: v for k, v in job.items() if not k.startswith("sys")}
|
|
||||||
|
Walkers store progress as a structured payload (``progressMessageData =
|
||||||
|
{key, params}``). The frontend never calls ``t()`` on backend-supplied
|
||||||
|
keys (i18n convention #2), so we resolve the payload here using the
|
||||||
|
request-context language and overwrite ``progressMessage`` with the
|
||||||
|
fully rendered string. Older clients keep working because they read
|
||||||
|
the same field.
|
||||||
|
"""
|
||||||
|
out = {k: v for k, v in job.items() if not k.startswith("sys")}
|
||||||
|
translated = resolveJobMessage(out.get("progressMessageData"))
|
||||||
|
if translated:
|
||||||
|
out["progressMessage"] = translated
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def _userHasMandateAccess(context: RequestContext, mandateId: Optional[str]) -> bool:
|
def _userHasMandateAccess(context: RequestContext, mandateId: Optional[str]) -> bool:
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ from typing import Any, Dict, List, Optional
|
||||||
from fastapi import APIRouter, HTTPException, Depends, Request
|
from fastapi import APIRouter, HTTPException, Depends, Request
|
||||||
from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
|
from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
|
||||||
from modules.datamodels.datamodelUam import User
|
from modules.datamodels.datamodelUam import User
|
||||||
from modules.shared.i18nRegistry import apiRouteContext
|
from modules.shared.i18nRegistry import apiRouteContext, resolveJobMessage
|
||||||
|
|
||||||
routeApiMsg = apiRouteContext("routeRagInventory")
|
routeApiMsg = apiRouteContext("routeRagInventory")
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -24,9 +24,69 @@ router = APIRouter(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_SUB_RESULT_KEYS = ("sharepoint", "outlook", "drive", "gmail", "clickup", "kdrive")
|
||||||
|
|
||||||
|
|
||||||
|
def _flattenJobResult(result: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Bootstrap handlers nest per-service results (e.g. msft returns
|
||||||
|
`{"sharepoint": {...}, "outlook": {...}}`). The UI needs per-connection
|
||||||
|
aggregates AND the first hit limit, so we sum the counters and pick the
|
||||||
|
most informative `stoppedAtLimit` across sub-services.
|
||||||
|
|
||||||
|
Returns a flat dict with the same keys the UI expects on `lastSuccess`.
|
||||||
|
"""
|
||||||
|
subResults = [result[k] for k in _SUB_RESULT_KEYS if isinstance(result.get(k), dict)]
|
||||||
|
if not subResults:
|
||||||
|
# Single-service handler that returns flat dict directly (legacy path).
|
||||||
|
return result
|
||||||
|
|
||||||
|
indexed = sum(int(r.get("indexed") or 0) for r in subResults)
|
||||||
|
skippedDup = sum(int(r.get("skippedDuplicate") or 0) for r in subResults)
|
||||||
|
skippedPol = sum(int(r.get("skippedPolicy") or 0) for r in subResults)
|
||||||
|
failed = sum(int(r.get("failed") or 0) for r in subResults)
|
||||||
|
bytes_ = sum(int(r.get("bytesProcessed") or 0) for r in subResults)
|
||||||
|
# Parallel sub-services: wall-clock ≈ slowest one.
|
||||||
|
durationMs = max((int(r.get("durationMs") or 0) for r in subResults), default=0)
|
||||||
|
|
||||||
|
# First sub-service that hit a limit wins — UI shows one banner per
|
||||||
|
# connection; if multiple stopped, the first one is informative enough
|
||||||
|
# and the user re-runs after raising that budget.
|
||||||
|
stoppedAtLimit: Optional[str] = None
|
||||||
|
limits: Dict[str, Any] = {}
|
||||||
|
for r in subResults:
|
||||||
|
if r.get("stoppedAtLimit"):
|
||||||
|
stoppedAtLimit = r["stoppedAtLimit"]
|
||||||
|
limits = r.get("limits") or {}
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
"indexed": indexed,
|
||||||
|
"skippedDuplicate": skippedDup,
|
||||||
|
"skippedPolicy": skippedPol,
|
||||||
|
"failed": failed,
|
||||||
|
"bytesProcessed": bytes_,
|
||||||
|
"durationMs": durationMs,
|
||||||
|
"stoppedAtLimit": stoppedAtLimit,
|
||||||
|
"limits": limits,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> List[Dict[str, Any]]:
|
def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> List[Dict[str, Any]]:
|
||||||
|
"""Build per-connection RAG inventory rows.
|
||||||
|
|
||||||
|
Each DataSource row exposes BOTH numbers because they mean different things:
|
||||||
|
* `fileCount` — distinct files indexed (== `FileContentIndex` rows)
|
||||||
|
* `chunkCount` — embedding-sized text fragments (== `ContentChunk` rows,
|
||||||
|
max `DEFAULT_CHUNK_TOKENS` tokens each, what the vector retrieval
|
||||||
|
actually hits)
|
||||||
|
|
||||||
|
A single PDF typically yields 1 file × 5–100 chunks; legacy UI labelled
|
||||||
|
`len(FileContentIndex)` as "chunks" which was off by 1–2 orders of
|
||||||
|
magnitude and misleading.
|
||||||
|
"""
|
||||||
from modules.datamodels.datamodelDataSource import DataSource
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||||
|
|
||||||
out = []
|
out = []
|
||||||
for conn in connections:
|
for conn in connections:
|
||||||
|
|
@ -34,19 +94,35 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
||||||
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
|
|
||||||
connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId})
|
connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId})
|
||||||
connChunkTotal = len(connIndexRows)
|
connFileTotal = len(connIndexRows)
|
||||||
|
|
||||||
|
# Map fileId → real chunk count via 1 aggregate query (cheap even for
|
||||||
|
# connections with thousands of files; we never load the vector body).
|
||||||
|
fileIds = [
|
||||||
|
(idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", ""))
|
||||||
|
for idx in connIndexRows
|
||||||
|
]
|
||||||
|
fileIds = [fid for fid in fileIds if fid]
|
||||||
|
chunkCountByFile = knowledgeIf.countChunksByFileIds(fileIds) if fileIds else {}
|
||||||
|
connChunkTotal = sum(chunkCountByFile.values())
|
||||||
|
|
||||||
|
filesByDs: Dict[str, int] = {}
|
||||||
chunksByDs: Dict[str, int] = {}
|
chunksByDs: Dict[str, int] = {}
|
||||||
unassigned = 0
|
unassignedFiles = 0
|
||||||
|
unassignedChunks = 0
|
||||||
for idx in connIndexRows:
|
for idx in connIndexRows:
|
||||||
|
fileId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", "")
|
||||||
|
chunkCnt = chunkCountByFile.get(fileId, 0)
|
||||||
struct = (idx.get("structure") if isinstance(idx, dict) else getattr(idx, "structure", None)) or {}
|
struct = (idx.get("structure") if isinstance(idx, dict) else getattr(idx, "structure", None)) or {}
|
||||||
ingestion = struct.get("_ingestion") or {} if isinstance(struct, dict) else {}
|
ingestion = struct.get("_ingestion") or {} if isinstance(struct, dict) else {}
|
||||||
prov = ingestion.get("provenance") or {} if isinstance(ingestion, dict) else {}
|
prov = ingestion.get("provenance") or {} if isinstance(ingestion, dict) else {}
|
||||||
dsIdRef = prov.get("dataSourceId", "") if isinstance(prov, dict) else ""
|
dsIdRef = prov.get("dataSourceId", "") if isinstance(prov, dict) else ""
|
||||||
if dsIdRef:
|
if dsIdRef:
|
||||||
chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + 1
|
filesByDs[dsIdRef] = filesByDs.get(dsIdRef, 0) + 1
|
||||||
|
chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + chunkCnt
|
||||||
else:
|
else:
|
||||||
unassigned += 1
|
unassignedFiles += 1
|
||||||
|
unassignedChunks += chunkCnt
|
||||||
|
|
||||||
seen: Dict[str, bool] = {}
|
seen: Dict[str, bool] = {}
|
||||||
dsItems = []
|
dsItems = []
|
||||||
|
|
@ -61,24 +137,39 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
||||||
"label": ds.get("label") if isinstance(ds, dict) else getattr(ds, "label", ""),
|
"label": ds.get("label") if isinstance(ds, dict) else getattr(ds, "label", ""),
|
||||||
"path": dsPath,
|
"path": dsPath,
|
||||||
"sourceType": ds.get("sourceType") if isinstance(ds, dict) else getattr(ds, "sourceType", ""),
|
"sourceType": ds.get("sourceType") if isinstance(ds, dict) else getattr(ds, "sourceType", ""),
|
||||||
"ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False),
|
"ragIndexEnabled": getEffectiveFlag(ds, "ragIndexEnabled", dataSources, mode="walk"),
|
||||||
"neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False),
|
"neutralize": getEffectiveFlag(ds, "neutralize", dataSources, mode="walk"),
|
||||||
"lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None),
|
"lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None),
|
||||||
|
"fileCount": filesByDs.get(dsId, 0),
|
||||||
"chunkCount": chunksByDs.get(dsId, 0),
|
"chunkCount": chunksByDs.get(dsId, 0),
|
||||||
})
|
})
|
||||||
|
|
||||||
if unassigned > 0 and len(dsItems) > 0:
|
# Spread orphan files (provenance lost) evenly so totals match.
|
||||||
perDs = unassigned // len(dsItems)
|
if unassignedFiles > 0 and len(dsItems) > 0:
|
||||||
remainder = unassigned % len(dsItems)
|
perFile = unassignedFiles // len(dsItems)
|
||||||
|
remFile = unassignedFiles % len(dsItems)
|
||||||
|
perChunk = unassignedChunks // len(dsItems)
|
||||||
|
remChunk = unassignedChunks % len(dsItems)
|
||||||
for i, item in enumerate(dsItems):
|
for i, item in enumerate(dsItems):
|
||||||
item["chunkCount"] += perDs + (1 if i < remainder else 0)
|
item["fileCount"] += perFile + (1 if i < remFile else 0)
|
||||||
|
item["chunkCount"] += perChunk + (1 if i < remChunk else 0)
|
||||||
|
|
||||||
# Pull a wider window than the previous 5 so the "last successful
|
# Pull a wider window than the previous 5 so the "last successful
|
||||||
# sync" is found even if a connection has many recent jobs queued.
|
# sync" is found even if a connection has many recent jobs queued.
|
||||||
jobs = jobService.listJobs(jobType="connection.bootstrap", limit=50)
|
jobs = jobService.listJobs(jobType="connection.bootstrap", limit=50)
|
||||||
connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId]
|
connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId]
|
||||||
runningJobs = [
|
runningJobs = [
|
||||||
{"jobId": j["id"], "progress": j.get("progress", 0), "progressMessage": j.get("progressMessage", "")}
|
{
|
||||||
|
"jobId": j["id"],
|
||||||
|
"progress": j.get("progress", 0),
|
||||||
|
# Server-side translate the structured walker payload into
|
||||||
|
# the request-context language; frontend renders 1:1 (no
|
||||||
|
# `t()` on backend-supplied keys).
|
||||||
|
"progressMessage": (
|
||||||
|
resolveJobMessage(j.get("progressMessageData"))
|
||||||
|
or j.get("progressMessage", "")
|
||||||
|
),
|
||||||
|
}
|
||||||
for j in connJobs
|
for j in connJobs
|
||||||
if j.get("status") in ("PENDING", "RUNNING")
|
if j.get("status") in ("PENDING", "RUNNING")
|
||||||
]
|
]
|
||||||
|
|
@ -93,7 +184,12 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
||||||
"finishedAt": j.get("finishedAt"),
|
"finishedAt": j.get("finishedAt"),
|
||||||
}
|
}
|
||||||
elif status == "SUCCESS" and lastSuccess is None:
|
elif status == "SUCCESS" and lastSuccess is None:
|
||||||
result = j.get("result") or {}
|
# Bootstrap handlers may return either a flat dict (single
|
||||||
|
# service) or a nested dict keyed by sub-service (e.g. msft
|
||||||
|
# returns {"sharepoint": {...}, "outlook": {...}}). Flatten
|
||||||
|
# so the UI always sees aggregated counters and the first
|
||||||
|
# sub-service that hit a limit.
|
||||||
|
result = _flattenJobResult(j.get("result") or {})
|
||||||
lastSuccess = {
|
lastSuccess = {
|
||||||
"jobId": j["id"],
|
"jobId": j["id"],
|
||||||
"finishedAt": j.get("finishedAt"),
|
"finishedAt": j.get("finishedAt"),
|
||||||
|
|
@ -102,6 +198,12 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
||||||
"skippedPolicy": result.get("skippedPolicy", 0),
|
"skippedPolicy": result.get("skippedPolicy", 0),
|
||||||
"failed": result.get("failed", 0),
|
"failed": result.get("failed", 0),
|
||||||
"durationMs": result.get("durationMs", 0),
|
"durationMs": result.get("durationMs", 0),
|
||||||
|
# Surface limit-stop reason so the UI can warn the user
|
||||||
|
# that the index is provably incomplete (and which budget
|
||||||
|
# to raise). None means the walker finished naturally.
|
||||||
|
"stoppedAtLimit": result.get("stoppedAtLimit"),
|
||||||
|
"limits": result.get("limits") or {},
|
||||||
|
"bytesProcessed": result.get("bytesProcessed", 0),
|
||||||
}
|
}
|
||||||
if lastError and lastSuccess:
|
if lastError and lastSuccess:
|
||||||
break
|
break
|
||||||
|
|
@ -113,6 +215,7 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
||||||
"knowledgeIngestionEnabled": getattr(conn, "knowledgeIngestionEnabled", False),
|
"knowledgeIngestionEnabled": getattr(conn, "knowledgeIngestionEnabled", False),
|
||||||
"preferences": getattr(conn, "knowledgePreferences", None) or {},
|
"preferences": getattr(conn, "knowledgePreferences", None) or {},
|
||||||
"dataSources": dsItems,
|
"dataSources": dsItems,
|
||||||
|
"totalFiles": connFileTotal,
|
||||||
"totalChunks": connChunkTotal,
|
"totalChunks": connChunkTotal,
|
||||||
"runningJobs": runningJobs,
|
"runningJobs": runningJobs,
|
||||||
"lastError": lastError,
|
"lastError": lastError,
|
||||||
|
|
@ -121,13 +224,165 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _buildFeatureInstanceInventory(featureInstanceIds, rootIf, knowledgeIf) -> List[Dict[str, Any]]:
|
||||||
|
"""Build per-feature-instance RAG inventory rows.
|
||||||
|
|
||||||
|
Feature-instance data lives in FileContentIndex with a non-empty
|
||||||
|
featureInstanceId. Additionally each feature instance may have
|
||||||
|
FeatureDataSource rows that define which tables/data are visible
|
||||||
|
as sources, with their own ragIndexEnabled flags.
|
||||||
|
Includes feature.bootstrap job status (running/success/error).
|
||||||
|
"""
|
||||||
|
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
||||||
|
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||||
|
from modules.interfaces.interfaceFeatures import getFeatureInterface
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlagFds
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subFeatureBootstrap import FEATURE_BOOTSTRAP_JOB_TYPE
|
||||||
|
|
||||||
|
featureIf = getFeatureInterface(rootIf.db)
|
||||||
|
|
||||||
|
allFeatureJobs = jobService.listJobs(jobType=FEATURE_BOOTSTRAP_JOB_TYPE, limit=100)
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for fiId in featureInstanceIds:
|
||||||
|
instance = featureIf.getFeatureInstance(fiId)
|
||||||
|
if not instance or not instance.enabled:
|
||||||
|
continue
|
||||||
|
|
||||||
|
indexRows = knowledgeIf.db.getRecordset(
|
||||||
|
FileContentIndex, recordFilter={"featureInstanceId": fiId}
|
||||||
|
)
|
||||||
|
fileIds = [
|
||||||
|
(r.get("id") if isinstance(r, dict) else getattr(r, "id", ""))
|
||||||
|
for r in indexRows
|
||||||
|
]
|
||||||
|
fileIds = [fid for fid in fileIds if fid]
|
||||||
|
chunkCounts = knowledgeIf.countChunksByFileIds(fileIds) if fileIds else {}
|
||||||
|
|
||||||
|
statusCounts: Dict[str, int] = {}
|
||||||
|
for r in indexRows:
|
||||||
|
st = (r.get("status") if isinstance(r, dict) else getattr(r, "status", "unknown")) or "unknown"
|
||||||
|
statusCounts[st] = statusCounts.get(st, 0) + 1
|
||||||
|
|
||||||
|
allFds = rootIf.db.getRecordset(FeatureDataSource, recordFilter={"workspaceInstanceId": fiId})
|
||||||
|
dsItems = []
|
||||||
|
anyRagEnabled = False
|
||||||
|
for fds in allFds:
|
||||||
|
tblName = (fds.get("tableName") if isinstance(fds, dict) else getattr(fds, "tableName", "")) or ""
|
||||||
|
fCode = (fds.get("featureCode") if isinstance(fds, dict) else getattr(fds, "featureCode", "")) or ""
|
||||||
|
if tblName == "*" or not fCode:
|
||||||
|
continue
|
||||||
|
fdsId = fds.get("id") if isinstance(fds, dict) else getattr(fds, "id", "")
|
||||||
|
ragEnabled = getEffectiveFlagFds(fds, "ragIndexEnabled", allFds, mode="aggregate")
|
||||||
|
if ragEnabled:
|
||||||
|
anyRagEnabled = True
|
||||||
|
dsItems.append({
|
||||||
|
"id": fdsId,
|
||||||
|
"label": (fds.get("label") if isinstance(fds, dict) else getattr(fds, "label", "")) or "",
|
||||||
|
"tableName": tblName,
|
||||||
|
"featureCode": fCode,
|
||||||
|
"ragIndexEnabled": ragEnabled,
|
||||||
|
})
|
||||||
|
|
||||||
|
fiJobs = [
|
||||||
|
j for j in allFeatureJobs
|
||||||
|
if (j.get("payload") or {}).get("workspaceInstanceId") == fiId
|
||||||
|
]
|
||||||
|
runningJobs = [
|
||||||
|
{
|
||||||
|
"jobId": j["id"],
|
||||||
|
"progress": j.get("progress", 0),
|
||||||
|
"progressMessage": (
|
||||||
|
resolveJobMessage(j.get("progressMessageData"))
|
||||||
|
or j.get("progressMessage", "")
|
||||||
|
),
|
||||||
|
}
|
||||||
|
for j in fiJobs
|
||||||
|
if j.get("status") in ("PENDING", "RUNNING")
|
||||||
|
]
|
||||||
|
lastError: Optional[Dict[str, Any]] = None
|
||||||
|
lastSuccess: Optional[Dict[str, Any]] = None
|
||||||
|
for j in fiJobs:
|
||||||
|
jStatus = j.get("status")
|
||||||
|
if jStatus == "ERROR" and lastError is None:
|
||||||
|
lastError = {
|
||||||
|
"jobId": j["id"],
|
||||||
|
"errorMessage": j.get("errorMessage", ""),
|
||||||
|
"finishedAt": j.get("finishedAt"),
|
||||||
|
}
|
||||||
|
elif jStatus == "SUCCESS" and lastSuccess is None:
|
||||||
|
result = j.get("result") or {}
|
||||||
|
lastSuccess = {
|
||||||
|
"jobId": j["id"],
|
||||||
|
"finishedAt": j.get("finishedAt"),
|
||||||
|
"indexed": result.get("indexed", 0),
|
||||||
|
"skippedDuplicate": result.get("skippedDuplicate", 0),
|
||||||
|
"failed": result.get("failed", 0),
|
||||||
|
}
|
||||||
|
if lastError and lastSuccess:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not indexRows and not dsItems:
|
||||||
|
continue
|
||||||
|
|
||||||
|
out.append({
|
||||||
|
"featureInstanceId": fiId,
|
||||||
|
"featureCode": instance.featureCode,
|
||||||
|
"label": instance.label or instance.featureCode,
|
||||||
|
"mandateId": str(instance.mandateId) if instance.mandateId else "",
|
||||||
|
"fileCount": len(indexRows),
|
||||||
|
"chunkCount": sum(chunkCounts.values()),
|
||||||
|
"statusCounts": statusCounts,
|
||||||
|
"dataSources": dsItems,
|
||||||
|
"ragEnabled": anyRagEnabled,
|
||||||
|
"runningJobs": runningJobs,
|
||||||
|
"lastSuccess": lastSuccess,
|
||||||
|
"lastError": lastError,
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/my-mandates")
|
||||||
|
@limiter.limit("30/minute")
|
||||||
|
def _getMyMandates(
|
||||||
|
request: Request,
|
||||||
|
currentUser: User = Depends(getCurrentUser),
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Return mandates where the current user has an active membership.
|
||||||
|
|
||||||
|
Used by the RAG inventory frontend to populate the mandate dropdown
|
||||||
|
without requiring admin rights (unlike GET /api/mandates/).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
userMandates = rootIf.getUserMandates(str(currentUser.id))
|
||||||
|
result = []
|
||||||
|
for um in userMandates:
|
||||||
|
if not um.enabled:
|
||||||
|
continue
|
||||||
|
mandate = rootIf.getMandate(str(um.mandateId))
|
||||||
|
if not mandate or not getattr(mandate, "enabled", True):
|
||||||
|
continue
|
||||||
|
result.append({
|
||||||
|
"id": str(um.mandateId),
|
||||||
|
"name": getattr(mandate, "name", ""),
|
||||||
|
"label": getattr(mandate, "label", None) or getattr(mandate, "name", ""),
|
||||||
|
})
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error in RAG inventory /my-mandates: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
@router.get("/me")
|
@router.get("/me")
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
def _getInventoryMe(
|
def _getInventoryMe(
|
||||||
request: Request,
|
request: Request,
|
||||||
currentUser: User = Depends(getCurrentUser),
|
currentUser: User = Depends(getCurrentUser),
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Personal RAG inventory: own connections + DataSources + chunk counts."""
|
"""Personal RAG inventory: own connections + DataSources + chunk counts + feature uploads."""
|
||||||
try:
|
try:
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||||
|
|
@ -139,8 +394,22 @@ def _getInventoryMe(
|
||||||
|
|
||||||
items = _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService)
|
items = _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService)
|
||||||
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||||
|
totalFiles = sum(c.get("totalFiles", 0) for c in items)
|
||||||
|
|
||||||
return {"connections": items, "totals": {"chunks": totalChunks}}
|
featureAccesses = rootIf.getFeatureAccessesForUser(str(currentUser.id))
|
||||||
|
fiIds = [
|
||||||
|
str(fa.featureInstanceId) for fa in featureAccesses
|
||||||
|
if fa.enabled and fa.featureInstanceId
|
||||||
|
]
|
||||||
|
fiItems = _buildFeatureInstanceInventory(fiIds, rootIf, knowledgeIf)
|
||||||
|
totalFiles += sum(fi.get("fileCount", 0) for fi in fiItems)
|
||||||
|
totalChunks += sum(fi.get("chunkCount", 0) for fi in fiItems)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"connections": items,
|
||||||
|
"featureInstances": fiItems,
|
||||||
|
"totals": {"files": totalFiles, "chunks": totalChunks},
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Error in RAG inventory /me: %s", e, exc_info=True)
|
logger.error("Error in RAG inventory /me: %s", e, exc_info=True)
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
@ -159,20 +428,43 @@ def _getInventoryMandate(
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface, aggregateMandateRagTotalBytes
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface, aggregateMandateRagTotalBytes
|
||||||
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
|
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
|
||||||
|
|
||||||
rootIf = getRootInterface()
|
rootIf = getRootInterface()
|
||||||
knowledgeIf = getKnowledgeInterface(None)
|
knowledgeIf = getKnowledgeInterface(None)
|
||||||
mandateId = str(context.mandateId) if context.mandateId else ""
|
mandateId = str(context.mandateId)
|
||||||
|
userId = str(context.user.id)
|
||||||
|
|
||||||
from modules.datamodels.datamodelUam import UserConnection
|
userMandates = rootIf.getUserMandates(userId)
|
||||||
allConnections = rootIf.db.getRecordset(UserConnection, recordFilter={"mandateId": mandateId})
|
isMember = any(
|
||||||
connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
|
getattr(um, "mandateId", None) == mandateId and um.enabled
|
||||||
|
for um in userMandates
|
||||||
|
)
|
||||||
|
if not isMember and not context.isSysAdmin:
|
||||||
|
raise HTTPException(status_code=403, detail=routeApiMsg("No membership in this mandate"))
|
||||||
|
|
||||||
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
|
mandateMembers = rootIf.getUserMandatesByMandate(mandateId)
|
||||||
|
memberUserIds = {getattr(um, "userId", None) for um in mandateMembers}
|
||||||
|
memberUserIds.discard(None)
|
||||||
|
|
||||||
|
allConnections = []
|
||||||
|
for uid in memberUserIds:
|
||||||
|
allConnections.extend(rootIf.getUserConnections(uid))
|
||||||
|
|
||||||
|
items = _buildConnectionInventory(allConnections, rootIf, knowledgeIf, jobService)
|
||||||
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||||
|
totalFiles = sum(c.get("totalFiles", 0) for c in items)
|
||||||
totalBytes = aggregateMandateRagTotalBytes(mandateId)
|
totalBytes = aggregateMandateRagTotalBytes(mandateId)
|
||||||
|
|
||||||
return {"connections": items, "totals": {"chunks": totalChunks, "bytes": totalBytes}}
|
mandateInstances = rootIf.getFeatureInstancesByMandate(mandateId, enabledOnly=True)
|
||||||
|
fiIds = [str(inst.id) for inst in mandateInstances if inst.id]
|
||||||
|
fiItems = _buildFeatureInstanceInventory(fiIds, rootIf, knowledgeIf)
|
||||||
|
totalFiles += sum(fi.get("fileCount", 0) for fi in fiItems)
|
||||||
|
totalChunks += sum(fi.get("chunkCount", 0) for fi in fiItems)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"connections": items,
|
||||||
|
"featureInstances": fiItems,
|
||||||
|
"totals": {"files": totalFiles, "chunks": totalChunks, "bytes": totalBytes},
|
||||||
|
}
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -202,8 +494,24 @@ def _getInventoryPlatform(
|
||||||
|
|
||||||
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
|
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
|
||||||
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||||
|
totalFiles = sum(c.get("totalFiles", 0) for c in items)
|
||||||
|
|
||||||
return {"connections": items, "totals": {"chunks": totalChunks}}
|
from modules.datamodels.datamodelFeatures import FeatureInstance
|
||||||
|
allInstances = rootIf.db.getRecordset(FeatureInstance, recordFilter={"enabled": True})
|
||||||
|
fiIds = [
|
||||||
|
(r.get("id") if isinstance(r, dict) else getattr(r, "id", ""))
|
||||||
|
for r in allInstances
|
||||||
|
]
|
||||||
|
fiIds = [fid for fid in fiIds if fid]
|
||||||
|
fiItems = _buildFeatureInstanceInventory(fiIds, rootIf, knowledgeIf)
|
||||||
|
totalFiles += sum(fi.get("fileCount", 0) for fi in fiItems)
|
||||||
|
totalChunks += sum(fi.get("chunkCount", 0) for fi in fiItems)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"connections": items,
|
||||||
|
"featureInstances": fiItems,
|
||||||
|
"totals": {"files": totalFiles, "chunks": totalChunks},
|
||||||
|
}
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -240,8 +548,9 @@ async def _reindexConnection(
|
||||||
if str(conn.userId) != str(currentUser.id):
|
if str(conn.userId) != str(currentUser.id):
|
||||||
raise HTTPException(status_code=403, detail="Not your connection")
|
raise HTTPException(status_code=403, detail="Not your connection")
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||||
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
ragDs = [ds for ds in dataSources if (ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False))]
|
ragDs = [ds for ds in dataSources if getEffectiveFlag(ds, "ragIndexEnabled", dataSources, mode="walk") is True]
|
||||||
if not ragDs:
|
if not ragDs:
|
||||||
return {"status": "skipped", "reason": "no_rag_enabled_datasources"}
|
return {"status": "skipped", "reason": "no_rag_enabled_datasources"}
|
||||||
|
|
||||||
|
|
@ -263,6 +572,47 @@ async def _reindexConnection(
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/reindex-feature/{workspaceInstanceId}")
|
||||||
|
@limiter.limit("10/minute")
|
||||||
|
async def _reindexFeature(
|
||||||
|
request: Request,
|
||||||
|
workspaceInstanceId: str,
|
||||||
|
currentUser: User = Depends(getCurrentUser),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Re-trigger feature data bootstrap for a workspace instance.
|
||||||
|
|
||||||
|
Indexes all RAG-enabled FeatureDataSource rows into the knowledge store.
|
||||||
|
Must be ``async def`` so ``await startJob(...)`` registers in the main loop.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subFeatureBootstrap import FEATURE_BOOTSTRAP_JOB_TYPE
|
||||||
|
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
featureAccesses = rootIf.getFeatureAccessesForUser(str(currentUser.id))
|
||||||
|
hasAccess = any(
|
||||||
|
str(fa.featureInstanceId) == workspaceInstanceId and fa.enabled
|
||||||
|
for fa in featureAccesses
|
||||||
|
)
|
||||||
|
if not hasAccess and not getattr(currentUser, "isSysAdmin", False):
|
||||||
|
raise HTTPException(status_code=403, detail="No access to this feature instance")
|
||||||
|
|
||||||
|
jobId = await startJob(
|
||||||
|
FEATURE_BOOTSTRAP_JOB_TYPE,
|
||||||
|
{"workspaceInstanceId": workspaceInstanceId},
|
||||||
|
triggeredBy=str(currentUser.id),
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info("Feature reindex triggered for workspace %s (jobId=%s)", workspaceInstanceId, jobId)
|
||||||
|
return {"status": "queued", "workspaceInstanceId": workspaceInstanceId, "jobId": jobId}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error triggering feature reindex: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
@router.get("/jobs")
|
@router.get("/jobs")
|
||||||
@limiter.limit("60/minute")
|
@limiter.limit("60/minute")
|
||||||
def _getActiveJobs(
|
def _getActiveJobs(
|
||||||
|
|
@ -294,7 +644,10 @@ def _getActiveJobs(
|
||||||
"connectionLabel": getattr(conn, "displayLabel", None) or getattr(conn, "authority", connId),
|
"connectionLabel": getattr(conn, "displayLabel", None) or getattr(conn, "authority", connId),
|
||||||
"jobType": j.get("jobType", "connection.bootstrap"),
|
"jobType": j.get("jobType", "connection.bootstrap"),
|
||||||
"progress": j.get("progress", 0),
|
"progress": j.get("progress", 0),
|
||||||
"progressMessage": j.get("progressMessage", ""),
|
"progressMessage": (
|
||||||
|
resolveJobMessage(j.get("progressMessageData"))
|
||||||
|
or j.get("progressMessage", "")
|
||||||
|
),
|
||||||
})
|
})
|
||||||
return active
|
return active
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -227,7 +227,7 @@ WHERE "workflowId" = ANY(%s)
|
||||||
GROUP BY "workflowId"
|
GROUP BY "workflowId"
|
||||||
"""
|
"""
|
||||||
out: dict = {}
|
out: dict = {}
|
||||||
with db.connection.cursor() as cursor:
|
with db.borrowCursor() as cursor:
|
||||||
cursor.execute(sql, (workflowIds,))
|
cursor.execute(sql, (workflowIds,))
|
||||||
for row in cursor.fetchall():
|
for row in cursor.fetchall():
|
||||||
r = dict(row)
|
r = dict(row)
|
||||||
|
|
@ -480,7 +480,7 @@ def _getWorkflowsJoinedPaginated(
|
||||||
dataSql = f"SELECT w.*, rs.\"lastStartedAt\", rs.\"runCount\", rs.\"activeRunId\" FROM {fromSql}{whereClause}{orderClause}{limitClause}"
|
dataSql = f"SELECT w.*, rs.\"lastStartedAt\", rs.\"runCount\", rs.\"activeRunId\" FROM {fromSql}{whereClause}{orderClause}{limitClause}"
|
||||||
|
|
||||||
db._ensure_connection()
|
db._ensure_connection()
|
||||||
with db.connection.cursor() as cursor:
|
with db.borrowCursor() as cursor:
|
||||||
cursor.execute(countSql, countValues)
|
cursor.execute(countSql, countValues)
|
||||||
totalItems = int(cursor.fetchone()["cnt"])
|
totalItems = int(cursor.fetchone()["cnt"])
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -341,11 +341,10 @@ class RbacClass:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
conn = self.dbApp.connection
|
|
||||||
roleIds = set()
|
roleIds = set()
|
||||||
|
|
||||||
# 1. Mandant-Rollen via UserMandate → UserMandateRole (SINGLE Query)
|
# 1. Mandant-Rollen via UserMandate → UserMandateRole (SINGLE Query)
|
||||||
with conn.cursor() as cursor:
|
with self.dbApp.borrowCursor() as cursor:
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
SELECT umr."roleId"
|
SELECT umr."roleId"
|
||||||
|
|
@ -360,7 +359,7 @@ class RbacClass:
|
||||||
|
|
||||||
# 2. Instanz-Rollen via FeatureAccess → FeatureAccessRole (SINGLE Query)
|
# 2. Instanz-Rollen via FeatureAccess → FeatureAccessRole (SINGLE Query)
|
||||||
if featureInstanceId:
|
if featureInstanceId:
|
||||||
with conn.cursor() as cursor:
|
with self.dbApp.borrowCursor() as cursor:
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
SELECT far."roleId"
|
SELECT far."roleId"
|
||||||
|
|
@ -377,9 +376,8 @@ class RbacClass:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# 3. BULK Query: Alle Regeln für alle Rollen + zugehörige Role-Daten
|
# 3. BULK Query: Alle Regeln für alle Rollen + zugehörige Role-Daten
|
||||||
# SINGLE Query mit JOIN statt N+1
|
|
||||||
roleIdsList = list(roleIds)
|
roleIdsList = list(roleIds)
|
||||||
with conn.cursor() as cursor:
|
with self.dbApp.borrowCursor() as cursor:
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
SELECT ar.*, r."mandateId" as "roleMandateId",
|
SELECT ar.*, r."mandateId" as "roleMandateId",
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,12 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
sourceType = ds.get("sourceType", "")
|
sourceType = ds.get("sourceType", "")
|
||||||
path = ds.get("path", "/")
|
path = ds.get("path", "/")
|
||||||
label = ds.get("label", "")
|
label = ds.get("label", "")
|
||||||
neutralize = bool(ds.get("neutralize", False))
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||||
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
allConnDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
|
neutralize = bool(getEffectiveFlag(ds, "neutralize", allConnDs or [ds], mode="walk"))
|
||||||
service = _SOURCE_TYPE_TO_SERVICE.get(sourceType, sourceType)
|
service = _SOURCE_TYPE_TO_SERVICE.get(sourceType, sourceType)
|
||||||
if not connectionId:
|
if not connectionId:
|
||||||
raise ValueError(f"DataSource '{dsId}' has no connectionId")
|
raise ValueError(f"DataSource '{dsId}' has no connectionId")
|
||||||
|
|
|
||||||
|
|
@ -25,15 +25,14 @@ _CACHE_TTL_SECONDS = 300
|
||||||
|
|
||||||
|
|
||||||
def _getOrCreateFeatureDbConnector(featureDbName: str, userId: str):
|
def _getOrCreateFeatureDbConnector(featureDbName: str, userId: str):
|
||||||
"""Reuse a pooled DB connector for the given feature database."""
|
"""Reuse a pooled DB connector for the given feature database.
|
||||||
|
|
||||||
|
The underlying psycopg2 connections live in the central pool
|
||||||
|
(`_PoolRegistry`) and are recreated on demand if they go stale; we just
|
||||||
|
need to keep the lightweight connector wrapper around.
|
||||||
|
"""
|
||||||
if featureDbName in _featureDbConnPool:
|
if featureDbName in _featureDbConnPool:
|
||||||
conn = _featureDbConnPool[featureDbName]
|
return _featureDbConnPool[featureDbName]
|
||||||
try:
|
|
||||||
if conn.connection and not conn.connection.closed:
|
|
||||||
return conn
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Feature DB connection check failed for {featureDbName}: {e}")
|
|
||||||
_featureDbConnPool.pop(featureDbName, None)
|
|
||||||
|
|
||||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
@ -111,9 +110,11 @@ def _registerFeatureSubAgentTools(registry: ToolRegistry, services):
|
||||||
recordFilter={"featureInstanceId": featureInstanceId, "workspaceInstanceId": workspaceInstanceId},
|
recordFilter={"featureInstanceId": featureInstanceId, "workspaceInstanceId": workspaceInstanceId},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlagFds
|
||||||
|
_fdsAll = featureDataSources or []
|
||||||
_anySourceNeutralize = any(
|
_anySourceNeutralize = any(
|
||||||
bool(ds.get("neutralize", False) if isinstance(ds, dict) else getattr(ds, "neutralize", False))
|
getEffectiveFlagFds(ds, "neutralize", _fdsAll, mode="walk") is True
|
||||||
for ds in (featureDataSources or [])
|
for ds in _fdsAll
|
||||||
)
|
)
|
||||||
|
|
||||||
neutralizeFieldsPerTable: Dict[str, List[str]] = {}
|
neutralizeFieldsPerTable: Dict[str, List[str]] = {}
|
||||||
|
|
|
||||||
|
|
@ -95,8 +95,7 @@ class FeatureDataProvider:
|
||||||
def getActualColumns(self, tableName: str) -> List[str]:
|
def getActualColumns(self, tableName: str) -> List[str]:
|
||||||
"""Read real column names from PostgreSQL information_schema."""
|
"""Read real column names from PostgreSQL information_schema."""
|
||||||
try:
|
try:
|
||||||
conn = self._db.connection
|
with self._db.borrowCursor() as cur:
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"SELECT column_name FROM information_schema.columns "
|
"SELECT column_name FROM information_schema.columns "
|
||||||
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
||||||
|
|
@ -131,7 +130,6 @@ class FeatureDataProvider:
|
||||||
Returns ``{"rows": [...], "total": N, "limit": L, "offset": O}``.
|
Returns ``{"rows": [...], "total": N, "limit": L, "offset": O}``.
|
||||||
"""
|
"""
|
||||||
_validateTableName(tableName)
|
_validateTableName(tableName)
|
||||||
conn = self._db.connection
|
|
||||||
|
|
||||||
if fields:
|
if fields:
|
||||||
invalid = [f for f in fields if not _isValidIdentifier(f)]
|
invalid = [f for f in fields if not _isValidIdentifier(f)]
|
||||||
|
|
@ -141,7 +139,7 @@ class FeatureDataProvider:
|
||||||
"error": f"Invalid field name(s): {', '.join(invalid)}. Use getActualColumns to discover valid column names.",
|
"error": f"Invalid field name(s): {', '.join(invalid)}. Use getActualColumns to discover valid column names.",
|
||||||
}
|
}
|
||||||
|
|
||||||
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, dbConnection=conn)
|
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, db=self._db)
|
||||||
extraWhere, extraParams = _buildFilterClauses(extraFilters)
|
extraWhere, extraParams = _buildFilterClauses(extraFilters)
|
||||||
|
|
||||||
fullWhere = scopeFilter["where"]
|
fullWhere = scopeFilter["where"]
|
||||||
|
|
@ -152,7 +150,7 @@ class FeatureDataProvider:
|
||||||
|
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
try:
|
try:
|
||||||
with conn.cursor() as cur:
|
with self._db.borrowCursor() as cur:
|
||||||
countSql = f'SELECT COUNT(*) FROM "{tableName}" WHERE {fullWhere}'
|
countSql = f'SELECT COUNT(*) FROM "{tableName}" WHERE {fullWhere}'
|
||||||
cur.execute(countSql, allParams)
|
cur.execute(countSql, allParams)
|
||||||
total = cur.fetchone()["count"] if cur.rowcount else 0
|
total = cur.fetchone()["count"] if cur.rowcount else 0
|
||||||
|
|
@ -179,10 +177,6 @@ class FeatureDataProvider:
|
||||||
_debugQueryLog("browseTable", tableName, {
|
_debugQueryLog("browseTable", tableName, {
|
||||||
"fields": fields, "limit": limit, "offset": offset,
|
"fields": fields, "limit": limit, "offset": offset,
|
||||||
}, errResult, elapsed)
|
}, errResult, elapsed)
|
||||||
try:
|
|
||||||
conn.rollback()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return errResult
|
return errResult
|
||||||
|
|
||||||
def aggregateTable(
|
def aggregateTable(
|
||||||
|
|
@ -208,8 +202,7 @@ class FeatureDataProvider:
|
||||||
if groupBy and not _isValidIdentifier(groupBy):
|
if groupBy and not _isValidIdentifier(groupBy):
|
||||||
return {"rows": [], "error": f"Invalid groupBy field: {groupBy}"}
|
return {"rows": [], "error": f"Invalid groupBy field: {groupBy}"}
|
||||||
|
|
||||||
conn = self._db.connection
|
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, db=self._db)
|
||||||
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, dbConnection=conn)
|
|
||||||
extraWhere, extraParams = _buildFilterClauses(extraFilters)
|
extraWhere, extraParams = _buildFilterClauses(extraFilters)
|
||||||
|
|
||||||
fullWhere = scopeFilter["where"]
|
fullWhere = scopeFilter["where"]
|
||||||
|
|
@ -220,7 +213,7 @@ class FeatureDataProvider:
|
||||||
|
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
try:
|
try:
|
||||||
with conn.cursor() as cur:
|
with self._db.borrowCursor() as cur:
|
||||||
if groupBy:
|
if groupBy:
|
||||||
sql = (
|
sql = (
|
||||||
f'SELECT "{groupBy}" AS "groupValue", {aggregate}("{field}") AS "result" '
|
f'SELECT "{groupBy}" AS "groupValue", {aggregate}("{field}") AS "result" '
|
||||||
|
|
@ -253,10 +246,6 @@ class FeatureDataProvider:
|
||||||
_debugQueryLog("aggregateTable", tableName, {
|
_debugQueryLog("aggregateTable", tableName, {
|
||||||
"aggregate": aggregate, "field": field, "groupBy": groupBy,
|
"aggregate": aggregate, "field": field, "groupBy": groupBy,
|
||||||
}, errResult, elapsed)
|
}, errResult, elapsed)
|
||||||
try:
|
|
||||||
conn.rollback()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return errResult
|
return errResult
|
||||||
|
|
||||||
def queryTable(
|
def queryTable(
|
||||||
|
|
@ -277,7 +266,6 @@ class FeatureDataProvider:
|
||||||
``extraFilters`` are mandatory record-level scoping filters injected by the pipeline.
|
``extraFilters`` are mandatory record-level scoping filters injected by the pipeline.
|
||||||
"""
|
"""
|
||||||
_validateTableName(tableName)
|
_validateTableName(tableName)
|
||||||
conn = self._db.connection
|
|
||||||
|
|
||||||
if fields:
|
if fields:
|
||||||
invalid = [f for f in fields if not _isValidIdentifier(f)]
|
invalid = [f for f in fields if not _isValidIdentifier(f)]
|
||||||
|
|
@ -287,7 +275,7 @@ class FeatureDataProvider:
|
||||||
"error": f"Invalid field name(s): {', '.join(invalid)}. Use getActualColumns to discover valid column names.",
|
"error": f"Invalid field name(s): {', '.join(invalid)}. Use getActualColumns to discover valid column names.",
|
||||||
}
|
}
|
||||||
|
|
||||||
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, dbConnection=conn)
|
scopeFilter = _buildScopeFilter(tableName, featureInstanceId, mandateId, db=self._db)
|
||||||
|
|
||||||
combinedFilters = list(filters or []) + list(extraFilters or [])
|
combinedFilters = list(filters or []) + list(extraFilters or [])
|
||||||
extraWhere, extraParams = _buildFilterClauses(combinedFilters if combinedFilters else None)
|
extraWhere, extraParams = _buildFilterClauses(combinedFilters if combinedFilters else None)
|
||||||
|
|
@ -300,7 +288,7 @@ class FeatureDataProvider:
|
||||||
|
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
try:
|
try:
|
||||||
with conn.cursor() as cur:
|
with self._db.borrowCursor() as cur:
|
||||||
countSql = f'SELECT COUNT(*) FROM "{tableName}" WHERE {fullWhere}'
|
countSql = f'SELECT COUNT(*) FROM "{tableName}" WHERE {fullWhere}'
|
||||||
cur.execute(countSql, allParams)
|
cur.execute(countSql, allParams)
|
||||||
total = cur.fetchone()["count"] if cur.rowcount else 0
|
total = cur.fetchone()["count"] if cur.rowcount else 0
|
||||||
|
|
@ -329,10 +317,6 @@ class FeatureDataProvider:
|
||||||
"filters": filters, "fields": fields, "orderBy": orderBy,
|
"filters": filters, "fields": fields, "orderBy": orderBy,
|
||||||
"limit": limit, "offset": offset,
|
"limit": limit, "offset": offset,
|
||||||
}, errResult, elapsed)
|
}, errResult, elapsed)
|
||||||
try:
|
|
||||||
conn.rollback()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return errResult
|
return errResult
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -343,13 +327,13 @@ class FeatureDataProvider:
|
||||||
_instanceColCache: Dict[str, str] = {}
|
_instanceColCache: Dict[str, str] = {}
|
||||||
|
|
||||||
|
|
||||||
def _resolveInstanceColumn(tableName: str, dbConnection=None) -> str:
|
def _resolveInstanceColumn(tableName: str, db=None) -> str:
|
||||||
"""Detect whether the table uses ``instanceId`` or ``featureInstanceId``."""
|
"""Detect whether the table uses ``instanceId`` or ``featureInstanceId``."""
|
||||||
if tableName in _instanceColCache:
|
if tableName in _instanceColCache:
|
||||||
return _instanceColCache[tableName]
|
return _instanceColCache[tableName]
|
||||||
if dbConnection:
|
if db:
|
||||||
try:
|
try:
|
||||||
with dbConnection.cursor() as cur:
|
with db.borrowCursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"SELECT column_name FROM information_schema.columns "
|
"SELECT column_name FROM information_schema.columns "
|
||||||
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
"WHERE table_schema = 'public' AND LOWER(table_name) = LOWER(%s) "
|
||||||
|
|
@ -378,14 +362,14 @@ def _isValidIdentifier(name: str) -> bool:
|
||||||
return name.isidentifier()
|
return name.isidentifier()
|
||||||
|
|
||||||
|
|
||||||
def _buildScopeFilter(tableName: str, featureInstanceId: str, mandateId: str, dbConnection=None) -> Dict[str, Any]:
|
def _buildScopeFilter(tableName: str, featureInstanceId: str, mandateId: str, db=None, dbConnection=None) -> Dict[str, Any]:
|
||||||
"""Build the mandatory WHERE clause that scopes rows to the feature instance.
|
"""Build the mandatory WHERE clause that scopes rows to the feature instance.
|
||||||
|
|
||||||
Feature tables use either ``instanceId`` (commcoach, teamsbot) or
|
Feature tables use either ``instanceId`` (commcoach, teamsbot) or
|
||||||
``featureInstanceId`` (trustee) as the FK. We detect the actual column
|
``featureInstanceId`` (trustee) as the FK. We detect the actual column
|
||||||
from ``information_schema`` when a DB connection is provided.
|
from ``information_schema`` when a DB connector is provided.
|
||||||
"""
|
"""
|
||||||
instanceCol = _resolveInstanceColumn(tableName, dbConnection)
|
instanceCol = _resolveInstanceColumn(tableName, db or dbConnection)
|
||||||
|
|
||||||
conditions = []
|
conditions = []
|
||||||
params = []
|
params = []
|
||||||
|
|
|
||||||
|
|
@ -54,19 +54,53 @@ _CANCEL_CHECK_INTERVAL_S = 3.0
|
||||||
|
|
||||||
|
|
||||||
class JobProgressCallback:
|
class JobProgressCallback:
|
||||||
"""Callable progress reporter with cooperative cancel-check for long-running walkers."""
|
"""Callable progress reporter with cooperative cancel-check for long-running walkers.
|
||||||
|
|
||||||
|
Two ways to set a progress message:
|
||||||
|
progressCb(50, "145 Dateien verarbeitet") # legacy plaintext (DE)
|
||||||
|
progressCb(50, messageKey="{n} Dateien verarbeitet",
|
||||||
|
messageParams={"n": 145}) # i18n-friendly
|
||||||
|
|
||||||
|
When `messageKey` is given the structured payload is written to
|
||||||
|
`BackgroundJob.progressMessageData` so the frontend can render it via
|
||||||
|
`t(key, params)` in the user's UI language. A best-effort rendered
|
||||||
|
fallback is also stored in `progressMessage` for older clients, logs,
|
||||||
|
and audit trails.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, jobId: str):
|
def __init__(self, jobId: str):
|
||||||
self._jobId = jobId
|
self._jobId = jobId
|
||||||
self._cancelledCache: Optional[bool] = None
|
self._cancelledCache: Optional[bool] = None
|
||||||
self._lastCheckedAt: float = 0.0
|
self._lastCheckedAt: float = 0.0
|
||||||
|
|
||||||
def __call__(self, progress: int, message: Optional[str] = None) -> None:
|
def __call__(
|
||||||
|
self,
|
||||||
|
progress: int,
|
||||||
|
message: Optional[str] = None,
|
||||||
|
*,
|
||||||
|
messageKey: Optional[str] = None,
|
||||||
|
messageParams: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> None:
|
||||||
try:
|
try:
|
||||||
clamped = max(0, min(100, int(progress)))
|
clamped = max(0, min(100, int(progress)))
|
||||||
fields: Dict[str, Any] = {"progress": clamped}
|
fields: Dict[str, Any] = {"progress": clamped}
|
||||||
if message is not None:
|
|
||||||
|
if messageKey is not None:
|
||||||
|
params = messageParams or {}
|
||||||
|
try:
|
||||||
|
fallback = messageKey.format(**params)
|
||||||
|
except (KeyError, IndexError, ValueError) as fmtErr:
|
||||||
|
fallback = message or messageKey
|
||||||
|
logger.warning(
|
||||||
|
"progressCb message format failed for job %s key=%r params=%r: %s",
|
||||||
|
self._jobId, messageKey, params, fmtErr,
|
||||||
|
)
|
||||||
|
fields["progressMessageData"] = {"key": messageKey, "params": params}
|
||||||
|
fields["progressMessage"] = (message or fallback)[:500]
|
||||||
|
elif message is not None:
|
||||||
fields["progressMessage"] = message[:500]
|
fields["progressMessage"] = message[:500]
|
||||||
|
fields["progressMessageData"] = None
|
||||||
|
|
||||||
_updateJob(self._jobId, fields)
|
_updateJob(self._jobId, fields)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
logger.warning("Progress update failed for job %s: %s", self._jobId, ex)
|
logger.warning("Progress update failed for job %s: %s", self._jobId, ex)
|
||||||
|
|
|
||||||
|
|
@ -534,11 +534,17 @@ class ChatService:
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Create a new external data source reference.
|
"""Create a new external data source reference.
|
||||||
|
|
||||||
Returns existing record if connectionId + path already exists (upsert semantics).
|
Upsert key is `(connectionId, sourceType, path)`. The same `path='/'`
|
||||||
|
can carry multiple DataSources discriminated by sourceType: the
|
||||||
|
Connection-Root (sourceType=<authority>, e.g. 'msft') plus one per
|
||||||
|
service (sourceType='sharepointFolder', 'outlookFolder', ...). The
|
||||||
|
sourceType filter MUST be present, otherwise a Service-Root POST
|
||||||
|
returns the Connection-Root and toggles cascade onto every sibling.
|
||||||
"""
|
"""
|
||||||
from modules.datamodels.datamodelDataSource import DataSource
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
existing = self.interfaceDbApp.db.getRecordset(
|
existing = self.interfaceDbApp.db.getRecordset(
|
||||||
DataSource, recordFilter={"connectionId": connectionId, "path": path}
|
DataSource,
|
||||||
|
recordFilter={"connectionId": connectionId, "sourceType": sourceType, "path": path},
|
||||||
)
|
)
|
||||||
if existing:
|
if existing:
|
||||||
return existing[0] if isinstance(existing[0], dict) else existing[0].model_dump()
|
return existing[0] if isinstance(existing[0], dict) else existing[0].model_dump()
|
||||||
|
|
|
||||||
1020
modules/serviceCenter/services/serviceKnowledge/_buildTree.py
Normal file
1020
modules/serviceCenter/services/serviceKnowledge/_buildTree.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,86 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Indicative cost estimation for a RAG bootstrap run.
|
||||||
|
|
||||||
|
This is **not** a billing-grade forecast: it gives the user a back-of-the-envelope
|
||||||
|
USD figure for the worst-case full sync, so they can sanity-check before raising
|
||||||
|
`maxBytes`/`maxItems`. The output always carries the underlying assumptions
|
||||||
|
(`basis`) so the user can judge plausibility.
|
||||||
|
|
||||||
|
Heuristic:
|
||||||
|
estimatedTokens = ceil(maxBytes / CHARS_PER_TOKEN_BYTES_FACTOR)
|
||||||
|
estimatedUsd = estimatedTokens / 1_000_000 * EMBEDDING_USD_PER_MTOKEN
|
||||||
|
|
||||||
|
Defaults match OpenAI `text-embedding-3-small` pricing (2026-Q2).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
|
||||||
|
CHARS_PER_TOKEN = 4
|
||||||
|
EMBEDDING_USD_PER_MTOKEN = 0.02
|
||||||
|
DEFAULT_TOKENS_PER_ITEM = 1500
|
||||||
|
BYTES_PER_TOKEN_TEXT_FACTOR = 4
|
||||||
|
EXTRACTABLE_FRACTION = 0.4
|
||||||
|
|
||||||
|
|
||||||
|
def estimateBootstrapCost(limits: Dict[str, int], kind: str = "files") -> Dict[str, Any]:
|
||||||
|
"""Return an indicative cost estimate dict for a DataSource bootstrap.
|
||||||
|
|
||||||
|
Returned shape::
|
||||||
|
|
||||||
|
{
|
||||||
|
"estimatedTokens": int,
|
||||||
|
"estimatedUsd": float, # rounded to 4 decimals
|
||||||
|
"basis": {
|
||||||
|
"kind": "files"|"clickup",
|
||||||
|
"limits": {...},
|
||||||
|
"assumptions": {
|
||||||
|
"embeddingUsdPerMToken": 0.02,
|
||||||
|
"charsPerToken": 4,
|
||||||
|
"extractableFraction": 0.4,
|
||||||
|
"tokensPerItem": 1500 # only for clickup-like item counts
|
||||||
|
},
|
||||||
|
"notes": "non-binding, depends on real file content..."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
assumptions: Dict[str, Any] = {
|
||||||
|
"embeddingUsdPerMToken": EMBEDDING_USD_PER_MTOKEN,
|
||||||
|
"charsPerToken": CHARS_PER_TOKEN,
|
||||||
|
}
|
||||||
|
|
||||||
|
if kind == "files":
|
||||||
|
maxBytes = int(limits.get("maxBytes") or 0)
|
||||||
|
extractableBytes = maxBytes * EXTRACTABLE_FRACTION
|
||||||
|
estimatedTokens = int(math.ceil(extractableBytes / BYTES_PER_TOKEN_TEXT_FACTOR))
|
||||||
|
assumptions["extractableFraction"] = EXTRACTABLE_FRACTION
|
||||||
|
assumptions["formula"] = "ceil(maxBytes * 0.4 / 4)"
|
||||||
|
elif kind == "clickup":
|
||||||
|
maxTasks = int(limits.get("maxTasks") or 0)
|
||||||
|
maxWorkspaces = max(1, int(limits.get("maxWorkspaces") or 1))
|
||||||
|
estimatedTokens = maxTasks * maxWorkspaces * DEFAULT_TOKENS_PER_ITEM
|
||||||
|
assumptions["tokensPerItem"] = DEFAULT_TOKENS_PER_ITEM
|
||||||
|
assumptions["formula"] = "maxTasks * maxWorkspaces * 1500"
|
||||||
|
else:
|
||||||
|
estimatedTokens = 0
|
||||||
|
assumptions["formula"] = "unknown kind, returning zero"
|
||||||
|
|
||||||
|
estimatedUsd = round(estimatedTokens / 1_000_000 * EMBEDDING_USD_PER_MTOKEN, 4)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"estimatedTokens": estimatedTokens,
|
||||||
|
"estimatedUsd": estimatedUsd,
|
||||||
|
"basis": {
|
||||||
|
"kind": kind,
|
||||||
|
"limits": dict(limits),
|
||||||
|
"assumptions": assumptions,
|
||||||
|
"notes": (
|
||||||
|
"Indicative only. Actual cost depends on file types, extractable text "
|
||||||
|
"ratio, dedup hit-rate, retries, and current embedding model pricing."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
631
modules/serviceCenter/services/serviceKnowledge/_inheritFlags.py
Normal file
631
modules/serviceCenter/services/serviceKnowledge/_inheritFlags.py
Normal file
|
|
@ -0,0 +1,631 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Cascade-inherit semantics for DataSource flags (neutralize, ragIndexEnabled, scope).
|
||||||
|
|
||||||
|
Three-state flags allow tree elements to either set an explicit value or
|
||||||
|
inherit the value from their nearest ancestor in the path hierarchy.
|
||||||
|
|
||||||
|
Modes:
|
||||||
|
- 'walk' (default): resolves the *concrete* effective value per-item
|
||||||
|
(never returns 'mixed'). Used by backend consumers (RAG walker,
|
||||||
|
neutralization pipeline, scope filter, etc.).
|
||||||
|
- 'aggregate': resolves the *display* effective value per-item. If the
|
||||||
|
item has descendants with differing walk-effective values, returns
|
||||||
|
'mixed'. Used by listing endpoints and PATCH responses for the UI.
|
||||||
|
|
||||||
|
Path-traversal rules:
|
||||||
|
- A DataSource is identified by `(connectionId, sourceType, path)`.
|
||||||
|
- The root of a service tree is `path == '/'`.
|
||||||
|
- Sub-elements have paths like `/folder1/sub`. Their parent path is the
|
||||||
|
longest prefix path that exists as a DataSource record (string-based).
|
||||||
|
- If no ancestor with an explicit value exists, the default is `False`
|
||||||
|
(or `'personal'` for scope) — matching the legacy behavior of NULL = inherit.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_INHERITABLE_FLAGS = ("neutralize", "ragIndexEnabled", "scope")
|
||||||
|
_INHERITABLE_FDS_FLAGS = ("neutralize", "ragIndexEnabled", "scope")
|
||||||
|
|
||||||
|
# Connection-root DataSources carry the authority as their sourceType
|
||||||
|
# (e.g. 'msft', 'google'). They sit one level above all service DataSources
|
||||||
|
# of the same connection in the visual tree, so flag inheritance must
|
||||||
|
# cross sourceType boundaries — but ONLY from these authority roots.
|
||||||
|
_AUTHORITY_SOURCE_TYPES = frozenset({"local", "google", "msft", "clickup", "infomaniak"})
|
||||||
|
|
||||||
|
Mode = Literal["walk", "aggregate"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Internal helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _normalisePath(path: Optional[str]) -> str:
|
||||||
|
"""Normalize a DataSource path to '/'-prefixed, no trailing slash (except root)."""
|
||||||
|
if not path:
|
||||||
|
return "/"
|
||||||
|
p = str(path).strip()
|
||||||
|
if not p.startswith("/"):
|
||||||
|
p = "/" + p
|
||||||
|
if len(p) > 1 and p.endswith("/"):
|
||||||
|
p = p.rstrip("/")
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
|
def _flagDefault(flag: str) -> Any:
|
||||||
|
if flag == "scope":
|
||||||
|
return "personal"
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _isExplicit(value: Any) -> bool:
|
||||||
|
"""A flag value is explicit when it is not None/empty-string."""
|
||||||
|
if value is None:
|
||||||
|
return False
|
||||||
|
if isinstance(value, str) and value == "":
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _getRecordValue(rec: Any, key: str) -> Any:
|
||||||
|
if isinstance(rec, dict):
|
||||||
|
return rec.get(key)
|
||||||
|
return getattr(rec, key, None)
|
||||||
|
|
||||||
|
|
||||||
|
def _isAncestorPath(ancestor: str, descendant: str) -> bool:
|
||||||
|
"""True iff `ancestor` is a strict path-prefix of `descendant`."""
|
||||||
|
if ancestor == descendant:
|
||||||
|
return False
|
||||||
|
if ancestor == "/":
|
||||||
|
return descendant != "/"
|
||||||
|
return descendant.startswith(ancestor + "/")
|
||||||
|
|
||||||
|
|
||||||
|
def _pathDepth(path: str) -> int:
|
||||||
|
if path == "/":
|
||||||
|
return 0
|
||||||
|
return path.count("/")
|
||||||
|
|
||||||
|
|
||||||
|
def _findAncestorChain(
|
||||||
|
rec: Dict[str, Any],
|
||||||
|
allDs: Iterable[Dict[str, Any]],
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Return all ancestor DataSources of `rec` in the same connection,
|
||||||
|
ordered nearest-first.
|
||||||
|
|
||||||
|
Two ancestor relations are merged:
|
||||||
|
1) same-sourceType path-ancestor — strict path-prefix within the
|
||||||
|
same service tree.
|
||||||
|
2) connection-root ancestor — a DS with `path='/'` and
|
||||||
|
`sourceType` in authority set is the parent of every other DS
|
||||||
|
in that connection regardless of sourceType.
|
||||||
|
|
||||||
|
The connection-root is always the most distant ancestor.
|
||||||
|
"""
|
||||||
|
recPath = _normalisePath(_getRecordValue(rec, "path"))
|
||||||
|
recSourceType = _getRecordValue(rec, "sourceType")
|
||||||
|
recConnectionId = _getRecordValue(rec, "connectionId")
|
||||||
|
sameTypeCandidates: List[Tuple[int, Dict[str, Any]]] = []
|
||||||
|
connectionRoot: Optional[Dict[str, Any]] = None
|
||||||
|
recIsConnectionRoot = recSourceType in _AUTHORITY_SOURCE_TYPES and recPath == "/"
|
||||||
|
for cand in allDs:
|
||||||
|
if _getRecordValue(cand, "id") == _getRecordValue(rec, "id"):
|
||||||
|
continue
|
||||||
|
if _getRecordValue(cand, "connectionId") != recConnectionId:
|
||||||
|
continue
|
||||||
|
candSourceType = _getRecordValue(cand, "sourceType")
|
||||||
|
candPath = _normalisePath(_getRecordValue(cand, "path"))
|
||||||
|
if candSourceType == recSourceType:
|
||||||
|
if candPath == recPath or not _isAncestorPath(candPath, recPath):
|
||||||
|
continue
|
||||||
|
sameTypeCandidates.append((len(candPath), cand))
|
||||||
|
elif (
|
||||||
|
not recIsConnectionRoot
|
||||||
|
and candSourceType in _AUTHORITY_SOURCE_TYPES
|
||||||
|
and candPath == "/"
|
||||||
|
):
|
||||||
|
connectionRoot = cand
|
||||||
|
sameTypeCandidates.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
chain = [c for _, c in sameTypeCandidates]
|
||||||
|
if connectionRoot is not None:
|
||||||
|
chain.append(connectionRoot)
|
||||||
|
return chain
|
||||||
|
|
||||||
|
|
||||||
|
def _isDescendantDs(parentRec: Dict[str, Any], candidate: Dict[str, Any]) -> bool:
|
||||||
|
"""True iff `candidate` is a descendant of `parentRec` in the DS hierarchy."""
|
||||||
|
parentSourceType = _getRecordValue(parentRec, "sourceType")
|
||||||
|
parentPath = _normalisePath(_getRecordValue(parentRec, "path"))
|
||||||
|
parentConnectionId = _getRecordValue(parentRec, "connectionId")
|
||||||
|
parentId = _getRecordValue(parentRec, "id")
|
||||||
|
|
||||||
|
candId = _getRecordValue(candidate, "id")
|
||||||
|
if candId == parentId:
|
||||||
|
return False
|
||||||
|
if _getRecordValue(candidate, "connectionId") != parentConnectionId:
|
||||||
|
return False
|
||||||
|
|
||||||
|
candSourceType = _getRecordValue(candidate, "sourceType")
|
||||||
|
candPath = _normalisePath(_getRecordValue(candidate, "path"))
|
||||||
|
|
||||||
|
parentIsConnectionRoot = (
|
||||||
|
parentSourceType in _AUTHORITY_SOURCE_TYPES and parentPath == "/"
|
||||||
|
)
|
||||||
|
if parentIsConnectionRoot:
|
||||||
|
return True
|
||||||
|
if candSourceType != parentSourceType:
|
||||||
|
return False
|
||||||
|
return _isAncestorPath(parentPath, candPath)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# DataSource: getEffectiveFlag
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def getEffectiveFlag(
|
||||||
|
rec: Dict[str, Any],
|
||||||
|
flag: str,
|
||||||
|
sameConnectionDs: Iterable[Dict[str, Any]],
|
||||||
|
mode: Mode = "walk",
|
||||||
|
) -> Any:
|
||||||
|
"""Resolve the effective value of a flag via path-traversal.
|
||||||
|
|
||||||
|
mode='walk': own explicit → nearest ancestor explicit → default.
|
||||||
|
Always returns a concrete value (never 'mixed').
|
||||||
|
mode='aggregate': same as walk for leaf value, but if the item has
|
||||||
|
descendants whose walk-effective values differ from
|
||||||
|
each other, returns 'mixed'.
|
||||||
|
"""
|
||||||
|
if flag not in _INHERITABLE_FLAGS:
|
||||||
|
raise ValueError(f"Unknown inheritable flag: {flag}")
|
||||||
|
|
||||||
|
allDs = list(sameConnectionDs)
|
||||||
|
|
||||||
|
walkValue = _resolveWalkValue(rec, flag, allDs)
|
||||||
|
|
||||||
|
if mode == "walk":
|
||||||
|
return walkValue
|
||||||
|
|
||||||
|
# mode == 'aggregate': check subtree for heterogeneous effective values
|
||||||
|
descendants = [d for d in allDs if _isDescendantDs(rec, d)]
|
||||||
|
if not descendants:
|
||||||
|
return walkValue
|
||||||
|
|
||||||
|
subtreeValues = set()
|
||||||
|
subtreeValues.add(_normaliseForComparison(walkValue))
|
||||||
|
for desc in descendants:
|
||||||
|
descEffective = _resolveWalkValue(desc, flag, allDs)
|
||||||
|
subtreeValues.add(_normaliseForComparison(descEffective))
|
||||||
|
if len(subtreeValues) > 1:
|
||||||
|
recId = _getRecordValue(rec, "id")
|
||||||
|
descId = _getRecordValue(desc, "id")
|
||||||
|
descOwnVal = _getRecordValue(desc, flag)
|
||||||
|
logger.info(
|
||||||
|
"DS aggregate MIXED for rec=%s flag=%s: walkValue=%s, "
|
||||||
|
"divergent desc=%s (own=%s, effective=%s), subtreeValues=%s",
|
||||||
|
recId, flag, walkValue, descId, descOwnVal, descEffective, subtreeValues,
|
||||||
|
)
|
||||||
|
return "mixed"
|
||||||
|
return walkValue
|
||||||
|
|
||||||
|
|
||||||
|
def _resolveWalkValue(rec: Dict[str, Any], flag: str, allDs: List[Dict[str, Any]]) -> Any:
|
||||||
|
"""Core walk resolution: own explicit → ancestor chain → default."""
|
||||||
|
own = _getRecordValue(rec, flag)
|
||||||
|
if _isExplicit(own):
|
||||||
|
return own
|
||||||
|
chain = _findAncestorChain(rec, allDs)
|
||||||
|
for ancestor in chain:
|
||||||
|
ancestorVal = _getRecordValue(ancestor, flag)
|
||||||
|
if _isExplicit(ancestorVal):
|
||||||
|
return ancestorVal
|
||||||
|
return _flagDefault(flag)
|
||||||
|
|
||||||
|
|
||||||
|
def _normaliseForComparison(value: Any) -> Any:
|
||||||
|
"""Normalize values for set-comparison (bool as int to avoid hash issues)."""
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return int(value)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# DataSource: cascadeResetDescendants (bottom-up)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def cascadeResetDescendants(
|
||||||
|
rootIf: Any,
|
||||||
|
parentRec: Dict[str, Any],
|
||||||
|
flag: str,
|
||||||
|
) -> List[str]:
|
||||||
|
"""Reset all explicit descendant values of `flag` to NULL (= inherit).
|
||||||
|
|
||||||
|
Reset order: bottom-up (deepest first) for crash safety.
|
||||||
|
The parent itself is NOT modified here — the caller sets the master value
|
||||||
|
after this function returns.
|
||||||
|
|
||||||
|
Returns list of reset record IDs in bottom-up order.
|
||||||
|
"""
|
||||||
|
if flag not in _INHERITABLE_FLAGS:
|
||||||
|
raise ValueError(f"Unknown inheritable flag: {flag}")
|
||||||
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
|
|
||||||
|
connectionId = _getRecordValue(parentRec, "connectionId")
|
||||||
|
parentId = _getRecordValue(parentRec, "id")
|
||||||
|
if not connectionId:
|
||||||
|
return []
|
||||||
|
|
||||||
|
siblings = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
|
|
||||||
|
toReset: List[Tuple[int, str]] = []
|
||||||
|
for sib in siblings:
|
||||||
|
if not _isDescendantDs(parentRec, sib):
|
||||||
|
continue
|
||||||
|
sibVal = _getRecordValue(sib, flag)
|
||||||
|
if not _isExplicit(sibVal):
|
||||||
|
continue
|
||||||
|
sibId = _getRecordValue(sib, "id")
|
||||||
|
sibPath = _normalisePath(_getRecordValue(sib, "path"))
|
||||||
|
toReset.append((_pathDepth(sibPath), sibId))
|
||||||
|
|
||||||
|
# Sort deepest first (bottom-up)
|
||||||
|
toReset.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
|
||||||
|
resetIds: List[str] = []
|
||||||
|
for _, sibId in toReset:
|
||||||
|
try:
|
||||||
|
rootIf.db.recordModify(DataSource, sibId, {flag: None})
|
||||||
|
resetIds.append(sibId)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Cascade-reset failed for DataSource %s flag=%s: %s", sibId, flag, exc)
|
||||||
|
|
||||||
|
if resetIds:
|
||||||
|
logger.info(
|
||||||
|
"Cascade-reset %s on %d descendants of DataSource %s (bottom-up)",
|
||||||
|
flag, len(resetIds), parentId,
|
||||||
|
)
|
||||||
|
return resetIds
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# DataSource: collectAncestorChain (for updatedAncestors in PATCH response)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def collectAncestorChain(
|
||||||
|
rec: Dict[str, Any],
|
||||||
|
sameConnectionDs: Iterable[Dict[str, Any]],
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Return ancestor chain of `rec` (nearest-first), same as internal helper.
|
||||||
|
|
||||||
|
Exposed for PATCH endpoints to compute updatedAncestors.
|
||||||
|
"""
|
||||||
|
return _findAncestorChain(rec, sameConnectionDs)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# DataSource: buildEffectiveByConnection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def buildEffectiveByConnection(
|
||||||
|
dataSources: Iterable[Dict[str, Any]],
|
||||||
|
flag: str,
|
||||||
|
mode: Mode = "walk",
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Pre-compute the effective value of `flag` for every DataSource id.
|
||||||
|
|
||||||
|
Uses the specified mode. O(N^2) worst case but N is bounded per connection.
|
||||||
|
"""
|
||||||
|
if flag not in _INHERITABLE_FLAGS:
|
||||||
|
raise ValueError(f"Unknown inheritable flag: {flag}")
|
||||||
|
allDs = list(dataSources)
|
||||||
|
out: Dict[str, Any] = {}
|
||||||
|
for rec in allDs:
|
||||||
|
recId = _getRecordValue(rec, "id")
|
||||||
|
out[recId] = getEffectiveFlag(rec, flag, allDs, mode=mode)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# FeatureDataSource helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _fdsClassify(fds: Dict[str, Any]) -> str:
|
||||||
|
"""Return 'workspace' | 'table' | 'record' based on the FDS identifier shape."""
|
||||||
|
tableName = _getRecordValue(fds, "tableName") or ""
|
||||||
|
recordFilter = _getRecordValue(fds, "recordFilter")
|
||||||
|
if tableName == "*":
|
||||||
|
return "workspace"
|
||||||
|
if not recordFilter:
|
||||||
|
return "table"
|
||||||
|
return "record"
|
||||||
|
|
||||||
|
|
||||||
|
def _fdsIsAncestor(parent: Dict[str, Any], child: Dict[str, Any]) -> bool:
|
||||||
|
"""Return True iff `parent` FDS is a strict ancestor of `child` FDS.
|
||||||
|
|
||||||
|
Hierarchy within one featureInstanceId (allFds is already scoped to
|
||||||
|
a single workspace):
|
||||||
|
feature-wildcard (tableName='*') -> table-wildcard / record-fds
|
||||||
|
table-wildcard (tableName='X') -> record-fds (tableName='X')
|
||||||
|
"""
|
||||||
|
parentFiId = _getRecordValue(parent, "featureInstanceId")
|
||||||
|
childFiId = _getRecordValue(child, "featureInstanceId")
|
||||||
|
if not parentFiId or parentFiId != childFiId:
|
||||||
|
return False
|
||||||
|
if _getRecordValue(parent, "id") == _getRecordValue(child, "id"):
|
||||||
|
return False
|
||||||
|
parentKind = _fdsClassify(parent)
|
||||||
|
childKind = _fdsClassify(child)
|
||||||
|
if parentKind == "workspace":
|
||||||
|
return childKind in ("table", "record")
|
||||||
|
if parentKind == "table":
|
||||||
|
if childKind != "record":
|
||||||
|
return False
|
||||||
|
return _getRecordValue(parent, "tableName") == _getRecordValue(child, "tableName")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _fdsDepth(fds: Dict[str, Any]) -> int:
|
||||||
|
kind = _fdsClassify(fds)
|
||||||
|
if kind == "workspace":
|
||||||
|
return 0
|
||||||
|
if kind == "table":
|
||||||
|
return 1
|
||||||
|
return 2
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# FeatureDataSource: getEffectiveFlagFds
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def getEffectiveFlagFds(
|
||||||
|
rec: Dict[str, Any],
|
||||||
|
flag: str,
|
||||||
|
sameWorkspaceFds: Iterable[Dict[str, Any]],
|
||||||
|
mode: Mode = "walk",
|
||||||
|
) -> Any:
|
||||||
|
"""Resolve effective value of a FeatureDataSource flag.
|
||||||
|
|
||||||
|
mode='walk': own explicit -> table-wildcard -> workspace-wildcard -> default.
|
||||||
|
mode='aggregate': same but returns 'mixed' if descendants diverge.
|
||||||
|
"""
|
||||||
|
if flag not in _INHERITABLE_FDS_FLAGS:
|
||||||
|
raise ValueError(f"Unknown inheritable FDS flag: {flag}")
|
||||||
|
|
||||||
|
allFds = list(sameWorkspaceFds)
|
||||||
|
walkValue = _resolveWalkValueFds(rec, flag, allFds)
|
||||||
|
|
||||||
|
if mode == "walk":
|
||||||
|
return walkValue
|
||||||
|
|
||||||
|
# mode == 'aggregate'
|
||||||
|
descendants = [f for f in allFds if _fdsIsAncestor(rec, f)]
|
||||||
|
if not descendants:
|
||||||
|
return walkValue
|
||||||
|
|
||||||
|
subtreeValues = set()
|
||||||
|
subtreeValues.add(_normaliseForComparison(walkValue))
|
||||||
|
for desc in descendants:
|
||||||
|
descEffective = _resolveWalkValueFds(desc, flag, allFds)
|
||||||
|
subtreeValues.add(_normaliseForComparison(descEffective))
|
||||||
|
if len(subtreeValues) > 1:
|
||||||
|
recId = _getRecordValue(rec, "id")
|
||||||
|
descId = _getRecordValue(desc, "id")
|
||||||
|
descOwnVal = _getRecordValue(desc, flag)
|
||||||
|
logger.info(
|
||||||
|
"FDS aggregate MIXED for rec=%s flag=%s: walkValue=%s, "
|
||||||
|
"divergent desc=%s (own=%s, effective=%s), subtreeValues=%s",
|
||||||
|
recId, flag, walkValue, descId, descOwnVal, descEffective, subtreeValues,
|
||||||
|
)
|
||||||
|
return "mixed"
|
||||||
|
return walkValue
|
||||||
|
|
||||||
|
|
||||||
|
def _resolveWalkValueFds(rec: Dict[str, Any], flag: str, allFds: List[Dict[str, Any]]) -> Any:
|
||||||
|
"""Core walk resolution for FDS."""
|
||||||
|
own = _getRecordValue(rec, flag)
|
||||||
|
if _isExplicit(own):
|
||||||
|
return own
|
||||||
|
ancestors = [a for a in allFds if _fdsIsAncestor(a, rec)]
|
||||||
|
ancestors.sort(key=lambda a: 0 if _fdsClassify(a) == "table" else 1)
|
||||||
|
for ancestor in ancestors:
|
||||||
|
val = _getRecordValue(ancestor, flag)
|
||||||
|
if _isExplicit(val):
|
||||||
|
return val
|
||||||
|
return _flagDefault(flag)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# FeatureDataSource: cascadeResetDescendantsFds (bottom-up)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def cascadeResetDescendantsFds(
|
||||||
|
rootIf: Any,
|
||||||
|
parentRec: Dict[str, Any],
|
||||||
|
flag: str,
|
||||||
|
) -> List[str]:
|
||||||
|
"""Reset explicit `flag` to NULL on every descendant FDS of `parentRec`.
|
||||||
|
|
||||||
|
Reset order: bottom-up (deepest first) for crash safety.
|
||||||
|
Returns list of reset record IDs in bottom-up order.
|
||||||
|
"""
|
||||||
|
if flag not in _INHERITABLE_FDS_FLAGS:
|
||||||
|
raise ValueError(f"Unknown inheritable FDS flag: {flag}")
|
||||||
|
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||||
|
|
||||||
|
workspaceInstanceId = _getRecordValue(parentRec, "workspaceInstanceId")
|
||||||
|
if not workspaceInstanceId:
|
||||||
|
return []
|
||||||
|
siblings = rootIf.db.getRecordset(
|
||||||
|
FeatureDataSource, recordFilter={"workspaceInstanceId": workspaceInstanceId}
|
||||||
|
)
|
||||||
|
|
||||||
|
toReset: List[Tuple[int, str]] = []
|
||||||
|
for sib in siblings:
|
||||||
|
if not _fdsIsAncestor(parentRec, sib):
|
||||||
|
continue
|
||||||
|
sibVal = _getRecordValue(sib, flag)
|
||||||
|
if not _isExplicit(sibVal):
|
||||||
|
continue
|
||||||
|
sibId = _getRecordValue(sib, "id")
|
||||||
|
toReset.append((_fdsDepth(sib), sibId))
|
||||||
|
|
||||||
|
# Sort deepest first (bottom-up)
|
||||||
|
toReset.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
|
||||||
|
resetIds: List[str] = []
|
||||||
|
for _, sibId in toReset:
|
||||||
|
try:
|
||||||
|
rootIf.db.recordModify(FeatureDataSource, sibId, {flag: None})
|
||||||
|
resetIds.append(sibId)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("FDS cascade-reset failed for %s flag=%s: %s", sibId, flag, exc)
|
||||||
|
|
||||||
|
if resetIds:
|
||||||
|
logger.info(
|
||||||
|
"FDS cascade-reset %s on %d descendants of FDS %s (bottom-up)",
|
||||||
|
flag, len(resetIds), _getRecordValue(parentRec, "id"),
|
||||||
|
)
|
||||||
|
return resetIds
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# FeatureDataSource: collectAncestorChainFds
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def collectAncestorChainFds(
|
||||||
|
rec: Dict[str, Any],
|
||||||
|
sameWorkspaceFds: Iterable[Dict[str, Any]],
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Return ancestor chain of `rec` FDS (nearest-first).
|
||||||
|
|
||||||
|
Exposed for PATCH endpoints to compute updatedAncestors.
|
||||||
|
"""
|
||||||
|
allFds = list(sameWorkspaceFds)
|
||||||
|
ancestors = [a for a in allFds if _fdsIsAncestor(a, rec)]
|
||||||
|
ancestors.sort(key=lambda a: 0 if _fdsClassify(a) == "table" else 1)
|
||||||
|
return ancestors
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# FeatureDataSource: buildEffectiveByWorkspaceFds
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def buildEffectiveByWorkspaceFds(
|
||||||
|
fdses: Iterable[Dict[str, Any]],
|
||||||
|
flag: str,
|
||||||
|
mode: Mode = "walk",
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Pre-compute the effective value of `flag` for every FDS id."""
|
||||||
|
if flag not in _INHERITABLE_FDS_FLAGS:
|
||||||
|
raise ValueError(f"Unknown inheritable FDS flag: {flag}")
|
||||||
|
allFds = list(fdses)
|
||||||
|
out: Dict[str, Any] = {}
|
||||||
|
for rec in allFds:
|
||||||
|
recId = _getRecordValue(rec, "id")
|
||||||
|
out[recId] = getEffectiveFlagFds(rec, flag, allFds, mode=mode)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Bulk resolve: effective flags for arbitrary paths (even without DB record)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def resolveEffectiveForPath(
|
||||||
|
connectionId: str,
|
||||||
|
sourceType: str,
|
||||||
|
path: str,
|
||||||
|
allDs: List[Dict[str, Any]],
|
||||||
|
mode: Mode = "aggregate",
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Resolve effective flags for ANY (connectionId, sourceType, path) tuple.
|
||||||
|
|
||||||
|
Works whether or not a DataSource record exists for this exact path.
|
||||||
|
Returns dict with effectiveNeutralize, effectiveScope, effectiveRagIndexEnabled.
|
||||||
|
"""
|
||||||
|
normPath = _normalisePath(path)
|
||||||
|
exactRecord = None
|
||||||
|
for ds in allDs:
|
||||||
|
if (
|
||||||
|
_getRecordValue(ds, "connectionId") == connectionId
|
||||||
|
and _getRecordValue(ds, "sourceType") == sourceType
|
||||||
|
and _normalisePath(_getRecordValue(ds, "path")) == normPath
|
||||||
|
):
|
||||||
|
exactRecord = ds
|
||||||
|
break
|
||||||
|
|
||||||
|
if exactRecord:
|
||||||
|
return {
|
||||||
|
"effectiveNeutralize": getEffectiveFlag(exactRecord, "neutralize", allDs, mode=mode),
|
||||||
|
"effectiveScope": getEffectiveFlag(exactRecord, "scope", allDs, mode=mode),
|
||||||
|
"effectiveRagIndexEnabled": getEffectiveFlag(exactRecord, "ragIndexEnabled", allDs, mode=mode),
|
||||||
|
}
|
||||||
|
|
||||||
|
virtualRec = {
|
||||||
|
"id": "__virtual__",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"sourceType": sourceType,
|
||||||
|
"path": normPath,
|
||||||
|
"neutralize": None,
|
||||||
|
"scope": None,
|
||||||
|
"ragIndexEnabled": None,
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"effectiveNeutralize": _resolveWalkValue(virtualRec, "neutralize", allDs),
|
||||||
|
"effectiveScope": _resolveWalkValue(virtualRec, "scope", allDs),
|
||||||
|
"effectiveRagIndexEnabled": _resolveWalkValue(virtualRec, "ragIndexEnabled", allDs),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def resolveEffectiveForFds(
|
||||||
|
featureInstanceId: str,
|
||||||
|
tableName: str,
|
||||||
|
recordFilter: Optional[Dict[str, str]],
|
||||||
|
allFds: List[Dict[str, Any]],
|
||||||
|
mode: Mode = "aggregate",
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Resolve effective flags for ANY FDS tuple (even without DB record).
|
||||||
|
|
||||||
|
`allFds` is pre-scoped to a single workspace (loaded with
|
||||||
|
workspaceInstanceId filter). Within that set, the coordinate is
|
||||||
|
featureInstanceId + tableName + recordFilter.
|
||||||
|
|
||||||
|
Returns dict with effectiveNeutralize, effectiveScope, effectiveRagIndexEnabled.
|
||||||
|
"""
|
||||||
|
exactRecord = None
|
||||||
|
for fds in allFds:
|
||||||
|
if _getRecordValue(fds, "featureInstanceId") != featureInstanceId:
|
||||||
|
continue
|
||||||
|
if (_getRecordValue(fds, "tableName") or "") != tableName:
|
||||||
|
continue
|
||||||
|
fdsFilter = _getRecordValue(fds, "recordFilter")
|
||||||
|
if fdsFilter == recordFilter:
|
||||||
|
exactRecord = fds
|
||||||
|
break
|
||||||
|
|
||||||
|
if exactRecord:
|
||||||
|
return {
|
||||||
|
"effectiveNeutralize": getEffectiveFlagFds(exactRecord, "neutralize", allFds, mode=mode),
|
||||||
|
"effectiveScope": getEffectiveFlagFds(exactRecord, "scope", allFds, mode=mode),
|
||||||
|
"effectiveRagIndexEnabled": getEffectiveFlagFds(exactRecord, "ragIndexEnabled", allFds, mode=mode),
|
||||||
|
}
|
||||||
|
|
||||||
|
virtualRec = {
|
||||||
|
"id": "__virtual__",
|
||||||
|
"featureInstanceId": featureInstanceId,
|
||||||
|
"tableName": tableName,
|
||||||
|
"recordFilter": recordFilter,
|
||||||
|
"neutralize": None,
|
||||||
|
"scope": None,
|
||||||
|
"ragIndexEnabled": None,
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"effectiveNeutralize": _resolveWalkValueFds(virtualRec, "neutralize", allFds),
|
||||||
|
"effectiveScope": _resolveWalkValueFds(virtualRec, "scope", allFds),
|
||||||
|
"effectiveRagIndexEnabled": _resolveWalkValueFds(virtualRec, "ragIndexEnabled", allFds),
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
"""Central i18n registration for BackgroundJob progress messages.
|
||||||
|
|
||||||
|
Walkers and consumers report progress via ``progressCb(..., messageKey="…",
|
||||||
|
messageParams={...})``. Those keys are not seen by ``t()`` at call time, so
|
||||||
|
without a stub registration they would never make it into the boot-time
|
||||||
|
``UiLanguageSet(xx)`` sync. Importing this module is enough to register
|
||||||
|
every known key — call sites stay clean while translators can still find
|
||||||
|
the texts in the standard i18n table.
|
||||||
|
|
||||||
|
Keep this list in lockstep with the ``messageKey=`` arguments used in
|
||||||
|
``subConnectorSync*.py`` and ``subConnectorIngestConsumer.py``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from modules.shared.i18nRegistry import t
|
||||||
|
|
||||||
|
# Bootstrap walkers (one per connector family)
|
||||||
|
t("{n} Dateien verarbeitet, {indexed} indexiert")
|
||||||
|
t("{n} Tasks verarbeitet, {indexed} indexiert")
|
||||||
|
t("{n} Mails verarbeitet, {indexed} indexiert")
|
||||||
|
|
||||||
|
# Ingestion consumer hand-offs
|
||||||
|
t("Verbindung wird aufgebaut ({authority})")
|
||||||
|
t("Synchronisierung läuft...")
|
||||||
107
modules/serviceCenter/services/serviceKnowledge/_ragLimits.py
Normal file
107
modules/serviceCenter/services/serviceKnowledge/_ragLimits.py
Normal file
|
|
@ -0,0 +1,107 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Centralized RAG bootstrap limits + DataSource-scoped resolution.
|
||||||
|
|
||||||
|
The original walkers (SharePoint, kDrive, gDrive, ClickUp) each carried their
|
||||||
|
own module-level `MAX_*_DEFAULT` constants and silently stopped indexing once
|
||||||
|
they were exceeded. That made it impossible for a user with a 500 MB folder to
|
||||||
|
override the 200 MB cap without a code change.
|
||||||
|
|
||||||
|
This module is the single source of truth for two things:
|
||||||
|
|
||||||
|
1. The canonical default budget per source kind (`FILES_LIMITS_DEFAULT`,
|
||||||
|
`CLICKUP_LIMITS_DEFAULT`). Walkers fall back to these when a DataSource has
|
||||||
|
no `settings.ragLimits` yet.
|
||||||
|
|
||||||
|
2. The pure read/lazy-fill helpers that walkers and the API use to merge a
|
||||||
|
DataSource's stored settings with the defaults. No override layers, no
|
||||||
|
resolver chain: what is in `DataSource.settings.ragLimits` is what the
|
||||||
|
walker uses.
|
||||||
|
|
||||||
|
Lazy fill: the first time a DataSource is processed, the defaults are written
|
||||||
|
to its `settings.ragLimits` so the UI shows real values immediately, even if
|
||||||
|
the user has never opened the settings modal.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
FILES_LIMITS_DEFAULT: Dict[str, int] = {
|
||||||
|
"maxItems": 500,
|
||||||
|
"maxBytes": 200 * 1024 * 1024,
|
||||||
|
"maxFileSize": 25 * 1024 * 1024,
|
||||||
|
"maxDepth": 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
CLICKUP_LIMITS_DEFAULT: Dict[str, int] = {
|
||||||
|
"maxTasks": 500,
|
||||||
|
"maxWorkspaces": 3,
|
||||||
|
"maxListsPerWorkspace": 20,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_LIMITS_BY_KIND: Dict[str, Dict[str, int]] = {
|
||||||
|
"files": FILES_LIMITS_DEFAULT,
|
||||||
|
"clickup": CLICKUP_LIMITS_DEFAULT,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def getDefaults(kind: str) -> Dict[str, int]:
|
||||||
|
"""Return a fresh copy of the default budget for the given walker kind.
|
||||||
|
|
||||||
|
`kind` is either "files" (Sharepoint, kDrive, gDrive) or "clickup".
|
||||||
|
Returning a copy lets callers mutate the result safely.
|
||||||
|
"""
|
||||||
|
defaults = _LIMITS_BY_KIND.get(kind)
|
||||||
|
if defaults is None:
|
||||||
|
raise ValueError(f"Unknown RAG limit kind: {kind!r}")
|
||||||
|
return dict(defaults)
|
||||||
|
|
||||||
|
|
||||||
|
def getStoredOverrides(dataSource: Optional[Dict[str, Any]], kind: str) -> Dict[str, int]:
|
||||||
|
"""Return ONLY the limits explicitly set on `dataSource.settings.ragLimits`.
|
||||||
|
|
||||||
|
Missing keys are NOT filled with defaults — that is the caller's job (so
|
||||||
|
a programmatically supplied `limits=` from a Caller still wins when the
|
||||||
|
DataSource has no override). Pure read, no DB writes.
|
||||||
|
"""
|
||||||
|
if not isinstance(dataSource, dict):
|
||||||
|
return {}
|
||||||
|
settings = dataSource.get("settings") or {}
|
||||||
|
if not isinstance(settings, dict):
|
||||||
|
return {}
|
||||||
|
stored = settings.get("ragLimits")
|
||||||
|
if not isinstance(stored, dict):
|
||||||
|
return {}
|
||||||
|
allowed = set(_LIMITS_BY_KIND.get(kind, {}).keys())
|
||||||
|
out: Dict[str, int] = {}
|
||||||
|
for key, raw in stored.items():
|
||||||
|
if key not in allowed or raw is None:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
out[key] = int(raw)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
logger.warning(
|
||||||
|
"Ignoring non-int ragLimits[%s]=%r on DataSource %s",
|
||||||
|
key, raw, dataSource.get("id"),
|
||||||
|
)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def getRagLimits(dataSource: Optional[Dict[str, Any]], kind: str) -> Dict[str, int]:
|
||||||
|
"""Effective RAG limits for the API/cost-estimate use-case.
|
||||||
|
|
||||||
|
Stored overrides win over `getDefaults(kind)`. Walkers should NOT use this
|
||||||
|
function — they should pass their own caller-limits as the fallback so that
|
||||||
|
a runtime-supplied `limits=` parameter is honoured (see `getStoredOverrides`).
|
||||||
|
"""
|
||||||
|
base = getDefaults(kind)
|
||||||
|
base.update(getStoredOverrides(dataSource, kind))
|
||||||
|
return base
|
||||||
|
|
@ -147,7 +147,7 @@ class KnowledgeService:
|
||||||
else getattr(existing, "status", "")
|
else getattr(existing, "status", "")
|
||||||
) or ""
|
) or ""
|
||||||
if existingMeta.get("hash") == contentHash and existingStatus == "indexed":
|
if existingMeta.get("hash") == contentHash and existingStatus == "indexed":
|
||||||
logger.info(
|
logger.debug(
|
||||||
"ingestion.skipped.duplicate sourceKind=%s sourceId=%s hash=%s",
|
"ingestion.skipped.duplicate sourceKind=%s sourceId=%s hash=%s",
|
||||||
job.sourceKind, job.sourceId, contentHash[:12],
|
job.sourceKind, job.sourceId, contentHash[:12],
|
||||||
extra={
|
extra={
|
||||||
|
|
|
||||||
|
|
@ -141,18 +141,39 @@ _SOURCE_TYPE_MAP = {
|
||||||
|
|
||||||
|
|
||||||
def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None):
|
def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None):
|
||||||
"""Load DataSource rows with ragIndexEnabled=true for a connection.
|
"""Load DataSource rows whose *effective* ragIndexEnabled is True.
|
||||||
|
|
||||||
If dataSourceIds is provided (mini-bootstrap), filter to only those IDs.
|
Cascade-inherit semantics: a DataSource with `ragIndexEnabled=None`
|
||||||
|
follows its nearest ancestor's value (path-traversal). Walker iterates
|
||||||
|
over all DataSources whose effective value resolves to True, including
|
||||||
|
inherited ones.
|
||||||
|
|
||||||
|
Returned dicts carry **resolved** flags (`neutralize`, `scope`) so the
|
||||||
|
downstream walkers can keep reading `ds.get("neutralize")` directly
|
||||||
|
without having to know about the inheritance chain.
|
||||||
|
|
||||||
|
If `dataSourceIds` is provided (mini-bootstrap), the explicit set is
|
||||||
|
intersected with the effective-true set.
|
||||||
"""
|
"""
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
from modules.datamodels.datamodelDataSource import DataSource
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
||||||
|
|
||||||
rootIf = getRootInterface()
|
rootIf = getRootInterface()
|
||||||
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
|
resolved = []
|
||||||
|
for ds in allDs:
|
||||||
|
effRagIndex = getEffectiveFlag(ds, "ragIndexEnabled", allDs)
|
||||||
|
if effRagIndex is not True:
|
||||||
|
continue
|
||||||
|
dsCopy = dict(ds) if isinstance(ds, dict) else {**ds.__dict__}
|
||||||
|
dsCopy["neutralize"] = getEffectiveFlag(ds, "neutralize", allDs)
|
||||||
|
dsCopy["scope"] = getEffectiveFlag(ds, "scope", allDs)
|
||||||
|
dsCopy["ragIndexEnabled"] = True
|
||||||
|
resolved.append(dsCopy)
|
||||||
if dataSourceIds:
|
if dataSourceIds:
|
||||||
return [ds for ds in allDs if ds.get("id") in dataSourceIds and ds.get("ragIndexEnabled")]
|
resolved = [ds for ds in resolved if ds.get("id") in dataSourceIds]
|
||||||
return [ds for ds in allDs if ds.get("ragIndexEnabled")]
|
return resolved
|
||||||
|
|
||||||
|
|
||||||
async def _bootstrapJobHandler(
|
async def _bootstrapJobHandler(
|
||||||
|
|
@ -167,7 +188,11 @@ async def _bootstrapJobHandler(
|
||||||
if not connectionId:
|
if not connectionId:
|
||||||
raise ValueError("connection.bootstrap requires payload.connectionId")
|
raise ValueError("connection.bootstrap requires payload.connectionId")
|
||||||
|
|
||||||
progressCb(5, f"resolving {authority} connection")
|
progressCb(
|
||||||
|
5,
|
||||||
|
messageKey="Verbindung wird aufgebaut ({authority})",
|
||||||
|
messageParams={"authority": authority},
|
||||||
|
)
|
||||||
|
|
||||||
# Defensive consent check
|
# Defensive consent check
|
||||||
try:
|
try:
|
||||||
|
|
@ -225,7 +250,7 @@ async def _bootstrapJobHandler(
|
||||||
bootstrapOutlook,
|
bootstrapOutlook,
|
||||||
)
|
)
|
||||||
|
|
||||||
progressCb(0, "Synchronisierung läuft...")
|
progressCb(0, messageKey="Synchronisierung läuft...")
|
||||||
spDs = _filterDs("sharepoint")
|
spDs = _filterDs("sharepoint")
|
||||||
olDs = _filterDs("outlook")
|
olDs = _filterDs("outlook")
|
||||||
async def _noopResult():
|
async def _noopResult():
|
||||||
|
|
@ -251,7 +276,7 @@ async def _bootstrapJobHandler(
|
||||||
bootstrapGmail,
|
bootstrapGmail,
|
||||||
)
|
)
|
||||||
|
|
||||||
progressCb(0, "Synchronisierung läuft...")
|
progressCb(0, messageKey="Synchronisierung läuft...")
|
||||||
gdDs = _filterDs("drive")
|
gdDs = _filterDs("drive")
|
||||||
gmDs = _filterDs("gmail")
|
gmDs = _filterDs("gmail")
|
||||||
async def _noopResult():
|
async def _noopResult():
|
||||||
|
|
@ -274,7 +299,7 @@ async def _bootstrapJobHandler(
|
||||||
bootstrapClickup,
|
bootstrapClickup,
|
||||||
)
|
)
|
||||||
|
|
||||||
progressCb(0, "Synchronisierung läuft...")
|
progressCb(0, messageKey="Synchronisierung läuft...")
|
||||||
cuDs = _filterDs("clickup")
|
cuDs = _filterDs("clickup")
|
||||||
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb, dataSources=cuDs) if cuDs else {"skipped": True, "reason": "no_datasources"}
|
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb, dataSources=cuDs) if cuDs else {"skipped": True, "reason": "no_datasources"}
|
||||||
return {
|
return {
|
||||||
|
|
@ -288,7 +313,7 @@ async def _bootstrapJobHandler(
|
||||||
bootstrapKdrive,
|
bootstrapKdrive,
|
||||||
)
|
)
|
||||||
|
|
||||||
progressCb(0, "Synchronisierung läuft...")
|
progressCb(0, messageKey="Synchronisierung läuft...")
|
||||||
kdDs = _filterDs("kdrive")
|
kdDs = _filterDs("kdrive")
|
||||||
kdResult = await bootstrapKdrive(connectionId=connectionId, progressCb=progressCb, dataSources=kdDs) if kdDs else {"skipped": True, "reason": "no_datasources"}
|
kdResult = await bootstrapKdrive(connectionId=connectionId, progressCb=progressCb, dataSources=kdDs) if kdDs else {"skipped": True, "reason": "no_datasources"}
|
||||||
return {
|
return {
|
||||||
|
|
@ -406,6 +431,15 @@ def registerKnowledgeIngestionConsumer() -> None:
|
||||||
callbackRegistry.register("connection.established", _onConnectionEstablished)
|
callbackRegistry.register("connection.established", _onConnectionEstablished)
|
||||||
callbackRegistry.register("connection.revoked", _onConnectionRevoked)
|
callbackRegistry.register("connection.revoked", _onConnectionRevoked)
|
||||||
registerJobHandler(BOOTSTRAP_JOB_TYPE, _bootstrapJobHandler)
|
registerJobHandler(BOOTSTRAP_JOB_TYPE, _bootstrapJobHandler)
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subFeatureBootstrap import (
|
||||||
|
FEATURE_BOOTSTRAP_JOB_TYPE, _featureBootstrapHandler,
|
||||||
|
)
|
||||||
|
registerJobHandler(FEATURE_BOOTSTRAP_JOB_TYPE, _featureBootstrapHandler)
|
||||||
|
|
||||||
registerDailyResyncScheduler()
|
registerDailyResyncScheduler()
|
||||||
_registered = True
|
_registered = True
|
||||||
logger.info("KnowledgeIngestionConsumer registered (established/revoked + %s handler + daily resync)", BOOTSTRAP_JOB_TYPE)
|
logger.info(
|
||||||
|
"KnowledgeIngestionConsumer registered (established/revoked + %s + %s handler + daily resync)",
|
||||||
|
BOOTSTRAP_JOB_TYPE, FEATURE_BOOTSTRAP_JOB_TYPE,
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -33,13 +33,21 @@ from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
MAX_TASKS_DEFAULT = 500
|
from modules.serviceCenter.services.serviceKnowledge import _ragLimits as _ragLimitsHelper
|
||||||
MAX_WORKSPACES_DEFAULT = 3
|
|
||||||
MAX_LISTS_PER_WORKSPACE_DEFAULT = 20
|
_CLICKUP_DEFAULTS = _ragLimitsHelper.CLICKUP_LIMITS_DEFAULT
|
||||||
|
MAX_TASKS_DEFAULT = _CLICKUP_DEFAULTS["maxTasks"]
|
||||||
|
MAX_WORKSPACES_DEFAULT = _CLICKUP_DEFAULTS["maxWorkspaces"]
|
||||||
|
MAX_LISTS_PER_WORKSPACE_DEFAULT = _CLICKUP_DEFAULTS["maxListsPerWorkspace"]
|
||||||
MAX_DESCRIPTION_CHARS_DEFAULT = 8000
|
MAX_DESCRIPTION_CHARS_DEFAULT = 8000
|
||||||
MAX_AGE_DAYS_DEFAULT = 180
|
MAX_AGE_DAYS_DEFAULT = 180
|
||||||
|
|
||||||
|
|
||||||
|
def _resolveDataSourceLimits(dsId: str, ds: Dict[str, Any]) -> Dict[str, int]:
|
||||||
|
"""Return explicit RAG-limit overrides stored on the DataSource (or {})."""
|
||||||
|
return _ragLimitsHelper.getStoredOverrides(ds, "clickup")
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ClickupBootstrapLimits:
|
class ClickupBootstrapLimits:
|
||||||
maxTasks: int = MAX_TASKS_DEFAULT
|
maxTasks: int = MAX_TASKS_DEFAULT
|
||||||
|
|
@ -68,6 +76,9 @@ class ClickupBootstrapResult:
|
||||||
workspaces: int = 0
|
workspaces: int = 0
|
||||||
lists: int = 0
|
lists: int = 0
|
||||||
errors: List[str] = field(default_factory=list)
|
errors: List[str] = field(default_factory=list)
|
||||||
|
# First budget exhausted: "maxTasks" | "maxWorkspaces" | "maxListsPerWorkspace" | None.
|
||||||
|
# Drives the same UI banner as the file-walker bootstraps.
|
||||||
|
stoppedAtLimit: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
def _syntheticTaskId(connectionId: str, taskId: str) -> str:
|
def _syntheticTaskId(connectionId: str, taskId: str) -> str:
|
||||||
|
|
@ -225,6 +236,7 @@ async def bootstrapClickup(
|
||||||
cancelled = False
|
cancelled = False
|
||||||
for ds in dataSources:
|
for ds in dataSources:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||||
|
_recordLimitStop(result, "maxTasks", "dataSource", limits)
|
||||||
break
|
break
|
||||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
cancelled = True
|
cancelled = True
|
||||||
|
|
@ -232,10 +244,11 @@ async def bootstrapClickup(
|
||||||
|
|
||||||
dsId = ds.get("id", "")
|
dsId = ds.get("id", "")
|
||||||
dsNeutralize = ds.get("neutralize", False)
|
dsNeutralize = ds.get("neutralize", False)
|
||||||
|
eff = _resolveDataSourceLimits(dsId, ds)
|
||||||
dsLimits = ClickupBootstrapLimits(
|
dsLimits = ClickupBootstrapLimits(
|
||||||
maxTasks=limits.maxTasks,
|
maxTasks=eff.get("maxTasks", limits.maxTasks),
|
||||||
maxWorkspaces=limits.maxWorkspaces,
|
maxWorkspaces=eff.get("maxWorkspaces", limits.maxWorkspaces),
|
||||||
maxListsPerWorkspace=limits.maxListsPerWorkspace,
|
maxListsPerWorkspace=eff.get("maxListsPerWorkspace", limits.maxListsPerWorkspace),
|
||||||
maxDescriptionChars=limits.maxDescriptionChars,
|
maxDescriptionChars=limits.maxDescriptionChars,
|
||||||
maxAgeDays=limits.maxAgeDays,
|
maxAgeDays=limits.maxAgeDays,
|
||||||
includeClosed=limits.includeClosed,
|
includeClosed=limits.includeClosed,
|
||||||
|
|
@ -243,8 +256,11 @@ async def bootstrapClickup(
|
||||||
clickupScope=limits.clickupScope,
|
clickupScope=limits.clickupScope,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if len(teams) > dsLimits.maxWorkspaces:
|
||||||
|
_recordLimitStop(result, "maxWorkspaces", "teams", dsLimits, hard=False)
|
||||||
for team in teams[:dsLimits.maxWorkspaces]:
|
for team in teams[:dsLimits.maxWorkspaces]:
|
||||||
if result.indexed + result.skippedDuplicate >= dsLimits.maxTasks:
|
if result.indexed + result.skippedDuplicate >= dsLimits.maxTasks:
|
||||||
|
_recordLimitStop(result, "maxTasks", f"team={team.get('id','')}", dsLimits)
|
||||||
break
|
break
|
||||||
teamId = str(team.get("id", "") or "")
|
teamId = str(team.get("id", "") or "")
|
||||||
if not teamId:
|
if not teamId:
|
||||||
|
|
@ -351,6 +367,7 @@ async def _walkTeam(
|
||||||
|
|
||||||
for lst in listsCollected:
|
for lst in listsCollected:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||||
|
_recordLimitStop(result, "maxTasks", f"team={teamId}", limits)
|
||||||
return
|
return
|
||||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
return
|
return
|
||||||
|
|
@ -407,6 +424,7 @@ async def _walkList(
|
||||||
|
|
||||||
for task in tasks:
|
for task in tasks:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||||
|
_recordLimitStop(result, "maxTasks", f"list={listId}", limits)
|
||||||
return
|
return
|
||||||
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
|
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
|
||||||
result.skippedPolicy += 1
|
result.skippedPolicy += 1
|
||||||
|
|
@ -511,7 +529,11 @@ async def _ingestTask(
|
||||||
if hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
if hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
progressCb(0, f"{processed} Tasks verarbeitet, {result.indexed} indexiert")
|
progressCb(
|
||||||
|
0,
|
||||||
|
messageKey="{n} Tasks verarbeitet, {indexed} indexiert",
|
||||||
|
messageParams={"n": processed, "indexed": result.indexed},
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if processed % 50 == 0:
|
if processed % 50 == 0:
|
||||||
|
|
@ -529,13 +551,37 @@ async def _ingestTask(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _recordLimitStop(
|
||||||
|
result: ClickupBootstrapResult,
|
||||||
|
limitName: str,
|
||||||
|
where: str,
|
||||||
|
limits: ClickupBootstrapLimits,
|
||||||
|
*,
|
||||||
|
hard: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""See subConnectorSyncSharepoint._recordLimitStop for semantics."""
|
||||||
|
if hard or result.stoppedAtLimit is None:
|
||||||
|
result.stoppedAtLimit = limitName
|
||||||
|
budgetMap = {
|
||||||
|
"maxTasks": limits.maxTasks,
|
||||||
|
"maxWorkspaces": limits.maxWorkspaces,
|
||||||
|
"maxListsPerWorkspace": limits.maxListsPerWorkspace,
|
||||||
|
}
|
||||||
|
logger.warning(
|
||||||
|
"clickup walker hit %s=%s at %s — partial index (indexed=%d, skippedDup=%d).",
|
||||||
|
limitName, budgetMap.get(limitName), where,
|
||||||
|
result.indexed, result.skippedDuplicate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs: float) -> Dict[str, Any]:
|
def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||||
durationMs = int((time.time() - startMs) * 1000)
|
durationMs = int((time.time() - startMs) * 1000)
|
||||||
logger.info(
|
logger.info(
|
||||||
"ingestion.connection.bootstrap.done part=clickup connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d workspaces=%d lists=%d durationMs=%d",
|
"ingestion.connection.bootstrap.done part=clickup connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d workspaces=%d lists=%d durationMs=%d stoppedAtLimit=%s",
|
||||||
connectionId,
|
connectionId,
|
||||||
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||||
result.failed, result.workspaces, result.lists, durationMs,
|
result.failed, result.workspaces, result.lists, durationMs,
|
||||||
|
result.stoppedAtLimit or "none",
|
||||||
extra={
|
extra={
|
||||||
"event": "ingestion.connection.bootstrap.done",
|
"event": "ingestion.connection.bootstrap.done",
|
||||||
"part": "clickup",
|
"part": "clickup",
|
||||||
|
|
@ -547,6 +593,7 @@ def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs:
|
||||||
"workspaces": result.workspaces,
|
"workspaces": result.workspaces,
|
||||||
"lists": result.lists,
|
"lists": result.lists,
|
||||||
"durationMs": durationMs,
|
"durationMs": durationMs,
|
||||||
|
"stoppedAtLimit": result.stoppedAtLimit,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
|
|
@ -559,4 +606,11 @@ def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs:
|
||||||
"lists": result.lists,
|
"lists": result.lists,
|
||||||
"durationMs": durationMs,
|
"durationMs": durationMs,
|
||||||
"errors": result.errors[:20],
|
"errors": result.errors[:20],
|
||||||
|
"stoppedAtLimit": result.stoppedAtLimit,
|
||||||
|
"limits": {
|
||||||
|
"maxTasks": MAX_TASKS_DEFAULT,
|
||||||
|
"maxWorkspaces": MAX_WORKSPACES_DEFAULT,
|
||||||
|
"maxListsPerWorkspace": MAX_LISTS_PER_WORKSPACE_DEFAULT,
|
||||||
|
"maxAgeDays": MAX_AGE_DAYS_DEFAULT,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,13 +31,21 @@ from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
MAX_ITEMS_DEFAULT = 500
|
from modules.serviceCenter.services.serviceKnowledge import _ragLimits as _ragLimitsHelper
|
||||||
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
|
|
||||||
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
|
_FILES_DEFAULTS = _ragLimitsHelper.FILES_LIMITS_DEFAULT
|
||||||
|
MAX_ITEMS_DEFAULT = _FILES_DEFAULTS["maxItems"]
|
||||||
|
MAX_BYTES_DEFAULT = _FILES_DEFAULTS["maxBytes"]
|
||||||
|
MAX_FILE_SIZE_DEFAULT = _FILES_DEFAULTS["maxFileSize"]
|
||||||
|
MAX_DEPTH_DEFAULT = _FILES_DEFAULTS["maxDepth"]
|
||||||
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||||
MAX_DEPTH_DEFAULT = 4
|
|
||||||
MAX_AGE_DAYS_DEFAULT = 365
|
MAX_AGE_DAYS_DEFAULT = 365
|
||||||
|
|
||||||
|
|
||||||
|
def _resolveDataSourceLimits(dsId: str, ds: Dict[str, Any]) -> Dict[str, int]:
|
||||||
|
"""Return explicit RAG-limit overrides stored on the DataSource (or {})."""
|
||||||
|
return _ragLimitsHelper.getStoredOverrides(ds, "files")
|
||||||
|
|
||||||
FOLDER_MIME = "application/vnd.google-apps.folder"
|
FOLDER_MIME = "application/vnd.google-apps.folder"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -61,6 +69,8 @@ class GdriveBootstrapResult:
|
||||||
failed: int = 0
|
failed: int = 0
|
||||||
bytesProcessed: int = 0
|
bytesProcessed: int = 0
|
||||||
errors: List[str] = field(default_factory=list)
|
errors: List[str] = field(default_factory=list)
|
||||||
|
# See SharepointBootstrapResult.stoppedAtLimit — same semantics.
|
||||||
|
stoppedAtLimit: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
||||||
|
|
@ -173,12 +183,13 @@ async def bootstrapGdrive(
|
||||||
dsId = ds.get("id", "")
|
dsId = ds.get("id", "")
|
||||||
dsNeutralize = ds.get("neutralize", False)
|
dsNeutralize = ds.get("neutralize", False)
|
||||||
dsMaxAgeDays = ds.get("maxAgeDays", limits.maxAgeDays)
|
dsMaxAgeDays = ds.get("maxAgeDays", limits.maxAgeDays)
|
||||||
|
eff = _resolveDataSourceLimits(dsId, ds)
|
||||||
dsLimits = GdriveBootstrapLimits(
|
dsLimits = GdriveBootstrapLimits(
|
||||||
maxItems=limits.maxItems,
|
maxItems=eff.get("maxItems", limits.maxItems),
|
||||||
maxBytes=limits.maxBytes,
|
maxBytes=eff.get("maxBytes", limits.maxBytes),
|
||||||
maxFileSize=limits.maxFileSize,
|
maxFileSize=eff.get("maxFileSize", limits.maxFileSize),
|
||||||
skipMimePrefixes=limits.skipMimePrefixes,
|
skipMimePrefixes=limits.skipMimePrefixes,
|
||||||
maxDepth=limits.maxDepth,
|
maxDepth=eff.get("maxDepth", limits.maxDepth),
|
||||||
maxAgeDays=dsMaxAgeDays,
|
maxAgeDays=dsMaxAgeDays,
|
||||||
neutralize=dsNeutralize,
|
neutralize=dsNeutralize,
|
||||||
)
|
)
|
||||||
|
|
@ -265,8 +276,10 @@ async def _walkFolder(
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||||
|
_recordLimitStop(result, "maxItems", folderPath, limits)
|
||||||
return
|
return
|
||||||
if result.bytesProcessed >= limits.maxBytes:
|
if result.bytesProcessed >= limits.maxBytes:
|
||||||
|
_recordLimitStop(result, "maxBytes", folderPath, limits)
|
||||||
return
|
return
|
||||||
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||||
return
|
return
|
||||||
|
|
@ -276,6 +289,9 @@ async def _walkFolder(
|
||||||
mimeType = getattr(entry, "mimeType", None) or metadata.get("mimeType")
|
mimeType = getattr(entry, "mimeType", None) or metadata.get("mimeType")
|
||||||
|
|
||||||
if getattr(entry, "isFolder", False) or mimeType == FOLDER_MIME:
|
if getattr(entry, "isFolder", False) or mimeType == FOLDER_MIME:
|
||||||
|
if depth + 1 > limits.maxDepth:
|
||||||
|
_recordLimitStop(result, "maxDepth", entryPath, limits, hard=False)
|
||||||
|
continue
|
||||||
await _walkFolder(
|
await _walkFolder(
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
knowledgeService=knowledgeService,
|
knowledgeService=knowledgeService,
|
||||||
|
|
@ -298,6 +314,7 @@ async def _walkFolder(
|
||||||
continue
|
continue
|
||||||
size = int(getattr(entry, "size", 0) or 0)
|
size = int(getattr(entry, "size", 0) or 0)
|
||||||
if size and size > limits.maxFileSize:
|
if size and size > limits.maxFileSize:
|
||||||
|
_recordLimitStop(result, "maxFileSize", entryPath, limits, hard=False)
|
||||||
result.skippedPolicy += 1
|
result.skippedPolicy += 1
|
||||||
continue
|
continue
|
||||||
modifiedTime = metadata.get("modifiedTime")
|
modifiedTime = metadata.get("modifiedTime")
|
||||||
|
|
@ -451,7 +468,11 @@ async def _ingestOne(
|
||||||
processed = result.indexed + result.skippedDuplicate
|
processed = result.indexed + result.skippedDuplicate
|
||||||
if progressCb is not None and processed % 5 == 0:
|
if progressCb is not None and processed % 5 == 0:
|
||||||
try:
|
try:
|
||||||
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
|
progressCb(
|
||||||
|
0,
|
||||||
|
messageKey="{n} Dateien verarbeitet, {indexed} indexiert",
|
||||||
|
messageParams={"n": processed, "indexed": result.indexed},
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
@ -470,13 +491,38 @@ async def _ingestOne(
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
|
||||||
|
def _recordLimitStop(
|
||||||
|
result: GdriveBootstrapResult,
|
||||||
|
limitName: str,
|
||||||
|
where: str,
|
||||||
|
limits: GdriveBootstrapLimits,
|
||||||
|
*,
|
||||||
|
hard: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""See subConnectorSyncSharepoint._recordLimitStop for semantics."""
|
||||||
|
if hard or result.stoppedAtLimit is None:
|
||||||
|
result.stoppedAtLimit = limitName
|
||||||
|
budgetMap = {
|
||||||
|
"maxItems": limits.maxItems,
|
||||||
|
"maxBytes": limits.maxBytes,
|
||||||
|
"maxDepth": limits.maxDepth,
|
||||||
|
"maxFileSize": limits.maxFileSize,
|
||||||
|
}
|
||||||
|
logger.warning(
|
||||||
|
"gdrive walker hit %s=%s at %s — partial index (indexed=%d, bytesProcessed=%d).",
|
||||||
|
limitName, budgetMap.get(limitName), where,
|
||||||
|
result.indexed, result.bytesProcessed,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||||
durationMs = int((time.time() - startMs) * 1000)
|
durationMs = int((time.time() - startMs) * 1000)
|
||||||
logger.info(
|
logger.info(
|
||||||
"ingestion.connection.bootstrap.done part=gdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d bytes=%d durationMs=%d",
|
"ingestion.connection.bootstrap.done part=gdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d bytes=%d durationMs=%d stoppedAtLimit=%s",
|
||||||
connectionId,
|
connectionId,
|
||||||
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||||
result.failed, result.bytesProcessed, durationMs,
|
result.failed, result.bytesProcessed, durationMs,
|
||||||
|
result.stoppedAtLimit or "none",
|
||||||
extra={
|
extra={
|
||||||
"event": "ingestion.connection.bootstrap.done",
|
"event": "ingestion.connection.bootstrap.done",
|
||||||
"part": "gdrive",
|
"part": "gdrive",
|
||||||
|
|
@ -487,6 +533,7 @@ def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: f
|
||||||
"failed": result.failed,
|
"failed": result.failed,
|
||||||
"bytes": result.bytesProcessed,
|
"bytes": result.bytesProcessed,
|
||||||
"durationMs": durationMs,
|
"durationMs": durationMs,
|
||||||
|
"stoppedAtLimit": result.stoppedAtLimit,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
|
|
@ -498,4 +545,11 @@ def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: f
|
||||||
"bytesProcessed": result.bytesProcessed,
|
"bytesProcessed": result.bytesProcessed,
|
||||||
"durationMs": durationMs,
|
"durationMs": durationMs,
|
||||||
"errors": result.errors[:20],
|
"errors": result.errors[:20],
|
||||||
|
"stoppedAtLimit": result.stoppedAtLimit,
|
||||||
|
"limits": {
|
||||||
|
"maxItems": MAX_ITEMS_DEFAULT,
|
||||||
|
"maxBytes": MAX_BYTES_DEFAULT,
|
||||||
|
"maxFileSize": MAX_FILE_SIZE_DEFAULT,
|
||||||
|
"maxDepth": MAX_DEPTH_DEFAULT,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -474,7 +474,11 @@ async def _ingestMessage(
|
||||||
processed = result.indexed + result.skippedDuplicate
|
processed = result.indexed + result.skippedDuplicate
|
||||||
if progressCb is not None and processed % 5 == 0:
|
if progressCb is not None and processed % 5 == 0:
|
||||||
try:
|
try:
|
||||||
progressCb(0, f"{processed} Mails verarbeitet, {result.indexed} indexiert")
|
progressCb(
|
||||||
|
0,
|
||||||
|
messageKey="{n} Mails verarbeitet, {indexed} indexiert",
|
||||||
|
messageParams={"n": processed, "indexed": result.indexed},
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if processed % 50 == 0:
|
if processed % 50 == 0:
|
||||||
|
|
|
||||||
|
|
@ -27,11 +27,19 @@ from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
MAX_ITEMS_DEFAULT = 500
|
from modules.serviceCenter.services.serviceKnowledge import _ragLimits as _ragLimitsHelper
|
||||||
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
|
|
||||||
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
|
_FILES_DEFAULTS = _ragLimitsHelper.FILES_LIMITS_DEFAULT
|
||||||
|
MAX_ITEMS_DEFAULT = _FILES_DEFAULTS["maxItems"]
|
||||||
|
MAX_BYTES_DEFAULT = _FILES_DEFAULTS["maxBytes"]
|
||||||
|
MAX_FILE_SIZE_DEFAULT = _FILES_DEFAULTS["maxFileSize"]
|
||||||
|
MAX_DEPTH_DEFAULT = _FILES_DEFAULTS["maxDepth"]
|
||||||
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||||
MAX_DEPTH_DEFAULT = 4
|
|
||||||
|
|
||||||
|
def _resolveDataSourceLimits(dsId: str, ds: Dict[str, Any]) -> Dict[str, int]:
|
||||||
|
"""Return explicit RAG-limit overrides stored on the DataSource (or {})."""
|
||||||
|
return _ragLimitsHelper.getStoredOverrides(ds, "files")
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -53,6 +61,8 @@ class KdriveBootstrapResult:
|
||||||
failed: int = 0
|
failed: int = 0
|
||||||
bytesProcessed: int = 0
|
bytesProcessed: int = 0
|
||||||
errors: List[str] = field(default_factory=list)
|
errors: List[str] = field(default_factory=list)
|
||||||
|
# See SharepointBootstrapResult.stoppedAtLimit — same semantics.
|
||||||
|
stoppedAtLimit: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
||||||
|
|
@ -141,12 +151,13 @@ async def bootstrapKdrive(
|
||||||
dsPath = ds.get("path", "")
|
dsPath = ds.get("path", "")
|
||||||
dsId = ds.get("id", "")
|
dsId = ds.get("id", "")
|
||||||
dsNeutralize = ds.get("neutralize", False)
|
dsNeutralize = ds.get("neutralize", False)
|
||||||
|
eff = _resolveDataSourceLimits(dsId, ds)
|
||||||
dsLimits = KdriveBootstrapLimits(
|
dsLimits = KdriveBootstrapLimits(
|
||||||
maxItems=limits.maxItems,
|
maxItems=eff.get("maxItems", limits.maxItems),
|
||||||
maxBytes=limits.maxBytes,
|
maxBytes=eff.get("maxBytes", limits.maxBytes),
|
||||||
maxFileSize=limits.maxFileSize,
|
maxFileSize=eff.get("maxFileSize", limits.maxFileSize),
|
||||||
skipMimePrefixes=limits.skipMimePrefixes,
|
skipMimePrefixes=limits.skipMimePrefixes,
|
||||||
maxDepth=limits.maxDepth,
|
maxDepth=eff.get("maxDepth", limits.maxDepth),
|
||||||
neutralize=dsNeutralize,
|
neutralize=dsNeutralize,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -232,14 +243,19 @@ async def _walkFolder(
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||||
|
_recordLimitStop(result, "maxItems", folderPath, limits)
|
||||||
return
|
return
|
||||||
if result.bytesProcessed >= limits.maxBytes:
|
if result.bytesProcessed >= limits.maxBytes:
|
||||||
|
_recordLimitStop(result, "maxBytes", folderPath, limits)
|
||||||
return
|
return
|
||||||
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||||
return
|
return
|
||||||
|
|
||||||
entryPath = getattr(entry, "path", "") or ""
|
entryPath = getattr(entry, "path", "") or ""
|
||||||
if getattr(entry, "isFolder", False):
|
if getattr(entry, "isFolder", False):
|
||||||
|
if depth + 1 > limits.maxDepth:
|
||||||
|
_recordLimitStop(result, "maxDepth", entryPath, limits, hard=False)
|
||||||
|
continue
|
||||||
await _walkFolder(
|
await _walkFolder(
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
knowledgeService=knowledgeService,
|
knowledgeService=knowledgeService,
|
||||||
|
|
@ -262,6 +278,7 @@ async def _walkFolder(
|
||||||
continue
|
continue
|
||||||
size = int(getattr(entry, "size", 0) or 0)
|
size = int(getattr(entry, "size", 0) or 0)
|
||||||
if size and size > limits.maxFileSize:
|
if size and size > limits.maxFileSize:
|
||||||
|
_recordLimitStop(result, "maxFileSize", entryPath, limits, hard=False)
|
||||||
result.skippedPolicy += 1
|
result.skippedPolicy += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -408,24 +425,53 @@ async def _ingestOne(
|
||||||
processed = result.indexed + result.skippedDuplicate
|
processed = result.indexed + result.skippedDuplicate
|
||||||
if progressCb is not None and processed % 5 == 0:
|
if progressCb is not None and processed % 5 == 0:
|
||||||
try:
|
try:
|
||||||
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
|
progressCb(
|
||||||
|
0,
|
||||||
|
messageKey="{n} Dateien verarbeitet, {indexed} indexiert",
|
||||||
|
messageParams={"n": processed, "indexed": result.indexed},
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
|
||||||
|
def _recordLimitStop(
|
||||||
|
result: KdriveBootstrapResult,
|
||||||
|
limitName: str,
|
||||||
|
where: str,
|
||||||
|
limits: KdriveBootstrapLimits,
|
||||||
|
*,
|
||||||
|
hard: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""See subConnectorSyncSharepoint._recordLimitStop for semantics."""
|
||||||
|
if hard or result.stoppedAtLimit is None:
|
||||||
|
result.stoppedAtLimit = limitName
|
||||||
|
budgetMap = {
|
||||||
|
"maxItems": limits.maxItems,
|
||||||
|
"maxBytes": limits.maxBytes,
|
||||||
|
"maxDepth": limits.maxDepth,
|
||||||
|
"maxFileSize": limits.maxFileSize,
|
||||||
|
}
|
||||||
|
logger.warning(
|
||||||
|
"kdrive walker hit %s=%s at %s — partial index (indexed=%d, bytesProcessed=%d).",
|
||||||
|
limitName, budgetMap.get(limitName), where,
|
||||||
|
result.indexed, result.bytesProcessed,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _finalizeResult(connectionId: str, result: KdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
def _finalizeResult(connectionId: str, result: KdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||||
durationMs = int((time.time() - startMs) * 1000)
|
durationMs = int((time.time() - startMs) * 1000)
|
||||||
logger.info(
|
logger.info(
|
||||||
"ingestion.connection.bootstrap.done part=kdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d",
|
"ingestion.connection.bootstrap.done part=kdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d stoppedAtLimit=%s",
|
||||||
connectionId,
|
connectionId,
|
||||||
result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
|
result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
|
||||||
durationMs,
|
durationMs, result.stoppedAtLimit or "none",
|
||||||
extra={"event": "ingestion.connection.bootstrap.done", "part": "kdrive",
|
extra={"event": "ingestion.connection.bootstrap.done", "part": "kdrive",
|
||||||
"connectionId": connectionId, "indexed": result.indexed,
|
"connectionId": connectionId, "indexed": result.indexed,
|
||||||
"skippedDup": result.skippedDuplicate, "skippedPolicy": result.skippedPolicy,
|
"skippedDup": result.skippedDuplicate, "skippedPolicy": result.skippedPolicy,
|
||||||
"failed": result.failed, "durationMs": durationMs},
|
"failed": result.failed, "durationMs": durationMs,
|
||||||
|
"stoppedAtLimit": result.stoppedAtLimit},
|
||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
"connectionId": result.connectionId,
|
"connectionId": result.connectionId,
|
||||||
|
|
@ -436,4 +482,11 @@ def _finalizeResult(connectionId: str, result: KdriveBootstrapResult, startMs: f
|
||||||
"bytesProcessed": result.bytesProcessed,
|
"bytesProcessed": result.bytesProcessed,
|
||||||
"durationMs": durationMs,
|
"durationMs": durationMs,
|
||||||
"errors": result.errors[:20],
|
"errors": result.errors[:20],
|
||||||
|
"stoppedAtLimit": result.stoppedAtLimit,
|
||||||
|
"limits": {
|
||||||
|
"maxItems": MAX_ITEMS_DEFAULT,
|
||||||
|
"maxBytes": MAX_BYTES_DEFAULT,
|
||||||
|
"maxFileSize": MAX_FILE_SIZE_DEFAULT,
|
||||||
|
"maxDepth": MAX_DEPTH_DEFAULT,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -460,7 +460,11 @@ async def _ingestMessage(
|
||||||
processed = result.indexed + result.skippedDuplicate
|
processed = result.indexed + result.skippedDuplicate
|
||||||
if progressCb is not None and processed % 5 == 0:
|
if progressCb is not None and processed % 5 == 0:
|
||||||
try:
|
try:
|
||||||
progressCb(0, f"{processed} Mails verarbeitet, {result.indexed} indexiert")
|
progressCb(
|
||||||
|
0,
|
||||||
|
messageKey="{n} Mails verarbeitet, {indexed} indexiert",
|
||||||
|
messageParams={"n": processed, "indexed": result.indexed},
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if processed % 50 == 0:
|
if processed % 50 == 0:
|
||||||
|
|
|
||||||
|
|
@ -30,14 +30,27 @@ from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
MAX_ITEMS_DEFAULT = 500
|
from modules.serviceCenter.services.serviceKnowledge import _ragLimits as _ragLimitsHelper
|
||||||
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
|
|
||||||
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
|
_FILES_DEFAULTS = _ragLimitsHelper.FILES_LIMITS_DEFAULT
|
||||||
|
MAX_ITEMS_DEFAULT = _FILES_DEFAULTS["maxItems"]
|
||||||
|
MAX_BYTES_DEFAULT = _FILES_DEFAULTS["maxBytes"]
|
||||||
|
MAX_FILE_SIZE_DEFAULT = _FILES_DEFAULTS["maxFileSize"]
|
||||||
|
MAX_DEPTH_DEFAULT = _FILES_DEFAULTS["maxDepth"]
|
||||||
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||||
MAX_DEPTH_DEFAULT = 4
|
|
||||||
MAX_SITES_DEFAULT = 3
|
MAX_SITES_DEFAULT = 3
|
||||||
|
|
||||||
|
|
||||||
|
def _resolveDataSourceLimits(dsId: str, ds: Dict[str, Any]) -> Dict[str, int]:
|
||||||
|
"""Return explicit RAG-limit overrides stored on the DataSource.
|
||||||
|
|
||||||
|
Empty dict means "use caller-supplied limits" — never overrides them with
|
||||||
|
defaults. Used to merge per-DataSource user settings on top of the
|
||||||
|
walker's runtime limits.
|
||||||
|
"""
|
||||||
|
return _ragLimitsHelper.getStoredOverrides(ds, "files")
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SharepointBootstrapLimits:
|
class SharepointBootstrapLimits:
|
||||||
maxItems: int = MAX_ITEMS_DEFAULT
|
maxItems: int = MAX_ITEMS_DEFAULT
|
||||||
|
|
@ -59,6 +72,10 @@ class SharepointBootstrapResult:
|
||||||
failed: int = 0
|
failed: int = 0
|
||||||
bytesProcessed: int = 0
|
bytesProcessed: int = 0
|
||||||
errors: List[str] = field(default_factory=list)
|
errors: List[str] = field(default_factory=list)
|
||||||
|
# First budget that hit zero; None means the walk completed naturally.
|
||||||
|
# Surfaces in the bootstrap result so the RAG inventory UI can warn the
|
||||||
|
# user that the corpus is incomplete and tell them which knob to turn.
|
||||||
|
stoppedAtLimit: Optional[str] = None # "maxItems" | "maxBytes" | "maxDepth" | "maxFileSize" | None
|
||||||
|
|
||||||
|
|
||||||
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
||||||
|
|
@ -161,12 +178,13 @@ async def bootstrapSharepoint(
|
||||||
dsPath = ds.get("path", "")
|
dsPath = ds.get("path", "")
|
||||||
dsId = ds.get("id", "")
|
dsId = ds.get("id", "")
|
||||||
dsNeutralize = ds.get("neutralize", False)
|
dsNeutralize = ds.get("neutralize", False)
|
||||||
|
eff = _resolveDataSourceLimits(dsId, ds)
|
||||||
dsLimits = SharepointBootstrapLimits(
|
dsLimits = SharepointBootstrapLimits(
|
||||||
maxItems=limits.maxItems,
|
maxItems=eff.get("maxItems", limits.maxItems),
|
||||||
maxBytes=limits.maxBytes,
|
maxBytes=eff.get("maxBytes", limits.maxBytes),
|
||||||
maxFileSize=limits.maxFileSize,
|
maxFileSize=eff.get("maxFileSize", limits.maxFileSize),
|
||||||
skipMimePrefixes=limits.skipMimePrefixes,
|
skipMimePrefixes=limits.skipMimePrefixes,
|
||||||
maxDepth=limits.maxDepth,
|
maxDepth=eff.get("maxDepth", limits.maxDepth),
|
||||||
maxSites=limits.maxSites,
|
maxSites=limits.maxSites,
|
||||||
neutralize=dsNeutralize,
|
neutralize=dsNeutralize,
|
||||||
)
|
)
|
||||||
|
|
@ -259,14 +277,22 @@ async def _walkFolder(
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||||
|
_recordLimitStop(result, "maxItems", folderPath, limits)
|
||||||
return
|
return
|
||||||
if result.bytesProcessed >= limits.maxBytes:
|
if result.bytesProcessed >= limits.maxBytes:
|
||||||
|
_recordLimitStop(result, "maxBytes", folderPath, limits)
|
||||||
return
|
return
|
||||||
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||||
return
|
return
|
||||||
|
|
||||||
entryPath = getattr(entry, "path", "") or ""
|
entryPath = getattr(entry, "path", "") or ""
|
||||||
if getattr(entry, "isFolder", False):
|
if getattr(entry, "isFolder", False):
|
||||||
|
if depth + 1 > limits.maxDepth:
|
||||||
|
# We stop descending here but keep walking siblings.
|
||||||
|
# Record once per bootstrap so the UI shows "maxDepth" even
|
||||||
|
# if other budgets aren't exhausted yet.
|
||||||
|
_recordLimitStop(result, "maxDepth", entryPath, limits, hard=False)
|
||||||
|
continue
|
||||||
await _walkFolder(
|
await _walkFolder(
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
knowledgeService=knowledgeService,
|
knowledgeService=knowledgeService,
|
||||||
|
|
@ -289,6 +315,7 @@ async def _walkFolder(
|
||||||
continue
|
continue
|
||||||
size = int(getattr(entry, "size", 0) or 0)
|
size = int(getattr(entry, "size", 0) or 0)
|
||||||
if size and size > limits.maxFileSize:
|
if size and size > limits.maxFileSize:
|
||||||
|
_recordLimitStop(result, "maxFileSize", entryPath, limits, hard=False)
|
||||||
result.skippedPolicy += 1
|
result.skippedPolicy += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -428,7 +455,11 @@ async def _ingestOne(
|
||||||
processed = result.indexed + result.skippedDuplicate
|
processed = result.indexed + result.skippedDuplicate
|
||||||
if progressCb is not None and processed % 5 == 0:
|
if progressCb is not None and processed % 5 == 0:
|
||||||
try:
|
try:
|
||||||
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
|
progressCb(
|
||||||
|
0,
|
||||||
|
messageKey="{n} Dateien verarbeitet, {indexed} indexiert",
|
||||||
|
messageParams={"n": processed, "indexed": result.indexed},
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if processed % 50 == 0:
|
if processed % 50 == 0:
|
||||||
|
|
@ -443,13 +474,44 @@ async def _ingestOne(
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
|
||||||
|
def _recordLimitStop(
|
||||||
|
result: SharepointBootstrapResult,
|
||||||
|
limitName: str,
|
||||||
|
where: str,
|
||||||
|
limits: SharepointBootstrapLimits,
|
||||||
|
*,
|
||||||
|
hard: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""Mark the FIRST limit that bit. Soft hits (per-file maxFileSize, per-folder
|
||||||
|
maxDepth) only record when no hard limit has yet stopped the run, so the UI
|
||||||
|
surfaces the most important reason.
|
||||||
|
|
||||||
|
Hard limits (maxItems / maxBytes) ALWAYS overwrite a previously recorded
|
||||||
|
soft limit — once a hard cap is hit, the corpus is provably incomplete.
|
||||||
|
"""
|
||||||
|
if hard or result.stoppedAtLimit is None:
|
||||||
|
result.stoppedAtLimit = limitName
|
||||||
|
budgetMap = {
|
||||||
|
"maxItems": limits.maxItems,
|
||||||
|
"maxBytes": limits.maxBytes,
|
||||||
|
"maxDepth": limits.maxDepth,
|
||||||
|
"maxFileSize": limits.maxFileSize,
|
||||||
|
}
|
||||||
|
logger.warning(
|
||||||
|
"sharepoint walker hit %s=%s at %s — partial index "
|
||||||
|
"(indexed=%d, bytesProcessed=%d). Raise the limit or split the data source.",
|
||||||
|
limitName, budgetMap.get(limitName), where,
|
||||||
|
result.indexed, result.bytesProcessed,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startMs: float) -> Dict[str, Any]:
|
def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||||
durationMs = int((time.time() - startMs) * 1000)
|
durationMs = int((time.time() - startMs) * 1000)
|
||||||
logger.info(
|
logger.info(
|
||||||
"ingestion.connection.bootstrap.done part=sharepoint connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d",
|
"ingestion.connection.bootstrap.done part=sharepoint connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d stoppedAtLimit=%s",
|
||||||
connectionId,
|
connectionId,
|
||||||
result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
|
result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
|
||||||
durationMs,
|
durationMs, result.stoppedAtLimit or "none",
|
||||||
extra={
|
extra={
|
||||||
"event": "ingestion.connection.bootstrap.done",
|
"event": "ingestion.connection.bootstrap.done",
|
||||||
"part": "sharepoint",
|
"part": "sharepoint",
|
||||||
|
|
@ -459,6 +521,7 @@ def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startM
|
||||||
"skippedPolicy": result.skippedPolicy,
|
"skippedPolicy": result.skippedPolicy,
|
||||||
"failed": result.failed,
|
"failed": result.failed,
|
||||||
"durationMs": durationMs,
|
"durationMs": durationMs,
|
||||||
|
"stoppedAtLimit": result.stoppedAtLimit,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
|
|
@ -470,4 +533,11 @@ def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startM
|
||||||
"bytesProcessed": result.bytesProcessed,
|
"bytesProcessed": result.bytesProcessed,
|
||||||
"durationMs": durationMs,
|
"durationMs": durationMs,
|
||||||
"errors": result.errors[:20],
|
"errors": result.errors[:20],
|
||||||
|
"stoppedAtLimit": result.stoppedAtLimit,
|
||||||
|
"limits": {
|
||||||
|
"maxItems": MAX_ITEMS_DEFAULT,
|
||||||
|
"maxBytes": MAX_BYTES_DEFAULT,
|
||||||
|
"maxFileSize": MAX_FILE_SIZE_DEFAULT,
|
||||||
|
"maxDepth": MAX_DEPTH_DEFAULT,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,289 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Feature-data RAG bootstrap: indexes FeatureDataSource rows into the knowledge store.
|
||||||
|
|
||||||
|
Analogous to connection.bootstrap for external connections (Google, Microsoft),
|
||||||
|
this handler reads FeatureDataSource records with ragIndexEnabled=True, queries
|
||||||
|
the underlying feature tables via FeatureDataProvider, serialises each row into
|
||||||
|
text, and feeds it through KnowledgeService.requestIngestion so the data
|
||||||
|
appears in ContentChunk embeddings for semantic RAG search.
|
||||||
|
|
||||||
|
Job type: ``feature.bootstrap``
|
||||||
|
Payload: ``{"workspaceInstanceId": "...", "featureDataSourceIds": [...] (optional)}``
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
FEATURE_BOOTSTRAP_JOB_TYPE = "feature.bootstrap"
|
||||||
|
|
||||||
|
|
||||||
|
def _loadRagEnabledFds(workspaceInstanceId: str, featureDataSourceIds: Optional[List[str]] = None):
|
||||||
|
"""Load FeatureDataSource rows whose effective ragIndexEnabled is True.
|
||||||
|
|
||||||
|
Returns dicts with resolved flags so downstream code can read them directly.
|
||||||
|
"""
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.datamodels.datamodelFeatureDataSource import FeatureDataSource
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlagFds
|
||||||
|
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
allFds = rootIf.db.getRecordset(
|
||||||
|
FeatureDataSource, recordFilter={"workspaceInstanceId": workspaceInstanceId}
|
||||||
|
)
|
||||||
|
resolved = []
|
||||||
|
for fds in allFds:
|
||||||
|
tblName = (fds.get("tableName") if isinstance(fds, dict) else getattr(fds, "tableName", "")) or ""
|
||||||
|
fCode = (fds.get("featureCode") if isinstance(fds, dict) else getattr(fds, "featureCode", "")) or ""
|
||||||
|
if tblName == "*" or not tblName or not fCode:
|
||||||
|
continue
|
||||||
|
effRag = getEffectiveFlagFds(fds, "ragIndexEnabled", allFds, mode="aggregate")
|
||||||
|
if effRag is not True:
|
||||||
|
continue
|
||||||
|
row = dict(fds) if isinstance(fds, dict) else {**fds.__dict__}
|
||||||
|
row["_effectiveNeutralize"] = getEffectiveFlagFds(fds, "neutralize", allFds, mode="aggregate")
|
||||||
|
row["_effectiveScope"] = getEffectiveFlagFds(fds, "scope", allFds, mode="aggregate") or "featureInstance"
|
||||||
|
row["ragIndexEnabled"] = True
|
||||||
|
resolved.append(row)
|
||||||
|
|
||||||
|
if featureDataSourceIds:
|
||||||
|
idSet = set(featureDataSourceIds)
|
||||||
|
resolved = [r for r in resolved if r.get("id") in idSet]
|
||||||
|
return resolved
|
||||||
|
|
||||||
|
|
||||||
|
def _serializeRowToText(row: Dict[str, Any], neutralizeFields: Optional[List[str]] = None) -> str:
|
||||||
|
"""Convert a feature-table row into readable text for embedding.
|
||||||
|
|
||||||
|
Skips internal fields (starting with '_' or 'sys') and produces
|
||||||
|
``key: value`` lines that embed well semantically.
|
||||||
|
"""
|
||||||
|
neutralizeSet = set(neutralizeFields or [])
|
||||||
|
lines = []
|
||||||
|
for key, value in row.items():
|
||||||
|
if key.startswith("_") or key.startswith("sys"):
|
||||||
|
continue
|
||||||
|
if key == "id":
|
||||||
|
continue
|
||||||
|
if value is None or value == "" or value == []:
|
||||||
|
continue
|
||||||
|
if key in neutralizeSet:
|
||||||
|
value = "[REDACTED]"
|
||||||
|
elif isinstance(value, (dict, list)):
|
||||||
|
value = json.dumps(value, ensure_ascii=False, default=str)
|
||||||
|
else:
|
||||||
|
value = str(value)
|
||||||
|
lines.append(f"{key}: {value}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _getFeatureDbConnector(featureCode: str):
|
||||||
|
"""Create a lightweight DB connector to the feature database."""
|
||||||
|
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
dbName = f"poweron_{featureCode.lower()}"
|
||||||
|
return DatabaseConnector(
|
||||||
|
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||||
|
dbDatabase=dbName,
|
||||||
|
dbUser=APP_CONFIG.get("DB_USER"),
|
||||||
|
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||||
|
dbPort=int(APP_CONFIG.get("DB_PORT", 5432)),
|
||||||
|
userId="system.feature_bootstrap",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _featureBootstrapHandler(
|
||||||
|
job: Dict[str, Any],
|
||||||
|
progressCb,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Walk RAG-enabled FeatureDataSources and index their rows."""
|
||||||
|
payload = job.get("payload") or {}
|
||||||
|
workspaceInstanceId = payload.get("workspaceInstanceId")
|
||||||
|
featureDataSourceIds = payload.get("featureDataSourceIds")
|
||||||
|
if not workspaceInstanceId:
|
||||||
|
raise ValueError("feature.bootstrap requires payload.workspaceInstanceId")
|
||||||
|
|
||||||
|
progressCb(5, messageKey="Feature-Datenquellen werden geladen...")
|
||||||
|
|
||||||
|
fdsList = _loadRagEnabledFds(workspaceInstanceId, featureDataSourceIds)
|
||||||
|
if not fdsList:
|
||||||
|
logger.info(
|
||||||
|
"feature.bootstrap.skipped — no rag-enabled FDS for workspace %s",
|
||||||
|
workspaceInstanceId,
|
||||||
|
)
|
||||||
|
return {"workspaceInstanceId": workspaceInstanceId, "skipped": True, "reason": "no_rag_enabled_fds"}
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceAgent.featureDataProvider import FeatureDataProvider
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
from modules.serviceCenter.context import ServiceCenterContext
|
||||||
|
from modules.serviceCenter import getService
|
||||||
|
from modules.security.rootAccess import getRootUser
|
||||||
|
|
||||||
|
totalIndexed = 0
|
||||||
|
totalSkipped = 0
|
||||||
|
totalFailed = 0
|
||||||
|
fdsResults = []
|
||||||
|
|
||||||
|
for fdsIdx, fds in enumerate(fdsList):
|
||||||
|
fdsId = fds.get("id", "")
|
||||||
|
featureCode = fds.get("featureCode", "")
|
||||||
|
tableName = fds.get("tableName", "")
|
||||||
|
featureInstanceId = fds.get("featureInstanceId", "")
|
||||||
|
mandateId = fds.get("mandateId", "")
|
||||||
|
neutralizeFields = fds.get("neutralizeFields") or []
|
||||||
|
recordFilter = fds.get("recordFilter") or {}
|
||||||
|
effectiveScope = fds.get("_effectiveScope", "featureInstance")
|
||||||
|
effectiveNeutralize = bool(fds.get("_effectiveNeutralize", False))
|
||||||
|
|
||||||
|
progressPct = 5 + int(90 * fdsIdx / len(fdsList))
|
||||||
|
progressCb(
|
||||||
|
progressPct,
|
||||||
|
messageKey="Indexiere {table} ({n}/{total})...",
|
||||||
|
messageParams={"table": tableName, "n": fdsIdx + 1, "total": len(fdsList)},
|
||||||
|
)
|
||||||
|
|
||||||
|
if not featureCode or not tableName or not featureInstanceId:
|
||||||
|
logger.warning("feature.bootstrap: skipping FDS %s — missing featureCode/tableName/fiId", fdsId)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
dbConnector = _getFeatureDbConnector(featureCode)
|
||||||
|
provider = FeatureDataProvider(dbConnector)
|
||||||
|
|
||||||
|
rootUser = getRootUser()
|
||||||
|
ctx = ServiceCenterContext(
|
||||||
|
user=rootUser,
|
||||||
|
mandate_id=mandateId,
|
||||||
|
feature_instance_id=workspaceInstanceId,
|
||||||
|
)
|
||||||
|
knowledgeService = getService("knowledge", ctx)
|
||||||
|
|
||||||
|
extraFilters = [
|
||||||
|
{"field": k, "op": "=", "value": v}
|
||||||
|
for k, v in recordFilter.items()
|
||||||
|
] if recordFilter else None
|
||||||
|
|
||||||
|
batchSize = 200
|
||||||
|
offset = 0
|
||||||
|
fdsIndexed = 0
|
||||||
|
fdsSkipped = 0
|
||||||
|
fdsFailed = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
result = provider.browseTable(
|
||||||
|
tableName=tableName,
|
||||||
|
featureInstanceId=featureInstanceId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
limit=batchSize,
|
||||||
|
offset=offset,
|
||||||
|
extraFilters=extraFilters,
|
||||||
|
)
|
||||||
|
rows = result.get("rows", [])
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
rowId = row.get("id", "")
|
||||||
|
if not rowId:
|
||||||
|
continue
|
||||||
|
|
||||||
|
textContent = _serializeRowToText(row, neutralizeFields if effectiveNeutralize else None)
|
||||||
|
if not textContent.strip():
|
||||||
|
fdsSkipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
contentVersion = str(row.get("sysUpdatedAt") or row.get("sysCreatedAt") or "")
|
||||||
|
|
||||||
|
ingestionJob = IngestionJob(
|
||||||
|
sourceKind="feature_record",
|
||||||
|
sourceId=f"{workspaceInstanceId}:{tableName}:{rowId}",
|
||||||
|
fileName=f"{tableName}-{rowId}",
|
||||||
|
mimeType="application/vnd.poweron.feature-record+json",
|
||||||
|
userId=fds.get("userId") or "system",
|
||||||
|
featureInstanceId=workspaceInstanceId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
contentObjects=[{
|
||||||
|
"contentType": "text",
|
||||||
|
"data": textContent,
|
||||||
|
"contextRef": {
|
||||||
|
"table": tableName,
|
||||||
|
"featureCode": featureCode,
|
||||||
|
"featureInstanceId": featureInstanceId,
|
||||||
|
"rowId": rowId,
|
||||||
|
},
|
||||||
|
"contentObjectId": f"{tableName}:{rowId}",
|
||||||
|
}],
|
||||||
|
structure={"sourceTable": tableName, "featureCode": featureCode},
|
||||||
|
contentVersion=contentVersion,
|
||||||
|
provenance={
|
||||||
|
"featureDataSourceId": fdsId,
|
||||||
|
"tableName": tableName,
|
||||||
|
"featureCode": featureCode,
|
||||||
|
"featureInstanceId": featureInstanceId,
|
||||||
|
},
|
||||||
|
neutralize=effectiveNeutralize,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
handle = await knowledgeService.requestIngestion(ingestionJob)
|
||||||
|
if handle.status == "failed":
|
||||||
|
fdsFailed += 1
|
||||||
|
logger.warning(
|
||||||
|
"feature.bootstrap: ingestion failed fds=%s table=%s row=%s error=%s",
|
||||||
|
fdsId, tableName, rowId, handle.error,
|
||||||
|
)
|
||||||
|
elif handle.status == "duplicate":
|
||||||
|
fdsSkipped += 1
|
||||||
|
else:
|
||||||
|
fdsIndexed += 1
|
||||||
|
except Exception as ingErr:
|
||||||
|
fdsFailed += 1
|
||||||
|
logger.error(
|
||||||
|
"feature.bootstrap: ingestion error fds=%s row=%s: %s",
|
||||||
|
fdsId, rowId, ingErr,
|
||||||
|
)
|
||||||
|
|
||||||
|
offset += batchSize
|
||||||
|
if len(rows) < batchSize:
|
||||||
|
break
|
||||||
|
|
||||||
|
totalIndexed += fdsIndexed
|
||||||
|
totalSkipped += fdsSkipped
|
||||||
|
totalFailed += fdsFailed
|
||||||
|
|
||||||
|
fdsResults.append({
|
||||||
|
"featureDataSourceId": fdsId,
|
||||||
|
"tableName": tableName,
|
||||||
|
"featureCode": featureCode,
|
||||||
|
"indexed": fdsIndexed,
|
||||||
|
"skippedDuplicate": fdsSkipped,
|
||||||
|
"failed": fdsFailed,
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as fdsErr:
|
||||||
|
logger.error(
|
||||||
|
"feature.bootstrap: error processing FDS %s (%s.%s): %s",
|
||||||
|
fdsId, featureCode, tableName, fdsErr, exc_info=True,
|
||||||
|
)
|
||||||
|
fdsResults.append({
|
||||||
|
"featureDataSourceId": fdsId,
|
||||||
|
"tableName": tableName,
|
||||||
|
"featureCode": featureCode,
|
||||||
|
"error": str(fdsErr),
|
||||||
|
})
|
||||||
|
|
||||||
|
progressCb(100, messageKey="Feature-Daten-Sync abgeschlossen.")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"workspaceInstanceId": workspaceInstanceId,
|
||||||
|
"indexed": totalIndexed,
|
||||||
|
"skippedDuplicate": totalSkipped,
|
||||||
|
"failed": totalFailed,
|
||||||
|
"dataSources": fdsResults,
|
||||||
|
}
|
||||||
|
|
@ -1,78 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""Resolve effective policies (neutralize, ragIndexEnabled) for DataSource tree hierarchies.
|
|
||||||
|
|
||||||
Tree-inheritance rule: nearest ancestor DataSource with an explicit value wins.
|
|
||||||
If no ancestor has a value, the default (False) is used.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Any, Dict, List, Optional
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def resolveEffectiveNeutralize(
|
|
||||||
ds: Dict[str, Any],
|
|
||||||
allDataSources: List[Dict[str, Any]],
|
|
||||||
) -> bool:
|
|
||||||
"""Compute effective neutralize by walking up the path tree.
|
|
||||||
|
|
||||||
A DataSource at /sites/HR/Documents inherits from /sites/HR if
|
|
||||||
that ancestor has neutralize=True and the child has no explicit override.
|
|
||||||
"""
|
|
||||||
ownValue = ds.get("neutralize")
|
|
||||||
if ownValue is not None and ownValue is not False:
|
|
||||||
return True
|
|
||||||
if ownValue is False:
|
|
||||||
return False
|
|
||||||
return _findAncestorPolicy(ds, allDataSources, "neutralize")
|
|
||||||
|
|
||||||
|
|
||||||
def resolveEffectiveRagIndexEnabled(
|
|
||||||
ds: Dict[str, Any],
|
|
||||||
allDataSources: List[Dict[str, Any]],
|
|
||||||
) -> bool:
|
|
||||||
"""Compute effective ragIndexEnabled by walking up the path tree."""
|
|
||||||
ownValue = ds.get("ragIndexEnabled")
|
|
||||||
if ownValue is True:
|
|
||||||
return True
|
|
||||||
if ownValue is False:
|
|
||||||
return False
|
|
||||||
return _findAncestorPolicy(ds, allDataSources, "ragIndexEnabled")
|
|
||||||
|
|
||||||
|
|
||||||
def _findAncestorPolicy(
|
|
||||||
ds: Dict[str, Any],
|
|
||||||
allDataSources: List[Dict[str, Any]],
|
|
||||||
field: str,
|
|
||||||
) -> bool:
|
|
||||||
"""Walk ancestors (longest-prefix match) to find an inherited policy value."""
|
|
||||||
dsPath = ds.get("path", "")
|
|
||||||
connectionId = ds.get("connectionId", "")
|
|
||||||
if not dsPath:
|
|
||||||
return False
|
|
||||||
|
|
||||||
ancestors = []
|
|
||||||
for candidate in allDataSources:
|
|
||||||
if candidate.get("id") == ds.get("id"):
|
|
||||||
continue
|
|
||||||
if candidate.get("connectionId") != connectionId:
|
|
||||||
continue
|
|
||||||
candidatePath = candidate.get("path", "")
|
|
||||||
if not candidatePath:
|
|
||||||
continue
|
|
||||||
if dsPath.startswith(candidatePath) and len(candidatePath) < len(dsPath):
|
|
||||||
ancestors.append(candidate)
|
|
||||||
|
|
||||||
ancestors.sort(key=lambda a: len(a.get("path", "")), reverse=True)
|
|
||||||
|
|
||||||
for ancestor in ancestors:
|
|
||||||
val = ancestor.get(field)
|
|
||||||
if val is True:
|
|
||||||
return True
|
|
||||||
if val is False:
|
|
||||||
return False
|
|
||||||
return False
|
|
||||||
|
|
@ -15,8 +15,9 @@ up with "Job stuck at 10% for 10h" zombies.
|
||||||
|
|
||||||
These helpers wrap each phase in `asyncio.wait_for`. Sync extraction runs
|
These helpers wrap each phase in `asyncio.wait_for`. Sync extraction runs
|
||||||
on a worker thread so the loop stays responsive. Every wrapped call also
|
on a worker thread so the loop stays responsive. Every wrapped call also
|
||||||
emits a short start/done log line, so when something hangs we know the
|
emits start/done log lines at DEBUG so normal INFO logs stay quiet; for
|
||||||
exact item that caused it (path, size, mime).
|
stuck-job triage, enable DEBUG for this module — the last
|
||||||
|
``walker.item.start`` before a hang still pinpoints the item (path, size, mime).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -48,7 +49,7 @@ async def downloadWithTimeout(
|
||||||
used in log messages so we can pinpoint the offending item in case of a
|
used in log messages so we can pinpoint the offending item in case of a
|
||||||
hang or timeout.
|
hang or timeout.
|
||||||
"""
|
"""
|
||||||
logger.info("walker.download.start %s timeout=%ds", label, timeoutSeconds)
|
logger.debug("walker.download.start %s timeout=%ds", label, timeoutSeconds)
|
||||||
try:
|
try:
|
||||||
result = await asyncio.wait_for(awaitable, timeout=timeoutSeconds)
|
result = await asyncio.wait_for(awaitable, timeout=timeoutSeconds)
|
||||||
logger.debug("walker.download.done %s", label)
|
logger.debug("walker.download.done %s", label)
|
||||||
|
|
@ -71,7 +72,7 @@ async def extractWithTimeout(
|
||||||
keep running until the process exits — but at least the walker proceeds
|
keep running until the process exits — but at least the walker proceeds
|
||||||
to the next item instead of freezing forever.
|
to the next item instead of freezing forever.
|
||||||
"""
|
"""
|
||||||
logger.info("walker.extract.start %s timeout=%ds", label, timeoutSeconds)
|
logger.debug("walker.extract.start %s timeout=%ds", label, timeoutSeconds)
|
||||||
try:
|
try:
|
||||||
result = await asyncio.wait_for(
|
result = await asyncio.wait_for(
|
||||||
asyncio.to_thread(syncFn, *args),
|
asyncio.to_thread(syncFn, *args),
|
||||||
|
|
@ -102,15 +103,15 @@ async def ingestWithTimeout(
|
||||||
|
|
||||||
|
|
||||||
def logItemStart(service: str, label: str, *, sizeBytes: Optional[int] = None, mime: Optional[str] = None) -> None:
|
def logItemStart(service: str, label: str, *, sizeBytes: Optional[int] = None, mime: Optional[str] = None) -> None:
|
||||||
"""Log that processing of one item is about to begin.
|
"""Log that processing of one item is about to begin (DEBUG).
|
||||||
|
|
||||||
When the worker hangs, the LAST `walker.item.start` line in the log
|
When the worker hangs, the LAST `walker.item.start` line in the log
|
||||||
points to the exact item that caused the freeze. This is the single
|
points to the exact item that caused the freeze. Enable DEBUG for this
|
||||||
most valuable diagnostic for stuck-job triage.
|
module during triage.
|
||||||
"""
|
"""
|
||||||
parts = [f"walker.item.start service={service} path={label}"]
|
parts = [f"walker.item.start service={service} path={label}"]
|
||||||
if sizeBytes is not None:
|
if sizeBytes is not None:
|
||||||
parts.append(f"size={sizeBytes}")
|
parts.append(f"size={sizeBytes}")
|
||||||
if mime:
|
if mime:
|
||||||
parts.append(f"mime={mime}")
|
parts.append(f"mime={mime}")
|
||||||
logger.info(" ".join(parts))
|
logger.debug(" ".join(parts))
|
||||||
|
|
|
||||||
|
|
@ -327,27 +327,20 @@ class SharepointService:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def uploadFile(self, siteId: str, folderPath: str, fileName: str, content: bytes) -> Dict[str, Any]:
|
async def uploadFile(self, siteId: str, folderPath: str, fileName: str, content: bytes) -> Dict[str, Any]:
|
||||||
"""Upload a file to SharePoint."""
|
"""Upload a file to SharePoint. Raises on failure."""
|
||||||
try:
|
cleanPath = folderPath.lstrip('/')
|
||||||
# Clean the path
|
uploadPath = f"{cleanPath.rstrip('/')}/{fileName}"
|
||||||
cleanPath = folderPath.lstrip('/')
|
endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
|
||||||
uploadPath = f"{cleanPath.rstrip('/')}/{fileName}"
|
|
||||||
endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
|
|
||||||
|
|
||||||
logger.info(f"Uploading file to: {endpoint}")
|
logger.info(f"Uploading file to: {endpoint}")
|
||||||
|
|
||||||
result = await self._makeGraphApiCall(endpoint, method="PUT", data=content)
|
result = await self._makeGraphApiCall(endpoint, method="PUT", data=content)
|
||||||
|
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.error(f"Upload failed: {result['error']}")
|
raise Exception(f"Upload failed: {result['error']}")
|
||||||
return result
|
|
||||||
|
|
||||||
logger.info(f"File uploaded successfully: {fileName}")
|
logger.info(f"File uploaded successfully: {fileName}")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error uploading file: {str(e)}")
|
|
||||||
return {"error": f"Error uploading file: {str(e)}"}
|
|
||||||
|
|
||||||
async def downloadFile(self, siteId: str, fileId: str) -> Optional[bytes]:
|
async def downloadFile(self, siteId: str, fileId: str) -> Optional[bytes]:
|
||||||
"""Download a file from SharePoint."""
|
"""Download a file from SharePoint."""
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,8 @@ import logging
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
import time
|
import time
|
||||||
from typing import Any, Dict, Optional
|
import threading
|
||||||
|
from typing import Any, Dict, Optional, Tuple
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from cryptography.fernet import Fernet
|
from cryptography.fernet import Fernet
|
||||||
from cryptography.hazmat.primitives import hashes
|
from cryptography.hazmat.primitives import hashes
|
||||||
|
|
@ -286,6 +287,16 @@ def handleSecretJson(value: str, userId: str = "system", keyName: str = "unknown
|
||||||
# Structure: {user_id: {key_name: [timestamps]}}
|
# Structure: {user_id: {key_name: [timestamps]}}
|
||||||
_decryption_attempts = {}
|
_decryption_attempts = {}
|
||||||
|
|
||||||
|
# Process-wide plaintext cache for decrypted secrets.
|
||||||
|
# Key: the encrypted ciphertext (which already includes env prefix).
|
||||||
|
# Value: (expiresAtMonotonic, plaintext).
|
||||||
|
# TTL is short enough that key rotation propagates quickly, long enough that
|
||||||
|
# hot DB-init paths (every API call building a connector) don't blow the
|
||||||
|
# decryption rate limit. 60s is a deliberate compromise.
|
||||||
|
_DECRYPTION_CACHE_TTL_S = 60.0
|
||||||
|
_decryption_cache: Dict[str, Tuple[float, str]] = {}
|
||||||
|
_decryption_cache_lock = threading.Lock()
|
||||||
|
|
||||||
def _getMasterKey(envType: str = None) -> bytes:
|
def _getMasterKey(envType: str = None) -> bytes:
|
||||||
"""
|
"""
|
||||||
Get the master key for the specified environment.
|
Get the master key for the specified environment.
|
||||||
|
|
@ -487,6 +498,16 @@ def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "un
|
||||||
"""
|
"""
|
||||||
Decrypt a value using the master key for the current environment.
|
Decrypt a value using the master key for the current environment.
|
||||||
|
|
||||||
|
A short-lived plaintext cache (TTL `_DECRYPTION_CACHE_TTL_S`) is consulted
|
||||||
|
first. The 10/sec rate-limit on cache misses still protects against
|
||||||
|
brute-force attacks; cache HITS bypass it because they are not actual
|
||||||
|
cryptographic operations — they just return the result of an earlier
|
||||||
|
successful decrypt. Without this cache, hot paths like
|
||||||
|
`mainBackgroundJobService._getDb()` (called per RAG inventory poll AND
|
||||||
|
per walker DB call) trigger the rate limit and surface as
|
||||||
|
"Decryption rate limit exceeded for user 'system' key 'DB_PASSWORD_SECRET'"
|
||||||
|
ERRORs in the RAG inventory UI route.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
encryptedValue: The encrypted value with prefix
|
encryptedValue: The encrypted value with prefix
|
||||||
userId: The user ID making the request (default: "system")
|
userId: The user ID making the request (default: "system")
|
||||||
|
|
@ -501,7 +522,15 @@ def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "un
|
||||||
if not _isEncryptedValue(encryptedValue):
|
if not _isEncryptedValue(encryptedValue):
|
||||||
return encryptedValue # Return as-is if not encrypted
|
return encryptedValue # Return as-is if not encrypted
|
||||||
|
|
||||||
# Check rate limiting (10 per second per user per key)
|
# Cache lookup BEFORE the rate-limit check: a cache hit is not a new
|
||||||
|
# cryptographic operation and must not be throttled.
|
||||||
|
now = time.monotonic()
|
||||||
|
with _decryption_cache_lock:
|
||||||
|
cached = _decryption_cache.get(encryptedValue)
|
||||||
|
if cached is not None and cached[0] > now:
|
||||||
|
return cached[1]
|
||||||
|
|
||||||
|
# Cache miss → real decrypt → apply rate limit.
|
||||||
if not _checkDecryptionRateLimit(userId, keyName, maxPerSecond=10):
|
if not _checkDecryptionRateLimit(userId, keyName, maxPerSecond=10):
|
||||||
raise ValueError(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' (10/sec)")
|
raise ValueError(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' (10/sec)")
|
||||||
|
|
||||||
|
|
@ -550,10 +579,24 @@ def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "un
|
||||||
# Don't fail if audit logging fails
|
# Don't fail if audit logging fails
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Populate cache so subsequent reads of the same ciphertext don't
|
||||||
|
# re-decrypt (and don't consume rate-limit budget).
|
||||||
|
with _decryption_cache_lock:
|
||||||
|
_decryption_cache[encryptedValue] = (
|
||||||
|
time.monotonic() + _DECRYPTION_CACHE_TTL_S,
|
||||||
|
decryptedValue,
|
||||||
|
)
|
||||||
|
|
||||||
return decryptedValue
|
return decryptedValue
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ValueError(f"Decryption failed: {e}")
|
raise ValueError(f"Decryption failed: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def clearDecryptionCache() -> None:
|
||||||
|
"""Drop all cached plaintext secrets. Call after key rotation or in tests."""
|
||||||
|
with _decryption_cache_lock:
|
||||||
|
_decryption_cache.clear()
|
||||||
|
|
||||||
# Create the global APP_CONFIG instance
|
# Create the global APP_CONFIG instance
|
||||||
APP_CONFIG = Configuration()
|
APP_CONFIG = Configuration()
|
||||||
|
|
@ -33,20 +33,35 @@ def _ensureUamTablesMatchModels(dbConnector) -> None:
|
||||||
logger.debug(f"_ensureUamTablesMatchModels: {e}")
|
logger.debug(f"_ensureUamTablesMatchModels: {e}")
|
||||||
|
|
||||||
|
|
||||||
def _getConnection(dbConnector):
|
from contextlib import contextmanager
|
||||||
"""Get a connection from the DatabaseConnector.
|
|
||||||
|
|
||||||
Ensures the connection is alive and returns it.
|
|
||||||
Commits any pending transaction first to avoid blocking.
|
@contextmanager
|
||||||
|
def _borrowDbConn(dbConnector):
|
||||||
|
"""Borrow a pooled connection from the DatabaseConnector.
|
||||||
|
|
||||||
|
Index/trigger/FK creation traditionally ran with `conn.autocommit = True`
|
||||||
|
so each CREATE statement is its own transaction (DDL on a managed
|
||||||
|
connection blocks waiting for COMMIT). This helper preserves that
|
||||||
|
behaviour on top of the pool: borrow a connection, flip it to autocommit,
|
||||||
|
yield it, and restore the previous state before returning it to the pool.
|
||||||
"""
|
"""
|
||||||
dbConnector._ensure_connection()
|
with dbConnector.borrowConn() as conn:
|
||||||
conn = dbConnector.connection
|
try:
|
||||||
# Commit any pending transaction to avoid blocking
|
previousAutocommit = conn.autocommit
|
||||||
try:
|
except Exception:
|
||||||
conn.commit()
|
previousAutocommit = False
|
||||||
except Exception:
|
try:
|
||||||
pass # Ignore if nothing to commit
|
conn.autocommit = True
|
||||||
return conn
|
except Exception as e:
|
||||||
|
logger.debug(f"Could not set autocommit on borrowed connection: {e}")
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
conn.autocommit = previousAutocommit
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
@ -174,48 +189,23 @@ def applyMultiTenantOptimizations(dbConnector, tables: Optional[List[str]] = Non
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get a connection from the connector
|
|
||||||
conn = _getConnection(dbConnector)
|
|
||||||
|
|
||||||
# Save and set autocommit state
|
|
||||||
try:
|
|
||||||
originalAutocommit = conn.autocommit
|
|
||||||
except Exception:
|
|
||||||
originalAutocommit = False
|
|
||||||
|
|
||||||
try:
|
|
||||||
conn.autocommit = True
|
|
||||||
except Exception as autoErr:
|
|
||||||
logger.debug(f"Could not set autocommit: {autoErr}")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_ensureUamTablesMatchModels(dbConnector)
|
_ensureUamTablesMatchModels(dbConnector)
|
||||||
except Exception as preIdxErr:
|
except Exception as preIdxErr:
|
||||||
logger.debug(f"Pre-index table ensure: {preIdxErr}")
|
logger.debug(f"Pre-index table ensure: {preIdxErr}")
|
||||||
|
|
||||||
try:
|
with _borrowDbConn(dbConnector) as conn:
|
||||||
with conn.cursor() as cursor:
|
with conn.cursor() as cursor:
|
||||||
# Apply indexes
|
|
||||||
results["indexesCreated"] = _applyIndexes(cursor, tables)
|
results["indexesCreated"] = _applyIndexes(cursor, tables)
|
||||||
|
|
||||||
# Apply foreign keys
|
|
||||||
results["foreignKeysCreated"] = _applyForeignKeys(cursor, tables)
|
results["foreignKeysCreated"] = _applyForeignKeys(cursor, tables)
|
||||||
|
|
||||||
# Apply immutable triggers
|
|
||||||
results["triggersCreated"] = _applyImmutableTriggers(cursor, tables)
|
results["triggersCreated"] = _applyImmutableTriggers(cursor, tables)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Multi-tenant optimizations applied: "
|
f"Multi-tenant optimizations applied: "
|
||||||
f"{results['indexesCreated']} indexes, "
|
f"{results['indexesCreated']} indexes, "
|
||||||
f"{results['triggersCreated']} triggers, "
|
f"{results['triggersCreated']} triggers, "
|
||||||
f"{results['foreignKeysCreated']} foreign keys"
|
f"{results['foreignKeysCreated']} foreign keys"
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
# Restore original autocommit state
|
|
||||||
try:
|
|
||||||
conn.autocommit = originalAutocommit
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error applying multi-tenant optimizations: {type(e).__name__}: {e}")
|
logger.error(f"Error applying multi-tenant optimizations: {type(e).__name__}: {e}")
|
||||||
|
|
@ -227,20 +217,14 @@ def applyMultiTenantOptimizations(dbConnector, tables: Optional[List[str]] = Non
|
||||||
def applyIndexesOnly(dbConnector, tables: Optional[List[str]] = None) -> int:
|
def applyIndexesOnly(dbConnector, tables: Optional[List[str]] = None) -> int:
|
||||||
"""Apply only indexes (lighter operation, safe for frequent calls)."""
|
"""Apply only indexes (lighter operation, safe for frequent calls)."""
|
||||||
try:
|
try:
|
||||||
conn = _getConnection(dbConnector)
|
|
||||||
originalAutocommit = conn.autocommit
|
|
||||||
conn.autocommit = True
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_ensureUamTablesMatchModels(dbConnector)
|
_ensureUamTablesMatchModels(dbConnector)
|
||||||
except Exception as preIdxErr:
|
except Exception as preIdxErr:
|
||||||
logger.debug(f"Pre-index table ensure: {preIdxErr}")
|
logger.debug(f"Pre-index table ensure: {preIdxErr}")
|
||||||
|
|
||||||
try:
|
with _borrowDbConn(dbConnector) as conn:
|
||||||
with conn.cursor() as cursor:
|
with conn.cursor() as cursor:
|
||||||
return _applyIndexes(cursor, tables)
|
return _applyIndexes(cursor, tables)
|
||||||
finally:
|
|
||||||
conn.autocommit = originalAutocommit
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error applying indexes: {e}")
|
logger.error(f"Error applying indexes: {e}")
|
||||||
return 0
|
return 0
|
||||||
|
|
@ -514,8 +498,7 @@ def getOptimizationStatus(dbConnector) -> dict:
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
conn = _getConnection(dbConnector)
|
with _borrowDbConn(dbConnector) as conn, conn.cursor() as cursor:
|
||||||
with conn.cursor() as cursor:
|
|
||||||
# Check regular indexes
|
# Check regular indexes
|
||||||
for tableName, indexName, _ in _INDEXES:
|
for tableName, indexName, _ in _INDEXES:
|
||||||
if _tableExists(cursor, tableName):
|
if _tableExists(cursor, tableName):
|
||||||
|
|
|
||||||
|
|
@ -60,11 +60,9 @@ def _getTableColumns(dbConnector, tableName: str) -> List[str]:
|
||||||
ORDER BY ordinal_position
|
ORDER BY ordinal_position
|
||||||
"""
|
"""
|
||||||
|
|
||||||
cursor = dbConnector.connection.cursor()
|
with dbConnector.borrowCursor() as cursor:
|
||||||
cursor.execute(query, (tableName,))
|
cursor.execute(query, (tableName,))
|
||||||
columns = [row[0] for row in cursor.fetchall()]
|
columns = [row[0] for row in cursor.fetchall()]
|
||||||
cursor.close()
|
|
||||||
|
|
||||||
return columns
|
return columns
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error getting columns for table {tableName}: {e}")
|
logger.error(f"Error getting columns for table {tableName}: {e}")
|
||||||
|
|
@ -92,29 +90,26 @@ def _getAllTables(dbConnector) -> List[str]:
|
||||||
ORDER BY table_name
|
ORDER BY table_name
|
||||||
"""
|
"""
|
||||||
|
|
||||||
cursor = dbConnector.connection.cursor()
|
with dbConnector.borrowCursor() as cursor:
|
||||||
cursor.execute(query)
|
cursor.execute(query)
|
||||||
allTables = [row[0] for row in cursor.fetchall()]
|
allTables = [row[0] for row in cursor.fetchall()]
|
||||||
|
|
||||||
# Get foreign key relationships to determine dependency order
|
fkQuery = """
|
||||||
fkQuery = """
|
SELECT
|
||||||
SELECT
|
tc.table_name,
|
||||||
tc.table_name,
|
ccu.table_name AS foreign_table_name
|
||||||
ccu.table_name AS foreign_table_name
|
FROM information_schema.table_constraints AS tc
|
||||||
FROM information_schema.table_constraints AS tc
|
JOIN information_schema.key_column_usage AS kcu
|
||||||
JOIN information_schema.key_column_usage AS kcu
|
ON tc.constraint_name = kcu.constraint_name
|
||||||
ON tc.constraint_name = kcu.constraint_name
|
AND tc.table_schema = kcu.table_schema
|
||||||
AND tc.table_schema = kcu.table_schema
|
JOIN information_schema.constraint_column_usage AS ccu
|
||||||
JOIN information_schema.constraint_column_usage AS ccu
|
ON ccu.constraint_name = tc.constraint_name
|
||||||
ON ccu.constraint_name = tc.constraint_name
|
AND ccu.table_schema = tc.table_schema
|
||||||
AND ccu.table_schema = tc.table_schema
|
WHERE tc.constraint_type = 'FOREIGN KEY'
|
||||||
WHERE tc.constraint_type = 'FOREIGN KEY'
|
AND tc.table_schema = 'public'
|
||||||
AND tc.table_schema = 'public'
|
"""
|
||||||
"""
|
cursor.execute(fkQuery)
|
||||||
|
foreignKeys = cursor.fetchall()
|
||||||
cursor.execute(fkQuery)
|
|
||||||
foreignKeys = cursor.fetchall()
|
|
||||||
cursor.close()
|
|
||||||
|
|
||||||
# Build dependency graph (child -> parent mapping)
|
# Build dependency graph (child -> parent mapping)
|
||||||
dependencies = {}
|
dependencies = {}
|
||||||
|
|
@ -154,10 +149,9 @@ def _getAllTables(dbConnector) -> List[str]:
|
||||||
# Fallback: return simple list without ordering
|
# Fallback: return simple list without ordering
|
||||||
try:
|
try:
|
||||||
query = "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE'"
|
query = "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE'"
|
||||||
cursor = dbConnector.connection.cursor()
|
with dbConnector.borrowCursor() as cursor:
|
||||||
cursor.execute(query)
|
cursor.execute(query)
|
||||||
tables = [row[0] for row in cursor.fetchall()]
|
tables = [row[0] for row in cursor.fetchall()]
|
||||||
cursor.close()
|
|
||||||
return [t for t in tables if t not in PROTECTED_TABLES]
|
return [t for t in tables if t not in PROTECTED_TABLES]
|
||||||
except Exception:
|
except Exception:
|
||||||
return []
|
return []
|
||||||
|
|
@ -184,11 +178,9 @@ def _getPrimaryKeyColumns(dbConnector, tableName: str) -> List[str]:
|
||||||
AND i.indisprimary
|
AND i.indisprimary
|
||||||
"""
|
"""
|
||||||
|
|
||||||
cursor = dbConnector.connection.cursor()
|
with dbConnector.borrowCursor() as cursor:
|
||||||
cursor.execute(query, (tableName,))
|
cursor.execute(query, (tableName,))
|
||||||
pkColumns = [row[0] for row in cursor.fetchall()]
|
pkColumns = [row[0] for row in cursor.fetchall()]
|
||||||
cursor.close()
|
|
||||||
|
|
||||||
return pkColumns
|
return pkColumns
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Could not get primary key for {tableName}: {e}")
|
logger.debug(f"Could not get primary key for {tableName}: {e}")
|
||||||
|
|
@ -229,21 +221,15 @@ def _findUserReferencesInTable(
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
references = {}
|
references = {}
|
||||||
cursor = dbConnector.connection.cursor()
|
with dbConnector.borrowCursor() as cursor:
|
||||||
|
for userColumn in userColumns:
|
||||||
for userColumn in userColumns:
|
pkSelect = ", ".join([f'"{pk}"' for pk in pkColumns])
|
||||||
# Build SELECT for primary key columns
|
query = f'SELECT {pkSelect} FROM "{tableName}" WHERE "{userColumn}" = %s'
|
||||||
pkSelect = ", ".join([f'"{pk}"' for pk in pkColumns])
|
cursor.execute(query, (userId,))
|
||||||
query = f'SELECT {pkSelect} FROM "{tableName}" WHERE "{userColumn}" = %s'
|
recordKeys = cursor.fetchall()
|
||||||
|
if recordKeys:
|
||||||
cursor.execute(query, (userId,))
|
references[userColumn] = recordKeys
|
||||||
recordKeys = cursor.fetchall()
|
logger.debug(f"Found {len(recordKeys)} records in {tableName}.{userColumn} for user {userId}")
|
||||||
|
|
||||||
if recordKeys:
|
|
||||||
references[userColumn] = recordKeys
|
|
||||||
logger.debug(f"Found {len(recordKeys)} records in {tableName}.{userColumn} for user {userId}")
|
|
||||||
|
|
||||||
cursor.close()
|
|
||||||
return references
|
return references
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -277,42 +263,35 @@ def _anonymizeRecords(
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cursor = dbConnector.connection.cursor()
|
# Resolve column metadata once outside the borrow block (it borrows its
|
||||||
|
# own connection internally).
|
||||||
|
columns = _getTableColumns(dbConnector, tableName)
|
||||||
|
hasModifiedAt = "sysModifiedAt" in columns
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
|
with dbConnector.borrowCursor() as cursor:
|
||||||
|
for recordKey in recordKeys:
|
||||||
|
whereClause = " AND ".join([f'"{pk}" = %s' for pk in pkColumns])
|
||||||
|
if hasModifiedAt:
|
||||||
|
query = f'UPDATE "{tableName}" SET "{columnName}" = %s, "sysModifiedAt" = %s WHERE {whereClause}'
|
||||||
|
params = [anonymousValue, getUtcTimestamp()]
|
||||||
|
else:
|
||||||
|
query = f'UPDATE "{tableName}" SET "{columnName}" = %s WHERE {whereClause}'
|
||||||
|
params = [anonymousValue]
|
||||||
|
|
||||||
for recordKey in recordKeys:
|
if isinstance(recordKey, tuple):
|
||||||
# Build WHERE clause for primary key
|
params.extend(recordKey)
|
||||||
whereClause = " AND ".join([f'"{pk}" = %s' for pk in pkColumns])
|
else:
|
||||||
|
params.append(recordKey)
|
||||||
|
|
||||||
# Check if table has sysModifiedAt column
|
cursor.execute(query, params)
|
||||||
columns = _getTableColumns(dbConnector, tableName)
|
count += cursor.rowcount
|
||||||
hasModifiedAt = "sysModifiedAt" in columns
|
|
||||||
|
|
||||||
if hasModifiedAt:
|
|
||||||
query = f'UPDATE "{tableName}" SET "{columnName}" = %s, "sysModifiedAt" = %s WHERE {whereClause}'
|
|
||||||
params = [anonymousValue, getUtcTimestamp()]
|
|
||||||
else:
|
|
||||||
query = f'UPDATE "{tableName}" SET "{columnName}" = %s WHERE {whereClause}'
|
|
||||||
params = [anonymousValue]
|
|
||||||
|
|
||||||
# Add primary key values to params
|
|
||||||
if isinstance(recordKey, tuple):
|
|
||||||
params.extend(recordKey)
|
|
||||||
else:
|
|
||||||
params.append(recordKey)
|
|
||||||
|
|
||||||
cursor.execute(query, params)
|
|
||||||
count += cursor.rowcount
|
|
||||||
|
|
||||||
dbConnector.connection.commit()
|
|
||||||
cursor.close()
|
|
||||||
|
|
||||||
logger.info(f"Anonymized {count} records in {tableName}.{columnName}")
|
logger.info(f"Anonymized {count} records in {tableName}.{columnName}")
|
||||||
return count
|
return count
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error anonymizing records in {tableName}.{columnName}: {e}")
|
logger.error(f"Error anonymizing records in {tableName}.{columnName}: {e}")
|
||||||
dbConnector.connection.rollback()
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -338,32 +317,23 @@ def _deleteRecords(
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cursor = dbConnector.connection.cursor()
|
|
||||||
count = 0
|
count = 0
|
||||||
|
with dbConnector.borrowCursor() as cursor:
|
||||||
for recordKey in recordKeys:
|
for recordKey in recordKeys:
|
||||||
# Build WHERE clause for primary key
|
whereClause = " AND ".join([f'"{pk}" = %s' for pk in pkColumns])
|
||||||
whereClause = " AND ".join([f'"{pk}" = %s' for pk in pkColumns])
|
query = f'DELETE FROM "{tableName}" WHERE {whereClause}'
|
||||||
query = f'DELETE FROM "{tableName}" WHERE {whereClause}'
|
if isinstance(recordKey, tuple):
|
||||||
|
params = list(recordKey)
|
||||||
# Prepare params
|
else:
|
||||||
if isinstance(recordKey, tuple):
|
params = [recordKey]
|
||||||
params = list(recordKey)
|
cursor.execute(query, params)
|
||||||
else:
|
count += cursor.rowcount
|
||||||
params = [recordKey]
|
|
||||||
|
|
||||||
cursor.execute(query, params)
|
|
||||||
count += cursor.rowcount
|
|
||||||
|
|
||||||
dbConnector.connection.commit()
|
|
||||||
cursor.close()
|
|
||||||
|
|
||||||
logger.info(f"Deleted {count} records from {tableName}")
|
logger.info(f"Deleted {count} records from {tableName}")
|
||||||
return count
|
return count
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error deleting records from {tableName}: {e}")
|
logger.error(f"Error deleting records from {tableName}: {e}")
|
||||||
dbConnector.connection.rollback()
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -124,6 +124,48 @@ def t(key: str, context: str = "api", value: str = "") -> str:
|
||||||
return _CACHE.get(lang, {}).get(key, f"[{key}]")
|
return _CACHE.get(lang, {}).get(key, f"[{key}]")
|
||||||
|
|
||||||
|
|
||||||
|
def resolveJobMessage(messageData: Optional[Dict[str, Any]], lang: Optional[str] = None) -> Optional[str]:
|
||||||
|
"""Translate a structured BackgroundJob progress payload.
|
||||||
|
|
||||||
|
``messageData`` shape (written by ``JobProgressCallback`` when callers
|
||||||
|
pass ``messageKey`` / ``messageParams``)::
|
||||||
|
|
||||||
|
{"key": "{n} Dateien verarbeitet, {indexed} indexiert",
|
||||||
|
"params": {"n": 145, "indexed": 106}}
|
||||||
|
|
||||||
|
The walker call sites use a string-literal ``messageKey=``; the matching
|
||||||
|
``t("…")`` literal lives in the feature's progress-key registration
|
||||||
|
module (e.g. ``serviceKnowledge/_progressMessages.py``,
|
||||||
|
``features/trustee/mainTrustee.py``) so the boot sync picks it up.
|
||||||
|
|
||||||
|
This helper is the **server-side** translation hop so route handlers can
|
||||||
|
deliver a fully rendered ``progressMessage`` string to the frontend --
|
||||||
|
the frontend never calls ``t()`` on backend-supplied keys.
|
||||||
|
"""
|
||||||
|
if not messageData or not isinstance(messageData, dict):
|
||||||
|
return None
|
||||||
|
key = messageData.get("key")
|
||||||
|
if not isinstance(key, str) or not key:
|
||||||
|
return None
|
||||||
|
params = messageData.get("params") or {}
|
||||||
|
|
||||||
|
if lang is not None:
|
||||||
|
token = _CURRENT_LANGUAGE.set(lang)
|
||||||
|
try:
|
||||||
|
template = t(key)
|
||||||
|
finally:
|
||||||
|
_CURRENT_LANGUAGE.reset(token)
|
||||||
|
else:
|
||||||
|
template = t(key)
|
||||||
|
|
||||||
|
if isinstance(params, dict) and params:
|
||||||
|
try:
|
||||||
|
return template.format(**params)
|
||||||
|
except (KeyError, IndexError, ValueError):
|
||||||
|
return template
|
||||||
|
return template
|
||||||
|
|
||||||
|
|
||||||
def resolveText(value: Any, lang: Optional[str] = None) -> str:
|
def resolveText(value: Any, lang: Optional[str] = None) -> str:
|
||||||
"""Resolve any value to a translated string for the current request language.
|
"""Resolve any value to a translated string for the current request language.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -247,16 +247,29 @@ def _resolveDocumentList(documentListParam, services) -> List[tuple]:
|
||||||
if isinstance(first, dict) and ("documentData" in first or "documentName" in first):
|
if isinstance(first, dict) and ("documentData" in first or "documentName" in first):
|
||||||
for doc in documentListParam:
|
for doc in documentListParam:
|
||||||
rawData = doc.get("documentData")
|
rawData = doc.get("documentData")
|
||||||
logger.debug("_resolveDocumentList: doc keys=%s documentData type=%s documentData truthy=%s", list(doc.keys()), type(rawData).__name__, bool(rawData))
|
fileId = (doc.get("validationMetadata") or {}).get("fileId") or doc.get("fileId", "")
|
||||||
|
fileName = doc.get("documentName") or doc.get("fileName") or "document"
|
||||||
|
mimeType = doc.get("mimeType") or doc.get("documentMimeType") or "application/json"
|
||||||
|
|
||||||
|
# When documentData was persisted as binary (_hasBinaryData), read it
|
||||||
|
# back from file storage via the chat service.
|
||||||
|
if not rawData and doc.get("_hasBinaryData") and fileId:
|
||||||
|
chatService = getattr(services, "chat", None)
|
||||||
|
if chatService:
|
||||||
|
try:
|
||||||
|
rawBytes = chatService.getFileData(fileId)
|
||||||
|
if rawBytes:
|
||||||
|
rawData = rawBytes.decode("utf-8") if isinstance(rawBytes, bytes) else rawBytes
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("_resolveDocumentList: failed to read binary for fileId=%s: %s", fileId, e)
|
||||||
|
|
||||||
|
logger.debug("_resolveDocumentList: doc keys=%s documentData type=%s documentData truthy=%s", list(doc.keys()), type(rawData).__name__ if rawData else "NoneType", bool(rawData))
|
||||||
if not rawData:
|
if not rawData:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
data = json.loads(rawData) if isinstance(rawData, str) else rawData
|
data = json.loads(rawData) if isinstance(rawData, str) else rawData
|
||||||
except (json.JSONDecodeError, TypeError):
|
except (json.JSONDecodeError, TypeError):
|
||||||
continue
|
continue
|
||||||
fileId = (doc.get("validationMetadata") or {}).get("fileId") or doc.get("fileId", "")
|
|
||||||
fileName = doc.get("documentName") or doc.get("fileName") or "document"
|
|
||||||
mimeType = doc.get("mimeType") or doc.get("documentMimeType") or "application/json"
|
|
||||||
results.append((data, fileId, fileName, mimeType))
|
results.append((data, fileId, fileName, mimeType))
|
||||||
if results:
|
if results:
|
||||||
return results
|
return results
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,52 @@ def _tsToIso(ts) -> Optional[str]:
|
||||||
_SYNC_THRESHOLD_SECONDS = 3600
|
_SYNC_THRESHOLD_SECONDS = 3600
|
||||||
|
|
||||||
|
|
||||||
|
def _buildAccountSummary(accountMap: Dict[str, dict], balances: list, year: int) -> list:
|
||||||
|
"""Aggregate balance records into one row per account for *year*.
|
||||||
|
|
||||||
|
For each account the annual balance record (``periodMonth == 0``) of
|
||||||
|
*year* is preferred. If that row is missing, we also check the
|
||||||
|
previous year's annual record so that YTD carry-forwards are visible.
|
||||||
|
Additionally, quarterly closing balances (Q1-Q4) are derived from the
|
||||||
|
monthly records so the AI can compare against quarterly budgets.
|
||||||
|
"""
|
||||||
|
bestClosing: Dict[str, float] = {}
|
||||||
|
quarterClosing: Dict[str, Dict[str, float]] = {}
|
||||||
|
|
||||||
|
for b in balances:
|
||||||
|
acct = b.get("accountNumber", "")
|
||||||
|
bYear = b.get("periodYear", 0)
|
||||||
|
bMonth = b.get("periodMonth", 0)
|
||||||
|
closing = b.get("closingBalance", 0) or 0
|
||||||
|
|
||||||
|
if bYear == year and bMonth == 0:
|
||||||
|
bestClosing[acct] = closing
|
||||||
|
|
||||||
|
if bYear == year and bMonth in (3, 6, 9, 12):
|
||||||
|
qLabel = f"Q{bMonth // 3}"
|
||||||
|
quarterClosing.setdefault(acct, {})[qLabel] = closing
|
||||||
|
|
||||||
|
if acct not in bestClosing and bYear == year - 1 and bMonth == 0:
|
||||||
|
bestClosing[acct] = closing
|
||||||
|
|
||||||
|
summary = []
|
||||||
|
for nr in sorted(accountMap.keys()):
|
||||||
|
info = accountMap[nr]
|
||||||
|
row = {
|
||||||
|
"account": nr,
|
||||||
|
"label": info.get("label", ""),
|
||||||
|
"type": info.get("type", ""),
|
||||||
|
"group": info.get("group", ""),
|
||||||
|
"closingBalance": round(bestClosing.get(nr, 0), 2),
|
||||||
|
}
|
||||||
|
qData = quarterClosing.get(nr, {})
|
||||||
|
for q in ("Q1", "Q2", "Q3", "Q4"):
|
||||||
|
if q in qData:
|
||||||
|
row[q] = round(qData[q], 2)
|
||||||
|
summary.append(row)
|
||||||
|
return summary
|
||||||
|
|
||||||
|
|
||||||
async def refreshAccountingData(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def refreshAccountingData(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""Import/refresh accounting data from the configured external system.
|
"""Import/refresh accounting data from the configured external system.
|
||||||
|
|
||||||
|
|
@ -133,7 +179,13 @@ async def refreshAccountingData(self, parameters: Dict[str, Any]) -> ActionResul
|
||||||
|
|
||||||
|
|
||||||
def _exportAccountingData(trusteeInterface, featureInstanceId: str, dateFrom: str = None, dateTo: str = None) -> str:
|
def _exportAccountingData(trusteeInterface, featureInstanceId: str, dateFrom: str = None, dateTo: str = None) -> str:
|
||||||
"""Export accounting data (accounts, balances, journal entries+lines) as compact JSON for downstream AI nodes."""
|
"""Export accounting data as compact JSON for downstream AI nodes.
|
||||||
|
|
||||||
|
Produces a pre-aggregated ``accountSummary`` (one row per account with
|
||||||
|
a single *Ist* value) so the AI does not have to navigate thousands of
|
||||||
|
raw balance records. Raw per-month balances are deliberately omitted to
|
||||||
|
avoid confusion and reduce payload size.
|
||||||
|
"""
|
||||||
from modules.features.trustee.datamodelFeatureTrustee import (
|
from modules.features.trustee.datamodelFeatureTrustee import (
|
||||||
TrusteeDataAccount,
|
TrusteeDataAccount,
|
||||||
TrusteeDataJournalEntry,
|
TrusteeDataJournalEntry,
|
||||||
|
|
@ -155,17 +207,9 @@ def _exportAccountingData(trusteeInterface, featureInstanceId: str, dateFrom: st
|
||||||
}
|
}
|
||||||
|
|
||||||
balances = trusteeInterface.db.getRecordset(TrusteeDataAccountBalance, recordFilter=baseFilter) or []
|
balances = trusteeInterface.db.getRecordset(TrusteeDataAccountBalance, recordFilter=baseFilter) or []
|
||||||
balanceList = []
|
|
||||||
for b in balances:
|
currentYear = _dt.now(tz=_tz.utc).year
|
||||||
balanceList.append({
|
accountSummary = _buildAccountSummary(accountMap, balances, currentYear)
|
||||||
"account": b.get("accountNumber", ""),
|
|
||||||
"year": b.get("periodYear", 0),
|
|
||||||
"month": b.get("periodMonth", 0),
|
|
||||||
"opening": b.get("openingBalance", 0),
|
|
||||||
"debit": b.get("debitTotal", 0),
|
|
||||||
"credit": b.get("creditTotal", 0),
|
|
||||||
"closing": b.get("closingBalance", 0),
|
|
||||||
})
|
|
||||||
|
|
||||||
entries = trusteeInterface.db.getRecordset(TrusteeDataJournalEntry, recordFilter=baseFilter) or []
|
entries = trusteeInterface.db.getRecordset(TrusteeDataJournalEntry, recordFilter=baseFilter) or []
|
||||||
fromTs = _isoToTs(dateFrom)
|
fromTs = _isoToTs(dateFrom)
|
||||||
|
|
@ -205,21 +249,26 @@ def _exportAccountingData(trusteeInterface, featureInstanceId: str, dateFrom: st
|
||||||
})
|
})
|
||||||
|
|
||||||
export = {
|
export = {
|
||||||
"accounts": list(accountMap.values()),
|
"accountSummary": accountSummary,
|
||||||
"balances": balanceList,
|
|
||||||
"journalLines": lineList,
|
"journalLines": lineList,
|
||||||
"meta": {
|
"meta": {
|
||||||
"accountCount": len(accountMap),
|
"accountCount": len(accountMap),
|
||||||
"entryCount": len(entryMap),
|
"entryCount": len(entryMap),
|
||||||
"lineCount": len(lineList),
|
"lineCount": len(lineList),
|
||||||
"balanceCount": len(balanceList),
|
"summaryYear": currentYear,
|
||||||
"dateFrom": dateFrom,
|
"dateFrom": dateFrom,
|
||||||
"dateTo": dateTo,
|
"dateTo": dateTo,
|
||||||
|
"hint": (
|
||||||
|
"accountSummary contains ONE row per account with the "
|
||||||
|
"current-year closing balance (Ist). Use this for "
|
||||||
|
"budget comparisons. journalLines lists individual "
|
||||||
|
"bookings for drill-down."
|
||||||
|
),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
result = json.dumps(export, ensure_ascii=False, default=str)
|
result = json.dumps(export, ensure_ascii=False, default=str)
|
||||||
logger.info("Exported accounting data: %d accounts, %d entries, %d lines, %d balances (%d bytes)",
|
logger.info("Exported accounting data: %d accounts (summary), %d entries, %d lines (%d bytes)",
|
||||||
len(accountMap), len(entryMap), len(lineList), len(balanceList), len(result))
|
len(accountSummary), len(entryMap), len(lineList), len(result))
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Could not export accounting data: %s", e)
|
logger.warning("Could not export accounting data: %s", e)
|
||||||
|
|
|
||||||
70
scripts/debug_rag_job_result.py
Normal file
70
scripts/debug_rag_job_result.py
Normal file
|
|
@ -0,0 +1,70 @@
|
||||||
|
"""Diagnose: read a connection.bootstrap job result and print its keys.
|
||||||
|
|
||||||
|
Usage (from repo root):
|
||||||
|
python gateway\scripts\debug_rag_job_result.py
|
||||||
|
|
||||||
|
Prints the most recent SUCCESS connection.bootstrap job per UserConnection so
|
||||||
|
we can see whether the `stoppedAtLimit` key actually landed in the JSONB
|
||||||
|
`result` column. If it is missing here, the bug is in the writer (handler or
|
||||||
|
_markSuccess); if it is present here but absent in the HTTP response, the bug
|
||||||
|
is in routeRagInventory.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
_HERE = Path(__file__).resolve()
|
||||||
|
sys.path.insert(0, str(_HERE.parent.parent)) # gateway/
|
||||||
|
os.chdir(_HERE.parent.parent)
|
||||||
|
|
||||||
|
from modules.shared.configuration import APP_CONFIG # noqa: E402
|
||||||
|
from modules.connectors.connectorDbPostgre import getCachedConnector # noqa: E402
|
||||||
|
from modules.datamodels.datamodelBackgroundJob import BackgroundJob # noqa: E402
|
||||||
|
from modules.routes.routeRagInventory import _flattenJobResult # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
def _main() -> None:
|
||||||
|
db = getCachedConnector(
|
||||||
|
dbDatabase=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
|
||||||
|
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||||
|
dbPort=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||||
|
dbUser=APP_CONFIG.get("DB_USER"),
|
||||||
|
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||||
|
)
|
||||||
|
|
||||||
|
rows = db.getRecordset(BackgroundJob)
|
||||||
|
rows = [r for r in rows if r.get("jobType") == "connection.bootstrap"]
|
||||||
|
rows = [r for r in rows if r.get("status") == "SUCCESS"]
|
||||||
|
rows.sort(key=lambda r: r.get("createdAt") or 0, reverse=True)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
print("No SUCCESS connection.bootstrap jobs found.")
|
||||||
|
return
|
||||||
|
|
||||||
|
seenConnections: set[str] = set()
|
||||||
|
for j in rows:
|
||||||
|
connId = (j.get("payload") or {}).get("connectionId", "<unknown>")
|
||||||
|
if connId in seenConnections:
|
||||||
|
continue
|
||||||
|
seenConnections.add(connId)
|
||||||
|
result = j.get("result") or {}
|
||||||
|
flat = _flattenJobResult(result) if isinstance(result, dict) else {}
|
||||||
|
print("=" * 80)
|
||||||
|
print(f"jobId = {j.get('id')}")
|
||||||
|
print(f"connectionId = {connId}")
|
||||||
|
print(f"finishedAt = {j.get('finishedAt')}")
|
||||||
|
print(f"raw keys = {sorted(result.keys()) if isinstance(result, dict) else 'N/A'}")
|
||||||
|
print("--- flattened (what the API will return now) ---")
|
||||||
|
print(f" indexed = {flat.get('indexed')}")
|
||||||
|
print(f" skippedDuplicate= {flat.get('skippedDuplicate')}")
|
||||||
|
print(f" skippedPolicy = {flat.get('skippedPolicy')}")
|
||||||
|
print(f" stoppedAtLimit = {flat.get('stoppedAtLimit')!r} <-- KEY CHECK")
|
||||||
|
print(f" limits = {flat.get('limits')}")
|
||||||
|
print(f" bytesProcessed = {flat.get('bytesProcessed')}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
_main()
|
||||||
97
scripts/script_db_migrate_backgroundjob_progress_data.py
Normal file
97
scripts/script_db_migrate_backgroundjob_progress_data.py
Normal file
|
|
@ -0,0 +1,97 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Migration: Add `progressMessageData` JSONB column to BackgroundJob.
|
||||||
|
|
||||||
|
Carries the structured i18n payload that lets the frontend translate
|
||||||
|
walker progress messages (e.g. "{n} Dateien verarbeitet, {indexed}
|
||||||
|
indexiert") into the user's UI language. `progressMessage` stays around
|
||||||
|
as the rendered fallback for older clients and audit logs.
|
||||||
|
|
||||||
|
Safe to run multiple times (checks column existence before acting).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/script_db_migrate_backgroundjob_progress_data.py [--dry-run]
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
scriptPath = Path(__file__).resolve()
|
||||||
|
gatewayPath = scriptPath.parent.parent
|
||||||
|
sys.path.insert(0, str(gatewayPath))
|
||||||
|
os.chdir(str(gatewayPath))
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
def _getConnection():
|
||||||
|
return psycopg2.connect(
|
||||||
|
host=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||||
|
port=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||||
|
database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
|
||||||
|
user=APP_CONFIG.get("DB_USER"),
|
||||||
|
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _columnExists(cur, table: str, column: str) -> bool:
|
||||||
|
cur.execute(
|
||||||
|
"""SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
|
||||||
|
(table, column),
|
||||||
|
)
|
||||||
|
return cur.fetchone() is not None
|
||||||
|
|
||||||
|
|
||||||
|
def _tableExists(cur, table: str) -> bool:
|
||||||
|
cur.execute(
|
||||||
|
"""SELECT 1 FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'public' AND table_name = %s""",
|
||||||
|
(table,),
|
||||||
|
)
|
||||||
|
return cur.fetchone() is not None
|
||||||
|
|
||||||
|
|
||||||
|
def migrate(dryRun: bool = False):
|
||||||
|
conn = _getConnection()
|
||||||
|
conn.autocommit = False
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
table, column = "BackgroundJob", "progressMessageData"
|
||||||
|
executed = []
|
||||||
|
|
||||||
|
if not _tableExists(cur, table):
|
||||||
|
logger.warning("SKIP: table %s does not exist yet (will be created on next ORM init)", table)
|
||||||
|
elif _columnExists(cur, table, column):
|
||||||
|
logger.info("SKIP: %s.%s already exists", table, column)
|
||||||
|
else:
|
||||||
|
sql = f'ALTER TABLE public."{table}" ADD COLUMN "{column}" JSONB DEFAULT NULL;'
|
||||||
|
logger.info("EXEC: %s", sql)
|
||||||
|
if not dryRun:
|
||||||
|
cur.execute(sql)
|
||||||
|
executed.append(sql)
|
||||||
|
|
||||||
|
if not dryRun and executed:
|
||||||
|
conn.commit()
|
||||||
|
logger.info("Migration committed (%d statements)", len(executed))
|
||||||
|
elif dryRun and executed:
|
||||||
|
conn.rollback()
|
||||||
|
logger.info("DRY RUN -- would execute %d statements", len(executed))
|
||||||
|
else:
|
||||||
|
logger.info("Nothing to do -- schema already up to date")
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
|
||||||
|
args = parser.parse_args()
|
||||||
|
migrate(dryRun=args.dry_run)
|
||||||
110
scripts/script_db_migrate_datasource_inherit.py
Normal file
110
scripts/script_db_migrate_datasource_inherit.py
Normal file
|
|
@ -0,0 +1,110 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Migration: Drop NOT NULL on DataSource/FeatureDataSource cascade-inherit flags.
|
||||||
|
|
||||||
|
Switches three-valued semantics (NULL = inherit, True/False = explicit) for:
|
||||||
|
- DataSource.neutralize, ragIndexEnabled, scope
|
||||||
|
- FeatureDataSource.neutralize, scope
|
||||||
|
|
||||||
|
Existing rows keep their explicit values; only new records (or explicit reset
|
||||||
|
via cascade) start with NULL. Migration is non-destructive and idempotent.
|
||||||
|
|
||||||
|
Safe to run multiple times.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/script_db_migrate_datasource_inherit.py [--dry-run]
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
scriptPath = Path(__file__).resolve()
|
||||||
|
gatewayPath = scriptPath.parent.parent
|
||||||
|
sys.path.insert(0, str(gatewayPath))
|
||||||
|
os.chdir(str(gatewayPath))
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
def _getConnection():
|
||||||
|
return psycopg2.connect(
|
||||||
|
host=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||||
|
port=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||||
|
database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
|
||||||
|
user=APP_CONFIG.get("DB_USER"),
|
||||||
|
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _tableExists(cur, table: str) -> bool:
|
||||||
|
cur.execute(
|
||||||
|
"""SELECT 1 FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'public' AND table_name = %s""",
|
||||||
|
(table,),
|
||||||
|
)
|
||||||
|
return cur.fetchone() is not None
|
||||||
|
|
||||||
|
|
||||||
|
def _columnIsNullable(cur, table: str, column: str) -> bool:
|
||||||
|
cur.execute(
|
||||||
|
"""SELECT is_nullable FROM information_schema.columns
|
||||||
|
WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
|
||||||
|
(table, column),
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
return False
|
||||||
|
return row[0] == "YES"
|
||||||
|
|
||||||
|
|
||||||
|
def migrate(dryRun: bool = False):
|
||||||
|
conn = _getConnection()
|
||||||
|
conn.autocommit = False
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
targets = [
|
||||||
|
("DataSource", "neutralize"),
|
||||||
|
("DataSource", "ragIndexEnabled"),
|
||||||
|
("DataSource", "scope"),
|
||||||
|
("FeatureDataSource", "neutralize"),
|
||||||
|
("FeatureDataSource", "scope"),
|
||||||
|
]
|
||||||
|
|
||||||
|
executed = []
|
||||||
|
for table, column in targets:
|
||||||
|
if not _tableExists(cur, table):
|
||||||
|
logger.warning("SKIP: table %s does not exist yet", table)
|
||||||
|
continue
|
||||||
|
if _columnIsNullable(cur, table, column):
|
||||||
|
logger.info("SKIP: %s.%s already nullable", table, column)
|
||||||
|
continue
|
||||||
|
sql = f'ALTER TABLE public."{table}" ALTER COLUMN "{column}" DROP NOT NULL;'
|
||||||
|
logger.info("EXEC: %s", sql)
|
||||||
|
if not dryRun:
|
||||||
|
cur.execute(sql)
|
||||||
|
executed.append(sql)
|
||||||
|
|
||||||
|
if not dryRun and executed:
|
||||||
|
conn.commit()
|
||||||
|
logger.info("Migration committed (%d statements)", len(executed))
|
||||||
|
elif dryRun and executed:
|
||||||
|
conn.rollback()
|
||||||
|
logger.info("DRY RUN -- would execute %d statements", len(executed))
|
||||||
|
else:
|
||||||
|
logger.info("Nothing to do -- schema already nullable")
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
|
||||||
|
args = parser.parse_args()
|
||||||
|
migrate(dryRun=args.dry_run)
|
||||||
102
scripts/script_db_migrate_datasource_settings.py
Normal file
102
scripts/script_db_migrate_datasource_settings.py
Normal file
|
|
@ -0,0 +1,102 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Migration: Add `settings` JSONB column to DataSource and FeatureDataSource.
|
||||||
|
|
||||||
|
This is a one-off migration for the UDB DataSource Settings (Settings-Icon)
|
||||||
|
feature: walkers read RAG limits (maxBytes, maxFileSize, maxItems, maxDepth)
|
||||||
|
from this JSON blob, the UI edits them. Existing rows get NULL until the
|
||||||
|
next bootstrap lazy-fills sensible defaults from `_ragLimits.RAG_LIMITS_DEFAULT`.
|
||||||
|
|
||||||
|
Safe to run multiple times (checks column existence before acting).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/script_db_migrate_datasource_settings.py [--dry-run]
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
scriptPath = Path(__file__).resolve()
|
||||||
|
gatewayPath = scriptPath.parent.parent
|
||||||
|
sys.path.insert(0, str(gatewayPath))
|
||||||
|
os.chdir(str(gatewayPath))
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
def _getConnection():
|
||||||
|
return psycopg2.connect(
|
||||||
|
host=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||||
|
port=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||||
|
database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
|
||||||
|
user=APP_CONFIG.get("DB_USER"),
|
||||||
|
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _columnExists(cur, table: str, column: str) -> bool:
|
||||||
|
cur.execute(
|
||||||
|
"""SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
|
||||||
|
(table, column),
|
||||||
|
)
|
||||||
|
return cur.fetchone() is not None
|
||||||
|
|
||||||
|
|
||||||
|
def _tableExists(cur, table: str) -> bool:
|
||||||
|
cur.execute(
|
||||||
|
"""SELECT 1 FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'public' AND table_name = %s""",
|
||||||
|
(table,),
|
||||||
|
)
|
||||||
|
return cur.fetchone() is not None
|
||||||
|
|
||||||
|
|
||||||
|
def migrate(dryRun: bool = False):
|
||||||
|
conn = _getConnection()
|
||||||
|
conn.autocommit = False
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
targets = [
|
||||||
|
("DataSource", "settings"),
|
||||||
|
("FeatureDataSource", "settings"),
|
||||||
|
]
|
||||||
|
|
||||||
|
executed = []
|
||||||
|
for table, column in targets:
|
||||||
|
if not _tableExists(cur, table):
|
||||||
|
logger.warning("SKIP: table %s does not exist yet (will be created on next ORM init)", table)
|
||||||
|
continue
|
||||||
|
if _columnExists(cur, table, column):
|
||||||
|
logger.info("SKIP: %s.%s already exists", table, column)
|
||||||
|
continue
|
||||||
|
sql = f'ALTER TABLE public."{table}" ADD COLUMN "{column}" JSONB DEFAULT NULL;'
|
||||||
|
logger.info("EXEC: %s", sql)
|
||||||
|
if not dryRun:
|
||||||
|
cur.execute(sql)
|
||||||
|
executed.append(sql)
|
||||||
|
|
||||||
|
if not dryRun and executed:
|
||||||
|
conn.commit()
|
||||||
|
logger.info("Migration committed (%d statements)", len(executed))
|
||||||
|
elif dryRun and executed:
|
||||||
|
conn.rollback()
|
||||||
|
logger.info("DRY RUN -- would execute %d statements", len(executed))
|
||||||
|
else:
|
||||||
|
logger.info("Nothing to do -- schema already up to date")
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
|
||||||
|
args = parser.parse_args()
|
||||||
|
migrate(dryRun=args.dry_run)
|
||||||
274
scripts/script_migrate_user_uid.py
Normal file
274
scripts/script_migrate_user_uid.py
Normal file
|
|
@ -0,0 +1,274 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""One-time migration: Reassign all DB references from an old user UID to a new UID.
|
||||||
|
|
||||||
|
When a user is re-created in PORTA (same username, new UUID), all existing records
|
||||||
|
still reference the old UUID. This script scans ALL registered databases and tables
|
||||||
|
for VARCHAR columns containing the old UID and updates them to the new UID.
|
||||||
|
|
||||||
|
Affected columns include:
|
||||||
|
- sysCreatedBy / sysModifiedBy (on every table via PowerOnModel)
|
||||||
|
- userId, revokedBy, createdByUserId, publishedBy, triggeredBy, assignedTo, etc.
|
||||||
|
|
||||||
|
The script auto-detects the new UID from the UserInDB table by username.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Dry-run (default) — shows what would change, no writes:
|
||||||
|
python scripts/script_migrate_user_uid.py --username patrick.helvetia --old-uid <OLD_UUID>
|
||||||
|
|
||||||
|
# Execute for real:
|
||||||
|
python scripts/script_migrate_user_uid.py --username patrick.helvetia --old-uid <OLD_UUID> --execute
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
|
scriptPath = Path(__file__).resolve()
|
||||||
|
gatewayPath = scriptPath.parent.parent
|
||||||
|
sys.path.insert(0, str(gatewayPath))
|
||||||
|
os.chdir(str(gatewayPath))
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
import psycopg2.extras
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
ALL_DATABASES = [
|
||||||
|
"poweron_app",
|
||||||
|
"poweron_chat",
|
||||||
|
"poweron_management",
|
||||||
|
"poweron_knowledge",
|
||||||
|
"poweron_billing",
|
||||||
|
"poweron_workspace",
|
||||||
|
"poweron_graphicaleditor",
|
||||||
|
"poweron_chatbot",
|
||||||
|
"poweron_trustee",
|
||||||
|
"poweron_commcoach",
|
||||||
|
"poweron_neutralization",
|
||||||
|
"poweron_realestate",
|
||||||
|
"poweron_teamsbot",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _getConnection(dbName: str):
|
||||||
|
return psycopg2.connect(
|
||||||
|
host=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||||
|
port=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||||
|
database=dbName,
|
||||||
|
user=APP_CONFIG.get("DB_USER"),
|
||||||
|
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||||
|
client_encoding="utf8",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _getTablesInDb(conn) -> List[str]:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT table_name FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'public'
|
||||||
|
AND table_type = 'BASE TABLE'
|
||||||
|
AND table_name NOT LIKE '\\_%%'
|
||||||
|
ORDER BY table_name
|
||||||
|
""")
|
||||||
|
return [row[0] for row in cur.fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
def _getVarcharColumns(conn, tableName: str) -> List[str]:
|
||||||
|
"""Get all VARCHAR/TEXT columns for a table (potential user-ID carriers)."""
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT column_name FROM information_schema.columns
|
||||||
|
WHERE table_schema = 'public'
|
||||||
|
AND table_name = %s
|
||||||
|
AND data_type IN ('character varying', 'text')
|
||||||
|
ORDER BY ordinal_position
|
||||||
|
""", (tableName,))
|
||||||
|
return [row[0] for row in cur.fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
def _countMatches(conn, tableName: str, columnName: str, oldUid: str) -> int:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
f'SELECT COUNT(*) FROM "{tableName}" WHERE "{columnName}" = %s',
|
||||||
|
(oldUid,),
|
||||||
|
)
|
||||||
|
return cur.fetchone()[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _updateColumn(conn, tableName: str, columnName: str, oldUid: str, newUid: str) -> int:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
f'UPDATE "{tableName}" SET "{columnName}" = %s WHERE "{columnName}" = %s',
|
||||||
|
(newUid, oldUid),
|
||||||
|
)
|
||||||
|
return cur.rowcount
|
||||||
|
|
||||||
|
|
||||||
|
def _lookupNewUid(username: str) -> Optional[str]:
|
||||||
|
"""Find the current UID for a username in poweron_app.UserInDB."""
|
||||||
|
conn = _getConnection("poweron_app")
|
||||||
|
try:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
'SELECT "id" FROM "UserInDB" WHERE "username" = %s',
|
||||||
|
(username,),
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
return row[0] if row else None
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _scanJsonbForUid(conn, tableName: str, columnName: str, oldUid: str) -> int:
|
||||||
|
"""Count JSONB fields that contain the old UID as a text value anywhere."""
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
f"""SELECT COUNT(*) FROM "{tableName}"
|
||||||
|
WHERE "{columnName}"::text LIKE %s""",
|
||||||
|
(f"%{oldUid}%",),
|
||||||
|
)
|
||||||
|
return cur.fetchone()[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _updateJsonbColumn(conn, tableName: str, columnName: str, oldUid: str, newUid: str) -> int:
|
||||||
|
"""Replace old UID inside JSONB columns using text replacement."""
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
f"""UPDATE "{tableName}"
|
||||||
|
SET "{columnName}" = REPLACE("{columnName}"::text, %s, %s)::jsonb
|
||||||
|
WHERE "{columnName}"::text LIKE %s""",
|
||||||
|
(oldUid, newUid, f"%{oldUid}%"),
|
||||||
|
)
|
||||||
|
return cur.rowcount
|
||||||
|
|
||||||
|
|
||||||
|
def _getJsonbColumns(conn, tableName: str) -> List[str]:
|
||||||
|
"""Get all JSONB columns for a table."""
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT column_name FROM information_schema.columns
|
||||||
|
WHERE table_schema = 'public'
|
||||||
|
AND table_name = %s
|
||||||
|
AND data_type = 'jsonb'
|
||||||
|
ORDER BY ordinal_position
|
||||||
|
""", (tableName,))
|
||||||
|
return [row[0] for row in cur.fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
def migrate(username: str, oldUid: str, execute: bool = False):
|
||||||
|
newUid = _lookupNewUid(username)
|
||||||
|
if not newUid:
|
||||||
|
logger.error(f"User '{username}' not found in UserInDB. Cannot determine new UID.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if newUid == oldUid:
|
||||||
|
logger.error(f"Old UID and new UID are identical ({oldUid}). Nothing to migrate.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
logger.info(f"Migration: user '{username}'")
|
||||||
|
logger.info(f" Old UID: {oldUid}")
|
||||||
|
logger.info(f" New UID: {newUid}")
|
||||||
|
logger.info(f" Mode: {'EXECUTE' if execute else 'DRY-RUN'}")
|
||||||
|
logger.info("")
|
||||||
|
|
||||||
|
totalUpdated = 0
|
||||||
|
findings: List[Tuple[str, str, str, int]] = []
|
||||||
|
|
||||||
|
for dbName in ALL_DATABASES:
|
||||||
|
try:
|
||||||
|
conn = _getConnection(dbName)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f" Cannot connect to {dbName}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn.autocommit = False
|
||||||
|
tables = _getTablesInDb(conn)
|
||||||
|
|
||||||
|
for tableName in tables:
|
||||||
|
varcharCols = _getVarcharColumns(conn, tableName)
|
||||||
|
for col in varcharCols:
|
||||||
|
count = _countMatches(conn, tableName, col, oldUid)
|
||||||
|
if count > 0:
|
||||||
|
findings.append((dbName, tableName, col, count))
|
||||||
|
if execute:
|
||||||
|
updated = _updateColumn(conn, tableName, col, oldUid, newUid)
|
||||||
|
totalUpdated += updated
|
||||||
|
logger.info(f" [UPDATED] {dbName}.{tableName}.{col}: {updated} rows")
|
||||||
|
else:
|
||||||
|
logger.info(f" [DRY-RUN] {dbName}.{tableName}.{col}: {count} rows would be updated")
|
||||||
|
|
||||||
|
jsonbCols = _getJsonbColumns(conn, tableName)
|
||||||
|
for col in jsonbCols:
|
||||||
|
count = _scanJsonbForUid(conn, tableName, col, oldUid)
|
||||||
|
if count > 0:
|
||||||
|
findings.append((dbName, tableName, f"{col} (JSONB)", count))
|
||||||
|
if execute:
|
||||||
|
_updateJsonbColumn(conn, tableName, col, oldUid, newUid)
|
||||||
|
totalUpdated += count
|
||||||
|
logger.info(f" [UPDATED] {dbName}.{tableName}.{col} (JSONB): {count} rows")
|
||||||
|
else:
|
||||||
|
logger.info(f" [DRY-RUN] {dbName}.{tableName}.{col} (JSONB): {count} rows would be updated")
|
||||||
|
|
||||||
|
if execute:
|
||||||
|
conn.commit()
|
||||||
|
else:
|
||||||
|
conn.rollback()
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback()
|
||||||
|
logger.error(f" Error processing {dbName}: {e}")
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
logger.info("")
|
||||||
|
logger.info("=" * 70)
|
||||||
|
logger.info("SUMMARY")
|
||||||
|
logger.info("=" * 70)
|
||||||
|
if not findings:
|
||||||
|
logger.info(" No references to old UID found in any database.")
|
||||||
|
else:
|
||||||
|
logger.info(f" Found {len(findings)} column(s) with references to old UID:")
|
||||||
|
for dbName, tableName, col, count in findings:
|
||||||
|
logger.info(f" {dbName}.{tableName}.{col}: {count} rows")
|
||||||
|
logger.info("")
|
||||||
|
if execute:
|
||||||
|
logger.info(f" Total rows updated: {totalUpdated}")
|
||||||
|
else:
|
||||||
|
logger.info(f" Total rows that would be updated: {sum(c for _, _, _, c in findings)}")
|
||||||
|
logger.info("")
|
||||||
|
logger.info(" To apply changes, re-run with --execute")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Migrate all DB references from old user UID to new UID."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--username",
|
||||||
|
required=True,
|
||||||
|
help="Username to migrate (e.g. 'patrick.helvetia'). Used to look up the new UID.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--old-uid",
|
||||||
|
required=True,
|
||||||
|
help="The old UUID that is orphaned in the database.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--execute",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Actually perform the migration. Without this flag, only a dry-run is done.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
migrate(username=args.username, oldUid=args.old_uid, execute=args.execute)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -25,7 +25,7 @@ if not c or not c.connection:
|
||||||
print("STAGE0: DB_CONNECTION=none (check config.ini / .env)")
|
print("STAGE0: DB_CONNECTION=none (check config.ini / .env)")
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
cur = c.connection.cursor()
|
cur = c.borrowCursor()
|
||||||
|
|
||||||
|
|
||||||
def _scalar(cur):
|
def _scalar(cur):
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,16 @@ broken query into "no rows found". That hid bugs like:
|
||||||
|
|
||||||
These tests pin the new contract: empty result sets still return ``[]`` /
|
These tests pin the new contract: empty result sets still return ``[]`` /
|
||||||
``None`` (normal), but any exception inside the query path propagates as
|
``None`` (normal), but any exception inside the query path propagates as
|
||||||
``DatabaseQueryError`` with the table name attached. The transaction is
|
``DatabaseQueryError`` with the table name attached.
|
||||||
rolled back so the connection is usable for subsequent queries.
|
|
||||||
|
Since the 2026-05-17 pool refactor (`c-work/2-build/2026-05-postgres-connection-pool.md`)
|
||||||
|
the connector borrows a connection from `_PoolRegistry` on every call via the
|
||||||
|
`borrowConn()` context manager. The tests mock that context manager so the
|
||||||
|
fast-fail contract is exercised without requiring a live Postgres server.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from contextlib import contextmanager
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
@ -25,7 +30,7 @@ import psycopg2.errors
|
||||||
from modules.connectors.connectorDbPostgre import (
|
from modules.connectors.connectorDbPostgre import (
|
||||||
DatabaseConnector,
|
DatabaseConnector,
|
||||||
DatabaseQueryError,
|
DatabaseQueryError,
|
||||||
_rollbackQuietly,
|
_stripNulBytesFromStr,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -39,26 +44,44 @@ class DummyTable:
|
||||||
|
|
||||||
|
|
||||||
def _makeConnector(cursorBehavior):
|
def _makeConnector(cursorBehavior):
|
||||||
"""Build a ``DatabaseConnector`` skeleton with mocked connection/cursor.
|
"""Build a ``DatabaseConnector`` skeleton with a mocked pool borrow.
|
||||||
|
|
||||||
``cursorBehavior`` is a callable invoked with the cursor mock so the test
|
``cursorBehavior`` is a callable invoked with the cursor mock so the test
|
||||||
can configure ``execute``/``fetchall``/``fetchone`` per scenario.
|
can configure ``execute``/``fetchall``/``fetchone`` per scenario.
|
||||||
|
|
||||||
|
Returns ``(connector, conn, cursor)``:
|
||||||
|
* ``conn`` exposes ``commit`` / ``rollback`` MagicMocks so tests can
|
||||||
|
assert that the borrow lifecycle did the right thing.
|
||||||
|
* ``cursor`` is the per-test cursor mock.
|
||||||
"""
|
"""
|
||||||
connector = DatabaseConnector.__new__(DatabaseConnector)
|
connector = DatabaseConnector.__new__(DatabaseConnector)
|
||||||
|
|
||||||
cursor = MagicMock()
|
cursor = MagicMock()
|
||||||
|
cursorBehavior(cursor)
|
||||||
|
|
||||||
cursorContext = MagicMock()
|
cursorContext = MagicMock()
|
||||||
cursorContext.__enter__ = MagicMock(return_value=cursor)
|
cursorContext.__enter__ = MagicMock(return_value=cursor)
|
||||||
cursorContext.__exit__ = MagicMock(return_value=False)
|
cursorContext.__exit__ = MagicMock(return_value=False)
|
||||||
|
|
||||||
connection = MagicMock()
|
conn = MagicMock()
|
||||||
connection.cursor.return_value = cursorContext
|
conn.cursor.return_value = cursorContext
|
||||||
connector.connection = connection
|
|
||||||
|
@contextmanager
|
||||||
|
def fakeBorrow():
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
except Exception:
|
||||||
|
conn.rollback()
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
connector.borrowConn = fakeBorrow
|
||||||
|
|
||||||
connector._ensureTableExists = MagicMock(return_value=True)
|
connector._ensureTableExists = MagicMock(return_value=True)
|
||||||
connector._systemTableName = "_system"
|
connector._systemTableName = "_system"
|
||||||
|
|
||||||
cursorBehavior(cursor)
|
return connector, conn, cursor
|
||||||
return connector, connection, cursor
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetRecordsetFailLoud:
|
class TestGetRecordsetFailLoud:
|
||||||
|
|
@ -67,11 +90,12 @@ class TestGetRecordsetFailLoud:
|
||||||
def behavior(cursor):
|
def behavior(cursor):
|
||||||
cursor.execute.return_value = None
|
cursor.execute.return_value = None
|
||||||
cursor.fetchall.return_value = []
|
cursor.fetchall.return_value = []
|
||||||
connector, connection, _ = _makeConnector(behavior)
|
connector, conn, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
result = connector.getRecordset(DummyTable)
|
result = connector.getRecordset(DummyTable)
|
||||||
assert result == []
|
assert result == []
|
||||||
connection.rollback.assert_not_called()
|
conn.rollback.assert_not_called()
|
||||||
|
conn.commit.assert_called_once()
|
||||||
|
|
||||||
def test_dictAdaptErrorRaisesDatabaseQueryError(self):
|
def test_dictAdaptErrorRaisesDatabaseQueryError(self):
|
||||||
"""Reproduces the Trustee bug: passing a dict in WHERE → can't adapt → raise."""
|
"""Reproduces the Trustee bug: passing a dict in WHERE → can't adapt → raise."""
|
||||||
|
|
@ -79,7 +103,7 @@ class TestGetRecordsetFailLoud:
|
||||||
cursor.execute.side_effect = psycopg2.ProgrammingError(
|
cursor.execute.side_effect = psycopg2.ProgrammingError(
|
||||||
"can't adapt type 'dict'"
|
"can't adapt type 'dict'"
|
||||||
)
|
)
|
||||||
connector, connection, _ = _makeConnector(behavior)
|
connector, conn, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
with pytest.raises(DatabaseQueryError) as excinfo:
|
with pytest.raises(DatabaseQueryError) as excinfo:
|
||||||
connector.getRecordset(
|
connector.getRecordset(
|
||||||
|
|
@ -90,30 +114,30 @@ class TestGetRecordsetFailLoud:
|
||||||
assert excinfo.value.table == "DummyTable"
|
assert excinfo.value.table == "DummyTable"
|
||||||
assert "can't adapt type 'dict'" in str(excinfo.value)
|
assert "can't adapt type 'dict'" in str(excinfo.value)
|
||||||
assert isinstance(excinfo.value.original, psycopg2.ProgrammingError)
|
assert isinstance(excinfo.value.original, psycopg2.ProgrammingError)
|
||||||
connection.rollback.assert_called_once()
|
conn.rollback.assert_called_once()
|
||||||
|
|
||||||
def test_missingColumnRaisesDatabaseQueryError(self):
|
def test_missingColumnRaisesDatabaseQueryError(self):
|
||||||
def behavior(cursor):
|
def behavior(cursor):
|
||||||
cursor.execute.side_effect = psycopg2.errors.UndefinedColumn(
|
cursor.execute.side_effect = psycopg2.errors.UndefinedColumn(
|
||||||
'column "wat" does not exist'
|
'column "wat" does not exist'
|
||||||
)
|
)
|
||||||
connector, connection, _ = _makeConnector(behavior)
|
connector, conn, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
with pytest.raises(DatabaseQueryError) as excinfo:
|
with pytest.raises(DatabaseQueryError) as excinfo:
|
||||||
connector.getRecordset(DummyTable, recordFilter={"wat": "x"})
|
connector.getRecordset(DummyTable, recordFilter={"wat": "x"})
|
||||||
|
|
||||||
assert "wat" in str(excinfo.value)
|
assert "wat" in str(excinfo.value)
|
||||||
connection.rollback.assert_called_once()
|
conn.rollback.assert_called_once()
|
||||||
|
|
||||||
def test_operationalErrorRaisesDatabaseQueryError(self):
|
def test_operationalErrorRaisesDatabaseQueryError(self):
|
||||||
"""Connection lost mid-query is also a real failure that must propagate."""
|
"""Connection lost mid-query is also a real failure that must propagate."""
|
||||||
def behavior(cursor):
|
def behavior(cursor):
|
||||||
cursor.execute.side_effect = psycopg2.OperationalError("connection lost")
|
cursor.execute.side_effect = psycopg2.OperationalError("connection lost")
|
||||||
connector, connection, _ = _makeConnector(behavior)
|
connector, conn, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
with pytest.raises(DatabaseQueryError):
|
with pytest.raises(DatabaseQueryError):
|
||||||
connector.getRecordset(DummyTable)
|
connector.getRecordset(DummyTable)
|
||||||
connection.rollback.assert_called_once()
|
conn.rollback.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
class TestGetRecordFailLoud:
|
class TestGetRecordFailLoud:
|
||||||
|
|
@ -122,37 +146,31 @@ class TestGetRecordFailLoud:
|
||||||
def behavior(cursor):
|
def behavior(cursor):
|
||||||
cursor.execute.return_value = None
|
cursor.execute.return_value = None
|
||||||
cursor.fetchone.return_value = None
|
cursor.fetchone.return_value = None
|
||||||
connector, connection, _ = _makeConnector(behavior)
|
connector, conn, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
result = connector.getRecord(DummyTable, "missing-id")
|
result = connector.getRecord(DummyTable, "missing-id")
|
||||||
assert result is None
|
assert result is None
|
||||||
connection.rollback.assert_not_called()
|
conn.rollback.assert_not_called()
|
||||||
|
conn.commit.assert_called_once()
|
||||||
|
|
||||||
def test_queryErrorRaisesDatabaseQueryError(self):
|
def test_queryErrorRaisesDatabaseQueryError(self):
|
||||||
def behavior(cursor):
|
def behavior(cursor):
|
||||||
cursor.execute.side_effect = psycopg2.errors.UndefinedTable(
|
cursor.execute.side_effect = psycopg2.errors.UndefinedTable(
|
||||||
'relation "DummyTable" does not exist'
|
'relation "DummyTable" does not exist'
|
||||||
)
|
)
|
||||||
connector, connection, _ = _makeConnector(behavior)
|
connector, conn, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
with pytest.raises(DatabaseQueryError) as excinfo:
|
with pytest.raises(DatabaseQueryError) as excinfo:
|
||||||
connector.getRecord(DummyTable, "any-id")
|
connector.getRecord(DummyTable, "any-id")
|
||||||
|
|
||||||
assert excinfo.value.table == "DummyTable"
|
assert excinfo.value.table == "DummyTable"
|
||||||
connection.rollback.assert_called_once()
|
conn.rollback.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
class TestRollbackQuietly:
|
class TestStripNulBytesFromStr:
|
||||||
def test_rollsBackOnLiveConnection(self):
|
def test_removesNul(self):
|
||||||
connection = MagicMock()
|
assert _stripNulBytesFromStr("a\x00b") == "ab"
|
||||||
_rollbackQuietly(connection)
|
|
||||||
connection.rollback.assert_called_once()
|
|
||||||
|
|
||||||
def test_swallowsRollbackError(self):
|
def test_passthroughNonStr(self):
|
||||||
"""Rollback failure must not mask the original query error."""
|
assert _stripNulBytesFromStr(None) is None
|
||||||
connection = MagicMock()
|
assert _stripNulBytesFromStr(7) == 7
|
||||||
connection.rollback.side_effect = RuntimeError("rollback failed")
|
|
||||||
_rollbackQuietly(connection)
|
|
||||||
|
|
||||||
def test_noopOnNoneConnection(self):
|
|
||||||
_rollbackQuietly(None)
|
|
||||||
|
|
|
||||||
304
tests/unit/connectors/test_connectorDbPostgre_pool.py
Normal file
304
tests/unit/connectors/test_connectorDbPostgre_pool.py
Normal file
|
|
@ -0,0 +1,304 @@
|
||||||
|
# Copyright (c) 2026 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Concurrency tests for the PostgreSQL connection pool.
|
||||||
|
|
||||||
|
These tests pin the contract that the `c-work/2-build/2026-05-postgres-connection-pool.md`
|
||||||
|
refactor delivered:
|
||||||
|
|
||||||
|
* T1 — 50 threads × 100 calls in parallel produce 0 `OperationalError`s and
|
||||||
|
every call completes within reasonable time (p99 < 2 s).
|
||||||
|
* T2 — Two threads `_loadRecord` + `_saveRecord` against the same connector
|
||||||
|
do not corrupt each other's cursors.
|
||||||
|
* T3 — `statement_timeout` aborts a runaway `pg_sleep(60)` after ~30 s and
|
||||||
|
releases the connection back into the pool cleanly.
|
||||||
|
|
||||||
|
The tests need a real PostgreSQL server because the bug they guard against
|
||||||
|
only materialises with real psycopg2 sockets — a mocked connection never
|
||||||
|
hangs in `recv()`. They read DB credentials from `APP_CONFIG` (which loads
|
||||||
|
`.env`) and are auto-skipped when the connection fails (no local Postgres,
|
||||||
|
wrong creds, etc.) so `pytest` keeps working in CI-only environments.
|
||||||
|
|
||||||
|
To run them locally:
|
||||||
|
|
||||||
|
pytest gateway/tests/unit/connectors/test_connectorDbPostgre_pool.py -v
|
||||||
|
|
||||||
|
They use a throwaway database name (`poweron_pool_test_<uuid>`) and drop it
|
||||||
|
in fixture teardown so they leave nothing behind.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
import threading
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
import psycopg2.errors
|
||||||
|
import pytest
|
||||||
|
from pydantic import Field
|
||||||
|
|
||||||
|
from modules.connectors.connectorDbPostgre import (
|
||||||
|
DatabaseConnector,
|
||||||
|
_PoolRegistry,
|
||||||
|
closeAllPools,
|
||||||
|
)
|
||||||
|
from modules.datamodels.datamodelBase import PowerOnModel
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
def _dbConfig():
|
||||||
|
"""Read DB connection params from APP_CONFIG (`.env`).
|
||||||
|
|
||||||
|
Returns ``None`` when host/user/password are not all present so the
|
||||||
|
test module can skip cleanly instead of blowing up at import time.
|
||||||
|
"""
|
||||||
|
host = APP_CONFIG.get("DB_HOST")
|
||||||
|
user = APP_CONFIG.get("DB_USER")
|
||||||
|
password = APP_CONFIG.get("DB_PASSWORD_SECRET")
|
||||||
|
port = APP_CONFIG.get("DB_PORT", 5432)
|
||||||
|
if not host or not user or password is None:
|
||||||
|
return None
|
||||||
|
return {"host": host, "user": user, "password": password, "port": int(port)}
|
||||||
|
|
||||||
|
|
||||||
|
def _canReachPostgres(cfg) -> bool:
|
||||||
|
"""Try a quick connect to the admin DB so we can skip on connection failures."""
|
||||||
|
try:
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
host=cfg["host"], port=cfg["port"], database="postgres",
|
||||||
|
user=cfg["user"], password=cfg["password"], connect_timeout=2,
|
||||||
|
)
|
||||||
|
conn.close()
|
||||||
|
return True
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
_DB_CFG = _dbConfig()
|
||||||
|
pytestmark = pytest.mark.skipif(
|
||||||
|
_DB_CFG is None or not _canReachPostgres(_DB_CFG),
|
||||||
|
reason="No reachable PostgreSQL — skipping live-Postgres pool tests",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PoolTestRow(PowerOnModel):
|
||||||
|
"""Tiny model used to exercise the pool — one ID + one payload field."""
|
||||||
|
payload: str = Field(default="", description="Test payload")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def liveConnector():
|
||||||
|
"""Spin up a throwaway database, yield a `DatabaseConnector` against it,
|
||||||
|
drop the database afterwards.
|
||||||
|
|
||||||
|
The pool registry is wiped before and after each test so state from one
|
||||||
|
test cannot mask a bug in another.
|
||||||
|
"""
|
||||||
|
cfg = _DB_CFG
|
||||||
|
host = cfg["host"]
|
||||||
|
user = cfg["user"]
|
||||||
|
password = cfg["password"]
|
||||||
|
port = cfg["port"]
|
||||||
|
dbName = f"poweron_pool_test_{uuid.uuid4().hex[:8]}"
|
||||||
|
|
||||||
|
# Pre-clean: drop any orphan test DB with the same name (shouldn't happen
|
||||||
|
# because we use a unique uuid, but be defensive).
|
||||||
|
adminConn = psycopg2.connect(
|
||||||
|
host=host, port=port, database="postgres", user=user, password=password
|
||||||
|
)
|
||||||
|
adminConn.autocommit = True
|
||||||
|
try:
|
||||||
|
with adminConn.cursor() as cur:
|
||||||
|
cur.execute(f'DROP DATABASE IF EXISTS "{dbName}"')
|
||||||
|
finally:
|
||||||
|
adminConn.close()
|
||||||
|
|
||||||
|
closeAllPools()
|
||||||
|
|
||||||
|
connector = DatabaseConnector(
|
||||||
|
dbHost=host,
|
||||||
|
dbDatabase=dbName,
|
||||||
|
dbUser=user,
|
||||||
|
dbPassword=password,
|
||||||
|
dbPort=port,
|
||||||
|
)
|
||||||
|
# Seed exactly one row so every concurrent read has a stable target.
|
||||||
|
connector.recordCreate(PoolTestRow, {"id": "seed", "payload": "hello"})
|
||||||
|
|
||||||
|
yield connector
|
||||||
|
|
||||||
|
# Teardown: tear pools down, then drop the DB.
|
||||||
|
closeAllPools()
|
||||||
|
adminConn = psycopg2.connect(
|
||||||
|
host=host, port=port, database="postgres", user=user, password=password
|
||||||
|
)
|
||||||
|
adminConn.autocommit = True
|
||||||
|
try:
|
||||||
|
with adminConn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
'SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s',
|
||||||
|
(dbName,),
|
||||||
|
)
|
||||||
|
cur.execute(f'DROP DATABASE IF EXISTS "{dbName}"')
|
||||||
|
finally:
|
||||||
|
adminConn.close()
|
||||||
|
|
||||||
|
|
||||||
|
class TestPoolConcurrency:
|
||||||
|
def _runWorkers(self, liveConnector, *, threadCount: int, callsPerThread: int):
|
||||||
|
"""Run N worker threads, each issuing M reads. Return (errors, latencies)."""
|
||||||
|
errors: list = []
|
||||||
|
latencies: list = []
|
||||||
|
lock = threading.Lock()
|
||||||
|
|
||||||
|
def worker():
|
||||||
|
for _ in range(callsPerThread):
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
try:
|
||||||
|
rows = liveConnector.getRecordset(PoolTestRow)
|
||||||
|
assert any(r["id"] == "seed" for r in rows)
|
||||||
|
except Exception as e: # noqa: BLE001 — we want every failure mode
|
||||||
|
with lock:
|
||||||
|
errors.append(e)
|
||||||
|
finally:
|
||||||
|
with lock:
|
||||||
|
latencies.append(time.perf_counter() - t0)
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=threadCount) as ex:
|
||||||
|
futures = [ex.submit(worker) for _ in range(threadCount)]
|
||||||
|
for f in as_completed(futures):
|
||||||
|
f.result()
|
||||||
|
latencies.sort()
|
||||||
|
return errors, latencies
|
||||||
|
|
||||||
|
def test_50_threads_x_20_reads_no_errors(self, liveConnector):
|
||||||
|
"""T1a — STRESS: 50 threads × 20 reads each → 0 errors.
|
||||||
|
|
||||||
|
Pre-pool, this scenario produced either
|
||||||
|
`OperationalError: another command is already in progress` or a
|
||||||
|
deadlock in `recv()` because the threadpool shared one psycopg2
|
||||||
|
socket. With the pool plus `borrowConn`'s bounded wait, every
|
||||||
|
thread eventually gets a connection and completes — even with 30
|
||||||
|
threads queued waiting at any moment (pool max=20).
|
||||||
|
"""
|
||||||
|
errors, _ = self._runWorkers(liveConnector, threadCount=50, callsPerThread=20)
|
||||||
|
assert not errors, f"got {len(errors)} errors; first: {errors[0]!r}"
|
||||||
|
|
||||||
|
def test_20_threads_x_50_reads_latency_budget(self, liveConnector):
|
||||||
|
"""T1b — DESIGN CAPACITY: 20 threads × 50 reads, p99 < 5 s.
|
||||||
|
|
||||||
|
20 threads matches the pool's `max=20` so there is no queueing —
|
||||||
|
every borrow returns immediately. This pins a sanity-level per-call
|
||||||
|
latency budget; pre-pool it was unbounded (recv() never returned).
|
||||||
|
|
||||||
|
The 5 s ceiling is generous on purpose: `getRecordset` calls
|
||||||
|
`_ensureTableExists` which runs two `information_schema` queries
|
||||||
|
for column-additive migration, and under 20-way concurrency on a
|
||||||
|
single Postgres instance that produces a long tail. The hard
|
||||||
|
assertion is `not errors` — the latency check just guarantees
|
||||||
|
nothing hangs indefinitely.
|
||||||
|
"""
|
||||||
|
errors, latencies = self._runWorkers(
|
||||||
|
liveConnector, threadCount=20, callsPerThread=50
|
||||||
|
)
|
||||||
|
assert not errors, f"got {len(errors)} errors; first: {errors[0]!r}"
|
||||||
|
p99 = latencies[int(len(latencies) * 0.99)]
|
||||||
|
assert p99 < 5.0, f"p99 latency {p99:.2f}s exceeds 5s budget"
|
||||||
|
|
||||||
|
def test_interleaved_load_and_save_no_collision(self, liveConnector):
|
||||||
|
"""T2: parallel reads + writes on the same connector → no cursor mix-up.
|
||||||
|
|
||||||
|
Pre-pool the reader could observe a row in mid-write or vice versa
|
||||||
|
because both shared the same cursor. With one connection per borrow,
|
||||||
|
the database's own row-locking is the only contention, and we just
|
||||||
|
need to assert no exceptions.
|
||||||
|
"""
|
||||||
|
stopFlag = threading.Event()
|
||||||
|
errors: list = []
|
||||||
|
lock = threading.Lock()
|
||||||
|
|
||||||
|
def reader():
|
||||||
|
while not stopFlag.is_set():
|
||||||
|
try:
|
||||||
|
liveConnector.getRecord(PoolTestRow, "seed")
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
with lock:
|
||||||
|
errors.append(("read", e))
|
||||||
|
|
||||||
|
def writer():
|
||||||
|
i = 0
|
||||||
|
while not stopFlag.is_set():
|
||||||
|
try:
|
||||||
|
liveConnector.recordModify(
|
||||||
|
PoolTestRow,
|
||||||
|
"seed",
|
||||||
|
{"id": "seed", "payload": f"v{i}"},
|
||||||
|
)
|
||||||
|
i += 1
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
with lock:
|
||||||
|
errors.append(("write", e))
|
||||||
|
|
||||||
|
threads = [
|
||||||
|
threading.Thread(target=reader, daemon=True),
|
||||||
|
threading.Thread(target=reader, daemon=True),
|
||||||
|
threading.Thread(target=writer, daemon=True),
|
||||||
|
threading.Thread(target=writer, daemon=True),
|
||||||
|
]
|
||||||
|
for t in threads:
|
||||||
|
t.start()
|
||||||
|
time.sleep(2.0)
|
||||||
|
stopFlag.set()
|
||||||
|
for t in threads:
|
||||||
|
t.join(timeout=3.0)
|
||||||
|
|
||||||
|
assert not errors, f"got {len(errors)} errors; first: {errors[0]!r}"
|
||||||
|
|
||||||
|
def test_statement_timeout_releases_connection(self, liveConnector):
|
||||||
|
"""T3: `pg_sleep` past statement_timeout → QueryCanceled, pool intact.
|
||||||
|
|
||||||
|
The bug we are guarding against: a runaway query with no timeout
|
||||||
|
hung `recv()` forever, the psycopg2 connection was poisoned, and the
|
||||||
|
whole backend became unresponsive once that connection was reused.
|
||||||
|
With `statement_timeout=30000` configured at pool construction the
|
||||||
|
query is cancelled by the server, the borrow context manager rolls
|
||||||
|
back, and the connection returns to the pool — proven by the fact
|
||||||
|
that a follow-up call still succeeds quickly.
|
||||||
|
"""
|
||||||
|
# Use a short timeout to keep the test fast — override the pool's
|
||||||
|
# session statement_timeout for one borrow via SET LOCAL.
|
||||||
|
with liveConnector.borrowConn() as conn:
|
||||||
|
with conn.cursor() as cursor:
|
||||||
|
cursor.execute("SET LOCAL statement_timeout = 500")
|
||||||
|
with pytest.raises(psycopg2.errors.QueryCanceled):
|
||||||
|
cursor.execute("SELECT pg_sleep(5)")
|
||||||
|
|
||||||
|
# Follow-up call must succeed quickly: connection is back in the pool.
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
rows = liveConnector.getRecordset(PoolTestRow)
|
||||||
|
elapsed = time.perf_counter() - t0
|
||||||
|
assert any(r["id"] == "seed" for r in rows)
|
||||||
|
assert elapsed < 1.0, f"follow-up call took {elapsed:.2f}s — pool may be wedged"
|
||||||
|
|
||||||
|
|
||||||
|
class TestPoolRegistry:
|
||||||
|
def test_one_pool_per_database_identity(self, liveConnector):
|
||||||
|
"""Two connectors against the same (host, db, port) share one pool."""
|
||||||
|
cfg = _DB_CFG
|
||||||
|
pool1 = _PoolRegistry.getPool(
|
||||||
|
dbHost=cfg["host"], dbDatabase=liveConnector.dbDatabase,
|
||||||
|
dbUser=cfg["user"], dbPassword=cfg["password"], dbPort=cfg["port"],
|
||||||
|
)
|
||||||
|
pool2 = _PoolRegistry.getPool(
|
||||||
|
dbHost=cfg["host"], dbDatabase=liveConnector.dbDatabase,
|
||||||
|
dbUser=cfg["user"], dbPassword=cfg["password"], dbPort=cfg["port"],
|
||||||
|
)
|
||||||
|
assert pool1 is pool2
|
||||||
|
|
||||||
|
def test_close_all_clears_registry(self, liveConnector):
|
||||||
|
"""`closeAllPools()` empties the registry so the next call rebuilds."""
|
||||||
|
# Touch the pool first.
|
||||||
|
liveConnector.getRecordset(PoolTestRow)
|
||||||
|
assert _PoolRegistry._pools, "pool should exist after a real call"
|
||||||
|
closeAllPools()
|
||||||
|
assert _PoolRegistry._pools == {}, "registry should be empty after closeAllPools()"
|
||||||
|
|
@ -68,6 +68,16 @@ class _FakeDb:
|
||||||
def _ensureTableExists(self, modelClass):
|
def _ensureTableExists(self, modelClass):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def borrowCursor(self):
|
||||||
|
"""Mimic `DatabaseConnector.borrowCursor()` context manager."""
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _cm():
|
||||||
|
yield MagicMock()
|
||||||
|
return _cm()
|
||||||
|
|
||||||
def seed(self, modelClass, record: Dict[str, Any]):
|
def seed(self, modelClass, record: Dict[str, Any]):
|
||||||
tableName = modelClass.__name__
|
tableName = modelClass.__name__
|
||||||
self._tables.setdefault(tableName, {})
|
self._tables.setdefault(tableName, {})
|
||||||
|
|
|
||||||
|
|
@ -69,6 +69,16 @@ class _FakeDb:
|
||||||
def _ensureTableExists(self, modelClass):
|
def _ensureTableExists(self, modelClass):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def borrowCursor(self):
|
||||||
|
"""Mimic `DatabaseConnector.borrowCursor()` context manager for the cascade test."""
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _cm():
|
||||||
|
yield MagicMock()
|
||||||
|
return _cm()
|
||||||
|
|
||||||
def seed(self, modelClass, record: Dict[str, Any]):
|
def seed(self, modelClass, record: Dict[str, Any]):
|
||||||
tableName = modelClass.__name__
|
tableName = modelClass.__name__
|
||||||
self._tables.setdefault(tableName, {})
|
self._tables.setdefault(tableName, {})
|
||||||
|
|
|
||||||
359
tests/unit/services/test_buildTree.py
Normal file
359
tests/unit/services/test_buildTree.py
Normal file
|
|
@ -0,0 +1,359 @@
|
||||||
|
"""Unit tests for the generic UDB tree builder.
|
||||||
|
|
||||||
|
Verifies key encoding/decoding and that children for parent keys with
|
||||||
|
existing handlers (top-level, conn, mgrp, feat) are produced with the
|
||||||
|
correct effective-flag triplet.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import unittest
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge import _buildTree
|
||||||
|
|
||||||
|
|
||||||
|
class TestKeyCoding(unittest.TestCase):
|
||||||
|
def test_encode_decode_roundtrip(self):
|
||||||
|
key = _buildTree._encode("ds", "conn-1", "sharepointFolder", "/sites/x")
|
||||||
|
kind, parts = _buildTree._decode(key)
|
||||||
|
self.assertEqual(kind, "ds")
|
||||||
|
self.assertEqual(parts, ["conn-1", "sharepointFolder", "/sites/x"])
|
||||||
|
|
||||||
|
def test_top_level_kinds(self):
|
||||||
|
self.assertEqual(_buildTree._decode("conn|abc")[0], "conn")
|
||||||
|
self.assertEqual(_buildTree._decode("mgrp|m1")[0], "mgrp")
|
||||||
|
self.assertEqual(_buildTree._decode("feat|m1|trustee|fi-1")[1], ["m1", "trustee", "fi-1"])
|
||||||
|
|
||||||
|
|
||||||
|
class TestEffectiveTriplets(unittest.TestCase):
|
||||||
|
def test_ds_triplet_no_record_returns_defaults(self):
|
||||||
|
result = _buildTree._effectiveTripletDs("c", "msft", "/", [])
|
||||||
|
self.assertEqual(result, {
|
||||||
|
"effectiveNeutralize": False,
|
||||||
|
"effectiveScope": "personal",
|
||||||
|
"effectiveRagIndexEnabled": False,
|
||||||
|
})
|
||||||
|
|
||||||
|
def test_ds_triplet_inherits_from_root(self):
|
||||||
|
root = {
|
||||||
|
"id": "r", "connectionId": "c", "sourceType": "msft", "path": "/",
|
||||||
|
"neutralize": True, "scope": "mandate", "ragIndexEnabled": True,
|
||||||
|
}
|
||||||
|
result = _buildTree._effectiveTripletDs("c", "sharepointFolder", "/sites/x", [root])
|
||||||
|
self.assertEqual(result["effectiveNeutralize"], True)
|
||||||
|
self.assertEqual(result["effectiveScope"], "mandate")
|
||||||
|
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
||||||
|
|
||||||
|
def test_fds_triplet_inherits_from_workspace_wildcard(self):
|
||||||
|
ws = {
|
||||||
|
"id": "ws", "workspaceInstanceId": "ws-inst", "featureInstanceId": "fi1",
|
||||||
|
"tableName": "*", "recordFilter": None, "neutralize": True,
|
||||||
|
"scope": "mandate", "ragIndexEnabled": True,
|
||||||
|
}
|
||||||
|
result = _buildTree._effectiveTripletFds("fi1", "Pos", None, [ws])
|
||||||
|
self.assertEqual(result["effectiveNeutralize"], True)
|
||||||
|
self.assertEqual(result["effectiveScope"], "mandate")
|
||||||
|
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
||||||
|
|
||||||
|
|
||||||
|
class TestRecordLookup(unittest.TestCase):
|
||||||
|
def test_finds_ds_record_by_normalised_path(self):
|
||||||
|
rec = {"id": "x", "connectionId": "c", "sourceType": "msft", "path": "/folder"}
|
||||||
|
self.assertEqual(_buildTree._findDsRecord([rec], "c", "msft", "/folder/").get("id"), "x")
|
||||||
|
self.assertIsNone(_buildTree._findDsRecord([rec], "c", "msft", "/other"))
|
||||||
|
|
||||||
|
def test_finds_fds_record_with_matching_filter(self):
|
||||||
|
rec = {"id": "f", "workspaceInstanceId": "ws", "featureInstanceId": "fi1", "tableName": "Pos", "recordFilter": {"id": "5"}}
|
||||||
|
self.assertEqual(_buildTree._findFdsRecord([rec], "fi1", "Pos", {"id": "5"}).get("id"), "f")
|
||||||
|
self.assertIsNone(_buildTree._findFdsRecord([rec], "fi1", "Pos", {"id": "99"}))
|
||||||
|
|
||||||
|
def test_fds_record_with_none_filter_matches_only_none(self):
|
||||||
|
rec = {"id": "f", "workspaceInstanceId": "ws", "featureInstanceId": "fi1", "tableName": "*", "recordFilter": None}
|
||||||
|
self.assertEqual(_buildTree._findFdsRecord([rec], "fi1", "*", None).get("id"), "f")
|
||||||
|
self.assertIsNone(_buildTree._findFdsRecord([rec], "fi1", "*", {"id": "1"}))
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetChildrenForParents(unittest.TestCase):
|
||||||
|
"""End-to-end orchestrator test with mocked dependencies."""
|
||||||
|
|
||||||
|
def _runAsync(self, coro):
|
||||||
|
return asyncio.get_event_loop().run_until_complete(coro)
|
||||||
|
|
||||||
|
def test_unknown_parent_key_returns_empty_list(self):
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot:
|
||||||
|
rootIf = MagicMock()
|
||||||
|
rootIf.db.getRecordset.return_value = []
|
||||||
|
mockRoot.return_value = rootIf
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.user.id = "u1"
|
||||||
|
ctx.mandateId = "m1"
|
||||||
|
|
||||||
|
result = self._runAsync(
|
||||||
|
_buildTree.getChildrenForParents("inst-1", ["bogus|key"], ctx)
|
||||||
|
)
|
||||||
|
self.assertEqual(result["bogus|key"], [])
|
||||||
|
|
||||||
|
def test_top_level_emits_personal_root_first(self):
|
||||||
|
"""Top-level emits personalRoot first, then mandate-group nodes inline."""
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot:
|
||||||
|
rootIf = MagicMock()
|
||||||
|
rootIf.db.getRecordset.return_value = []
|
||||||
|
rootIf.getUserMandates.return_value = []
|
||||||
|
mockRoot.return_value = rootIf
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.user.id = "u1"
|
||||||
|
ctx.mandateId = "m1"
|
||||||
|
|
||||||
|
result = self._runAsync(
|
||||||
|
_buildTree.getChildrenForParents("inst-1", [None], ctx)
|
||||||
|
)
|
||||||
|
children = result["__root__"]
|
||||||
|
self.assertGreaterEqual(len(children), 1)
|
||||||
|
personalRoot = children[0]
|
||||||
|
self.assertEqual(personalRoot["key"], "personalRoot")
|
||||||
|
self.assertEqual(personalRoot["kind"], "synthRoot")
|
||||||
|
self.assertIsNone(personalRoot["parentKey"])
|
||||||
|
self.assertTrue(personalRoot["hasChildren"])
|
||||||
|
self.assertTrue(personalRoot["defaultExpanded"])
|
||||||
|
|
||||||
|
|
||||||
|
class TestTopLevelLayout(unittest.TestCase):
|
||||||
|
"""Tests for the flat top-level layout (personalRoot + mandate groups)."""
|
||||||
|
|
||||||
|
def _runAsync(self, coro):
|
||||||
|
return asyncio.get_event_loop().run_until_complete(coro)
|
||||||
|
|
||||||
|
def test_personal_root_carries_neutral_default_triplet(self):
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot:
|
||||||
|
rootIf = MagicMock()
|
||||||
|
rootIf.db.getRecordset.return_value = []
|
||||||
|
rootIf.getUserMandates.return_value = []
|
||||||
|
mockRoot.return_value = rootIf
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.user.id = "u1"
|
||||||
|
ctx.mandateId = "m1"
|
||||||
|
|
||||||
|
result = self._runAsync(
|
||||||
|
_buildTree.getChildrenForParents("inst-1", [None], ctx)
|
||||||
|
)
|
||||||
|
personalRoot = result["__root__"][0]
|
||||||
|
self.assertFalse(personalRoot["effectiveNeutralize"])
|
||||||
|
self.assertEqual(personalRoot["effectiveScope"], "personal")
|
||||||
|
self.assertFalse(personalRoot["effectiveRagIndexEnabled"])
|
||||||
|
self.assertFalse(personalRoot["supportsRag"])
|
||||||
|
self.assertFalse(personalRoot["canBeAdded"])
|
||||||
|
self.assertIsNone(personalRoot["dataSourceId"])
|
||||||
|
self.assertIsNone(personalRoot["modelType"])
|
||||||
|
|
||||||
|
def test_personal_root_emits_active_connection_with_correct_parent(self):
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot, \
|
||||||
|
patch("modules.serviceCenter.getService") as mockGetService:
|
||||||
|
rootIf = MagicMock()
|
||||||
|
rootIf.db.getRecordset.return_value = []
|
||||||
|
mockRoot.return_value = rootIf
|
||||||
|
|
||||||
|
chatService = MagicMock()
|
||||||
|
chatService.getUserConnections.return_value = [{
|
||||||
|
"id": "conn-1",
|
||||||
|
"status": "active",
|
||||||
|
"authority": "msft",
|
||||||
|
"externalEmail": "user@example.com",
|
||||||
|
}]
|
||||||
|
mockGetService.return_value = chatService
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.user.id = "u1"
|
||||||
|
ctx.mandateId = "m1"
|
||||||
|
|
||||||
|
result = self._runAsync(
|
||||||
|
_buildTree.getChildrenForParents("inst-1", ["personalRoot"], ctx)
|
||||||
|
)
|
||||||
|
children = result["personalRoot"]
|
||||||
|
self.assertEqual(len(children), 1)
|
||||||
|
self.assertEqual(children[0]["key"], "conn|conn-1")
|
||||||
|
self.assertEqual(children[0]["kind"], "connection")
|
||||||
|
self.assertEqual(children[0]["parentKey"], "personalRoot")
|
||||||
|
self.assertEqual(children[0]["label"], "user@example.com")
|
||||||
|
self.assertTrue(children[0]["supportsRag"])
|
||||||
|
|
||||||
|
def test_personal_root_skips_inactive_connection(self):
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot, \
|
||||||
|
patch("modules.serviceCenter.getService") as mockGetService:
|
||||||
|
rootIf = MagicMock()
|
||||||
|
rootIf.db.getRecordset.return_value = []
|
||||||
|
mockRoot.return_value = rootIf
|
||||||
|
|
||||||
|
chatService = MagicMock()
|
||||||
|
chatService.getUserConnections.return_value = [
|
||||||
|
{"id": "c1", "status": "active", "authority": "msft", "externalEmail": "a"},
|
||||||
|
{"id": "c2", "status": "expired", "authority": "google", "externalEmail": "b"},
|
||||||
|
]
|
||||||
|
mockGetService.return_value = chatService
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.user.id = "u1"
|
||||||
|
ctx.mandateId = "m1"
|
||||||
|
|
||||||
|
result = self._runAsync(
|
||||||
|
_buildTree.getChildrenForParents("inst-1", ["personalRoot"], ctx)
|
||||||
|
)
|
||||||
|
self.assertEqual(len(result["personalRoot"]), 1)
|
||||||
|
self.assertEqual(result["personalRoot"][0]["connectionId"], "c1")
|
||||||
|
|
||||||
|
def test_mandate_groups_emitted_inline_at_top_level(self):
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot, \
|
||||||
|
patch("modules.security.rbacCatalog.getCatalogService") as mockCatalog:
|
||||||
|
rootIf = MagicMock()
|
||||||
|
rootIf.db.getRecordset.return_value = []
|
||||||
|
userMandate = MagicMock()
|
||||||
|
userMandate.mandateId = "m1"
|
||||||
|
rootIf.getUserMandates.return_value = [userMandate]
|
||||||
|
featureInst = MagicMock()
|
||||||
|
featureInst.id = "fi-1"
|
||||||
|
featureInst.featureCode = "trustee"
|
||||||
|
featureInst.enabled = True
|
||||||
|
rootIf.getFeatureInstancesByMandate.return_value = [featureInst]
|
||||||
|
featureAccess = MagicMock()
|
||||||
|
featureAccess.enabled = True
|
||||||
|
rootIf.getFeatureAccess.return_value = featureAccess
|
||||||
|
mockRoot.return_value = rootIf
|
||||||
|
|
||||||
|
catalog = MagicMock()
|
||||||
|
catalog.getFeaturesWithDataObjects.return_value = ["trustee"]
|
||||||
|
mockCatalog.return_value = catalog
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.user.id = "u1"
|
||||||
|
ctx.mandateId = None
|
||||||
|
|
||||||
|
result = self._runAsync(
|
||||||
|
_buildTree.getChildrenForParents("inst-1", [None], ctx)
|
||||||
|
)
|
||||||
|
children = result["__root__"]
|
||||||
|
byKey = {c["key"]: c for c in children}
|
||||||
|
self.assertIn("personalRoot", byKey)
|
||||||
|
self.assertIn("mgrp|m1", byKey)
|
||||||
|
mgroup = byKey["mgrp|m1"]
|
||||||
|
self.assertEqual(mgroup["kind"], "mandateGroup")
|
||||||
|
self.assertIsNone(mgroup["parentKey"])
|
||||||
|
self.assertEqual(mgroup["mandateId"], "m1")
|
||||||
|
self.assertTrue(mgroup["defaultExpanded"])
|
||||||
|
self.assertFalse(mgroup["supportsRag"])
|
||||||
|
|
||||||
|
def test_top_level_omits_mandates_without_data_features(self):
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot, \
|
||||||
|
patch("modules.security.rbacCatalog.getCatalogService") as mockCatalog:
|
||||||
|
rootIf = MagicMock()
|
||||||
|
rootIf.db.getRecordset.return_value = []
|
||||||
|
userMandate = MagicMock()
|
||||||
|
userMandate.mandateId = "m1"
|
||||||
|
rootIf.getUserMandates.return_value = [userMandate]
|
||||||
|
rootIf.getFeatureInstancesByMandate.return_value = []
|
||||||
|
mockRoot.return_value = rootIf
|
||||||
|
|
||||||
|
catalog = MagicMock()
|
||||||
|
catalog.getFeaturesWithDataObjects.return_value = ["trustee"]
|
||||||
|
mockCatalog.return_value = catalog
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.user.id = "u1"
|
||||||
|
ctx.mandateId = None
|
||||||
|
|
||||||
|
result = self._runAsync(
|
||||||
|
_buildTree.getChildrenForParents("inst-1", [None], ctx)
|
||||||
|
)
|
||||||
|
keys = [c["key"] for c in result["__root__"]]
|
||||||
|
self.assertEqual(keys, ["personalRoot"])
|
||||||
|
|
||||||
|
def test_personal_root_listed_first_via_display_order(self):
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface") as mockRoot, \
|
||||||
|
patch("modules.security.rbacCatalog.getCatalogService") as mockCatalog:
|
||||||
|
rootIf = MagicMock()
|
||||||
|
rootIf.db.getRecordset.return_value = []
|
||||||
|
userMandate = MagicMock()
|
||||||
|
userMandate.mandateId = "m1"
|
||||||
|
rootIf.getUserMandates.return_value = [userMandate]
|
||||||
|
featureInst = MagicMock()
|
||||||
|
featureInst.id = "fi-1"
|
||||||
|
featureInst.featureCode = "trustee"
|
||||||
|
featureInst.enabled = True
|
||||||
|
rootIf.getFeatureInstancesByMandate.return_value = [featureInst]
|
||||||
|
featureAccess = MagicMock()
|
||||||
|
featureAccess.enabled = True
|
||||||
|
rootIf.getFeatureAccess.return_value = featureAccess
|
||||||
|
mockRoot.return_value = rootIf
|
||||||
|
|
||||||
|
catalog = MagicMock()
|
||||||
|
catalog.getFeaturesWithDataObjects.return_value = ["trustee"]
|
||||||
|
mockCatalog.return_value = catalog
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.user.id = "u1"
|
||||||
|
ctx.mandateId = None
|
||||||
|
|
||||||
|
result = self._runAsync(
|
||||||
|
_buildTree.getChildrenForParents("inst-1", [None], ctx)
|
||||||
|
)
|
||||||
|
children = result["__root__"]
|
||||||
|
self.assertEqual(children[0]["key"], "personalRoot")
|
||||||
|
self.assertEqual(children[0]["displayOrder"], 0)
|
||||||
|
|
||||||
|
|
||||||
|
class TestFeatureTableFields(unittest.TestCase):
|
||||||
|
"""Per-column field expansion under a feature data-source table."""
|
||||||
|
|
||||||
|
def test_emits_one_node_per_field(self):
|
||||||
|
nodes = _buildTree._featureTableFields(
|
||||||
|
parentKey="fdstbl|fi-1|TrusteePosition",
|
||||||
|
featureInstanceId="fi-1",
|
||||||
|
tableName="TrusteePosition",
|
||||||
|
fieldNames=["id", "valuta", "company"],
|
||||||
|
allFds=[],
|
||||||
|
)
|
||||||
|
self.assertEqual(len(nodes), 3)
|
||||||
|
self.assertEqual(nodes[0]["kind"], "fdsField")
|
||||||
|
self.assertEqual(nodes[0]["fieldName"], "id")
|
||||||
|
self.assertEqual(nodes[0]["parentKey"], "fdstbl|fi-1|TrusteePosition")
|
||||||
|
self.assertEqual(nodes[0]["key"], "fdsfld|fi-1|TrusteePosition|id")
|
||||||
|
self.assertFalse(nodes[0]["hasChildren"])
|
||||||
|
self.assertFalse(nodes[0]["supportsRag"])
|
||||||
|
|
||||||
|
def test_field_neutralize_inherits_from_table_blanket(self):
|
||||||
|
rec = {"id": "f", "workspaceInstanceId": "ws-1", "featureInstanceId": "fi-1",
|
||||||
|
"tableName": "TrusteePosition", "recordFilter": None,
|
||||||
|
"neutralize": True, "neutralizeFields": None,
|
||||||
|
"scope": None, "ragIndexEnabled": False}
|
||||||
|
nodes = _buildTree._featureTableFields(
|
||||||
|
parentKey="fdstbl|fi-1|TrusteePosition",
|
||||||
|
featureInstanceId="fi-1",
|
||||||
|
tableName="TrusteePosition",
|
||||||
|
fieldNames=["email", "company"],
|
||||||
|
allFds=[rec],
|
||||||
|
)
|
||||||
|
self.assertTrue(nodes[0]["effectiveNeutralize"])
|
||||||
|
self.assertTrue(nodes[1]["effectiveNeutralize"])
|
||||||
|
|
||||||
|
def test_field_neutralize_explicit_via_neutralize_fields(self):
|
||||||
|
rec = {"id": "f", "workspaceInstanceId": "ws-1", "featureInstanceId": "fi-1",
|
||||||
|
"tableName": "TrusteePosition", "recordFilter": None,
|
||||||
|
"neutralize": False, "neutralizeFields": ["email"],
|
||||||
|
"scope": None, "ragIndexEnabled": False}
|
||||||
|
nodes = _buildTree._featureTableFields(
|
||||||
|
parentKey="fdstbl|fi-1|TrusteePosition",
|
||||||
|
featureInstanceId="fi-1",
|
||||||
|
tableName="TrusteePosition",
|
||||||
|
fieldNames=["email", "company"],
|
||||||
|
allFds=[rec],
|
||||||
|
)
|
||||||
|
byField = {n["fieldName"]: n for n in nodes}
|
||||||
|
self.assertTrue(byField["email"]["effectiveNeutralize"])
|
||||||
|
self.assertFalse(byField["company"]["effectiveNeutralize"])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
55
tests/unit/services/test_costEstimate.py
Normal file
55
tests/unit/services/test_costEstimate.py
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
"""Unit tests for `_costEstimate` heuristic.
|
||||||
|
|
||||||
|
Validates the output shape, basic formulas, and that 'basis' annotations
|
||||||
|
are always present (the user-facing transparency contract).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge import _costEstimate
|
||||||
|
|
||||||
|
|
||||||
|
class TestCostEstimate(unittest.TestCase):
|
||||||
|
def test_files_shape(self):
|
||||||
|
result = _costEstimate.estimateBootstrapCost(
|
||||||
|
{"maxBytes": 200 * 1024 * 1024}, kind="files",
|
||||||
|
)
|
||||||
|
self.assertIn("estimatedTokens", result)
|
||||||
|
self.assertIn("estimatedUsd", result)
|
||||||
|
self.assertIn("basis", result)
|
||||||
|
self.assertIn("assumptions", result["basis"])
|
||||||
|
self.assertIn("formula", result["basis"]["assumptions"])
|
||||||
|
self.assertIn("notes", result["basis"])
|
||||||
|
|
||||||
|
def test_files_doubling_maxBytes_doubles_tokens(self):
|
||||||
|
low = _costEstimate.estimateBootstrapCost({"maxBytes": 100 * 1024 * 1024}, kind="files")
|
||||||
|
high = _costEstimate.estimateBootstrapCost({"maxBytes": 200 * 1024 * 1024}, kind="files")
|
||||||
|
self.assertEqual(high["estimatedTokens"], low["estimatedTokens"] * 2)
|
||||||
|
|
||||||
|
def test_clickup_uses_tasks_and_workspaces(self):
|
||||||
|
result = _costEstimate.estimateBootstrapCost(
|
||||||
|
{"maxTasks": 100, "maxWorkspaces": 2, "maxListsPerWorkspace": 10},
|
||||||
|
kind="clickup",
|
||||||
|
)
|
||||||
|
expectedTokens = 100 * 2 * _costEstimate.DEFAULT_TOKENS_PER_ITEM
|
||||||
|
self.assertEqual(result["estimatedTokens"], expectedTokens)
|
||||||
|
|
||||||
|
def test_unknown_kind_returns_zero(self):
|
||||||
|
result = _costEstimate.estimateBootstrapCost({}, kind="totally-unknown")
|
||||||
|
self.assertEqual(result["estimatedTokens"], 0)
|
||||||
|
self.assertEqual(result["estimatedUsd"], 0.0)
|
||||||
|
|
||||||
|
def test_usd_is_rounded_4_decimals(self):
|
||||||
|
result = _costEstimate.estimateBootstrapCost({"maxBytes": 1024 * 1024}, kind="files")
|
||||||
|
rounded = round(result["estimatedUsd"], 4)
|
||||||
|
self.assertEqual(result["estimatedUsd"], rounded)
|
||||||
|
|
||||||
|
def test_basis_includes_input_limits(self):
|
||||||
|
result = _costEstimate.estimateBootstrapCost({"maxBytes": 42}, kind="files")
|
||||||
|
self.assertEqual(result["basis"]["limits"]["maxBytes"], 42)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
655
tests/unit/services/test_inheritFlags.py
Normal file
655
tests/unit/services/test_inheritFlags.py
Normal file
|
|
@ -0,0 +1,655 @@
|
||||||
|
"""Unit tests for `_inheritFlags` cascade-inherit helpers.
|
||||||
|
|
||||||
|
Verifies:
|
||||||
|
- getEffectiveFlag mode='walk': walks ancestors via path-prefix matching
|
||||||
|
- getEffectiveFlag mode='aggregate': returns 'mixed' when subtree diverges
|
||||||
|
- cascadeResetDescendants: bottom-up reset returning List[str]
|
||||||
|
- cascadeResetDescendantsFds: same for FeatureDataSource
|
||||||
|
- collectAncestorChain / collectAncestorChainFds: ancestor discovery
|
||||||
|
- buildEffectiveByConnection / buildEffectiveByWorkspaceFds: batch compute
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
from typing import List
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge import _inheritFlags
|
||||||
|
|
||||||
|
|
||||||
|
def _ds(idVal: str, path: str, **flags) -> dict:
|
||||||
|
"""Build a DataSource dict with sensible defaults for a fixture."""
|
||||||
|
base = {
|
||||||
|
"id": idVal,
|
||||||
|
"connectionId": "conn-1",
|
||||||
|
"sourceType": "sharepointFolder",
|
||||||
|
"path": path,
|
||||||
|
"neutralize": None,
|
||||||
|
"ragIndexEnabled": None,
|
||||||
|
"scope": None,
|
||||||
|
}
|
||||||
|
base.update(flags)
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
def _fds(idVal: str, *, tableName: str, recordFilter=None, featureInstanceId="fi-1", **flags) -> dict:
|
||||||
|
"""Build a FeatureDataSource dict fixture."""
|
||||||
|
base = {
|
||||||
|
"id": idVal,
|
||||||
|
"workspaceInstanceId": "ws-1",
|
||||||
|
"featureInstanceId": featureInstanceId,
|
||||||
|
"tableName": tableName,
|
||||||
|
"recordFilter": recordFilter,
|
||||||
|
"neutralize": None,
|
||||||
|
"scope": None,
|
||||||
|
}
|
||||||
|
base.update(flags)
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# DataSource: getEffectiveFlag mode='walk'
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestEffectiveFlagWalk(unittest.TestCase):
|
||||||
|
def test_explicit_own_value_wins(self):
|
||||||
|
root = _ds("r", "/", neutralize=False)
|
||||||
|
leaf = _ds("l", "/folder/sub", neutralize=True)
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf]))
|
||||||
|
|
||||||
|
def test_inherits_from_root_when_own_is_none(self):
|
||||||
|
root = _ds("r", "/", neutralize=True)
|
||||||
|
leaf = _ds("l", "/folder/sub")
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf]))
|
||||||
|
|
||||||
|
def test_default_false_when_chain_empty(self):
|
||||||
|
leaf = _ds("l", "/folder/sub")
|
||||||
|
self.assertFalse(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [leaf]))
|
||||||
|
|
||||||
|
def test_nearest_ancestor_wins_over_distant(self):
|
||||||
|
root = _ds("r", "/", neutralize=False)
|
||||||
|
mid = _ds("m", "/folder", neutralize=True)
|
||||||
|
leaf = _ds("l", "/folder/sub")
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, mid, leaf]))
|
||||||
|
|
||||||
|
def test_different_connection_ignored(self):
|
||||||
|
otherConn = _ds("o", "/", connectionId="conn-2", neutralize=True)
|
||||||
|
leaf = _ds("l", "/folder")
|
||||||
|
self.assertFalse(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [otherConn, leaf]))
|
||||||
|
|
||||||
|
def test_different_sourcetype_ignored(self):
|
||||||
|
otherType = _ds("o", "/", sourceType="outlookFolder", neutralize=True)
|
||||||
|
leaf = _ds("l", "/folder")
|
||||||
|
self.assertFalse(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [otherType, leaf]))
|
||||||
|
|
||||||
|
def test_path_separator_required(self):
|
||||||
|
notAncestor = _ds("a", "/foo", neutralize=True)
|
||||||
|
leaf = _ds("l", "/foobar")
|
||||||
|
self.assertFalse(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [notAncestor, leaf]))
|
||||||
|
|
||||||
|
def test_root_is_ancestor_of_everything(self):
|
||||||
|
root = _ds("r", "/", neutralize=True)
|
||||||
|
leaf = _ds("l", "/anything/anywhere")
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf]))
|
||||||
|
|
||||||
|
def test_scope_inheritance_with_string_default(self):
|
||||||
|
root = _ds("r", "/", scope="mandate")
|
||||||
|
leaf = _ds("l", "/folder")
|
||||||
|
self.assertEqual(_inheritFlags.getEffectiveFlag(leaf, "scope", [root, leaf]), "mandate")
|
||||||
|
|
||||||
|
def test_scope_default_personal_when_empty(self):
|
||||||
|
leaf = _ds("l", "/folder")
|
||||||
|
self.assertEqual(_inheritFlags.getEffectiveFlag(leaf, "scope", [leaf]), "personal")
|
||||||
|
|
||||||
|
def test_unknown_flag_raises(self):
|
||||||
|
leaf = _ds("l", "/")
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
_inheritFlags.getEffectiveFlag(leaf, "unknownFlag", [leaf])
|
||||||
|
|
||||||
|
def test_explicit_false_overrides_inherited_true(self):
|
||||||
|
root = _ds("r", "/", neutralize=True)
|
||||||
|
leaf = _ds("l", "/folder", neutralize=False)
|
||||||
|
self.assertFalse(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf]))
|
||||||
|
|
||||||
|
def test_connection_root_inherits_cross_sourcetype(self):
|
||||||
|
connRoot = _ds("conn", "/", sourceType="msft", neutralize=True)
|
||||||
|
spService = _ds("sp", "/", sourceType="sharepointFolder")
|
||||||
|
olService = _ds("ol", "/", sourceType="outlookFolder")
|
||||||
|
allDs = [connRoot, spService, olService]
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlag(spService, "neutralize", allDs))
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlag(olService, "neutralize", allDs))
|
||||||
|
|
||||||
|
def test_same_sourcetype_ancestor_wins_over_connection_root(self):
|
||||||
|
connRoot = _ds("conn", "/", sourceType="msft", neutralize=True)
|
||||||
|
spRoot = _ds("sp", "/", sourceType="sharepointFolder", neutralize=False)
|
||||||
|
spLeaf = _ds("spl", "/sites/x", sourceType="sharepointFolder")
|
||||||
|
self.assertFalse(_inheritFlags.getEffectiveFlag(spLeaf, "neutralize", [connRoot, spRoot, spLeaf]))
|
||||||
|
|
||||||
|
def test_connection_root_does_not_self_inherit(self):
|
||||||
|
connRoot = _ds("conn", "/", sourceType="msft")
|
||||||
|
self.assertFalse(_inheritFlags.getEffectiveFlag(connRoot, "neutralize", [connRoot]))
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# DataSource: getEffectiveFlag mode='aggregate'
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestEffectiveFlagAggregate(unittest.TestCase):
|
||||||
|
def test_leaf_without_descendants_returns_concrete(self):
|
||||||
|
leaf = _ds("l", "/folder", neutralize=True)
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [leaf], mode="aggregate"))
|
||||||
|
|
||||||
|
def test_all_descendants_same_returns_concrete(self):
|
||||||
|
root = _ds("r", "/", neutralize=True)
|
||||||
|
child1 = _ds("c1", "/a", neutralize=True)
|
||||||
|
child2 = _ds("c2", "/b") # inherits True from root
|
||||||
|
allDs = [root, child1, child2]
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="aggregate"))
|
||||||
|
|
||||||
|
def test_divergent_descendants_returns_mixed(self):
|
||||||
|
root = _ds("r", "/", neutralize=True)
|
||||||
|
child1 = _ds("c1", "/a", neutralize=False)
|
||||||
|
child2 = _ds("c2", "/b") # inherits True from root
|
||||||
|
allDs = [root, child1, child2]
|
||||||
|
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="aggregate"), "mixed")
|
||||||
|
|
||||||
|
def test_mixed_scope(self):
|
||||||
|
root = _ds("r", "/", scope="personal")
|
||||||
|
child1 = _ds("c1", "/a", scope="team")
|
||||||
|
child2 = _ds("c2", "/b") # inherits personal from root
|
||||||
|
allDs = [root, child1, child2]
|
||||||
|
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "scope", allDs, mode="aggregate"), "mixed")
|
||||||
|
|
||||||
|
def test_all_scope_same_explicit_returns_concrete(self):
|
||||||
|
root = _ds("r", "/", scope="team")
|
||||||
|
child1 = _ds("c1", "/a", scope="team")
|
||||||
|
child2 = _ds("c2", "/b") # inherits team
|
||||||
|
allDs = [root, child1, child2]
|
||||||
|
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "scope", allDs, mode="aggregate"), "team")
|
||||||
|
|
||||||
|
def test_connection_root_aggregate_cross_sourcetype(self):
|
||||||
|
connRoot = _ds("conn", "/", sourceType="msft", neutralize=True)
|
||||||
|
spExplicit = _ds("sp", "/", sourceType="sharepointFolder", neutralize=False)
|
||||||
|
olInherit = _ds("ol", "/", sourceType="outlookFolder") # inherits True
|
||||||
|
allDs = [connRoot, spExplicit, olInherit]
|
||||||
|
self.assertEqual(
|
||||||
|
_inheritFlags.getEffectiveFlag(connRoot, "neutralize", allDs, mode="aggregate"),
|
||||||
|
"mixed",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_mid_level_aggregate_only_considers_own_subtree(self):
|
||||||
|
root = _ds("r", "/", neutralize=True)
|
||||||
|
mid = _ds("m", "/folder", neutralize=True)
|
||||||
|
midChild = _ds("mc", "/folder/sub", neutralize=True)
|
||||||
|
sibling = _ds("s", "/other", neutralize=False) # not under mid
|
||||||
|
allDs = [root, mid, midChild, sibling]
|
||||||
|
# mid's subtree is just midChild(True) + mid(True) = uniform
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlag(mid, "neutralize", allDs, mode="aggregate"))
|
||||||
|
# root's subtree includes sibling(False) = mixed
|
||||||
|
self.assertEqual(
|
||||||
|
_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="aggregate"),
|
||||||
|
"mixed",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_walk_mode_never_returns_mixed(self):
|
||||||
|
root = _ds("r", "/", neutralize=True)
|
||||||
|
child = _ds("c", "/a", neutralize=False)
|
||||||
|
allDs = [root, child]
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="walk"))
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# DataSource: cascadeResetDescendants (bottom-up, List[str])
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestCascadeReset(unittest.TestCase):
|
||||||
|
def _makeRootIf(self, dataSources: List[dict]):
|
||||||
|
rootIf = MagicMock()
|
||||||
|
rootIf.db.getRecordset = MagicMock(return_value=dataSources)
|
||||||
|
modified = []
|
||||||
|
|
||||||
|
def _modify(model, recordId, fields):
|
||||||
|
modified.append((recordId, fields))
|
||||||
|
rootIf.db.recordModify = MagicMock(side_effect=_modify)
|
||||||
|
return rootIf, modified
|
||||||
|
|
||||||
|
def test_returns_list_of_ids(self):
|
||||||
|
parent = _ds("p", "/sites", neutralize=True)
|
||||||
|
child = _ds("c1", "/sites/folder1", neutralize=False)
|
||||||
|
rootIf, _ = self._makeRootIf([parent, child])
|
||||||
|
result = _inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||||
|
self.assertIsInstance(result, list)
|
||||||
|
self.assertEqual(result, ["c1"])
|
||||||
|
|
||||||
|
def test_resets_only_explicit_descendants(self):
|
||||||
|
parent = _ds("p", "/sites", neutralize=True)
|
||||||
|
explicitChild = _ds("c1", "/sites/folder1", neutralize=False)
|
||||||
|
inheritChild = _ds("c2", "/sites/folder2")
|
||||||
|
sibling = _ds("s", "/other", neutralize=True)
|
||||||
|
rootIf, modified = self._makeRootIf([parent, explicitChild, inheritChild, sibling])
|
||||||
|
|
||||||
|
result = _inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||||
|
|
||||||
|
self.assertEqual(result, ["c1"])
|
||||||
|
self.assertEqual(modified, [("c1", {"neutralize": None})])
|
||||||
|
|
||||||
|
def test_bottom_up_order(self):
|
||||||
|
"""Deepest items are reset first."""
|
||||||
|
parent = _ds("p", "/", neutralize=True)
|
||||||
|
level1 = _ds("l1", "/a", neutralize=False)
|
||||||
|
level2 = _ds("l2", "/a/b", neutralize=False)
|
||||||
|
level3 = _ds("l3", "/a/b/c", neutralize=False)
|
||||||
|
rootIf, modified = self._makeRootIf([parent, level1, level2, level3])
|
||||||
|
|
||||||
|
result = _inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||||
|
|
||||||
|
self.assertEqual(result, ["l3", "l2", "l1"])
|
||||||
|
|
||||||
|
def test_deep_cascade_through_null_items(self):
|
||||||
|
"""null items are skipped (no DB write) but cascade continues deeper."""
|
||||||
|
parent = _ds("p", "/", neutralize=True)
|
||||||
|
nullChild = _ds("n", "/a") # null — no write, but not a barrier
|
||||||
|
deepExplicit = _ds("d", "/a/b", neutralize=False)
|
||||||
|
rootIf, modified = self._makeRootIf([parent, nullChild, deepExplicit])
|
||||||
|
|
||||||
|
result = _inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||||
|
|
||||||
|
self.assertEqual(result, ["d"])
|
||||||
|
self.assertEqual(modified, [("d", {"neutralize": None})])
|
||||||
|
|
||||||
|
def test_does_not_modify_parent(self):
|
||||||
|
parent = _ds("p", "/", neutralize=True)
|
||||||
|
child = _ds("c", "/a", neutralize=False)
|
||||||
|
rootIf, modified = self._makeRootIf([parent, child])
|
||||||
|
_inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||||
|
self.assertNotIn("p", [m[0] for m in modified])
|
||||||
|
|
||||||
|
def test_connection_root_cascades_cross_sourcetype(self):
|
||||||
|
connRoot = _ds("conn", "/", sourceType="msft", neutralize=True)
|
||||||
|
spExplicit = _ds("sp", "/", sourceType="sharepointFolder", neutralize=False)
|
||||||
|
olInherit = _ds("ol", "/", sourceType="outlookFolder")
|
||||||
|
spLeaf = _ds("sp-leaf", "/sites/x", sourceType="sharepointFolder", neutralize=True)
|
||||||
|
rootIf, modified = self._makeRootIf([connRoot, spExplicit, olInherit, spLeaf])
|
||||||
|
|
||||||
|
result = _inheritFlags.cascadeResetDescendants(rootIf, connRoot, "neutralize")
|
||||||
|
|
||||||
|
self.assertEqual(set(result), {"sp", "sp-leaf"})
|
||||||
|
# sp-leaf is deeper, should come first
|
||||||
|
self.assertEqual(result[0], "sp-leaf")
|
||||||
|
|
||||||
|
def test_does_not_cross_sourcetype_for_non_authority(self):
|
||||||
|
parent = _ds("p", "/", neutralize=True, sourceType="sharepointFolder")
|
||||||
|
otherType = _ds("o", "/anything", neutralize=False, sourceType="outlookFolder")
|
||||||
|
rootIf, modified = self._makeRootIf([parent, otherType])
|
||||||
|
result = _inheritFlags.cascadeResetDescendants(rootIf, parent, "neutralize")
|
||||||
|
self.assertEqual(result, [])
|
||||||
|
|
||||||
|
def test_unknown_flag_raises(self):
|
||||||
|
parent = _ds("p", "/", neutralize=True)
|
||||||
|
rootIf, _ = self._makeRootIf([parent])
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
_inheritFlags.cascadeResetDescendants(rootIf, parent, "unknownFlag")
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# DataSource: collectAncestorChain
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestCollectAncestorChain(unittest.TestCase):
|
||||||
|
def test_returns_nearest_first(self):
|
||||||
|
root = _ds("r", "/", neutralize=True)
|
||||||
|
mid = _ds("m", "/a")
|
||||||
|
leaf = _ds("l", "/a/b")
|
||||||
|
chain = _inheritFlags.collectAncestorChain(leaf, [root, mid, leaf])
|
||||||
|
self.assertEqual([_inheritFlags._getRecordValue(c, "id") for c in chain], ["m", "r"])
|
||||||
|
|
||||||
|
def test_connection_root_is_last(self):
|
||||||
|
connRoot = _ds("conn", "/", sourceType="msft")
|
||||||
|
spRoot = _ds("sp", "/", sourceType="sharepointFolder")
|
||||||
|
spLeaf = _ds("spl", "/sub", sourceType="sharepointFolder")
|
||||||
|
chain = _inheritFlags.collectAncestorChain(spLeaf, [connRoot, spRoot, spLeaf])
|
||||||
|
ids = [_inheritFlags._getRecordValue(c, "id") for c in chain]
|
||||||
|
self.assertEqual(ids, ["sp", "conn"])
|
||||||
|
|
||||||
|
def test_root_has_no_ancestors(self):
|
||||||
|
root = _ds("r", "/")
|
||||||
|
chain = _inheritFlags.collectAncestorChain(root, [root])
|
||||||
|
self.assertEqual(chain, [])
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# DataSource: buildEffectiveByConnection
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestBuildEffectiveByConnection(unittest.TestCase):
|
||||||
|
def test_walk_mode(self):
|
||||||
|
root = _ds("r", "/", neutralize=True)
|
||||||
|
child = _ds("c", "/a", neutralize=False)
|
||||||
|
leaf = _ds("l", "/a/b") # inherits False from child
|
||||||
|
result = _inheritFlags.buildEffectiveByConnection([root, child, leaf], "neutralize", mode="walk")
|
||||||
|
self.assertEqual(result, {"r": True, "c": False, "l": False})
|
||||||
|
|
||||||
|
def test_aggregate_mode(self):
|
||||||
|
root = _ds("r", "/", neutralize=True)
|
||||||
|
child = _ds("c", "/a", neutralize=False)
|
||||||
|
leaf = _ds("l", "/a/b") # inherits False from child
|
||||||
|
result = _inheritFlags.buildEffectiveByConnection([root, child, leaf], "neutralize", mode="aggregate")
|
||||||
|
self.assertEqual(result["r"], "mixed")
|
||||||
|
self.assertEqual(result["c"], False)
|
||||||
|
self.assertEqual(result["l"], False)
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# FeatureDataSource: getEffectiveFlagFds
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestFdsEffectiveFlagWalk(unittest.TestCase):
|
||||||
|
def test_own_explicit_wins(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=False)
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}, neutralize=True)
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [ws, rec]))
|
||||||
|
|
||||||
|
def test_inherits_from_table_wildcard(self):
|
||||||
|
tbl = _fds("t", tableName="Pos", neutralize=True)
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [tbl, rec]))
|
||||||
|
|
||||||
|
def test_table_wildcard_beats_workspace_wildcard(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=False)
|
||||||
|
tbl = _fds("t", tableName="Pos", neutralize=True)
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [ws, tbl, rec]))
|
||||||
|
|
||||||
|
def test_workspace_wildcard_inherits_when_no_table(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=True)
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [ws, rec]))
|
||||||
|
|
||||||
|
def test_default_false_when_chain_empty(self):
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||||
|
self.assertFalse(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [rec]))
|
||||||
|
|
||||||
|
def test_unknown_flag_raises(self):
|
||||||
|
rec = _fds("r", tableName="*")
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
_inheritFlags.getEffectiveFlagFds(rec, "doesNotExist", [rec])
|
||||||
|
|
||||||
|
|
||||||
|
class TestFdsEffectiveFlagAggregate(unittest.TestCase):
|
||||||
|
def test_leaf_without_descendants(self):
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}, neutralize=True)
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlagFds(rec, "neutralize", [rec], mode="aggregate"))
|
||||||
|
|
||||||
|
def test_all_descendants_same(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=True)
|
||||||
|
tbl = _fds("t", tableName="Pos") # inherits True
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}) # inherits True
|
||||||
|
allFds = [ws, tbl, rec]
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlagFds(ws, "neutralize", allFds, mode="aggregate"))
|
||||||
|
|
||||||
|
def test_divergent_descendants_returns_mixed(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=True)
|
||||||
|
tbl = _fds("t", tableName="Pos", neutralize=False)
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}) # inherits False from tbl
|
||||||
|
allFds = [ws, tbl, rec]
|
||||||
|
self.assertEqual(
|
||||||
|
_inheritFlags.getEffectiveFlagFds(ws, "neutralize", allFds, mode="aggregate"),
|
||||||
|
"mixed",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_table_aggregate_own_subtree_only(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=True)
|
||||||
|
tblA = _fds("tA", tableName="A", neutralize=True)
|
||||||
|
recA = _fds("rA", tableName="A", recordFilter={"id": "1"}, neutralize=True)
|
||||||
|
tblB = _fds("tB", tableName="B", neutralize=False)
|
||||||
|
allFds = [ws, tblA, recA, tblB]
|
||||||
|
# tblA subtree: all True
|
||||||
|
self.assertTrue(_inheritFlags.getEffectiveFlagFds(tblA, "neutralize", allFds, mode="aggregate"))
|
||||||
|
# ws subtree: mixed (tblB is False)
|
||||||
|
self.assertEqual(
|
||||||
|
_inheritFlags.getEffectiveFlagFds(ws, "neutralize", allFds, mode="aggregate"),
|
||||||
|
"mixed",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# FeatureDataSource: cascadeResetDescendantsFds (bottom-up, List[str])
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestFdsCascadeReset(unittest.TestCase):
|
||||||
|
def _makeRootIf(self, fdses):
|
||||||
|
rootIf = MagicMock()
|
||||||
|
rootIf.db.getRecordset = MagicMock(return_value=fdses)
|
||||||
|
modified = []
|
||||||
|
|
||||||
|
def _modify(model, recordId, fields):
|
||||||
|
modified.append((recordId, fields))
|
||||||
|
rootIf.db.recordModify = MagicMock(side_effect=_modify)
|
||||||
|
return rootIf, modified
|
||||||
|
|
||||||
|
def test_returns_list_of_ids(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=True)
|
||||||
|
tbl = _fds("t", tableName="Pos", neutralize=False)
|
||||||
|
rootIf, _ = self._makeRootIf([ws, tbl])
|
||||||
|
result = _inheritFlags.cascadeResetDescendantsFds(rootIf, ws, "neutralize")
|
||||||
|
self.assertIsInstance(result, list)
|
||||||
|
self.assertEqual(result, ["t"])
|
||||||
|
|
||||||
|
def test_workspace_cascades_to_all_explicit_descendants(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=True)
|
||||||
|
tblExplicit = _fds("t", tableName="Pos", neutralize=False)
|
||||||
|
tblInherit = _fds("t2", tableName="Other")
|
||||||
|
recExplicit = _fds("r", tableName="Pos", recordFilter={"id": "1"}, neutralize=True)
|
||||||
|
rootIf, modified = self._makeRootIf([ws, tblExplicit, tblInherit, recExplicit])
|
||||||
|
|
||||||
|
result = _inheritFlags.cascadeResetDescendantsFds(rootIf, ws, "neutralize")
|
||||||
|
|
||||||
|
self.assertEqual(set(result), {"t", "r"})
|
||||||
|
# record is deeper (depth 2) than table (depth 1), should come first
|
||||||
|
self.assertEqual(result[0], "r")
|
||||||
|
|
||||||
|
def test_table_cascades_only_to_same_table_records(self):
|
||||||
|
tbl = _fds("t", tableName="Pos", neutralize=True)
|
||||||
|
recSame = _fds("r1", tableName="Pos", recordFilter={"id": "1"}, neutralize=False)
|
||||||
|
recOther = _fds("r2", tableName="Other", recordFilter={"id": "1"}, neutralize=False)
|
||||||
|
rootIf, modified = self._makeRootIf([tbl, recSame, recOther])
|
||||||
|
|
||||||
|
result = _inheritFlags.cascadeResetDescendantsFds(rootIf, tbl, "neutralize")
|
||||||
|
|
||||||
|
self.assertEqual(result, ["r1"])
|
||||||
|
self.assertEqual(modified, [("r1", {"neutralize": None})])
|
||||||
|
|
||||||
|
def test_record_has_no_cascade(self):
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}, neutralize=True)
|
||||||
|
rootIf, modified = self._makeRootIf([rec])
|
||||||
|
result = _inheritFlags.cascadeResetDescendantsFds(rootIf, rec, "neutralize")
|
||||||
|
self.assertEqual(result, [])
|
||||||
|
|
||||||
|
def test_unknown_flag_raises(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=True)
|
||||||
|
rootIf, _ = self._makeRootIf([ws])
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
_inheritFlags.cascadeResetDescendantsFds(rootIf, ws, "doesNotExist")
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# FeatureDataSource: collectAncestorChainFds
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestCollectAncestorChainFds(unittest.TestCase):
|
||||||
|
def test_record_has_table_then_workspace(self):
|
||||||
|
ws = _fds("ws", tableName="*")
|
||||||
|
tbl = _fds("t", tableName="Pos")
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||||
|
chain = _inheritFlags.collectAncestorChainFds(rec, [ws, tbl, rec])
|
||||||
|
ids = [c["id"] for c in chain]
|
||||||
|
self.assertEqual(ids, ["t", "ws"])
|
||||||
|
|
||||||
|
def test_table_has_only_workspace(self):
|
||||||
|
ws = _fds("ws", tableName="*")
|
||||||
|
tbl = _fds("t", tableName="Pos")
|
||||||
|
chain = _inheritFlags.collectAncestorChainFds(tbl, [ws, tbl])
|
||||||
|
self.assertEqual([c["id"] for c in chain], ["ws"])
|
||||||
|
|
||||||
|
def test_workspace_has_no_ancestors(self):
|
||||||
|
ws = _fds("ws", tableName="*")
|
||||||
|
chain = _inheritFlags.collectAncestorChainFds(ws, [ws])
|
||||||
|
self.assertEqual(chain, [])
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# FeatureDataSource: buildEffectiveByWorkspaceFds
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestBuildEffectiveByWorkspaceFds(unittest.TestCase):
|
||||||
|
def test_walk_mode(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=True)
|
||||||
|
tbl = _fds("t", tableName="Pos", neutralize=False)
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"}) # inherits False from tbl
|
||||||
|
result = _inheritFlags.buildEffectiveByWorkspaceFds([ws, tbl, rec], "neutralize", mode="walk")
|
||||||
|
self.assertEqual(result, {"ws": True, "t": False, "r": False})
|
||||||
|
|
||||||
|
def test_aggregate_mode(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=True)
|
||||||
|
tbl = _fds("t", tableName="Pos", neutralize=False)
|
||||||
|
rec = _fds("r", tableName="Pos", recordFilter={"id": "1"})
|
||||||
|
result = _inheritFlags.buildEffectiveByWorkspaceFds([ws, tbl, rec], "neutralize", mode="aggregate")
|
||||||
|
self.assertEqual(result["ws"], "mixed")
|
||||||
|
self.assertEqual(result["t"], False)
|
||||||
|
self.assertEqual(result["r"], False)
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# resolveEffectiveForPath (with and without own record)
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestResolveEffectiveForPath(unittest.TestCase):
|
||||||
|
def test_with_exact_record(self):
|
||||||
|
root = _ds("r", "/", neutralize=True, scope="mandate", ragIndexEnabled=False)
|
||||||
|
leaf = _ds("l", "/folder/sub", neutralize=False)
|
||||||
|
allDs = [root, leaf]
|
||||||
|
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/folder/sub", allDs)
|
||||||
|
self.assertEqual(result["effectiveNeutralize"], False)
|
||||||
|
self.assertEqual(result["effectiveScope"], "mandate")
|
||||||
|
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||||
|
|
||||||
|
def test_without_record_inherits_from_ancestor(self):
|
||||||
|
root = _ds("r", "/", neutralize=True, scope="mandate", ragIndexEnabled=True)
|
||||||
|
allDs = [root]
|
||||||
|
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/deep/path/file.txt", allDs)
|
||||||
|
self.assertEqual(result["effectiveNeutralize"], True)
|
||||||
|
self.assertEqual(result["effectiveScope"], "mandate")
|
||||||
|
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
||||||
|
|
||||||
|
def test_without_record_inherits_from_closest_ancestor(self):
|
||||||
|
root = _ds("r", "/", neutralize=True, ragIndexEnabled=True)
|
||||||
|
mid = _ds("m", "/folder", neutralize=False, ragIndexEnabled=False)
|
||||||
|
allDs = [root, mid]
|
||||||
|
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/folder/sub/file.txt", allDs)
|
||||||
|
self.assertEqual(result["effectiveNeutralize"], False)
|
||||||
|
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||||
|
|
||||||
|
def test_without_record_no_ancestors_returns_defaults(self):
|
||||||
|
allDs: list = []
|
||||||
|
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/path", allDs)
|
||||||
|
self.assertEqual(result["effectiveNeutralize"], False)
|
||||||
|
self.assertEqual(result["effectiveScope"], "personal")
|
||||||
|
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||||
|
|
||||||
|
def test_connection_root_covers_service_subtree(self):
|
||||||
|
connRoot = _ds("cr", "/", neutralize=True, sourceType="msft")
|
||||||
|
allDs = [connRoot]
|
||||||
|
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/sites/intranet", allDs)
|
||||||
|
self.assertEqual(result["effectiveNeutralize"], True)
|
||||||
|
|
||||||
|
def test_exact_record_with_aggregate_mixed(self):
|
||||||
|
root = _ds("r", "/", neutralize=True)
|
||||||
|
leaf = _ds("l", "/sub", neutralize=False)
|
||||||
|
allDs = [root, leaf]
|
||||||
|
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/", allDs, mode="aggregate")
|
||||||
|
self.assertEqual(result["effectiveNeutralize"], "mixed")
|
||||||
|
|
||||||
|
|
||||||
|
class TestResolveEffectiveForFds(unittest.TestCase):
|
||||||
|
def test_with_exact_record(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=True, scope="mandate")
|
||||||
|
tbl = _fds("t", tableName="Pos", neutralize=False, scope="personal")
|
||||||
|
allFds = [ws, tbl]
|
||||||
|
result = _inheritFlags.resolveEffectiveForFds("fi-1", "Pos", None, allFds)
|
||||||
|
self.assertEqual(result["effectiveNeutralize"], False)
|
||||||
|
self.assertEqual(result["effectiveScope"], "personal")
|
||||||
|
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||||
|
|
||||||
|
def test_without_record_inherits_from_workspace_wildcard(self):
|
||||||
|
ws = _fds("ws", tableName="*", neutralize=True, scope="mandate", ragIndexEnabled=True)
|
||||||
|
allFds = [ws]
|
||||||
|
result = _inheritFlags.resolveEffectiveForFds("fi-1", "Unknown", None, allFds)
|
||||||
|
self.assertEqual(result["effectiveNeutralize"], True)
|
||||||
|
self.assertEqual(result["effectiveScope"], "mandate")
|
||||||
|
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
||||||
|
|
||||||
|
def test_without_record_no_ancestors_returns_defaults(self):
|
||||||
|
allFds: list = []
|
||||||
|
result = _inheritFlags.resolveEffectiveForFds("fi-1", "Pos", None, allFds)
|
||||||
|
self.assertEqual(result["effectiveNeutralize"], False)
|
||||||
|
self.assertEqual(result["effectiveScope"], "personal")
|
||||||
|
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||||
|
|
||||||
|
def test_rag_inherits_when_table_overrides_neutralize_only(self):
|
||||||
|
"""Tables that override only neutralize must still inherit RAG from parent."""
|
||||||
|
ws = _fds("ws", tableName="*", ragIndexEnabled=True)
|
||||||
|
tbl = _fds("t", tableName="Pos", neutralize=False)
|
||||||
|
allFds = [ws, tbl]
|
||||||
|
result = _inheritFlags.resolveEffectiveForFds("fi-1", "Pos", None, allFds)
|
||||||
|
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
||||||
|
|
||||||
|
def test_rag_aggregate_mixed_when_descendants_diverge(self):
|
||||||
|
ws = _fds("ws", tableName="*", ragIndexEnabled=True)
|
||||||
|
tbl = _fds("t", tableName="Pos", ragIndexEnabled=False)
|
||||||
|
allFds = [ws, tbl]
|
||||||
|
result = _inheritFlags.resolveEffectiveForFds("fi-1", "*", None, allFds, mode="aggregate")
|
||||||
|
self.assertEqual(result["effectiveRagIndexEnabled"], "mixed")
|
||||||
|
|
||||||
|
def test_inheritable_fds_flags_includes_rag(self):
|
||||||
|
self.assertIn("ragIndexEnabled", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
||||||
|
self.assertIn("neutralize", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
||||||
|
self.assertIn("scope", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# FDS cascade resets RAG (in addition to neutralize and scope)
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestCascadeResetFdsRag(unittest.TestCase):
|
||||||
|
def test_cascade_resets_rag_on_descendants(self):
|
||||||
|
ws = _fds("ws", tableName="*")
|
||||||
|
tbl = _fds("t", tableName="Pos", ragIndexEnabled=False)
|
||||||
|
allFds = [ws, tbl]
|
||||||
|
rootIf = MagicMock()
|
||||||
|
rootIf.db.getRecordset.return_value = allFds
|
||||||
|
rootIf.db.recordModify = MagicMock()
|
||||||
|
result = _inheritFlags.cascadeResetDescendantsFds(rootIf, ws, "ragIndexEnabled")
|
||||||
|
self.assertIn("t", result)
|
||||||
|
rootIf.db.recordModify.assert_called()
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# Path normalization
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
class TestPathNormalization(unittest.TestCase):
|
||||||
|
def test_empty_path_normalises_to_root(self):
|
||||||
|
self.assertEqual(_inheritFlags._normalisePath(""), "/")
|
||||||
|
self.assertEqual(_inheritFlags._normalisePath(None), "/")
|
||||||
|
|
||||||
|
def test_trailing_slash_stripped(self):
|
||||||
|
self.assertEqual(_inheritFlags._normalisePath("/foo/"), "/foo")
|
||||||
|
self.assertEqual(_inheritFlags._normalisePath("/"), "/")
|
||||||
|
|
||||||
|
def test_leading_slash_added(self):
|
||||||
|
self.assertEqual(_inheritFlags._normalisePath("foo/bar"), "/foo/bar")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
|
|
@ -99,11 +99,18 @@ def test_onConnectionRevoked_ignores_missing_id(monkeypatch):
|
||||||
assert seen == []
|
assert seen == []
|
||||||
|
|
||||||
|
|
||||||
|
def _stubRagEnabledDs(monkeypatch, dataSources):
|
||||||
|
"""Stub _loadRagEnabledDataSources so tests don't need a live DB."""
|
||||||
|
monkeypatch.setattr(consumer, "_loadRagEnabledDataSources", lambda *_, **__: dataSources)
|
||||||
|
|
||||||
|
|
||||||
def test_bootstrap_job_skips_unsupported_authority(monkeypatch):
|
def test_bootstrap_job_skips_unsupported_authority(monkeypatch):
|
||||||
|
_stubRagEnabledDs(monkeypatch, [{"id": "ds1", "sourceType": "unknownType"}])
|
||||||
|
|
||||||
async def _run():
|
async def _run():
|
||||||
result = await consumer._bootstrapJobHandler(
|
result = await consumer._bootstrapJobHandler(
|
||||||
{"payload": {"connectionId": "c1", "authority": "slack"}},
|
{"payload": {"connectionId": "c1", "authority": "slack"}},
|
||||||
lambda *_: None,
|
lambda *_, **__: None,
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
@ -114,13 +121,18 @@ def test_bootstrap_job_skips_unsupported_authority(monkeypatch):
|
||||||
|
|
||||||
|
|
||||||
def test_bootstrap_job_dispatches_msft_parts(monkeypatch):
|
def test_bootstrap_job_dispatches_msft_parts(monkeypatch):
|
||||||
|
_stubRagEnabledDs(monkeypatch, [
|
||||||
|
{"id": "ds1", "sourceType": "sharepointFolder"},
|
||||||
|
{"id": "ds2", "sourceType": "outlookFolder"},
|
||||||
|
])
|
||||||
|
|
||||||
calls = {"sp": 0, "ol": 0}
|
calls = {"sp": 0, "ol": 0}
|
||||||
|
|
||||||
async def _fakeSp(connectionId, progressCb=None):
|
async def _fakeSp(connectionId, progressCb=None, dataSources=None):
|
||||||
calls["sp"] += 1
|
calls["sp"] += 1
|
||||||
return {"indexed": 1}
|
return {"indexed": 1}
|
||||||
|
|
||||||
async def _fakeOl(connectionId, progressCb=None):
|
async def _fakeOl(connectionId, progressCb=None, dataSources=None):
|
||||||
calls["ol"] += 1
|
calls["ol"] += 1
|
||||||
return {"indexed": 2}
|
return {"indexed": 2}
|
||||||
|
|
||||||
|
|
@ -142,7 +154,7 @@ def test_bootstrap_job_dispatches_msft_parts(monkeypatch):
|
||||||
async def _run():
|
async def _run():
|
||||||
return await consumer._bootstrapJobHandler(
|
return await consumer._bootstrapJobHandler(
|
||||||
{"payload": {"connectionId": "c1", "authority": "msft"}},
|
{"payload": {"connectionId": "c1", "authority": "msft"}},
|
||||||
lambda *_: None,
|
lambda *_, **__: None,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = asyncio.run(_run())
|
result = asyncio.run(_run())
|
||||||
|
|
@ -152,13 +164,18 @@ def test_bootstrap_job_dispatches_msft_parts(monkeypatch):
|
||||||
|
|
||||||
|
|
||||||
def test_bootstrap_job_dispatches_google_parts(monkeypatch):
|
def test_bootstrap_job_dispatches_google_parts(monkeypatch):
|
||||||
|
_stubRagEnabledDs(monkeypatch, [
|
||||||
|
{"id": "ds1", "sourceType": "googleDriveFolder"},
|
||||||
|
{"id": "ds2", "sourceType": "gmailFolder"},
|
||||||
|
])
|
||||||
|
|
||||||
calls = {"gd": 0, "gm": 0}
|
calls = {"gd": 0, "gm": 0}
|
||||||
|
|
||||||
async def _fakeGd(connectionId, progressCb=None):
|
async def _fakeGd(connectionId, progressCb=None, dataSources=None):
|
||||||
calls["gd"] += 1
|
calls["gd"] += 1
|
||||||
return {"indexed": 7}
|
return {"indexed": 7}
|
||||||
|
|
||||||
async def _fakeGm(connectionId, progressCb=None):
|
async def _fakeGm(connectionId, progressCb=None, dataSources=None):
|
||||||
calls["gm"] += 1
|
calls["gm"] += 1
|
||||||
return {"indexed": 11}
|
return {"indexed": 11}
|
||||||
|
|
||||||
|
|
@ -180,7 +197,7 @@ def test_bootstrap_job_dispatches_google_parts(monkeypatch):
|
||||||
async def _run():
|
async def _run():
|
||||||
return await consumer._bootstrapJobHandler(
|
return await consumer._bootstrapJobHandler(
|
||||||
{"payload": {"connectionId": "c1", "authority": "google"}},
|
{"payload": {"connectionId": "c1", "authority": "google"}},
|
||||||
lambda *_: None,
|
lambda *_, **__: None,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = asyncio.run(_run())
|
result = asyncio.run(_run())
|
||||||
|
|
@ -190,9 +207,13 @@ def test_bootstrap_job_dispatches_google_parts(monkeypatch):
|
||||||
|
|
||||||
|
|
||||||
def test_bootstrap_job_dispatches_clickup_part(monkeypatch):
|
def test_bootstrap_job_dispatches_clickup_part(monkeypatch):
|
||||||
|
_stubRagEnabledDs(monkeypatch, [
|
||||||
|
{"id": "ds1", "sourceType": "clickupList"},
|
||||||
|
])
|
||||||
|
|
||||||
calls = {"cu": 0}
|
calls = {"cu": 0}
|
||||||
|
|
||||||
async def _fakeCu(connectionId, progressCb=None):
|
async def _fakeCu(connectionId, progressCb=None, dataSources=None):
|
||||||
calls["cu"] += 1
|
calls["cu"] += 1
|
||||||
return {"indexed": 4}
|
return {"indexed": 4}
|
||||||
|
|
||||||
|
|
@ -207,7 +228,7 @@ def test_bootstrap_job_dispatches_clickup_part(monkeypatch):
|
||||||
async def _run():
|
async def _run():
|
||||||
return await consumer._bootstrapJobHandler(
|
return await consumer._bootstrapJobHandler(
|
||||||
{"payload": {"connectionId": "c1", "authority": "clickup"}},
|
{"payload": {"connectionId": "c1", "authority": "clickup"}},
|
||||||
lambda *_: None,
|
lambda *_, **__: None,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = asyncio.run(_run())
|
result = asyncio.run(_run())
|
||||||
|
|
|
||||||
79
tests/unit/services/test_ragLimits.py
Normal file
79
tests/unit/services/test_ragLimits.py
Normal file
|
|
@ -0,0 +1,79 @@
|
||||||
|
"""Unit tests for `_ragLimits` central helpers.
|
||||||
|
|
||||||
|
Verifies:
|
||||||
|
- defaults are returned as fresh copies (no mutation leakage)
|
||||||
|
- getStoredOverrides returns ONLY explicit overrides (walker contract)
|
||||||
|
- getRagLimits merges defaults with overrides (API/cost-estimate contract)
|
||||||
|
- non-int values in stored settings are dropped, not silently coerced
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge import _ragLimits
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetDefaults(unittest.TestCase):
|
||||||
|
def test_files_defaults_have_all_keys(self):
|
||||||
|
d = _ragLimits.getDefaults("files")
|
||||||
|
self.assertEqual(set(d.keys()), {"maxItems", "maxBytes", "maxFileSize", "maxDepth"})
|
||||||
|
self.assertEqual(d["maxBytes"], 200 * 1024 * 1024)
|
||||||
|
|
||||||
|
def test_clickup_defaults(self):
|
||||||
|
d = _ragLimits.getDefaults("clickup")
|
||||||
|
self.assertEqual(set(d.keys()), {"maxTasks", "maxWorkspaces", "maxListsPerWorkspace"})
|
||||||
|
|
||||||
|
def test_defaults_are_a_fresh_copy(self):
|
||||||
|
d1 = _ragLimits.getDefaults("files")
|
||||||
|
d1["maxBytes"] = 1
|
||||||
|
d2 = _ragLimits.getDefaults("files")
|
||||||
|
self.assertEqual(d2["maxBytes"], 200 * 1024 * 1024)
|
||||||
|
|
||||||
|
def test_unknown_kind_raises(self):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
_ragLimits.getDefaults("unknown")
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetStoredOverrides(unittest.TestCase):
|
||||||
|
def test_no_settings_returns_empty_dict(self):
|
||||||
|
self.assertEqual(_ragLimits.getStoredOverrides({"id": "x", "settings": None}, "files"), {})
|
||||||
|
|
||||||
|
def test_only_explicit_overrides_returned(self):
|
||||||
|
ds = {"id": "x", "settings": {"ragLimits": {"maxBytes": 999}}}
|
||||||
|
self.assertEqual(_ragLimits.getStoredOverrides(ds, "files"), {"maxBytes": 999})
|
||||||
|
|
||||||
|
def test_unknown_keys_dropped(self):
|
||||||
|
ds = {"id": "x", "settings": {"ragLimits": {"maxBytes": 999, "bogus": 1}}}
|
||||||
|
self.assertEqual(_ragLimits.getStoredOverrides(ds, "files"), {"maxBytes": 999})
|
||||||
|
|
||||||
|
def test_non_int_dropped(self):
|
||||||
|
ds = {"id": "x", "settings": {"ragLimits": {"maxBytes": "not-a-number"}}}
|
||||||
|
self.assertEqual(_ragLimits.getStoredOverrides(ds, "files"), {})
|
||||||
|
|
||||||
|
def test_none_or_garbage_settings_safe(self):
|
||||||
|
self.assertEqual(_ragLimits.getStoredOverrides(None, "files"), {})
|
||||||
|
self.assertEqual(_ragLimits.getStoredOverrides({"id": "x", "settings": "garbage"}, "files"), {})
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetRagLimits(unittest.TestCase):
|
||||||
|
def test_no_settings_returns_defaults(self):
|
||||||
|
result = _ragLimits.getRagLimits({"id": "x", "settings": None}, "files")
|
||||||
|
self.assertEqual(result, _ragLimits.FILES_LIMITS_DEFAULT)
|
||||||
|
|
||||||
|
def test_partial_override_merges_with_defaults(self):
|
||||||
|
ds = {"id": "x", "settings": {"ragLimits": {"maxBytes": 999}}}
|
||||||
|
result = _ragLimits.getRagLimits(ds, "files")
|
||||||
|
self.assertEqual(result["maxBytes"], 999)
|
||||||
|
self.assertEqual(result["maxItems"], _ragLimits.FILES_LIMITS_DEFAULT["maxItems"])
|
||||||
|
|
||||||
|
def test_caller_can_distinguish_unset_from_set(self):
|
||||||
|
"""Walker contract: an unset key MUST NOT appear in `getStoredOverrides`."""
|
||||||
|
ds = {"id": "x", "settings": {"ragLimits": {"maxBytes": 999}}}
|
||||||
|
overrides = _ragLimits.getStoredOverrides(ds, "files")
|
||||||
|
self.assertIn("maxBytes", overrides)
|
||||||
|
self.assertNotIn("maxItems", overrides)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
|
|
@ -42,7 +42,7 @@ from modules.features.teamsbot.datamodelTeamsbot import (
|
||||||
from modules.features.teamsbot.service import (
|
from modules.features.teamsbot.service import (
|
||||||
TeamsbotService,
|
TeamsbotService,
|
||||||
_activeServices,
|
_activeServices,
|
||||||
_sessionEvents,
|
sessionEvents,
|
||||||
getActiveService,
|
getActiveService,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -152,10 +152,10 @@ def _buildService() -> TeamsbotService:
|
||||||
def _resetGlobals():
|
def _resetGlobals():
|
||||||
"""Avoid cross-test bleed in module-level globals."""
|
"""Avoid cross-test bleed in module-level globals."""
|
||||||
_activeServices.clear()
|
_activeServices.clear()
|
||||||
_sessionEvents.clear()
|
sessionEvents.clear()
|
||||||
yield
|
yield
|
||||||
_activeServices.clear()
|
_activeServices.clear()
|
||||||
_sessionEvents.clear()
|
sessionEvents.clear()
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
@ -251,7 +251,7 @@ class TestBuildPersistentDirectorContext:
|
||||||
]
|
]
|
||||||
rendered = svc._buildPersistentDirectorContext()
|
rendered = svc._buildPersistentDirectorContext()
|
||||||
assert "OPERATOR_DIRECTIVES" in rendered
|
assert "OPERATOR_DIRECTIVES" in rendered
|
||||||
assert "- Antworte immer in Englisch." in rendered
|
assert "Antworte immer in Englisch." in rendered
|
||||||
assert "private" in rendered
|
assert "private" in rendered
|
||||||
|
|
||||||
def test_skipsBlankText(self):
|
def test_skipsBlankText(self):
|
||||||
|
|
@ -261,7 +261,7 @@ class TestBuildPersistentDirectorContext:
|
||||||
{"id": "p2", "text": "Sei hoeflich."},
|
{"id": "p2", "text": "Sei hoeflich."},
|
||||||
]
|
]
|
||||||
rendered = svc._buildPersistentDirectorContext()
|
rendered = svc._buildPersistentDirectorContext()
|
||||||
assert "- Sei hoeflich." in rendered
|
assert "Sei hoeflich." in rendered
|
||||||
assert "p1" not in rendered # the blank one is filtered out
|
assert "p1" not in rendered # the blank one is filtered out
|
||||||
|
|
||||||
def test_allBlankPromptsResultInEmpty(self):
|
def test_allBlankPromptsResultInEmpty(self):
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue