From e942770ffc370ad38fd4b9d4ad8643a095d2c2da Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Thu, 16 Apr 2026 23:13:05 +0200
Subject: [PATCH] feat db-clean-ui and unified content udm
---
app.py | 3 +
modules/datamodels/datamodelAiAudit.py | 8 +-
modules/datamodels/datamodelAudit.py | 24 +-
modules/datamodels/datamodelBase.py | 14 +-
modules/datamodels/datamodelBilling.py | 50 ++-
modules/datamodels/datamodelChat.py | 50 ++-
modules/datamodels/datamodelContent.py | 5 +-
modules/datamodels/datamodelDataSource.py | 8 +-
modules/datamodels/datamodelExtraction.py | 14 +
.../datamodels/datamodelFeatureDataSource.py | 19 +-
modules/datamodels/datamodelFeatures.py | 16 +-
modules/datamodels/datamodelFileFolder.py | 34 +-
modules/datamodels/datamodelFiles.py | 10 +-
modules/datamodels/datamodelInvitation.py | 24 +-
modules/datamodels/datamodelKnowledge.py | 18 +-
modules/datamodels/datamodelMembership.py | 58 ++-
modules/datamodels/datamodelMessaging.py | 6 +
modules/datamodels/datamodelNotification.py | 8 +-
modules/datamodels/datamodelRbac.py | 32 +-
modules/datamodels/datamodelSecurity.py | 14 +-
modules/datamodels/datamodelSubscription.py | 2 +-
modules/datamodels/datamodelUam.py | 15 +-
modules/datamodels/datamodelUdm.py | 316 ++++++++++++++
modules/datamodels/datamodelUtils.py | 8 +-
.../chatbot/interfaceFeatureChatbot.py | 6 +-
.../commcoach/interfaceFeatureCommcoach.py | 6 +-
.../datamodelFeatureGraphicalEditor.py | 91 +++-
.../interfaceFeatureGraphicalEditor.py | 8 +-
.../nodeDefinitions/__init__.py | 2 +
.../graphicalEditor/nodeDefinitions/ai.py | 34 +-
.../nodeDefinitions/clickup.py | 12 +-
.../nodeDefinitions/context.py | 30 ++
.../graphicalEditor/nodeDefinitions/data.py | 30 +-
.../graphicalEditor/nodeDefinitions/email.py | 6 +-
.../graphicalEditor/nodeDefinitions/file.py | 2 +-
.../graphicalEditor/nodeDefinitions/flow.py | 41 +-
.../graphicalEditor/nodeDefinitions/input.py | 14 +-
.../nodeDefinitions/sharepoint.py | 12 +-
.../nodeDefinitions/triggers.py | 6 +-
.../nodeDefinitions/trustee.py | 8 +-
.../features/graphicalEditor/nodeRegistry.py | 1 +
modules/features/graphicalEditor/portTypes.py | 48 +++
.../datamodelFeatureNeutralizer.py | 62 ++-
.../interfaceFeatureNeutralizer.py | 6 +-
.../realEstate/datamodelFeatureRealEstate.py | 71 +--
.../realEstate/interfaceFeatureRealEstate.py | 6 +-
.../teamsbot/interfaceFeatureTeamsbot.py | 6 +-
.../trustee/datamodelFeatureTrustee.py | 71 +--
.../trustee/interfaceFeatureTrustee.py | 6 +-
.../workspace/datamodelFeatureWorkspace.py | 24 +-
.../workspace/interfaceFeatureWorkspace.py | 6 +-
modules/interfaces/interfaceBootstrap.py | 3 +-
modules/interfaces/interfaceDbApp.py | 9 +-
modules/interfaces/interfaceDbBilling.py | 2 +
modules/interfaces/interfaceDbChat.py | 6 +-
modules/interfaces/interfaceDbKnowledge.py | 6 +-
modules/interfaces/interfaceDbManagement.py | 6 +-
modules/interfaces/interfaceDbSubscription.py | 2 +
modules/routes/routeAdminDatabaseHealth.py | 102 +++++
modules/routes/routeDataFiles.py | 120 ++++++
modules/routes/routeWorkflowDashboard.py | 5 +-
.../serviceAgent/coreTools/_documentTools.py | 156 +++++++
.../coreTools/_featureSubAgentTools.py | 12 +
.../services/serviceAgent/featureDataAgent.py | 5 +-
.../serviceAgent/featureDataProvider.py | 40 +-
.../extractors/extractorContainer.py | 47 +-
.../extractors/extractorPdf.py | 116 +++--
.../mainServiceExtraction.py | 7 +-
.../services/serviceExtraction/subPipeline.py | 21 +-
.../services/serviceExtraction/subRegistry.py | 32 +-
modules/shared/dbRegistry.py | 70 +++
modules/shared/fkRegistry.py | 243 +++++++++++
modules/system/databaseHealth.py | 405 ++++++++++++++++++
modules/system/mainSystem.py | 10 +
.../workflows/automation2/executionEngine.py | 156 +++++--
.../executors/actionNodeExecutor.py | 9 +
.../automation2/executors/dataExecutor.py | 76 +++-
.../automation2/executors/flowExecutor.py | 50 ++-
.../methods/methodAi/actions/__init__.py | 2 +
.../methods/methodAi/actions/consolidate.py | 83 ++++
.../workflows/methods/methodAi/methodAi.py | 35 +-
.../extraction/test_extract_udm_pipeline.py | 37 ++
...xecute_graph_loop_aggregate_consolidate.py | 123 ++++++
tests/unit/datamodels/test_udm_bridge.py | 69 +++
tests/unit/datamodels/test_udm_models.py | 34 ++
.../unit/nodeDefinitions/test_usesai_flag.py | 42 ++
.../unit/serviceAgent/test_udm_agent_tools.py | 65 +++
.../unit/workflow/test_phase3_context_node.py | 71 +++
.../workflow/test_phase4_workflow_nodes.py | 177 ++++++++
tests/unit/workflow/test_phase5_highvol.py | 45 ++
90 files changed, 3535 insertions(+), 335 deletions(-)
create mode 100644 modules/datamodels/datamodelUdm.py
create mode 100644 modules/features/graphicalEditor/nodeDefinitions/context.py
create mode 100644 modules/routes/routeAdminDatabaseHealth.py
create mode 100644 modules/shared/dbRegistry.py
create mode 100644 modules/shared/fkRegistry.py
create mode 100644 modules/system/databaseHealth.py
create mode 100644 modules/workflows/methods/methodAi/actions/consolidate.py
create mode 100644 tests/integration/extraction/test_extract_udm_pipeline.py
create mode 100644 tests/integration/workflows/test_execute_graph_loop_aggregate_consolidate.py
create mode 100644 tests/unit/datamodels/test_udm_bridge.py
create mode 100644 tests/unit/datamodels/test_udm_models.py
create mode 100644 tests/unit/nodeDefinitions/test_usesai_flag.py
create mode 100644 tests/unit/serviceAgent/test_udm_agent_tools.py
create mode 100644 tests/unit/workflow/test_phase3_context_node.py
create mode 100644 tests/unit/workflow/test_phase4_workflow_nodes.py
create mode 100644 tests/unit/workflow/test_phase5_highvol.py
diff --git a/app.py b/app.py
index 03d30364..ae409883 100644
--- a/app.py
+++ b/app.py
@@ -603,6 +603,9 @@ app.include_router(userAccessOverviewRouter)
from modules.routes.routeAdminDemoConfig import router as demoConfigRouter
app.include_router(demoConfigRouter)
+from modules.routes.routeAdminDatabaseHealth import router as adminDatabaseHealthRouter
+app.include_router(adminDatabaseHealthRouter)
+
from modules.routes.routeGdpr import router as gdprRouter
app.include_router(gdprRouter)
diff --git a/modules/datamodels/datamodelAiAudit.py b/modules/datamodels/datamodelAiAudit.py
index 6f914690..1ab1b360 100644
--- a/modules/datamodels/datamodelAiAudit.py
+++ b/modules/datamodels/datamodelAiAudit.py
@@ -34,7 +34,7 @@ class AiAuditLogEntry(BaseModel):
userId: str = Field(
description="ID of the user who triggered the AI call",
- json_schema_extra={"label": "Benutzer-ID"},
+ json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "User"}},
)
username: Optional[str] = Field(
default=None,
@@ -43,17 +43,17 @@ class AiAuditLogEntry(BaseModel):
)
mandateId: str = Field(
description="Mandate context of the call",
- json_schema_extra={"label": "Mandanten-ID"},
+ json_schema_extra={"label": "Mandanten-ID", "fk_target": {"db": "poweron_app", "table": "Mandate"}},
)
featureInstanceId: Optional[str] = Field(
default=None,
description="Feature instance context",
- json_schema_extra={"label": "Feature-Instanz-ID"},
+ json_schema_extra={"label": "Feature-Instanz-ID", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}},
)
featureCode: Optional[str] = Field(
default=None,
description="Feature code (e.g. workspace, trustee)",
- json_schema_extra={"label": "Feature"},
+ json_schema_extra={"label": "Feature", "fk_target": {"db": "poweron_app", "table": "Feature", "column": "code"}},
)
instanceLabel: Optional[str] = Field(
default=None,
diff --git a/modules/datamodels/datamodelAudit.py b/modules/datamodels/datamodelAudit.py
index f95b213d..705b87e8 100644
--- a/modules/datamodels/datamodelAudit.py
+++ b/modules/datamodels/datamodelAudit.py
@@ -106,7 +106,13 @@ class AuditLogEntry(BaseModel):
# Actor identification
userId: str = Field(
description="ID of the user who performed the action (or 'system' for system events)",
- json_schema_extra={"label": "Benutzer-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True}
+ json_schema_extra={
+ "label": "Benutzer-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
username: Optional[str] = Field(
@@ -119,13 +125,25 @@ class AuditLogEntry(BaseModel):
mandateId: Optional[str] = Field(
default=None,
description="Mandate context (if applicable)",
- json_schema_extra={"label": "Mandanten-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False}
+ json_schema_extra={
+ "label": "Mandanten-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
+ },
)
featureInstanceId: Optional[str] = Field(
default=None,
description="Feature instance context (if applicable)",
- json_schema_extra={"label": "Feature-Instanz-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False}
+ json_schema_extra={
+ "label": "Feature-Instanz-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
+ },
)
# Event classification
diff --git a/modules/datamodels/datamodelBase.py b/modules/datamodels/datamodelBase.py
index 854be75e..353f780b 100644
--- a/modules/datamodels/datamodelBase.py
+++ b/modules/datamodels/datamodelBase.py
@@ -2,16 +2,28 @@
# All rights reserved.
"""Base Pydantic model with system-managed fields (DB + API + UI metadata)."""
-from typing import Optional
+from typing import Dict, Optional, Type
from pydantic import BaseModel, Field
from modules.shared.i18nRegistry import i18nModel
+_MODEL_REGISTRY: Dict[str, Type["PowerOnModel"]] = {}
+
+
+def _getModelByTableName(tableName: str) -> Optional[Type["PowerOnModel"]]:
+ """Look up a PowerOnModel subclass by its table name (= class name)."""
+ return _MODEL_REGISTRY.get(tableName)
+
@i18nModel("Basisdatensatz")
class PowerOnModel(BaseModel):
"""Basis-Datenmodell mit System-Audit-Feldern fuer alle DB-Tabellen."""
+
+ def __init_subclass__(cls, **kwargs):
+ super().__init_subclass__(**kwargs)
+ _MODEL_REGISTRY[cls.__name__] = cls
+
sysCreatedAt: Optional[float] = Field(
default=None,
description="Record creation timestamp (UTC, set by system)",
diff --git a/modules/datamodels/datamodelBilling.py b/modules/datamodels/datamodelBilling.py
index fb1a1061..f662e28c 100644
--- a/modules/datamodels/datamodelBilling.py
+++ b/modules/datamodels/datamodelBilling.py
@@ -46,11 +46,15 @@ class BillingAccount(PowerOnModel):
description="Primary key",
json_schema_extra={"label": "ID"},
)
- mandateId: str = Field(..., description="Foreign key to Mandate", json_schema_extra={"label": "Mandanten-ID"})
+ mandateId: str = Field(
+ ...,
+ description="Foreign key to Mandate",
+ json_schema_extra={"label": "Mandanten-ID", "fk_target": {"db": "poweron_app", "table": "Mandate"}},
+ )
userId: Optional[str] = Field(
None,
description="Foreign key to User (None = mandate pool account, set = user audit account)",
- json_schema_extra={"label": "Benutzer-ID"},
+ json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "User"}},
)
balance: float = Field(default=0.0, description="Current balance in CHF", json_schema_extra={"label": "Guthaben (CHF)"})
warningThreshold: float = Field(
@@ -74,7 +78,11 @@ class BillingTransaction(PowerOnModel):
description="Primary key",
json_schema_extra={"label": "ID"},
)
- accountId: str = Field(..., description="Foreign key to BillingAccount", json_schema_extra={"label": "Konto-ID"})
+ accountId: str = Field(
+ ...,
+ description="Foreign key to BillingAccount",
+ json_schema_extra={"label": "Konto-ID", "fk_target": {"db": "poweron_billing", "table": "BillingAccount"}},
+ )
transactionType: TransactionTypeEnum = Field(..., description="Transaction type", json_schema_extra={"label": "Typ"})
amount: float = Field(..., description="Amount in CHF (always positive)", json_schema_extra={"label": "Betrag (CHF)"})
description: str = Field(..., description="Transaction description", json_schema_extra={"label": "Beschreibung"})
@@ -84,12 +92,28 @@ class BillingTransaction(PowerOnModel):
referenceId: Optional[str] = Field(None, description="Reference ID", json_schema_extra={"label": "Referenz-ID"})
# Context for workflow transactions
- workflowId: Optional[str] = Field(None, description="Workflow ID (for WORKFLOW transactions)", json_schema_extra={"label": "Workflow-ID"})
- featureInstanceId: Optional[str] = Field(None, description="Feature instance ID", json_schema_extra={"label": "Feature-Instanz-ID"})
- featureCode: Optional[str] = Field(None, description="Feature code (e.g., automation)", json_schema_extra={"label": "Feature-Code"})
+ workflowId: Optional[str] = Field(
+ None,
+ description="Workflow ID (for WORKFLOW transactions; may be Chat or Graphical Editor)",
+ json_schema_extra={"label": "Workflow-ID"},
+ )
+ featureInstanceId: Optional[str] = Field(
+ None,
+ description="Feature instance ID",
+ json_schema_extra={"label": "Feature-Instanz-ID", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}},
+ )
+ featureCode: Optional[str] = Field(
+ None,
+ description="Feature code (e.g., automation)",
+ json_schema_extra={"label": "Feature-Code", "fk_target": {"db": "poweron_app", "table": "Feature", "column": "code"}},
+ )
aicoreProvider: Optional[str] = Field(None, description="AICore provider (anthropic, openai, etc.)", json_schema_extra={"label": "AI-Anbieter"})
aicoreModel: Optional[str] = Field(None, description="AICore model name (e.g., claude-4-sonnet, gpt-4o)", json_schema_extra={"label": "AI-Modell"})
- createdByUserId: Optional[str] = Field(None, description="User who created/caused this transaction", json_schema_extra={"label": "Erstellt von Benutzer"})
+ createdByUserId: Optional[str] = Field(
+ None,
+ description="User who created/caused this transaction",
+ json_schema_extra={"label": "Erstellt von Benutzer", "fk_target": {"db": "poweron_app", "table": "User"}},
+ )
# AI call metadata (for per-call analytics)
processingTime: Optional[float] = Field(None, description="Processing time in seconds", json_schema_extra={"label": "Verarbeitungszeit (s)"})
@@ -106,7 +130,11 @@ class BillingSettings(BaseModel):
description="Primary key",
json_schema_extra={"label": "ID"},
)
- mandateId: str = Field(..., description="Foreign key to Mandate (UNIQUE)", json_schema_extra={"label": "Mandanten-ID"})
+ mandateId: str = Field(
+ ...,
+ description="Foreign key to Mandate (UNIQUE)",
+ json_schema_extra={"label": "Mandanten-ID", "fk_target": {"db": "poweron_app", "table": "Mandate"}},
+ )
warningThresholdPercent: float = Field(
default=10.0,
@@ -179,7 +207,11 @@ class UsageStatistics(BaseModel):
description="Primary key",
json_schema_extra={"label": "ID"},
)
- accountId: str = Field(..., description="Foreign key to BillingAccount", json_schema_extra={"label": "Konto-ID"})
+ accountId: str = Field(
+ ...,
+ description="Foreign key to BillingAccount",
+ json_schema_extra={"label": "Konto-ID", "fk_target": {"db": "poweron_billing", "table": "BillingAccount"}},
+ )
periodType: PeriodTypeEnum = Field(..., description="Period type", json_schema_extra={"label": "Periodentyp"})
periodStart: date = Field(..., description="Period start date", json_schema_extra={"label": "Periodenbeginn"})
diff --git a/modules/datamodels/datamodelChat.py b/modules/datamodels/datamodelChat.py
index 961f9ea0..6160e7c8 100644
--- a/modules/datamodels/datamodelChat.py
+++ b/modules/datamodels/datamodelChat.py
@@ -14,7 +14,10 @@ import uuid
class ChatLog(PowerOnModel):
"""Log entries for chat workflows. User-owned, no mandate context."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", json_schema_extra={"label": "ID"})
- workflowId: str = Field(description="Foreign key to workflow", json_schema_extra={"label": "Workflow-ID"})
+ workflowId: str = Field(
+ description="Foreign key to workflow",
+ json_schema_extra={"label": "Workflow-ID", "fk_target": {"db": "poweron_chat", "table": "ChatWorkflow"}},
+ )
message: str = Field(description="Log message", json_schema_extra={"label": "Nachricht"})
type: str = Field(description="Log type (info, warning, error, etc.)", json_schema_extra={"label": "Typ"})
timestamp: float = Field(default_factory=getUtcTimestamp,
@@ -32,8 +35,14 @@ class ChatLog(PowerOnModel):
class ChatDocument(PowerOnModel):
"""Documents attached to chat messages. User-owned, no mandate context."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", json_schema_extra={"label": "ID"})
- messageId: str = Field(description="Foreign key to message", json_schema_extra={"label": "Nachrichten-ID"})
- fileId: str = Field(description="Foreign key to file", json_schema_extra={"label": "Datei-ID"})
+ messageId: str = Field(
+ description="Foreign key to message",
+ json_schema_extra={"label": "Nachrichten-ID", "fk_target": {"db": "poweron_chat", "table": "ChatMessage"}},
+ )
+ fileId: str = Field(
+ description="Foreign key to file",
+ json_schema_extra={"label": "Datei-ID", "fk_target": {"db": "poweron_management", "table": "FileItem"}},
+ )
fileName: str = Field(description="Name of the file", json_schema_extra={"label": "Dateiname"})
fileSize: int = Field(description="Size of the file", json_schema_extra={"label": "Dateigröße"})
mimeType: str = Field(description="MIME type of the file", json_schema_extra={"label": "MIME-Typ"})
@@ -70,8 +79,15 @@ class ChatContentExtracted(BaseModel):
class ChatMessage(PowerOnModel):
"""Messages in chat workflows. User-owned, no mandate context."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", json_schema_extra={"label": "ID"})
- workflowId: str = Field(description="Foreign key to workflow", json_schema_extra={"label": "Workflow-ID"})
- parentMessageId: Optional[str] = Field(None, description="Parent message ID for threading", json_schema_extra={"label": "Übergeordnete Nachrichten-ID"})
+ workflowId: str = Field(
+ description="Foreign key to workflow",
+ json_schema_extra={"label": "Workflow-ID", "fk_target": {"db": "poweron_chat", "table": "ChatWorkflow"}},
+ )
+ parentMessageId: Optional[str] = Field(
+ None,
+ description="Parent message ID for threading",
+ json_schema_extra={"label": "Übergeordnete Nachrichten-ID", "fk_target": {"db": "poweron_chat", "table": "ChatMessage"}},
+ )
documents: List[ChatDocument] = Field(default_factory=list, description="Associated documents", json_schema_extra={"label": "Dokumente"})
documentsLabel: Optional[str] = Field(None, description="Label for the set of documents", json_schema_extra={"label": "Dokumenten-Label"})
message: Optional[str] = Field(None, description="Message content", json_schema_extra={"label": "Nachricht"})
@@ -101,7 +117,17 @@ class WorkflowModeEnum(str, Enum):
class ChatWorkflow(PowerOnModel):
"""Chat workflow container. User-owned, no mandate context."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", json_schema_extra={"label": "ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
- featureInstanceId: Optional[str] = Field(None, description="Feature instance ID for multi-tenancy isolation", json_schema_extra={"label": "Feature-Instanz-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
+ featureInstanceId: Optional[str] = Field(
+ None,
+ description="Feature instance ID for multi-tenancy isolation",
+ json_schema_extra={
+ "label": "Feature-Instanz-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
+ },
+ )
status: str = Field(default="running", description="Current status of the workflow", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [
{"value": "running", "label": "Running"},
{"value": "completed", "label": "Completed"},
@@ -169,7 +195,11 @@ class UserInputRequest(BaseModel):
prompt: str = Field(description="Prompt for the user", json_schema_extra={"label": "Eingabeaufforderung"})
listFileId: List[str] = Field(default_factory=list, description="List of file IDs", json_schema_extra={"label": "Datei-IDs"})
userLanguage: str = Field(default="en", description="User's preferred language", json_schema_extra={"label": "Benutzersprache"})
- workflowId: Optional[str] = Field(None, description="Optional ID of the workflow to continue", json_schema_extra={"label": "Workflow-ID"})
+ workflowId: Optional[str] = Field(
+ None,
+ description="Optional ID of the workflow to continue",
+ json_schema_extra={"label": "Workflow-ID", "fk_target": {"db": "poweron_chat", "table": "ChatWorkflow"}},
+ )
allowedProviders: Optional[List[str]] = Field(None, description="List of allowed AI providers (multiselect)", json_schema_extra={"label": "Erlaubte Anbieter"})
@i18nModel("Aktions-Dokument")
@@ -307,7 +337,11 @@ class ChatTaskResult(BaseModel):
@i18nModel("Aufgabe")
class TaskItem(BaseModel):
id: str = Field(..., description="Task ID", json_schema_extra={"label": "Aufgaben-ID"})
- workflowId: str = Field(..., description="Workflow ID", json_schema_extra={"label": "Workflow-ID"})
+ workflowId: str = Field(
+ ...,
+ description="Workflow ID",
+ json_schema_extra={"label": "Workflow-ID", "fk_target": {"db": "poweron_chat", "table": "ChatWorkflow"}},
+ )
userInput: str = Field(..., description="User input that triggered the task", json_schema_extra={"label": "Benutzereingabe"})
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status", json_schema_extra={"label": "Status"})
error: Optional[str] = Field(None, description="Error message if task failed", json_schema_extra={"label": "Fehler"})
diff --git a/modules/datamodels/datamodelContent.py b/modules/datamodels/datamodelContent.py
index b2c87ed8..fc9dc4b6 100644
--- a/modules/datamodels/datamodelContent.py
+++ b/modules/datamodels/datamodelContent.py
@@ -32,7 +32,10 @@ class ContentContextRef(BaseModel):
class ContentObject(BaseModel):
"""Scalar content object extracted from a file. No AI involved."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
- fileId: str = Field(description="FK to the physical file")
+ fileId: str = Field(
+ description="FK to the physical file",
+ json_schema_extra={"fk_target": {"db": "poweron_management", "table": "FileItem"}},
+ )
contentType: str = Field(description="text, image, videostream, audiostream, other")
data: str = Field(default="", description="Content data (text, base64, URL)")
contextRef: ContentContextRef = Field(default_factory=ContentContextRef)
diff --git a/modules/datamodels/datamodelDataSource.py b/modules/datamodels/datamodelDataSource.py
index 0e0a7d16..cad125ef 100644
--- a/modules/datamodels/datamodelDataSource.py
+++ b/modules/datamodels/datamodelDataSource.py
@@ -23,7 +23,7 @@ class DataSource(PowerOnModel):
)
connectionId: str = Field(
description="FK to UserConnection",
- json_schema_extra={"label": "Verbindungs-ID"},
+ json_schema_extra={"label": "Verbindungs-ID", "fk_target": {"db": "poweron_app", "table": "UserConnection"}},
)
sourceType: str = Field(
description="sharepointFolder, googleDriveFolder, outlookFolder, ftpFolder, clickupList (path under /team/...)",
@@ -45,17 +45,17 @@ class DataSource(PowerOnModel):
featureInstanceId: Optional[str] = Field(
default=None,
description="Scoped to feature instance",
- json_schema_extra={"label": "Feature-Instanz"},
+ json_schema_extra={"label": "Feature-Instanz", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}},
)
mandateId: Optional[str] = Field(
default=None,
description="Mandate scope",
- json_schema_extra={"label": "Mandanten-ID"},
+ json_schema_extra={"label": "Mandanten-ID", "fk_target": {"db": "poweron_app", "table": "Mandate"}},
)
userId: str = Field(
default="",
description="Owner user ID",
- json_schema_extra={"label": "Benutzer-ID"},
+ json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "User"}},
)
autoSync: bool = Field(
default=False,
diff --git a/modules/datamodels/datamodelExtraction.py b/modules/datamodels/datamodelExtraction.py
index 71918092..0aaaffd8 100644
--- a/modules/datamodels/datamodelExtraction.py
+++ b/modules/datamodels/datamodelExtraction.py
@@ -18,6 +18,7 @@ class ContentExtracted(BaseModel):
id: str = Field(description="Extraction id or source document id")
parts: List[ContentPart] = Field(default_factory=list, description="List of extracted parts")
summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")
+ udm: Optional[Any] = Field(default=None, description="Optional UdmDocument (when outputFormat is udm or both)")
class ChunkResult(BaseModel):
@@ -75,6 +76,19 @@ class ExtractionOptions(BaseModel):
# Core extraction parameters
prompt: str = Field(default="", description="Extraction prompt for AI processing")
processDocumentsIndividually: bool = Field(default=True, description="Process each document separately")
+
+ outputFormat: Literal["parts", "udm", "both"] = Field(
+ default="parts",
+ description="Return flat parts only, UDM tree only, or both (parts always populated; udm when udm or both)",
+ )
+ outputDetail: Literal["full", "structure", "references"] = Field(
+ default="full",
+ description="Extraction detail: full inline data, skeleton without raw payloads, or file references only",
+ )
+ lazyContainer: bool = Field(
+ default=False,
+ description="For archives: emit file entries with metadata only (no nested extraction)",
+ )
# Image processing parameters
imageMaxPixels: int = Field(default=1024 * 1024, ge=1, description="Maximum pixels for image processing")
diff --git a/modules/datamodels/datamodelFeatureDataSource.py b/modules/datamodels/datamodelFeatureDataSource.py
index 3199a054..96b574a6 100644
--- a/modules/datamodels/datamodelFeatureDataSource.py
+++ b/modules/datamodels/datamodelFeatureDataSource.py
@@ -6,7 +6,7 @@ A FeatureDataSource links a FeatureInstance table (DATA_OBJECT) to a workspace
so the agent can query structured feature data (e.g. TrusteePosition rows).
"""
-from typing import Dict, Optional
+from typing import Dict, List, Optional
from pydantic import BaseModel, Field
from modules.datamodels.datamodelBase import PowerOnModel
from modules.shared.i18nRegistry import i18nModel
@@ -23,11 +23,11 @@ class FeatureDataSource(PowerOnModel):
)
featureInstanceId: str = Field(
description="FK to FeatureInstance",
- json_schema_extra={"label": "Feature-Instanz"},
+ json_schema_extra={"label": "Feature-Instanz", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}},
)
featureCode: str = Field(
description="Feature code (e.g. trustee, commcoach)",
- json_schema_extra={"label": "Feature"},
+ json_schema_extra={"label": "Feature", "fk_target": {"db": "poweron_app", "table": "Feature", "column": "code"}},
)
tableName: str = Field(
description="Table name from DATA_OBJECTS meta (e.g. TrusteePosition)",
@@ -44,16 +44,16 @@ class FeatureDataSource(PowerOnModel):
mandateId: str = Field(
default="",
description="Mandate scope",
- json_schema_extra={"label": "Mandant"},
+ json_schema_extra={"label": "Mandant", "fk_target": {"db": "poweron_app", "table": "Mandate"}},
)
userId: str = Field(
default="",
description="Owner user ID",
- json_schema_extra={"label": "Benutzer"},
+ json_schema_extra={"label": "Benutzer", "fk_target": {"db": "poweron_app", "table": "User"}},
)
workspaceInstanceId: str = Field(
- description="Workspace instance where this source is used",
- json_schema_extra={"label": "Workspace"},
+ description="Workspace feature instance where this source is used",
+ json_schema_extra={"label": "Workspace", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}},
)
scope: str = Field(
default="personal",
@@ -70,6 +70,11 @@ class FeatureDataSource(PowerOnModel):
description="Whether this data source should be neutralized before AI processing",
json_schema_extra={"label": "Neutralisieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
)
+ neutralizeFields: Optional[List[str]] = Field(
+ default=None,
+ description="Column names whose values are replaced with placeholders before AI processing",
+ json_schema_extra={"label": "Zu neutralisierende Felder", "frontend_type": "multiselect", "frontend_readonly": False, "frontend_required": False},
+ )
recordFilter: Optional[Dict[str, str]] = Field(
default=None,
description="Record-level filter applied when querying this table, e.g. {'sessionId': 'abc-123'}",
diff --git a/modules/datamodels/datamodelFeatures.py b/modules/datamodels/datamodelFeatures.py
index 93a7fae9..e8e51370 100644
--- a/modules/datamodels/datamodelFeatures.py
+++ b/modules/datamodels/datamodelFeatures.py
@@ -38,11 +38,23 @@ class FeatureInstance(PowerOnModel):
)
featureCode: str = Field(
description="FK -> Feature.code",
- json_schema_extra={"label": "Feature", "frontend_type": "select", "frontend_readonly": True, "frontend_required": True}
+ json_schema_extra={
+ "label": "Feature",
+ "frontend_type": "select",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "Feature", "column": "code"},
+ },
)
mandateId: str = Field(
description="FK -> Mandate.id (CASCADE DELETE)",
- json_schema_extra={"label": "Mandant", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True}
+ json_schema_extra={
+ "label": "Mandant",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
+ },
)
label: str = Field(
default="",
diff --git a/modules/datamodels/datamodelFileFolder.py b/modules/datamodels/datamodelFileFolder.py
index 73222e51..e3b0ba1a 100644
--- a/modules/datamodels/datamodelFileFolder.py
+++ b/modules/datamodels/datamodelFileFolder.py
@@ -24,15 +24,43 @@ class FileFolder(PowerOnModel):
parentId: Optional[str] = Field(
default=None,
description="Parent folder ID (null = root)",
- json_schema_extra={"label": "Uebergeordneter Ordner", "frontend_type": "text", "frontend_readonly": False, "frontend_required": False},
+ json_schema_extra={
+ "label": "Uebergeordneter Ordner",
+ "frontend_type": "text",
+ "frontend_readonly": False,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_management", "table": "FileFolder"},
+ },
)
mandateId: Optional[str] = Field(
default=None,
description="Mandate context",
- json_schema_extra={"label": "Mandanten-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False},
+ json_schema_extra={
+ "label": "Mandanten-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
+ },
)
featureInstanceId: Optional[str] = Field(
default=None,
description="Feature instance context",
- json_schema_extra={"label": "Feature-Instanz-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False},
+ json_schema_extra={
+ "label": "Feature-Instanz-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
+ },
+ )
+ neutralize: bool = Field(
+ default=False,
+ description="Whether files in this folder should be neutralized before AI processing. Inherited by new/moved files.",
+ json_schema_extra={
+ "label": "Neutralisieren",
+ "frontend_type": "checkbox",
+ "frontend_readonly": False,
+ "frontend_required": False,
+ },
)
diff --git a/modules/datamodels/datamodelFiles.py b/modules/datamodels/datamodelFiles.py
index c4072658..4cc6beba 100644
--- a/modules/datamodels/datamodelFiles.py
+++ b/modules/datamodels/datamodelFiles.py
@@ -33,6 +33,7 @@ class FileItem(PowerOnModel):
"frontend_fk_source": "/api/mandates/",
"frontend_fk_display_field": "label",
"fk_model": "Mandate",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
},
)
featureInstanceId: Optional[str] = Field(
@@ -46,6 +47,7 @@ class FileItem(PowerOnModel):
"frontend_fk_source": "/api/features/instances",
"frontend_fk_display_field": "label",
"fk_model": "FeatureInstance",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
},
)
mimeType: str = Field(
@@ -68,7 +70,13 @@ class FileItem(PowerOnModel):
folderId: Optional[str] = Field(
default=None,
description="ID of the parent folder",
- json_schema_extra={"label": "Ordner-ID", "frontend_type": "text", "frontend_readonly": False, "frontend_required": False},
+ json_schema_extra={
+ "label": "Ordner-ID",
+ "frontend_type": "text",
+ "frontend_readonly": False,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_management", "table": "FileFolder"},
+ },
)
description: Optional[str] = Field(
default=None,
diff --git a/modules/datamodels/datamodelInvitation.py b/modules/datamodels/datamodelInvitation.py
index 4808bd55..57efb9bb 100644
--- a/modules/datamodels/datamodelInvitation.py
+++ b/modules/datamodels/datamodelInvitation.py
@@ -32,12 +32,24 @@ class Invitation(PowerOnModel):
mandateId: str = Field(
description="FK → Mandate.id - Target mandate for the invitation",
- json_schema_extra={"label": "Mandant", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True}
+ json_schema_extra={
+ "label": "Mandant",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
+ },
)
featureInstanceId: Optional[str] = Field(
default=None,
description="Optional FK → FeatureInstance.id - Direct access to specific feature",
- json_schema_extra={"label": "Feature-Instanz", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False}
+ json_schema_extra={
+ "label": "Feature-Instanz",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
+ },
)
roleIds: List[str] = Field(
default_factory=list,
@@ -63,7 +75,13 @@ class Invitation(PowerOnModel):
usedBy: Optional[str] = Field(
default=None,
description="User ID of the person who used the invitation",
- json_schema_extra={"label": "Verwendet von", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False}
+ json_schema_extra={
+ "label": "Verwendet von",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
usedAt: Optional[float] = Field(
default=None,
diff --git a/modules/datamodels/datamodelKnowledge.py b/modules/datamodels/datamodelKnowledge.py
index 7432a30c..e440d657 100644
--- a/modules/datamodels/datamodelKnowledge.py
+++ b/modules/datamodels/datamodelKnowledge.py
@@ -30,17 +30,17 @@ class FileContentIndex(PowerOnModel):
)
userId: str = Field(
description="Owner user ID",
- json_schema_extra={"label": "Benutzer-ID"},
+ json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "User"}},
)
featureInstanceId: str = Field(
default="",
description="Feature instance scope",
- json_schema_extra={"label": "Feature-Instanz-ID"},
+ json_schema_extra={"label": "Feature-Instanz-ID", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}},
)
mandateId: str = Field(
default="",
description="Mandate scope",
- json_schema_extra={"label": "Mandanten-ID"},
+ json_schema_extra={"label": "Mandanten-ID", "fk_target": {"db": "poweron_app", "table": "Mandate"}},
)
fileName: str = Field(
description="Original file name",
@@ -116,16 +116,16 @@ class ContentChunk(PowerOnModel):
)
fileId: str = Field(
description="FK to the source file",
- json_schema_extra={"label": "Datei-ID"},
+ json_schema_extra={"label": "Datei-ID", "fk_target": {"db": "poweron_management", "table": "FileItem"}},
)
userId: str = Field(
description="Owner user ID",
- json_schema_extra={"label": "Benutzer-ID"},
+ json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "User"}},
)
featureInstanceId: str = Field(
default="",
description="Feature instance scope",
- json_schema_extra={"label": "Feature-Instanz-ID"},
+ json_schema_extra={"label": "Feature-Instanz-ID", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}},
)
contentType: str = Field(
description="Content type: text, image, videostream, audiostream, other",
@@ -214,16 +214,16 @@ class WorkflowMemory(PowerOnModel):
)
workflowId: str = Field(
description="FK to the workflow",
- json_schema_extra={"label": "Workflow-ID"},
+ json_schema_extra={"label": "Workflow-ID", "fk_target": {"db": "poweron_chat", "table": "ChatWorkflow"}},
)
userId: str = Field(
description="Owner user ID",
- json_schema_extra={"label": "Benutzer-ID"},
+ json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "User"}},
)
featureInstanceId: str = Field(
default="",
description="Feature instance scope",
- json_schema_extra={"label": "Feature-Instanz-ID"},
+ json_schema_extra={"label": "Feature-Instanz-ID", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}},
)
key: str = Field(
description="Key identifier (e.g. 'entity:companyName')",
diff --git a/modules/datamodels/datamodelMembership.py b/modules/datamodels/datamodelMembership.py
index ce13dbad..f70fe035 100644
--- a/modules/datamodels/datamodelMembership.py
+++ b/modules/datamodels/datamodelMembership.py
@@ -34,6 +34,7 @@ class UserMandate(PowerOnModel):
"frontend_fk_source": "/api/users/",
"frontend_fk_display_field": "username",
"fk_model": "User",
+ "fk_target": {"db": "poweron_app", "table": "User"},
},
)
mandateId: str = Field(
@@ -46,6 +47,7 @@ class UserMandate(PowerOnModel):
"frontend_fk_source": "/api/mandates/",
"frontend_fk_display_field": "label",
"fk_model": "Mandate",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
},
)
enabled: bool = Field(
@@ -68,11 +70,27 @@ class FeatureAccess(PowerOnModel):
)
userId: str = Field(
description="FK → User.id (CASCADE DELETE)",
- json_schema_extra={"label": "Benutzer", "frontend_type": "select", "frontend_readonly": False, "frontend_required": True, "frontend_fk_source": "/api/users/", "frontend_fk_display_field": "username"}
+ json_schema_extra={
+ "label": "Benutzer",
+ "frontend_type": "select",
+ "frontend_readonly": False,
+ "frontend_required": True,
+ "frontend_fk_source": "/api/users/",
+ "frontend_fk_display_field": "username",
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
featureInstanceId: str = Field(
description="FK → FeatureInstance.id (CASCADE DELETE)",
- json_schema_extra={"label": "Feature-Instanz", "frontend_type": "select", "frontend_readonly": False, "frontend_required": True, "frontend_fk_source": "/api/features/instances", "frontend_fk_display_field": "label"}
+ json_schema_extra={
+ "label": "Feature-Instanz",
+ "frontend_type": "select",
+ "frontend_readonly": False,
+ "frontend_required": True,
+ "frontend_fk_source": "/api/features/instances",
+ "frontend_fk_display_field": "label",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
+ },
)
enabled: bool = Field(
default=True,
@@ -94,11 +112,25 @@ class UserMandateRole(PowerOnModel):
)
userMandateId: str = Field(
description="FK → UserMandate.id (CASCADE DELETE)",
- json_schema_extra={"label": "Benutzer-Mandant", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True}
+ json_schema_extra={
+ "label": "Benutzer-Mandant",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "UserMandate"},
+ },
)
roleId: str = Field(
description="FK → Role.id (CASCADE DELETE)",
- json_schema_extra={"label": "Rolle", "frontend_type": "select", "frontend_readonly": False, "frontend_required": True, "frontend_fk_source": "/api/rbac/roles", "frontend_fk_display_field": "roleLabel"}
+ json_schema_extra={
+ "label": "Rolle",
+ "frontend_type": "select",
+ "frontend_readonly": False,
+ "frontend_required": True,
+ "frontend_fk_source": "/api/rbac/roles",
+ "frontend_fk_display_field": "roleLabel",
+ "fk_target": {"db": "poweron_app", "table": "Role"},
+ },
)
@@ -115,9 +147,23 @@ class FeatureAccessRole(PowerOnModel):
)
featureAccessId: str = Field(
description="FK → FeatureAccess.id (CASCADE DELETE)",
- json_schema_extra={"label": "Feature-Zugang", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True}
+ json_schema_extra={
+ "label": "Feature-Zugang",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "FeatureAccess"},
+ },
)
roleId: str = Field(
description="FK → Role.id (CASCADE DELETE)",
- json_schema_extra={"label": "Rolle", "frontend_type": "select", "frontend_readonly": False, "frontend_required": True, "frontend_fk_source": "/api/rbac/roles", "frontend_fk_display_field": "roleLabel"}
+ json_schema_extra={
+ "label": "Rolle",
+ "frontend_type": "select",
+ "frontend_readonly": False,
+ "frontend_required": True,
+ "frontend_fk_source": "/api/rbac/roles",
+ "frontend_fk_display_field": "roleLabel",
+ "fk_target": {"db": "poweron_app", "table": "Role"},
+ },
)
diff --git a/modules/datamodels/datamodelMessaging.py b/modules/datamodels/datamodelMessaging.py
index 1a32a09e..87845da8 100644
--- a/modules/datamodels/datamodelMessaging.py
+++ b/modules/datamodels/datamodelMessaging.py
@@ -64,6 +64,7 @@ class MessagingSubscription(PowerOnModel):
"frontend_readonly": True,
"frontend_required": False,
"label": "Mandanten-ID",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
},
)
featureInstanceId: str = Field(
@@ -73,6 +74,7 @@ class MessagingSubscription(PowerOnModel):
"frontend_readonly": True,
"frontend_required": False,
"label": "Feature-Instanz-ID",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
},
)
description: Optional[str] = Field(
@@ -129,6 +131,7 @@ class MessagingSubscriptionRegistration(BaseModel):
"frontend_readonly": True,
"frontend_required": False,
"label": "Mandanten-ID",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
},
)
featureInstanceId: str = Field(
@@ -138,6 +141,7 @@ class MessagingSubscriptionRegistration(BaseModel):
"frontend_readonly": True,
"frontend_required": False,
"label": "Feature-Instanz-ID",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
},
)
subscriptionId: str = Field(
@@ -156,6 +160,7 @@ class MessagingSubscriptionRegistration(BaseModel):
"frontend_readonly": True,
"frontend_required": False,
"label": "Benutzer-ID",
+ "fk_target": {"db": "poweron_app", "table": "User"},
},
)
channel: MessagingChannel = Field(
@@ -244,6 +249,7 @@ class MessagingDelivery(BaseModel):
"frontend_readonly": True,
"frontend_required": False,
"label": "Benutzer-ID",
+ "fk_target": {"db": "poweron_app", "table": "User"},
},
)
channel: MessagingChannel = Field(
diff --git a/modules/datamodels/datamodelNotification.py b/modules/datamodels/datamodelNotification.py
index 6ff7b52e..3a8fb631 100644
--- a/modules/datamodels/datamodelNotification.py
+++ b/modules/datamodels/datamodelNotification.py
@@ -60,7 +60,13 @@ class UserNotification(PowerOnModel):
)
userId: str = Field(
description="Target user ID for this notification",
- json_schema_extra={"label": "Benutzer", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True}
+ json_schema_extra={
+ "label": "Benutzer",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
type: NotificationType = Field(
diff --git a/modules/datamodels/datamodelRbac.py b/modules/datamodels/datamodelRbac.py
index d43b825e..1214a96f 100644
--- a/modules/datamodels/datamodelRbac.py
+++ b/modules/datamodels/datamodelRbac.py
@@ -57,12 +57,30 @@ class Role(PowerOnModel):
mandateId: Optional[str] = Field(
default=None,
description="FK → Mandate.id (CASCADE DELETE). Null = Global/Template role.",
- json_schema_extra={"label": "Mandant", "frontend_type": "select", "frontend_readonly": True, "frontend_visible": True, "frontend_required": False, "frontend_fk_source": "/api/mandates/", "frontend_fk_display_field": "label"}
+ json_schema_extra={
+ "label": "Mandant",
+ "frontend_type": "select",
+ "frontend_readonly": True,
+ "frontend_visible": True,
+ "frontend_required": False,
+ "frontend_fk_source": "/api/mandates/",
+ "frontend_fk_display_field": "label",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
+ },
)
featureInstanceId: Optional[str] = Field(
default=None,
description="FK → FeatureInstance.id (CASCADE DELETE). Null = Mandate-level or Global role.",
- json_schema_extra={"label": "Feature-Instanz", "frontend_type": "select", "frontend_readonly": True, "frontend_visible": True, "frontend_required": False, "frontend_fk_source": "/api/features/instances", "frontend_fk_display_field": "label"}
+ json_schema_extra={
+ "label": "Feature-Instanz",
+ "frontend_type": "select",
+ "frontend_readonly": True,
+ "frontend_visible": True,
+ "frontend_required": False,
+ "frontend_fk_source": "/api/features/instances",
+ "frontend_fk_display_field": "label",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
+ },
)
featureCode: Optional[str] = Field(
default=None,
@@ -92,7 +110,15 @@ class AccessRule(PowerOnModel):
)
roleId: str = Field(
description="FK → Role.id (CASCADE DELETE!)",
- json_schema_extra={"label": "Rolle", "frontend_type": "select", "frontend_readonly": True, "frontend_required": True, "frontend_fk_source": "/api/rbac/roles", "frontend_fk_display_field": "roleLabel"}
+ json_schema_extra={
+ "label": "Rolle",
+ "frontend_type": "select",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "frontend_fk_source": "/api/rbac/roles",
+ "frontend_fk_display_field": "roleLabel",
+ "fk_target": {"db": "poweron_app", "table": "Role"},
+ },
)
context: AccessRuleContext = Field(
description="Context type: DATA (database), UI (interface), RESOURCE (system resources). IMMUTABLE!",
diff --git a/modules/datamodels/datamodelSecurity.py b/modules/datamodels/datamodelSecurity.py
index 52237226..cd48fb08 100644
--- a/modules/datamodels/datamodelSecurity.py
+++ b/modules/datamodels/datamodelSecurity.py
@@ -47,7 +47,7 @@ class Token(PowerOnModel):
)
userId: str = Field(
...,
- json_schema_extra={"label": "Benutzer-ID"},
+ json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "User"}},
)
authority: AuthAuthority = Field(
...,
@@ -56,7 +56,7 @@ class Token(PowerOnModel):
connectionId: Optional[str] = Field(
None,
description="ID of the connection this token belongs to",
- json_schema_extra={"label": "Verbindungs-ID"},
+ json_schema_extra={"label": "Verbindungs-ID", "fk_target": {"db": "poweron_app", "table": "UserConnection"}},
)
tokenPurpose: Optional[TokenPurpose] = Field(
default=None,
@@ -92,7 +92,7 @@ class Token(PowerOnModel):
revokedBy: Optional[str] = Field(
None,
description="User ID who revoked the token (admin/self)",
- json_schema_extra={"label": "Widerrufen von"},
+ json_schema_extra={"label": "Widerrufen von", "fk_target": {"db": "poweron_app", "table": "User"}},
)
reason: Optional[str] = Field(
None,
@@ -134,7 +134,13 @@ class AuthEvent(PowerOnModel):
)
userId: str = Field(
description="ID of the user this event belongs to",
- json_schema_extra={"label": "Benutzer-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True},
+ json_schema_extra={
+ "label": "Benutzer-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
eventType: str = Field(
description="Type of authentication event (e.g., 'login', 'logout', 'token_refresh')",
diff --git a/modules/datamodels/datamodelSubscription.py b/modules/datamodels/datamodelSubscription.py
index 5a377244..058f2e17 100644
--- a/modules/datamodels/datamodelSubscription.py
+++ b/modules/datamodels/datamodelSubscription.py
@@ -207,7 +207,7 @@ class MandateSubscription(PowerOnModel):
mandateId: str = Field(
...,
description="Foreign key to Mandate",
- json_schema_extra={"label": "Mandanten-ID"},
+ json_schema_extra={"label": "Mandanten-ID", "fk_target": {"db": "poweron_app", "table": "Mandate"}},
)
planKey: str = Field(
...,
diff --git a/modules/datamodels/datamodelUam.py b/modules/datamodels/datamodelUam.py
index 61e7c105..dc7824f6 100644
--- a/modules/datamodels/datamodelUam.py
+++ b/modules/datamodels/datamodelUam.py
@@ -114,7 +114,13 @@ class UserConnection(PowerOnModel):
)
userId: str = Field(
description="ID of the user this connection belongs to",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False, "label": "Benutzer-ID"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "label": "Benutzer-ID",
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
authority: AuthAuthority = Field(
description="Authentication authority",
@@ -369,11 +375,14 @@ class UserVoicePreferences(PowerOnModel):
description="Primary key",
json_schema_extra={"label": "ID"},
)
- userId: str = Field(description="User ID", json_schema_extra={"label": "Benutzer-ID"})
+ userId: str = Field(
+ description="User ID",
+ json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "User"}},
+ )
mandateId: Optional[str] = Field(
default=None,
description="Mandate scope (None = global for user)",
- json_schema_extra={"label": "Mandanten-ID"},
+ json_schema_extra={"label": "Mandanten-ID", "fk_target": {"db": "poweron_app", "table": "Mandate"}},
)
sttLanguage: str = Field(
default="de-DE",
diff --git a/modules/datamodels/datamodelUdm.py b/modules/datamodels/datamodelUdm.py
new file mode 100644
index 00000000..330467b4
--- /dev/null
+++ b/modules/datamodels/datamodelUdm.py
@@ -0,0 +1,316 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""Unified Document Model (UDM) — hierarchical document tree and ContentPart bridge."""
+from __future__ import annotations
+
+import uuid
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+
+from pydantic import BaseModel, Field
+
+from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
+
+
+class UdmMetadata(BaseModel):
+ title: Optional[str] = None
+ author: Optional[str] = None
+ createdAt: Optional[str] = None
+ modifiedAt: Optional[str] = None
+ sourcePath: str = ""
+ tags: List[str] = Field(default_factory=list)
+ custom: Dict[str, Any] = Field(default_factory=dict)
+
+
+class UdmBoundingBox(BaseModel):
+ x: float = 0.0
+ y: float = 0.0
+ width: float = 0.0
+ height: float = 0.0
+ unit: Literal["px", "pt", "mm"] = "pt"
+
+
+class UdmPosition(BaseModel):
+ index: int = 0
+ page: Optional[int] = None
+ row: Optional[int] = None
+ col: Optional[int] = None
+ bbox: Optional[UdmBoundingBox] = None
+
+
+class UdmContentBlock(BaseModel):
+ id: str
+ contentType: Literal["text", "image", "table", "code", "media", "link", "formula"]
+ raw: str = ""
+ fileRef: Optional[str] = None
+ mimeType: Optional[str] = None
+ language: Optional[str] = None
+ attributes: Dict[str, Any] = Field(default_factory=dict)
+ position: UdmPosition = Field(default_factory=lambda: UdmPosition(index=0))
+ metadata: UdmMetadata = Field(default_factory=UdmMetadata)
+
+
+class UdmStructuralNode(BaseModel):
+ id: str
+ role: Literal["page", "section", "slide", "sheet"]
+ index: int
+ label: Optional[str] = None
+ metadata: UdmMetadata = Field(default_factory=UdmMetadata)
+ children: List[UdmContentBlock] = Field(default_factory=list)
+
+
+class UdmDocument(BaseModel):
+ id: str
+ role: Literal["document"] = "document"
+ sourceType: Literal["pdf", "docx", "pptx", "xlsx", "html", "binary", "unknown"] = "unknown"
+ sourcePath: str = ""
+ metadata: UdmMetadata = Field(default_factory=UdmMetadata)
+ children: List[UdmStructuralNode] = Field(default_factory=list)
+
+
+class UdmArchive(BaseModel):
+ id: str
+ role: Literal["archive"] = "archive"
+ sourceType: Literal["zip", "tar", "gz", "unknown"] = "unknown"
+ sourcePath: str = ""
+ metadata: UdmMetadata = Field(default_factory=UdmMetadata)
+ children: List[Union[UdmArchive, UdmDocument]] = Field(default_factory=list)
+
+
+def _newId() -> str:
+ return str(uuid.uuid4())
+
+
+def _mapTypeGroupToContentType(typeGroup: str) -> Literal["text", "image", "table", "code", "media", "link", "formula"]:
+ if typeGroup == "image":
+ return "image"
+ if typeGroup == "table":
+ return "table"
+ if typeGroup in ("code",):
+ return "code"
+ if typeGroup in ("binary", "audiostream", "videostream"):
+ return "media"
+ if typeGroup in ("structure", "text", "container"):
+ return "text"
+ return "text"
+
+
+def _contentPartToBlock(part: ContentPart, blockIndex: int) -> UdmContentBlock:
+ meta = part.metadata or {}
+ ctx = meta.get("contextRef") or {}
+ if not isinstance(ctx, dict):
+ ctx = {}
+ page = meta.get("pageIndex")
+ if page is None:
+ page = ctx.get("pageIndex")
+ slide = meta.get("slide_number")
+ if slide is None:
+ slide = ctx.get("slideIndex")
+ pos = UdmPosition(
+ index=blockIndex,
+ page=int(page) + 1 if isinstance(page, int) else None,
+ )
+ extraAttr: Dict[str, Any] = {}
+ if isinstance(slide, int):
+ extraAttr["slideIndex"] = slide
+ return UdmContentBlock(
+ id=part.id,
+ contentType=_mapTypeGroupToContentType(part.typeGroup),
+ raw=part.data or "",
+ mimeType=part.mimeType or None,
+ attributes={
+ "typeGroup": part.typeGroup,
+ "label": part.label,
+ "parentId": part.parentId,
+ **({"contextRef": ctx} if ctx else {}),
+ **extraAttr,
+ },
+ position=pos,
+ metadata=UdmMetadata(
+ sourcePath=meta.get("containerPath", "") or "",
+ custom={k: v for k, v in meta.items() if k not in ("contextRef",)},
+ ),
+ )
+
+
+def _groupKeyForPart(part: ContentPart) -> Tuple[str, int, str]:
+ """Return (role, structural_index, label) for grouping parts into structural nodes."""
+ meta = part.metadata or {}
+ ctx = meta.get("contextRef") or {}
+ if not isinstance(ctx, dict):
+ ctx = {}
+
+ if "pageIndex" in meta or "pageIndex" in ctx:
+ pi = meta.get("pageIndex", ctx.get("pageIndex", 0))
+ try:
+ idx = int(pi)
+ except (TypeError, ValueError):
+ idx = 0
+ return ("page", idx, f"page_{idx + 1}")
+
+ if meta.get("slide_number") is not None:
+ try:
+ idx = int(meta["slide_number"]) - 1
+ except (TypeError, ValueError):
+ idx = 0
+ return ("slide", max(0, idx), f"slide_{idx + 1}")
+ if ctx.get("slideIndex") is not None:
+ try:
+ idx = int(ctx.get("slideIndex", 0))
+ except (TypeError, ValueError):
+ idx = 0
+ return ("slide", max(0, idx), f"slide_{idx + 1}")
+
+ if meta.get("sheet") or ctx.get("sheetName"):
+ name = str(meta.get("sheet") or ctx.get("sheetName") or "sheet")
+ return ("sheet", abs(hash(name)) % (10**9), name)
+
+ if ctx.get("sectionId") or meta.get("sectionId"):
+ sid = str(ctx.get("sectionId") or meta.get("sectionId") or "section")
+ return ("section", abs(hash(sid)) % (10**9), sid)
+
+ if part.typeGroup == "container":
+ return ("section", 0, "root")
+
+ return ("section", 0, "body")
+
+
+_VALID_DOC_SOURCES = frozenset({"pdf", "docx", "pptx", "xlsx", "html", "binary", "unknown"})
+
+
+def _contentPartsToUdm(extracted: ContentExtracted, sourceType: str, sourcePath: str) -> UdmDocument:
+ """Convert flat ContentPart list into a UdmDocument using structural heuristics."""
+ parts = list(extracted.parts or [])
+ st: Literal["pdf", "docx", "pptx", "xlsx", "html", "binary", "unknown"] = (
+ sourceType if sourceType in _VALID_DOC_SOURCES else "unknown" # type: ignore[assignment]
+ )
+ doc = UdmDocument(
+ id=extracted.id or _newId(),
+ sourceType=st,
+ sourcePath=sourcePath,
+ metadata=UdmMetadata(sourcePath=sourcePath),
+ )
+
+ if not parts:
+ return doc
+
+ skipIds = set()
+ rootIds = set()
+ for p in parts:
+ if p.typeGroup == "container" and p.parentId is None:
+ rootIds.add(p.id)
+ skipIds.add(p.id)
+
+ contentParts = [p for p in parts if p.id not in skipIds and p.typeGroup != "container"]
+
+ if not contentParts:
+ for p in parts:
+ if p.id not in skipIds:
+ contentParts.append(p)
+
+ if not contentParts:
+ return doc
+
+ groups: Dict[Tuple[str, int, str], List[ContentPart]] = {}
+ for p in contentParts:
+ key = _groupKeyForPart(p)
+ groups.setdefault(key, []).append(p)
+
+ sortedKeys = sorted(groups.keys(), key=lambda k: (k[0], k[1], k[2]))
+ for gi, key in enumerate(sortedKeys):
+ role, structIdx, label = key
+ plist = groups[key]
+ node = UdmStructuralNode(
+ id=_newId(),
+ role=role if role in ("page", "section", "slide", "sheet") else "section",
+ index=gi if role == "section" else structIdx,
+ label=label,
+ metadata=UdmMetadata(sourcePath=sourcePath),
+ )
+ for bi, part in enumerate(plist):
+ node.children.append(_contentPartToBlock(part, bi))
+ doc.children.append(node)
+
+ return doc
+
+
+def _udmToContentParts(document: UdmDocument) -> ContentExtracted:
+ """Flatten UdmDocument back to ContentExtracted for backward compatibility."""
+ rootId = _newId()
+ parts: List[ContentPart] = [
+ ContentPart(
+ id=rootId,
+ parentId=None,
+ label=document.sourceType or "document",
+ typeGroup="container",
+ mimeType="application/octet-stream",
+ data="",
+ metadata={"udmRoot": True, "sourcePath": document.sourcePath},
+ )
+ ]
+ for sn in document.children:
+ for block in sn.children:
+ meta = dict(block.metadata.custom) if block.metadata else {}
+ meta.setdefault("structuralRole", sn.role)
+ meta.setdefault("structuralIndex", sn.index)
+ parts.append(
+ ContentPart(
+ id=block.id,
+ parentId=rootId,
+ label=block.attributes.get("label", sn.label or ""),
+ typeGroup=str(block.attributes.get("typeGroup", "text")),
+ mimeType=block.mimeType or "text/plain",
+ data=block.raw,
+ metadata=meta,
+ )
+ )
+ return ContentExtracted(id=document.id, parts=parts)
+
+
+def _stripUdmRaw(udm: UdmDocument) -> UdmDocument:
+ """Return a deep copy with all content block `raw` cleared (structure-only preview)."""
+ clone = udm.model_copy(deep=True)
+ for sn in clone.children:
+ for block in sn.children:
+ block.raw = ""
+ return clone
+
+
+def _stripUdmForReferences(udm: UdmDocument) -> UdmDocument:
+ """Clear inline payloads; keep `fileRef` when already set in attributes/metadata."""
+ clone = udm.model_copy(deep=True)
+ for sn in clone.children:
+ for block in sn.children:
+ block.raw = ""
+ if not block.fileRef:
+ ref = block.attributes.get("fileRef")
+ if block.metadata and block.metadata.custom:
+ ref = ref or block.metadata.custom.get("fileRef")
+ if isinstance(ref, str) and ref:
+ block.fileRef = ref
+ return clone
+
+
+def _applyUdmOutputDetail(udm: UdmDocument, detail: str) -> UdmDocument:
+ if detail == "structure":
+ return _stripUdmRaw(udm)
+ if detail == "references":
+ return _stripUdmForReferences(udm)
+ return udm
+
+
+def _mimeToUdmSourceType(mimeType: str, fileName: str) -> Literal["pdf", "docx", "pptx", "xlsx", "html", "binary", "unknown"]:
+ m = (mimeType or "").lower()
+ fn = (fileName or "").lower()
+ if m == "application/pdf" or fn.endswith(".pdf"):
+ return "pdf"
+ if "wordprocessingml" in m or fn.endswith(".docx"):
+ return "docx"
+ if "presentationml" in m or fn.endswith((".pptx", ".ppt")):
+ return "pptx"
+ if "spreadsheetml" in m or fn.endswith((".xlsx", ".xlsm")):
+ return "xlsx"
+ if m == "text/html" or fn.endswith((".html", ".htm")):
+ return "html"
+ if m == "application/octet-stream" or not m:
+ return "binary"
+ return "unknown"
diff --git a/modules/datamodels/datamodelUtils.py b/modules/datamodels/datamodelUtils.py
index 0c134ed2..f389d0d7 100644
--- a/modules/datamodels/datamodelUtils.py
+++ b/modules/datamodels/datamodelUtils.py
@@ -22,7 +22,13 @@ class Prompt(PowerOnModel):
mandateId: str = Field(
default="",
description="ID of the mandate this prompt belongs to",
- json_schema_extra={"label": "Mandanten-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False},
+ json_schema_extra={
+ "label": "Mandanten-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
+ },
)
isSystem: bool = Field(
default=False,
diff --git a/modules/features/chatbot/interfaceFeatureChatbot.py b/modules/features/chatbot/interfaceFeatureChatbot.py
index 151a96ce..28f6000c 100644
--- a/modules/features/chatbot/interfaceFeatureChatbot.py
+++ b/modules/features/chatbot/interfaceFeatureChatbot.py
@@ -21,8 +21,12 @@ from modules.datamodels.datamodelUam import AccessLevel
from modules.datamodels.datamodelChat import UserInputRequest
from modules.datamodels.datamodelBase import PowerOnModel
+from modules.shared.dbRegistry import registerDatabase
from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp
+chatbotDatabase = "poweron_chatbot"
+registerDatabase(chatbotDatabase)
+
# =============================================================================
# Chatbot-specific Pydantic models for poweron_chatbot (per-instance isolation)
# =============================================================================
@@ -392,7 +396,7 @@ class ChatObjects:
try:
# Get configuration values with defaults
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
- dbDatabase = "poweron_chatbot"
+ dbDatabase = chatbotDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
diff --git a/modules/features/commcoach/interfaceFeatureCommcoach.py b/modules/features/commcoach/interfaceFeatureCommcoach.py
index c9b4564e..2a3f3d12 100644
--- a/modules/features/commcoach/interfaceFeatureCommcoach.py
+++ b/modules/features/commcoach/interfaceFeatureCommcoach.py
@@ -11,6 +11,7 @@ from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelUam import User
from modules.connectors.connectorDbPostgre import DatabaseConnector
+from modules.shared.dbRegistry import registerDatabase
from modules.shared.timeUtils import getIsoTimestamp
from modules.shared.configuration import APP_CONFIG
from modules.shared.i18nRegistry import resolveText, t
@@ -26,6 +27,9 @@ from .datamodelCommcoach import (
logger = logging.getLogger(__name__)
+commcoachDatabase = "poweron_commcoach"
+registerDatabase(commcoachDatabase)
+
_interfaces = {}
@@ -51,7 +55,7 @@ class CommcoachObjects:
self.userId = str(currentUser.id) if currentUser else "system"
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
- dbDatabase = "poweron_commcoach"
+ dbDatabase = commcoachDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
diff --git a/modules/features/graphicalEditor/datamodelFeatureGraphicalEditor.py b/modules/features/graphicalEditor/datamodelFeatureGraphicalEditor.py
index e9fa8090..b86c295a 100644
--- a/modules/features/graphicalEditor/datamodelFeatureGraphicalEditor.py
+++ b/modules/features/graphicalEditor/datamodelFeatureGraphicalEditor.py
@@ -71,6 +71,7 @@ class AutoWorkflow(PowerOnModel):
"frontend_fk_source": "/api/mandates/",
"frontend_fk_display_field": "label",
"fk_model": "Mandate",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
},
)
featureInstanceId: str = Field(
@@ -83,6 +84,7 @@ class AutoWorkflow(PowerOnModel):
"frontend_fk_source": "/api/features/instances",
"frontend_fk_display_field": "label",
"fk_model": "FeatureInstance",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
},
)
label: str = Field(
@@ -107,7 +109,13 @@ class AutoWorkflow(PowerOnModel):
templateSourceId: Optional[str] = Field(
default=None,
description="ID of the template this workflow was created from",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False, "label": "Vorlagen-Quelle"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "label": "Vorlagen-Quelle",
+ "fk_target": {"db": "poweron_graphicaleditor", "table": "AutoWorkflow"},
+ },
)
templateScope: Optional[str] = Field(
default=None,
@@ -122,7 +130,13 @@ class AutoWorkflow(PowerOnModel):
currentVersionId: Optional[str] = Field(
default=None,
description="ID of the currently published AutoVersion",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False, "label": "Aktuelle Version"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "label": "Aktuelle Version",
+ "fk_target": {"db": "poweron_graphicaleditor", "table": "AutoVersion"},
+ },
)
active: bool = Field(
default=True,
@@ -165,7 +179,13 @@ class AutoVersion(PowerOnModel):
)
workflowId: str = Field(
description="FK -> AutoWorkflow",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True, "label": "Workflow-ID"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "label": "Workflow-ID",
+ "fk_target": {"db": "poweron_graphicaleditor", "table": "AutoWorkflow"},
+ },
)
versionNumber: int = Field(
default=1,
@@ -195,7 +215,13 @@ class AutoVersion(PowerOnModel):
publishedBy: Optional[str] = Field(
default=None,
description="User ID who published this version",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False, "label": "Veröffentlicht von"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "label": "Veröffentlicht von",
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
@@ -212,7 +238,13 @@ class AutoRun(PowerOnModel):
)
workflowId: str = Field(
description="Workflow ID",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True, "label": "Workflow-ID"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "label": "Workflow-ID",
+ "fk_target": {"db": "poweron_graphicaleditor", "table": "AutoWorkflow"},
+ },
)
label: Optional[str] = Field(
default=None,
@@ -230,17 +262,30 @@ class AutoRun(PowerOnModel):
"frontend_fk_source": "/api/mandates/",
"frontend_fk_display_field": "label",
"fk_model": "Mandate",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
},
)
ownerId: Optional[str] = Field(
default=None,
description="User ID who triggered this run",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False, "label": "Auslöser"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "label": "Auslöser",
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
versionId: Optional[str] = Field(
default=None,
description="AutoVersion ID used for this run",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False, "label": "Versions-ID"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "label": "Versions-ID",
+ "fk_target": {"db": "poweron_graphicaleditor", "table": "AutoVersion"},
+ },
)
status: str = Field(
default=AutoRunStatus.RUNNING.value,
@@ -307,7 +352,13 @@ class AutoStepLog(PowerOnModel):
)
runId: str = Field(
description="FK -> AutoRun",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True, "label": "Lauf-ID"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "label": "Lauf-ID",
+ "fk_target": {"db": "poweron_graphicaleditor", "table": "AutoRun"},
+ },
)
nodeId: str = Field(
description="Node ID in the graph",
@@ -377,11 +428,23 @@ class AutoTask(PowerOnModel):
)
runId: str = Field(
description="FK -> AutoRun",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True, "label": "Lauf-ID"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "label": "Lauf-ID",
+ "fk_target": {"db": "poweron_graphicaleditor", "table": "AutoRun"},
+ },
)
workflowId: str = Field(
description="Workflow ID",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": True, "label": "Workflow-ID"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "label": "Workflow-ID",
+ "fk_target": {"db": "poweron_graphicaleditor", "table": "AutoWorkflow"},
+ },
)
nodeId: str = Field(
description="Node ID in the graph",
@@ -399,7 +462,13 @@ class AutoTask(PowerOnModel):
assigneeId: Optional[str] = Field(
default=None,
description="User ID assigned to complete the task",
- json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": False, "label": "Zugewiesen an"},
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": False,
+ "frontend_required": False,
+ "label": "Zugewiesen an",
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
status: str = Field(
default=AutoTaskStatus.PENDING.value,
diff --git a/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py b/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py
index fabd5c42..6622391a 100644
--- a/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py
+++ b/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py
@@ -38,10 +38,12 @@ from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import (
from modules.features.graphicalEditor.entryPoints import normalize_invocations_list
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.configuration import APP_CONFIG
+from modules.shared.dbRegistry import registerDatabase
logger = logging.getLogger(__name__)
-_GREENFIELD_DB = "poweron_graphicaleditor"
+graphicalEditorDatabase = "poweron_graphicaleditor"
+registerDatabase(graphicalEditorDatabase)
_CALLBACK_WORKFLOW_CHANGED = "graphicalEditor.workflow.changed"
@@ -68,7 +70,7 @@ def getAllWorkflowsForScheduling() -> List[Dict[str, Any]]:
Used by the scheduler to register cron jobs. Does not filter by mandate/instance.
"""
dbHost = APP_CONFIG.get("DB_HOST", "localhost")
- dbDatabase = _GREENFIELD_DB
+ dbDatabase = graphicalEditorDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
@@ -155,7 +157,7 @@ class GraphicalEditorObjects:
def _init_db(self):
"""Initialize database connection to poweron_graphicaleditor (Greenfield)."""
dbHost = APP_CONFIG.get("DB_HOST", "localhost")
- dbDatabase = _GREENFIELD_DB
+ dbDatabase = graphicalEditorDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
diff --git a/modules/features/graphicalEditor/nodeDefinitions/__init__.py b/modules/features/graphicalEditor/nodeDefinitions/__init__.py
index ab41094b..6f97137d 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/__init__.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/__init__.py
@@ -11,6 +11,7 @@ from .clickup import CLICKUP_NODES
from .file import FILE_NODES
from .trustee import TRUSTEE_NODES
from .data import DATA_NODES
+from .context import CONTEXT_NODES
STATIC_NODE_TYPES = (
TRIGGER_NODES
@@ -23,4 +24,5 @@ STATIC_NODE_TYPES = (
+ FILE_NODES
+ TRUSTEE_NODES
+ DATA_NODES
+ + CONTEXT_NODES
)
diff --git a/modules/features/graphicalEditor/nodeDefinitions/ai.py b/modules/features/graphicalEditor/nodeDefinitions/ai.py
index 38044103..dce86056 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/ai.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/ai.py
@@ -26,7 +26,7 @@ AI_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "AiResult", "TextResult", "Transit"]}},
"outputPorts": {0: {"schema": "AiResult"}},
- "meta": {"icon": "mdi-robot", "color": "#9C27B0"},
+ "meta": {"icon": "mdi-robot", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "process",
},
@@ -43,7 +43,7 @@ AI_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "AiResult"}},
- "meta": {"icon": "mdi-magnify", "color": "#9C27B0"},
+ "meta": {"icon": "mdi-magnify", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "webResearch",
},
@@ -61,7 +61,7 @@ AI_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "AiResult"}},
- "meta": {"icon": "mdi-file-document-outline", "color": "#9C27B0"},
+ "meta": {"icon": "mdi-file-document-outline", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "summarizeDocument",
},
@@ -79,7 +79,7 @@ AI_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "AiResult"}},
- "meta": {"icon": "mdi-translate", "color": "#9C27B0"},
+ "meta": {"icon": "mdi-translate", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "translateDocument",
},
@@ -97,7 +97,7 @@ AI_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
- "meta": {"icon": "mdi-file-convert", "color": "#9C27B0"},
+ "meta": {"icon": "mdi-file-convert", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "convertDocument",
},
@@ -114,7 +114,7 @@ AI_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
- "meta": {"icon": "mdi-file-plus", "color": "#9C27B0"},
+ "meta": {"icon": "mdi-file-plus", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "generateDocument",
},
@@ -134,8 +134,28 @@ AI_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "AiResult"}},
- "meta": {"icon": "mdi-code-tags", "color": "#9C27B0"},
+ "meta": {"icon": "mdi-code-tags", "color": "#9C27B0", "usesAi": True},
"_method": "ai",
"_action": "generateCode",
},
+ {
+ "id": "ai.consolidate",
+ "category": "ai",
+ "label": t("KI-Konsolidierung"),
+ "description": t("Gesammelte Ergebnisse mit KI zusammenfassen, klassifizieren oder semantisch zusammenführen"),
+ "parameters": [
+ {"name": "mode", "type": "string", "required": False, "frontendType": "select",
+ "frontendOptions": {"options": ["summarize", "classify", "semanticMerge"]},
+ "description": t("Konsolidierungsmodus"), "default": "summarize"},
+ {"name": "prompt", "type": "string", "required": False, "frontendType": "textarea",
+ "description": t("Optionaler Prompt für die Konsolidierung"), "default": ""},
+ ],
+ "inputs": 1,
+ "outputs": 1,
+ "inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}},
+ "outputPorts": {0: {"schema": "ConsolidateResult"}},
+ "meta": {"icon": "mdi-table-merge-cells", "color": "#9C27B0", "usesAi": True},
+ "_method": "ai",
+ "_action": "consolidate",
+ },
]
diff --git a/modules/features/graphicalEditor/nodeDefinitions/clickup.py b/modules/features/graphicalEditor/nodeDefinitions/clickup.py
index 3f194e16..51ddfada 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/clickup.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/clickup.py
@@ -33,7 +33,7 @@ CLICKUP_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskList"}},
- "meta": {"icon": "mdi-magnify", "color": "#7B68EE"},
+ "meta": {"icon": "mdi-magnify", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "searchTasks",
},
@@ -57,7 +57,7 @@ CLICKUP_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskList"}},
- "meta": {"icon": "mdi-format-list-bulleted", "color": "#7B68EE"},
+ "meta": {"icon": "mdi-format-list-bulleted", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "listTasks",
},
@@ -78,7 +78,7 @@ CLICKUP_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskResult"}},
- "meta": {"icon": "mdi-file-document-outline", "color": "#7B68EE"},
+ "meta": {"icon": "mdi-file-document-outline", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "getTask",
},
@@ -123,7 +123,7 @@ CLICKUP_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TaskResult"}},
- "meta": {"icon": "mdi-plus-circle-outline", "color": "#7B68EE"},
+ "meta": {"icon": "mdi-plus-circle-outline", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "createTask",
},
@@ -148,7 +148,7 @@ CLICKUP_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["TaskResult", "Transit"]}},
"outputPorts": {0: {"schema": "TaskResult"}},
- "meta": {"icon": "mdi-pencil-outline", "color": "#7B68EE"},
+ "meta": {"icon": "mdi-pencil-outline", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "updateTask",
},
@@ -171,7 +171,7 @@ CLICKUP_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
- "meta": {"icon": "mdi-attachment", "color": "#7B68EE"},
+ "meta": {"icon": "mdi-attachment", "color": "#7B68EE", "usesAi": False},
"_method": "clickup",
"_action": "uploadAttachment",
},
diff --git a/modules/features/graphicalEditor/nodeDefinitions/context.py b/modules/features/graphicalEditor/nodeDefinitions/context.py
new file mode 100644
index 00000000..b677dca6
--- /dev/null
+++ b/modules/features/graphicalEditor/nodeDefinitions/context.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2025 Patrick Motsch
+# Context node definitions — structural extraction without AI.
+
+from modules.shared.i18nRegistry import t
+
+CONTEXT_NODES = [
+ {
+ "id": "context.extractContent",
+ "category": "context",
+ "label": t("Inhalt extrahieren"),
+ "description": t("Dokumentstruktur extrahieren ohne KI (Seiten, Abschnitte, Bilder, Tabellen)"),
+ "parameters": [
+ {"name": "outputDetail", "type": "string", "required": False, "frontendType": "select",
+ "frontendOptions": {"options": ["full", "structure", "references"]},
+ "description": t("Detailgrad: full = alles, structure = Skelett, references = Dateireferenzen"),
+ "default": "full"},
+ {"name": "includeImages", "type": "boolean", "required": False, "frontendType": "checkbox",
+ "description": t("Bilder extrahieren"), "default": True},
+ {"name": "includeTables", "type": "boolean", "required": False, "frontendType": "checkbox",
+ "description": t("Tabellen extrahieren"), "default": True},
+ ],
+ "inputs": 1,
+ "outputs": 1,
+ "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
+ "outputPorts": {0: {"schema": "UdmDocument"}},
+ "meta": {"icon": "mdi-file-tree-outline", "color": "#00897B", "usesAi": False},
+ "_method": "context",
+ "_action": "extractContent",
+ },
+]
diff --git a/modules/features/graphicalEditor/nodeDefinitions/data.py b/modules/features/graphicalEditor/nodeDefinitions/data.py
index f5eceb16..73552928 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/data.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/data.py
@@ -19,7 +19,7 @@ DATA_NODES = [
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "AggregateResult"}},
"executor": "data",
- "meta": {"icon": "mdi-playlist-plus", "color": "#607D8B"},
+ "meta": {"icon": "mdi-playlist-plus", "color": "#607D8B", "usesAi": False},
},
{
"id": "data.transform",
@@ -35,7 +35,7 @@ DATA_NODES = [
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult", "dynamic": True, "deriveFrom": "mappings"}},
"executor": "data",
- "meta": {"icon": "mdi-swap-horizontal-bold", "color": "#607D8B"},
+ "meta": {"icon": "mdi-swap-horizontal-bold", "color": "#607D8B", "usesAi": False},
},
{
"id": "data.filter",
@@ -45,12 +45,34 @@ DATA_NODES = [
"parameters": [
{"name": "condition", "type": "string", "required": True, "frontendType": "filterExpression",
"description": t("Filterbedingung")},
+ {"name": "udmContentType", "type": "string", "required": False, "frontendType": "select",
+ "frontendOptions": {"options": ["", "text", "image", "table", "code", "media", "link", "formula"]},
+ "description": t("UDM-ContentType-Filter (optional, leer = kein UDM-Filter)"), "default": ""},
],
"inputs": 1,
"outputs": 1,
- "inputPorts": {0: {"accepts": ["AggregateResult", "FileList", "TaskList", "EmailList", "DocumentList"]}},
+ "inputPorts": {0: {"accepts": ["AggregateResult", "FileList", "TaskList", "EmailList", "DocumentList", "UdmDocument", "UdmNodeList"]}},
"outputPorts": {0: {"schema": "Transit"}},
"executor": "data",
- "meta": {"icon": "mdi-filter-outline", "color": "#607D8B"},
+ "meta": {"icon": "mdi-filter-outline", "color": "#607D8B", "usesAi": False},
+ },
+ {
+ "id": "data.consolidate",
+ "category": "data",
+ "label": t("Konsolidieren"),
+ "description": t("Gesammelte Ergebnisse deterministisch zusammenführen (Tabelle, CSV, Merge)"),
+ "parameters": [
+ {"name": "mode", "type": "string", "required": False, "frontendType": "select",
+ "frontendOptions": {"options": ["table", "concat", "merge", "csvJoin"]},
+ "description": t("Konsolidierungsmodus"), "default": "table"},
+ {"name": "separator", "type": "string", "required": False, "frontendType": "text",
+ "description": t("Trennzeichen (für concat/csvJoin)"), "default": "\n"},
+ ],
+ "inputs": 1,
+ "outputs": 1,
+ "inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}},
+ "outputPorts": {0: {"schema": "ConsolidateResult"}},
+ "executor": "data",
+ "meta": {"icon": "mdi-table-merge-cells", "color": "#607D8B", "usesAi": False},
},
]
diff --git a/modules/features/graphicalEditor/nodeDefinitions/email.py b/modules/features/graphicalEditor/nodeDefinitions/email.py
index 1978fdfe..e2e852f0 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/email.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/email.py
@@ -29,7 +29,7 @@ EMAIL_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "EmailList"}},
- "meta": {"icon": "mdi-email-check", "color": "#1976D2"},
+ "meta": {"icon": "mdi-email-check", "color": "#1976D2", "usesAi": False},
"_method": "outlook",
"_action": "readEmails",
},
@@ -64,7 +64,7 @@ EMAIL_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "EmailList"}},
- "meta": {"icon": "mdi-email-search", "color": "#1976D2"},
+ "meta": {"icon": "mdi-email-search", "color": "#1976D2", "usesAi": False},
"_method": "outlook",
"_action": "searchEmails",
},
@@ -87,7 +87,7 @@ EMAIL_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["EmailDraft", "AiResult", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
- "meta": {"icon": "mdi-email-edit", "color": "#1976D2"},
+ "meta": {"icon": "mdi-email-edit", "color": "#1976D2", "usesAi": False},
"_method": "outlook",
"_action": "composeAndDraftEmailWithContext",
},
diff --git a/modules/features/graphicalEditor/nodeDefinitions/file.py b/modules/features/graphicalEditor/nodeDefinitions/file.py
index d9985db0..8e04f2bc 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/file.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/file.py
@@ -30,7 +30,7 @@ FILE_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["AiResult", "TextResult", "Transit"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
- "meta": {"icon": "mdi-file-plus-outline", "color": "#2196F3"},
+ "meta": {"icon": "mdi-file-plus-outline", "color": "#2196F3", "usesAi": False},
"_method": "file",
"_action": "create",
},
diff --git a/modules/features/graphicalEditor/nodeDefinitions/flow.py b/modules/features/graphicalEditor/nodeDefinitions/flow.py
index 91faa4e5..be5f5a43 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/flow.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/flow.py
@@ -24,7 +24,7 @@ FLOW_NODES = [
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "Transit"}, 1: {"schema": "Transit"}},
"executor": "flow",
- "meta": {"icon": "mdi-source-branch", "color": "#FF9800"},
+ "meta": {"icon": "mdi-source-branch", "color": "#FF9800", "usesAi": False},
},
{
"id": "flow.switch",
@@ -52,13 +52,13 @@ FLOW_NODES = [
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "Transit"}},
"executor": "flow",
- "meta": {"icon": "mdi-swap-horizontal", "color": "#FF9800"},
+ "meta": {"icon": "mdi-swap-horizontal", "color": "#FF9800", "usesAi": False},
},
{
"id": "flow.loop",
"category": "flow",
"label": t("Schleife / Für Jedes"),
- "description": t("Über Array-Elemente iterieren"),
+ "description": t("Über Array-Elemente oder UDM-Strukturebenen iterieren"),
"parameters": [
{
"name": "items",
@@ -67,19 +67,37 @@ FLOW_NODES = [
"frontendType": "text",
"description": t("Pfad zum Array"),
},
+ {
+ "name": "level",
+ "type": "string",
+ "required": False,
+ "frontendType": "select",
+ "frontendOptions": {"options": ["auto", "documents", "structuralNodes", "contentBlocks"]},
+ "description": t("UDM-Iterationsebene"),
+ "default": "auto",
+ },
+ {
+ "name": "concurrency",
+ "type": "number",
+ "required": False,
+ "frontendType": "number",
+ "frontendOptions": {"min": 1, "max": 20},
+ "description": t("Parallele Iterationen (1 = sequentiell)"),
+ "default": 1,
+ },
],
"inputs": 1,
"outputs": 1,
- "inputPorts": {0: {"accepts": ["Transit"]}},
+ "inputPorts": {0: {"accepts": ["Transit", "UdmDocument"]}},
"outputPorts": {0: {"schema": "LoopItem"}},
"executor": "flow",
- "meta": {"icon": "mdi-repeat", "color": "#FF9800"},
+ "meta": {"icon": "mdi-repeat", "color": "#FF9800", "usesAi": False},
},
{
"id": "flow.merge",
"category": "flow",
"label": t("Zusammenführen"),
- "description": t("Mehrere Zweige zusammenführen"),
+ "description": t("Mehrere Zweige zusammenführen (2-5 Eingänge)"),
"parameters": [
{
"name": "mode",
@@ -90,12 +108,21 @@ FLOW_NODES = [
"description": t("Zusammenführungsmodus"),
"default": "first",
},
+ {
+ "name": "inputCount",
+ "type": "number",
+ "required": False,
+ "frontendType": "number",
+ "frontendOptions": {"min": 2, "max": 5},
+ "description": t("Anzahl Eingänge"),
+ "default": 2,
+ },
],
"inputs": 2,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}, 1: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "MergeResult"}},
"executor": "flow",
- "meta": {"icon": "mdi-call-merge", "color": "#FF9800"},
+ "meta": {"icon": "mdi-call-merge", "color": "#FF9800", "usesAi": False},
},
]
diff --git a/modules/features/graphicalEditor/nodeDefinitions/input.py b/modules/features/graphicalEditor/nodeDefinitions/input.py
index b90efaa2..e6d88c6b 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/input.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/input.py
@@ -24,7 +24,7 @@ INPUT_NODES = [
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "FormPayload", "dynamic": True, "deriveFrom": "fields"}},
"executor": "input",
- "meta": {"icon": "mdi-form-textbox", "color": "#9C27B0"},
+ "meta": {"icon": "mdi-form-textbox", "color": "#9C27B0", "usesAi": False},
},
{
"id": "input.approval",
@@ -45,7 +45,7 @@ INPUT_NODES = [
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "BoolResult"}},
"executor": "input",
- "meta": {"icon": "mdi-check-decagram", "color": "#4CAF50"},
+ "meta": {"icon": "mdi-check-decagram", "color": "#4CAF50", "usesAi": False},
},
{
"id": "input.upload",
@@ -68,7 +68,7 @@ INPUT_NODES = [
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
"executor": "input",
- "meta": {"icon": "mdi-upload", "color": "#2196F3"},
+ "meta": {"icon": "mdi-upload", "color": "#2196F3", "usesAi": False},
},
{
"id": "input.comment",
@@ -86,7 +86,7 @@ INPUT_NODES = [
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TextResult"}},
"executor": "input",
- "meta": {"icon": "mdi-comment-text", "color": "#FF9800"},
+ "meta": {"icon": "mdi-comment-text", "color": "#FF9800", "usesAi": False},
},
{
"id": "input.review",
@@ -105,7 +105,7 @@ INPUT_NODES = [
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "BoolResult"}},
"executor": "input",
- "meta": {"icon": "mdi-magnify-scan", "color": "#673AB7"},
+ "meta": {"icon": "mdi-magnify-scan", "color": "#673AB7", "usesAi": False},
},
{
"id": "input.selection",
@@ -123,7 +123,7 @@ INPUT_NODES = [
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "TextResult"}},
"executor": "input",
- "meta": {"icon": "mdi-format-list-checks", "color": "#009688"},
+ "meta": {"icon": "mdi-format-list-checks", "color": "#009688", "usesAi": False},
},
{
"id": "input.confirmation",
@@ -143,6 +143,6 @@ INPUT_NODES = [
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "BoolResult"}},
"executor": "input",
- "meta": {"icon": "mdi-checkbox-marked-circle", "color": "#8BC34A"},
+ "meta": {"icon": "mdi-checkbox-marked-circle", "color": "#8BC34A", "usesAi": False},
},
]
diff --git a/modules/features/graphicalEditor/nodeDefinitions/sharepoint.py b/modules/features/graphicalEditor/nodeDefinitions/sharepoint.py
index 617354d3..4bb93256 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/sharepoint.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/sharepoint.py
@@ -23,7 +23,7 @@ SHAREPOINT_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "FileList"}},
- "meta": {"icon": "mdi-file-search", "color": "#0078D4"},
+ "meta": {"icon": "mdi-file-search", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "findDocumentPath",
},
@@ -43,7 +43,7 @@ SHAREPOINT_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["FileList", "Transit"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
- "meta": {"icon": "mdi-file-document", "color": "#0078D4"},
+ "meta": {"icon": "mdi-file-document", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "readDocuments",
},
@@ -63,7 +63,7 @@ SHAREPOINT_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
- "meta": {"icon": "mdi-upload", "color": "#0078D4"},
+ "meta": {"icon": "mdi-upload", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "uploadFile",
},
@@ -83,7 +83,7 @@ SHAREPOINT_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "FileList"}},
- "meta": {"icon": "mdi-folder-open", "color": "#0078D4"},
+ "meta": {"icon": "mdi-folder-open", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "listDocuments",
},
@@ -103,7 +103,7 @@ SHAREPOINT_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["FileList", "Transit"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
- "meta": {"icon": "mdi-download", "color": "#0078D4"},
+ "meta": {"icon": "mdi-download", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "downloadFileByPath",
},
@@ -126,7 +126,7 @@ SHAREPOINT_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
- "meta": {"icon": "mdi-content-copy", "color": "#0078D4"},
+ "meta": {"icon": "mdi-content-copy", "color": "#0078D4", "usesAi": False},
"_method": "sharepoint",
"_action": "copyFile",
},
diff --git a/modules/features/graphicalEditor/nodeDefinitions/triggers.py b/modules/features/graphicalEditor/nodeDefinitions/triggers.py
index 69b1aa17..d4122527 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/triggers.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/triggers.py
@@ -15,7 +15,7 @@ TRIGGER_NODES = [
"inputPorts": {},
"outputPorts": {0: {"schema": "ActionResult"}},
"executor": "trigger",
- "meta": {"icon": "mdi-play", "color": "#4CAF50"},
+ "meta": {"icon": "mdi-play", "color": "#4CAF50", "usesAi": False},
},
{
"id": "trigger.form",
@@ -36,7 +36,7 @@ TRIGGER_NODES = [
"inputPorts": {},
"outputPorts": {0: {"schema": "FormPayload", "dynamic": True, "deriveFrom": "formFields"}},
"executor": "trigger",
- "meta": {"icon": "mdi-form-select", "color": "#9C27B0"},
+ "meta": {"icon": "mdi-form-select", "color": "#9C27B0", "usesAi": False},
},
{
"id": "trigger.schedule",
@@ -57,6 +57,6 @@ TRIGGER_NODES = [
"inputPorts": {},
"outputPorts": {0: {"schema": "ActionResult"}},
"executor": "trigger",
- "meta": {"icon": "mdi-clock", "color": "#2196F3"},
+ "meta": {"icon": "mdi-clock", "color": "#2196F3", "usesAi": False},
},
]
diff --git a/modules/features/graphicalEditor/nodeDefinitions/trustee.py b/modules/features/graphicalEditor/nodeDefinitions/trustee.py
index 5d8a0f21..18874c40 100644
--- a/modules/features/graphicalEditor/nodeDefinitions/trustee.py
+++ b/modules/features/graphicalEditor/nodeDefinitions/trustee.py
@@ -23,7 +23,7 @@ TRUSTEE_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
- "meta": {"icon": "mdi-database-refresh", "color": "#4CAF50"},
+ "meta": {"icon": "mdi-database-refresh", "color": "#4CAF50", "usesAi": False},
"_method": "trustee",
"_action": "refreshAccountingData",
},
@@ -47,7 +47,7 @@ TRUSTEE_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "DocumentList"}},
- "meta": {"icon": "mdi-file-document-scan", "color": "#4CAF50"},
+ "meta": {"icon": "mdi-file-document-scan", "color": "#4CAF50", "usesAi": True},
"_method": "trustee",
"_action": "extractFromFiles",
},
@@ -66,7 +66,7 @@ TRUSTEE_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
- "meta": {"icon": "mdi-file-document-check", "color": "#4CAF50"},
+ "meta": {"icon": "mdi-file-document-check", "color": "#4CAF50", "usesAi": False},
"_method": "trustee",
"_action": "processDocuments",
},
@@ -85,7 +85,7 @@ TRUSTEE_NODES = [
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
"outputPorts": {0: {"schema": "ActionResult"}},
- "meta": {"icon": "mdi-calculator", "color": "#4CAF50"},
+ "meta": {"icon": "mdi-calculator", "color": "#4CAF50", "usesAi": False},
"_method": "trustee",
"_action": "syncToAccounting",
},
diff --git a/modules/features/graphicalEditor/nodeRegistry.py b/modules/features/graphicalEditor/nodeRegistry.py
index ea5b67bd..577b530f 100644
--- a/modules/features/graphicalEditor/nodeRegistry.py
+++ b/modules/features/graphicalEditor/nodeRegistry.py
@@ -88,6 +88,7 @@ def getNodeTypesForApi(
{"id": "input", "label": "Eingabe/Mensch"},
{"id": "flow", "label": "Ablauf"},
{"id": "data", "label": "Daten"},
+ {"id": "context", "label": "Kontext"},
{"id": "ai", "label": "KI"},
{"id": "file", "label": "Datei"},
{"id": "email", "label": "E-Mail"},
diff --git a/modules/features/graphicalEditor/portTypes.py b/modules/features/graphicalEditor/portTypes.py
index 59ae01bf..1ac90665 100644
--- a/modules/features/graphicalEditor/portTypes.py
+++ b/modules/features/graphicalEditor/portTypes.py
@@ -152,6 +152,21 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
description="Ergebnisdaten"),
]),
"Transit": PortSchema(name="Transit", fields=[]),
+ "UdmDocument": PortSchema(name="UdmDocument", fields=[
+ PortField(name="id", type="str", description="Dokument-ID"),
+ PortField(name="sourceType", type="str", description="Quellformat (pdf, docx, …)"),
+ PortField(name="sourcePath", type="str", description="Quellpfad"),
+ PortField(name="children", type="List[Any]", description="StructuralNodes"),
+ ]),
+ "UdmNodeList": PortSchema(name="UdmNodeList", fields=[
+ PortField(name="nodes", type="List[Any]", description="UDM StructuralNodes oder ContentBlocks"),
+ PortField(name="count", type="int", description="Anzahl"),
+ ]),
+ "ConsolidateResult": PortSchema(name="ConsolidateResult", fields=[
+ PortField(name="result", type="Any", description="Konsolidiertes Ergebnis"),
+ PortField(name="mode", type="str", description="Konsolidierungsmodus"),
+ PortField(name="count", type="int", description="Anzahl verarbeiteter Elemente"),
+ ]),
}
@@ -412,6 +427,36 @@ def _extractMergeResult(upstream: Dict[str, Any]) -> Dict[str, Any]:
}
+def _extractUdmDocument(upstream: Dict[str, Any]) -> Dict[str, Any]:
+ """Extract UdmDocument fields from upstream output."""
+ if upstream.get("children") is not None and upstream.get("sourceType"):
+ return upstream
+ udm = upstream.get("udm")
+ if isinstance(udm, dict) and udm.get("children") is not None:
+ return udm
+ return {}
+
+
+def _extractUdmNodeList(upstream: Dict[str, Any]) -> Dict[str, Any]:
+ """Extract UdmNodeList fields from upstream output."""
+ nodes = upstream.get("nodes")
+ if isinstance(nodes, list):
+ return {"nodes": nodes, "count": len(nodes)}
+ children = upstream.get("children")
+ if isinstance(children, list):
+ return {"nodes": children, "count": len(children)}
+ return {}
+
+
+def _extractConsolidateResult(upstream: Dict[str, Any]) -> Dict[str, Any]:
+ """Extract ConsolidateResult fields from upstream output."""
+ result = {}
+ for key in ("result", "mode", "count"):
+ if key in upstream:
+ result[key] = upstream[key]
+ return result
+
+
INPUT_EXTRACTORS: Dict[str, Callable] = {
"EmailDraft": _extractEmailDraft,
"DocumentList": _extractDocuments,
@@ -425,6 +470,9 @@ INPUT_EXTRACTORS: Dict[str, Callable] = {
"TaskResult": _extractTaskResult,
"AggregateResult": _extractAggregateResult,
"MergeResult": _extractMergeResult,
+ "UdmDocument": _extractUdmDocument,
+ "UdmNodeList": _extractUdmNodeList,
+ "ConsolidateResult": _extractConsolidateResult,
}
diff --git a/modules/features/neutralization/datamodelFeatureNeutralizer.py b/modules/features/neutralization/datamodelFeatureNeutralizer.py
index cbaae3c4..cd9b67f8 100644
--- a/modules/features/neutralization/datamodelFeatureNeutralizer.py
+++ b/modules/features/neutralization/datamodelFeatureNeutralizer.py
@@ -27,15 +27,33 @@ class DataNeutraliserConfig(PowerOnModel):
)
mandateId: str = Field(
description="ID of the mandate this configuration belongs to",
- json_schema_extra={"label": "Mandanten-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True},
+ json_schema_extra={
+ "label": "Mandanten-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
+ },
)
featureInstanceId: str = Field(
description="ID of the feature instance this configuration belongs to",
- json_schema_extra={"label": "Feature-Instanz-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True},
+ json_schema_extra={
+ "label": "Feature-Instanz-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
+ },
)
userId: str = Field(
description="ID of the user who created this configuration",
- json_schema_extra={"label": "Benutzer-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True},
+ json_schema_extra={
+ "label": "Benutzer-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
enabled: bool = Field(
default=True,
@@ -84,15 +102,33 @@ class DataNeutralizerAttributes(BaseModel):
)
mandateId: str = Field(
description="ID of the mandate this attribute belongs to",
- json_schema_extra={"label": "Mandanten-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True},
+ json_schema_extra={
+ "label": "Mandanten-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
+ },
)
featureInstanceId: str = Field(
description="ID of the feature instance this attribute belongs to",
- json_schema_extra={"label": "Feature-Instanz-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True},
+ json_schema_extra={
+ "label": "Feature-Instanz-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
+ },
)
userId: str = Field(
description="ID of the user who created this attribute",
- json_schema_extra={"label": "Benutzer-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True},
+ json_schema_extra={
+ "label": "Benutzer-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
originalText: str = Field(
description="Original text that was neutralized",
@@ -101,7 +137,13 @@ class DataNeutralizerAttributes(BaseModel):
fileId: Optional[str] = Field(
default=None,
description="ID of the file this attribute belongs to",
- json_schema_extra={"label": "Datei-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False},
+ json_schema_extra={
+ "label": "Datei-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_management", "table": "FileItem"},
+ },
)
patternType: str = Field(
description="Type of pattern that matched (email, phone, name, etc.)",
@@ -118,16 +160,16 @@ class DataNeutralizationSnapshot(BaseModel):
)
mandateId: str = Field(
description="Mandate scope",
- json_schema_extra={"label": "Mandanten-ID"},
+ json_schema_extra={"label": "Mandanten-ID", "fk_target": {"db": "poweron_app", "table": "Mandate"}},
)
featureInstanceId: str = Field(
default="",
description="Feature instance scope",
- json_schema_extra={"label": "Feature-Instanz-ID"},
+ json_schema_extra={"label": "Feature-Instanz-ID", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}},
)
userId: str = Field(
description="User who triggered neutralization",
- json_schema_extra={"label": "Benutzer-ID"},
+ json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "User"}},
)
sourceLabel: str = Field(
description="Human label, e.g. 'Prompt', 'Kontext', 'Nachricht 3'",
diff --git a/modules/features/neutralization/interfaceFeatureNeutralizer.py b/modules/features/neutralization/interfaceFeatureNeutralizer.py
index 22af9683..1575ed85 100644
--- a/modules/features/neutralization/interfaceFeatureNeutralizer.py
+++ b/modules/features/neutralization/interfaceFeatureNeutralizer.py
@@ -14,6 +14,7 @@ from modules.features.neutralization.datamodelFeatureNeutralizer import (
DataNeutralizationSnapshot,
)
from modules.connectors.connectorDbPostgre import DatabaseConnector
+from modules.shared.dbRegistry import registerDatabase
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
from modules.shared.configuration import APP_CONFIG
from modules.shared.timeUtils import getUtcTimestamp
@@ -21,6 +22,9 @@ from modules.datamodels.datamodelUam import User
logger = logging.getLogger(__name__)
+neutralizationDatabase = "poweron_neutralization"
+registerDatabase(neutralizationDatabase)
+
# Singleton cache for interface instances
_neutralizerInterfaces = {}
@@ -54,7 +58,7 @@ class InterfaceFeatureNeutralizer:
try:
# Use same database config pattern as other feature interfaces
dbHost = APP_CONFIG.get("DB_HOST", "localhost")
- dbDatabase = "poweron_neutralization"
+ dbDatabase = neutralizationDatabase
dbUser = APP_CONFIG.get("DB_USER", "postgres")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
diff --git a/modules/features/realEstate/datamodelFeatureRealEstate.py b/modules/features/realEstate/datamodelFeatureRealEstate.py
index c12090d1..4f2ebcd3 100644
--- a/modules/features/realEstate/datamodelFeatureRealEstate.py
+++ b/modules/features/realEstate/datamodelFeatureRealEstate.py
@@ -284,9 +284,12 @@ class Kanton(PowerOnModel):
id_land: Optional[str] = Field(
None,
description="Land ID (Foreign Key) - eindeutiger Link zum Land, in welchem Land der Kanton liegt",
- frontend_type="text",
- frontend_readonly=False,
- frontend_required=False,
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": False,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_realestate", "table": "Land"},
+ },
)
abk: Optional[str] = Field(
None,
@@ -341,9 +344,12 @@ class Gemeinde(BaseModel):
id_kanton: Optional[str] = Field(
None,
description="Kanton ID (Foreign Key) - eindeutiger Link zum Kanton, in welchem Kanton die Gemeinde liegt",
- frontend_type="text",
- frontend_readonly=False,
- frontend_required=False,
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": False,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_realestate", "table": "Kanton"},
+ },
)
plz: Optional[str] = Field(
None,
@@ -387,17 +393,23 @@ class Parzelle(PowerOnModel):
)
mandateId: str = Field(
description="ID of the mandate",
- frontend_type="text",
- frontend_readonly=True,
- frontend_required=False,
- label="Mandats-ID",
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "label": "Mandats-ID",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
+ },
)
featureInstanceId: str = Field(
description="ID of the feature instance",
- frontend_type="text",
- frontend_readonly=True,
- frontend_required=False,
- label="Feature-Instanz-ID",
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "label": "Feature-Instanz-ID",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
+ },
)
# Grunddaten
@@ -456,9 +468,12 @@ class Parzelle(PowerOnModel):
kontextGemeinde: Optional[str] = Field(
None,
description="Municipality ID (Foreign Key)",
- frontend_type="text",
- frontend_readonly=False,
- frontend_required=False,
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": False,
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_realestate", "table": "Gemeinde"},
+ },
)
# Bebauungsparameter
@@ -618,17 +633,23 @@ class Projekt(PowerOnModel):
)
mandateId: str = Field(
description="ID of the mandate",
- frontend_type="text",
- frontend_readonly=True,
- frontend_required=False,
- label="Mandats-ID",
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "label": "Mandats-ID",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
+ },
)
featureInstanceId: str = Field(
description="ID of the feature instance",
- frontend_type="text",
- frontend_readonly=True,
- frontend_required=False,
- label="Feature-Instanz-ID",
+ json_schema_extra={
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": False,
+ "label": "Feature-Instanz-ID",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
+ },
)
label: str = Field(
description="Project designation",
diff --git a/modules/features/realEstate/interfaceFeatureRealEstate.py b/modules/features/realEstate/interfaceFeatureRealEstate.py
index f7ed52b6..1fbaf06f 100644
--- a/modules/features/realEstate/interfaceFeatureRealEstate.py
+++ b/modules/features/realEstate/interfaceFeatureRealEstate.py
@@ -21,6 +21,7 @@ from .datamodelFeatureRealEstate import (
from modules.datamodels.datamodelUam import User
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.configuration import APP_CONFIG
+from modules.shared.dbRegistry import registerDatabase
from modules.security.rbac import RbacClass
from modules.datamodels.datamodelRbac import AccessRuleContext
from modules.datamodels.datamodelUam import AccessLevel
@@ -29,6 +30,9 @@ from modules.datamodels.datamodelPagination import PaginationParams, PaginatedRe
logger = logging.getLogger(__name__)
+realEstateDatabase = "poweron_realestate"
+registerDatabase(realEstateDatabase)
+
# Singleton factory for Real Estate interfaces
_realEstateInterfaces = {}
@@ -71,7 +75,7 @@ class RealEstateObjects:
try:
# Get database configuration from environment
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
- dbDatabase = "poweron_realestate"
+ dbDatabase = realEstateDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
diff --git a/modules/features/teamsbot/interfaceFeatureTeamsbot.py b/modules/features/teamsbot/interfaceFeatureTeamsbot.py
index 4d6519d8..5395d922 100644
--- a/modules/features/teamsbot/interfaceFeatureTeamsbot.py
+++ b/modules/features/teamsbot/interfaceFeatureTeamsbot.py
@@ -11,6 +11,7 @@ from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelUam import User
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.configuration import APP_CONFIG
+from modules.shared.dbRegistry import registerDatabase
from .datamodelTeamsbot import (
TeamsbotSession,
@@ -24,6 +25,9 @@ from .datamodelTeamsbot import (
logger = logging.getLogger(__name__)
+teamsbotDatabase = "poweron_teamsbot"
+registerDatabase(teamsbotDatabase)
+
# Singleton factory
_interfaces = {}
@@ -50,7 +54,7 @@ class TeamsbotObjects:
self.userId = str(currentUser.id) if currentUser else "system"
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
- dbDatabase = "poweron_teamsbot"
+ dbDatabase = teamsbotDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
diff --git a/modules/features/trustee/datamodelFeatureTrustee.py b/modules/features/trustee/datamodelFeatureTrustee.py
index d1b35718..5d1b4263 100644
--- a/modules/features/trustee/datamodelFeatureTrustee.py
+++ b/modules/features/trustee/datamodelFeatureTrustee.py
@@ -46,6 +46,7 @@ class TrusteeOrganisation(PowerOnModel):
description="Mandate ID (system-level organisation)",
json_schema_extra={
"label": "Mandat",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False
@@ -56,6 +57,7 @@ class TrusteeOrganisation(PowerOnModel):
description="Feature Instance ID for instance-level isolation",
json_schema_extra={
"label": "Feature-Instanz",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False
@@ -90,6 +92,7 @@ class TrusteeRole(PowerOnModel):
description="Mandate ID",
json_schema_extra={
"label": "Mandat",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False
@@ -100,6 +103,7 @@ class TrusteeRole(PowerOnModel):
description="Feature Instance ID for instance-level isolation",
json_schema_extra={
"label": "Feature-Instanz",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False
@@ -127,7 +131,8 @@ class TrusteeAccess(PowerOnModel):
"frontend_type": "select",
"frontend_readonly": False,
"frontend_required": True,
- "frontend_options": "/api/trustee/{instanceId}/organisations/options"
+ "frontend_options": "/api/trustee/{instanceId}/organisations/options",
+ "fk_target": {"db": "poweron_trustee", "table": "TrusteeOrganisation"},
}
)
roleId: str = Field(
@@ -137,7 +142,8 @@ class TrusteeAccess(PowerOnModel):
"frontend_type": "select",
"frontend_readonly": False,
"frontend_required": True,
- "frontend_options": "/api/trustee/{instanceId}/roles/options"
+ "frontend_options": "/api/trustee/{instanceId}/roles/options",
+ "fk_target": {"db": "poweron_trustee", "table": "TrusteeRole"},
}
)
userId: str = Field(
@@ -147,7 +153,8 @@ class TrusteeAccess(PowerOnModel):
"frontend_type": "select",
"frontend_readonly": False,
"frontend_required": True,
- "frontend_options": "/api/users/options"
+ "frontend_options": "/api/users/options",
+ "fk_target": {"db": "poweron_app", "table": "User"},
}
)
contractId: Optional[str] = Field(
@@ -159,7 +166,8 @@ class TrusteeAccess(PowerOnModel):
"frontend_readonly": False,
"frontend_required": False,
"frontend_options": "/api/trustee/{instanceId}/contracts/options",
- "frontend_depends_on": "organisationId"
+ "frontend_depends_on": "organisationId",
+ "fk_target": {"db": "poweron_trustee", "table": "TrusteeContract"},
}
)
mandateId: Optional[str] = Field(
@@ -167,6 +175,7 @@ class TrusteeAccess(PowerOnModel):
description="Mandate ID",
json_schema_extra={
"label": "Mandat",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False
@@ -177,6 +186,7 @@ class TrusteeAccess(PowerOnModel):
description="Feature Instance ID for instance-level isolation",
json_schema_extra={
"label": "Feature-Instanz",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False
@@ -204,7 +214,8 @@ class TrusteeContract(PowerOnModel):
"frontend_type": "select",
"frontend_readonly": False, # Editable at creation, then readonly
"frontend_required": True,
- "frontend_options": "/api/trustee/{instanceId}/organisations/options"
+ "frontend_options": "/api/trustee/{instanceId}/organisations/options",
+ "fk_target": {"db": "poweron_trustee", "table": "TrusteeOrganisation"},
}
)
label: str = Field(
@@ -231,6 +242,7 @@ class TrusteeContract(PowerOnModel):
description="Mandate ID",
json_schema_extra={
"label": "Mandat",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False
@@ -241,6 +253,7 @@ class TrusteeContract(PowerOnModel):
description="Feature Instance ID for instance-level isolation",
json_schema_extra={
"label": "Feature-Instanz",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False
@@ -297,7 +310,8 @@ class TrusteeDocument(PowerOnModel):
"label": "Datei-Referenz",
"frontend_type": "file_reference",
"frontend_readonly": False,
- "frontend_required": False
+ "frontend_required": False,
+ "fk_target": {"db": "poweron_management", "table": "FileItem"},
}
)
documentName: str = Field(
@@ -345,6 +359,7 @@ class TrusteeDocument(PowerOnModel):
description="Mandate ID (auto-set from context)",
json_schema_extra={
"label": "Mandat",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False,
@@ -356,6 +371,7 @@ class TrusteeDocument(PowerOnModel):
description="Feature Instance ID for instance-level isolation (auto-set from context)",
json_schema_extra={
"label": "Feature-Instanz",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False,
@@ -422,7 +438,8 @@ class TrusteePosition(PowerOnModel):
"frontend_type": "select",
"frontend_readonly": False,
"frontend_required": False,
- "frontend_options": "/api/trustee/{instanceId}/documents/options"
+ "frontend_options": "/api/trustee/{instanceId}/documents/options",
+ "fk_target": {"db": "poweron_trustee", "table": "TrusteeDocument"},
}
)
bankDocumentId: Optional[str] = Field(
@@ -433,7 +450,8 @@ class TrusteePosition(PowerOnModel):
"frontend_type": "select",
"frontend_readonly": False,
"frontend_required": False,
- "frontend_options": "/api/trustee/{instanceId}/documents/options"
+ "frontend_options": "/api/trustee/{instanceId}/documents/options",
+ "fk_target": {"db": "poweron_trustee", "table": "TrusteeDocument"},
}
)
valuta: Optional[str] = Field(
@@ -677,6 +695,7 @@ class TrusteePosition(PowerOnModel):
description="Mandate ID (auto-set from context)",
json_schema_extra={
"label": "Mandat",
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False,
@@ -688,6 +707,7 @@ class TrusteePosition(PowerOnModel):
description="Feature Instance ID for instance-level isolation (auto-set from context)",
json_schema_extra={
"label": "Feature-Instanz",
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
"frontend_type": "text",
"frontend_readonly": True,
"frontend_required": False,
@@ -718,8 +738,8 @@ class TrusteeDataAccount(PowerOnModel):
accountGroup: Optional[str] = Field(default=None, description="Account group/category", json_schema_extra={"label": "Gruppe"})
currency: str = Field(default="CHF", description="Account currency", json_schema_extra={"label": "Währung"})
isActive: bool = Field(default=True, json_schema_extra={"label": "Aktiv"})
- mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat"})
- featureInstanceId: Optional[str] = Field(default=None, json_schema_extra={"label": "Feature-Instanz"})
+ mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat", "fk_target": {"db": "poweron_app", "table": "Mandate"}})
+ featureInstanceId: Optional[str] = Field(default=None, json_schema_extra={"label": "Feature-Instanz", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}})
@i18nModel("Buchung (Sync)")
class TrusteeDataJournalEntry(PowerOnModel):
@@ -731,14 +751,14 @@ class TrusteeDataJournalEntry(PowerOnModel):
description: str = Field(default="", description="Booking text", json_schema_extra={"label": "Beschreibung"})
currency: str = Field(default="CHF", json_schema_extra={"label": "Währung"})
totalAmount: float = Field(default=0.0, description="Total amount of entry", json_schema_extra={"label": "Betrag"})
- mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat"})
- featureInstanceId: Optional[str] = Field(default=None, json_schema_extra={"label": "Feature-Instanz"})
+ mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat", "fk_target": {"db": "poweron_app", "table": "Mandate"}})
+ featureInstanceId: Optional[str] = Field(default=None, json_schema_extra={"label": "Feature-Instanz", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}})
@i18nModel("Buchungszeile (Sync)")
class TrusteeDataJournalLine(PowerOnModel):
"""Journal entry line (debit/credit) synced from external accounting system."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), json_schema_extra={"label": "ID"})
- journalEntryId: str = Field(description="FK → TrusteeDataJournalEntry.id", json_schema_extra={"label": "Buchung"})
+ journalEntryId: str = Field(description="FK → TrusteeDataJournalEntry.id", json_schema_extra={"label": "Buchung", "fk_target": {"db": "poweron_trustee", "table": "TrusteeDataJournalEntry"}})
accountNumber: str = Field(description="Account number", json_schema_extra={"label": "Konto"})
debitAmount: float = Field(default=0.0, json_schema_extra={"label": "Soll"})
creditAmount: float = Field(default=0.0, json_schema_extra={"label": "Haben"})
@@ -746,8 +766,8 @@ class TrusteeDataJournalLine(PowerOnModel):
taxCode: Optional[str] = Field(default=None, json_schema_extra={"label": "Steuercode"})
costCenter: Optional[str] = Field(default=None, json_schema_extra={"label": "Kostenstelle"})
description: str = Field(default="", json_schema_extra={"label": "Beschreibung"})
- mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat"})
- featureInstanceId: Optional[str] = Field(default=None, json_schema_extra={"label": "Feature-Instanz"})
+ mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat", "fk_target": {"db": "poweron_app", "table": "Mandate"}})
+ featureInstanceId: Optional[str] = Field(default=None, json_schema_extra={"label": "Feature-Instanz", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}})
@i18nModel("Kontakt (Sync)")
class TrusteeDataContact(PowerOnModel):
@@ -764,8 +784,8 @@ class TrusteeDataContact(PowerOnModel):
email: Optional[str] = Field(default=None, json_schema_extra={"label": "E-Mail"})
phone: Optional[str] = Field(default=None, json_schema_extra={"label": "Telefon"})
vatNumber: Optional[str] = Field(default=None, json_schema_extra={"label": "MWST-Nr."})
- mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat"})
- featureInstanceId: Optional[str] = Field(default=None, json_schema_extra={"label": "Feature-Instanz"})
+ mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat", "fk_target": {"db": "poweron_app", "table": "Mandate"}})
+ featureInstanceId: Optional[str] = Field(default=None, json_schema_extra={"label": "Feature-Instanz", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}})
@i18nModel("Kontosaldo (Sync)")
class TrusteeDataAccountBalance(PowerOnModel):
@@ -779,8 +799,8 @@ class TrusteeDataAccountBalance(PowerOnModel):
creditTotal: float = Field(default=0.0, json_schema_extra={"label": "Haben-Umsatz"})
closingBalance: float = Field(default=0.0, json_schema_extra={"label": "Schlusssaldo"})
currency: str = Field(default="CHF", json_schema_extra={"label": "Währung"})
- mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat"})
- featureInstanceId: Optional[str] = Field(default=None, json_schema_extra={"label": "Feature-Instanz"})
+ mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat", "fk_target": {"db": "poweron_app", "table": "Mandate"}})
+ featureInstanceId: Optional[str] = Field(default=None, json_schema_extra={"label": "Feature-Instanz", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}})
@i18nModel("Buchhaltungs-Konfiguration")
class TrusteeAccountingConfig(PowerOnModel):
@@ -790,7 +810,7 @@ class TrusteeAccountingConfig(PowerOnModel):
Credentials are stored encrypted (decrypted at runtime by the AccountingBridge).
"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), json_schema_extra={"label": "ID"})
- featureInstanceId: str = Field(description="FK -> FeatureInstance.id (1:1)", json_schema_extra={"label": "Feature-Instanz"})
+ featureInstanceId: str = Field(description="FK -> FeatureInstance.id (1:1)", json_schema_extra={"label": "Feature-Instanz", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}})
connectorType: str = Field(description="Connector type key, e.g. 'rma', 'bexio', 'abacus'", json_schema_extra={"label": "System"})
displayLabel: str = Field(default="", description="User-visible label for this integration", json_schema_extra={"label": "Bezeichnung"})
encryptedConfig: str = Field(default="", description="Encrypted JSON blob with connector credentials", json_schema_extra={"label": "Verschlüsselte Konfiguration"})
@@ -800,7 +820,7 @@ class TrusteeAccountingConfig(PowerOnModel):
lastSyncErrorMessage: Optional[str] = Field(default=None, description="Error message when lastSyncStatus is error", json_schema_extra={"label": "Fehlermeldung"})
cachedChartOfAccounts: Optional[str] = Field(default=None, description="JSON-serialised chart of accounts cache (list of {accountNumber, label, accountType})", json_schema_extra={"label": "Cached Kontoplan"})
chartCachedAt: Optional[float] = Field(default=None, description="Timestamp when cachedChartOfAccounts was last refreshed", json_schema_extra={"label": "Kontoplan-Cache-Zeitpunkt"})
- mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat"})
+ mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat", "fk_target": {"db": "poweron_app", "table": "Mandate"}})
@i18nModel("Buchhaltungs-Synchronisation")
class TrusteeAccountingSync(PowerOnModel):
@@ -809,8 +829,11 @@ class TrusteeAccountingSync(PowerOnModel):
Used for duplicate prevention, audit trail, and retry logic.
"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), json_schema_extra={"label": "ID"})
- positionId: str = Field(description="FK -> TrusteePosition.id", json_schema_extra={"label": "Position"})
- featureInstanceId: str = Field(description="FK -> FeatureInstance.id", json_schema_extra={"label": "Feature-Instanz"})
+ positionId: str = Field(
+ description="FK -> TrusteePosition.id",
+ json_schema_extra={"label": "Position", "fk_target": {"db": "poweron_trustee", "table": "TrusteePosition"}},
+ )
+ featureInstanceId: str = Field(description="FK -> FeatureInstance.id", json_schema_extra={"label": "Feature-Instanz", "fk_target": {"db": "poweron_app", "table": "FeatureInstance"}})
connectorType: str = Field(description="Connector type at time of sync", json_schema_extra={"label": "System"})
externalId: Optional[str] = Field(default=None, description="ID assigned by the external system", json_schema_extra={"label": "Externe ID"})
externalReference: Optional[str] = Field(default=None, description="Reference in the external system", json_schema_extra={"label": "Externe Referenz"})
@@ -819,5 +842,5 @@ class TrusteeAccountingSync(PowerOnModel):
syncedAt: Optional[float] = Field(default=None, description="Timestamp of successful sync", json_schema_extra={"label": "Synchronisiert am"})
errorMessage: Optional[str] = Field(default=None, json_schema_extra={"label": "Fehler"})
bookingPayload: Optional[dict] = Field(default=None, description="Payload sent to the external system (audit)", json_schema_extra={"label": "Buchungs-Payload"})
- mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat"})
+ mandateId: Optional[str] = Field(default=None, json_schema_extra={"label": "Mandat", "fk_target": {"db": "poweron_app", "table": "Mandate"}})
diff --git a/modules/features/trustee/interfaceFeatureTrustee.py b/modules/features/trustee/interfaceFeatureTrustee.py
index 7ed6fcff..b1a6aab6 100644
--- a/modules/features/trustee/interfaceFeatureTrustee.py
+++ b/modules/features/trustee/interfaceFeatureTrustee.py
@@ -14,6 +14,7 @@ from pydantic import ValidationError
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.configuration import APP_CONFIG
+from modules.shared.dbRegistry import registerDatabase
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC, getRecordsetPaginatedWithRBAC, getDistinctColumnValuesWithRBAC
from modules.security.rbac import RbacClass
from modules.datamodels.datamodelUam import User, AccessLevel
@@ -30,6 +31,9 @@ from modules.datamodels.datamodelPagination import PaginationParams, PaginatedRe
logger = logging.getLogger(__name__)
+trusteeDatabase = "poweron_trustee"
+registerDatabase(trusteeDatabase)
+
# Singleton factory for TrusteeObjects instances per context
_trusteeInterfaces = {}
@@ -276,7 +280,7 @@ class TrusteeObjects:
"""Initializes the database connection directly."""
try:
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
- dbDatabase = "poweron_trustee"
+ dbDatabase = trusteeDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
diff --git a/modules/features/workspace/datamodelFeatureWorkspace.py b/modules/features/workspace/datamodelFeatureWorkspace.py
index b01f0427..a6d3c2a4 100644
--- a/modules/features/workspace/datamodelFeatureWorkspace.py
+++ b/modules/features/workspace/datamodelFeatureWorkspace.py
@@ -19,15 +19,33 @@ class WorkspaceUserSettings(PowerOnModel):
)
userId: str = Field(
description="User ID",
- json_schema_extra={"label": "Benutzer-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True},
+ json_schema_extra={
+ "label": "Benutzer-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "User"},
+ },
)
mandateId: str = Field(
description="Mandate ID",
- json_schema_extra={"label": "Mandanten-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True},
+ json_schema_extra={
+ "label": "Mandanten-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "Mandate"},
+ },
)
featureInstanceId: str = Field(
description="Feature Instance ID",
- json_schema_extra={"label": "Feature-Instanz-ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": True},
+ json_schema_extra={
+ "label": "Feature-Instanz-ID",
+ "frontend_type": "text",
+ "frontend_readonly": True,
+ "frontend_required": True,
+ "fk_target": {"db": "poweron_app", "table": "FeatureInstance"},
+ },
)
maxAgentRounds: Optional[int] = Field(
default=None,
diff --git a/modules/features/workspace/interfaceFeatureWorkspace.py b/modules/features/workspace/interfaceFeatureWorkspace.py
index 05bda01d..984bf942 100644
--- a/modules/features/workspace/interfaceFeatureWorkspace.py
+++ b/modules/features/workspace/interfaceFeatureWorkspace.py
@@ -9,6 +9,7 @@ import logging
from typing import Dict, Any, Optional
from modules.connectors.connectorDbPostgre import DatabaseConnector
+from modules.shared.dbRegistry import registerDatabase
from modules.datamodels.datamodelUam import User
from modules.features.workspace.datamodelFeatureWorkspace import WorkspaceUserSettings
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
@@ -17,6 +18,9 @@ from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
+workspaceDatabase = "poweron_workspace"
+registerDatabase(workspaceDatabase)
+
_workspaceInterfaces: Dict[str, "WorkspaceObjects"] = {}
@@ -39,7 +43,7 @@ class WorkspaceObjects:
def _initializeDatabase(self):
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
- dbDatabase = "poweron_workspace"
+ dbDatabase = workspaceDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
diff --git a/modules/interfaces/interfaceBootstrap.py b/modules/interfaces/interfaceBootstrap.py
index b8f65d9e..8f6e75fc 100644
--- a/modules/interfaces/interfaceBootstrap.py
+++ b/modules/interfaces/interfaceBootstrap.py
@@ -159,11 +159,12 @@ def _bootstrapSystemTemplates(db: DatabaseConnector) -> None:
"""
try:
from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import AutoWorkflow
+ from modules.features.graphicalEditor.interfaceFeatureGraphicalEditor import graphicalEditorDatabase
import uuid
greenfieldDb = DatabaseConnector(
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
- dbDatabase="poweron_graphicaleditor",
+ dbDatabase=graphicalEditorDatabase,
dbUser=APP_CONFIG.get("DB_USER"),
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD"),
)
diff --git a/modules/interfaces/interfaceDbApp.py b/modules/interfaces/interfaceDbApp.py
index e4384882..4f43d0ca 100644
--- a/modules/interfaces/interfaceDbApp.py
+++ b/modules/interfaces/interfaceDbApp.py
@@ -17,6 +17,7 @@ import uuid
from modules.connectors.connectorDbPostgre import DatabaseConnector, _get_cached_connector
from modules.shared.configuration import APP_CONFIG
+from modules.shared.dbRegistry import registerDatabase
from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
from modules.security.rbac import RbacClass
@@ -48,6 +49,9 @@ from modules.datamodels.datamodelNotification import UserNotification
logger = logging.getLogger(__name__)
+appDatabase = "poweron_app"
+registerDatabase(appDatabase)
+
# Singleton factory for AppObjects instances per context
_gatewayInterfaces = {}
@@ -133,7 +137,7 @@ class AppObjects:
try:
# Get configuration values with defaults
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
- dbDatabase = "poweron_app"
+ dbDatabase = appDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
@@ -1894,11 +1898,12 @@ class AppObjects:
from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import (
AutoWorkflow, AutoVersion, AutoRun, AutoStepLog, AutoTask,
)
+ from modules.features.graphicalEditor.interfaceFeatureGraphicalEditor import graphicalEditorDatabase
from modules.connectors.connectorDbPostgre import DatabaseConnector
geDb = DatabaseConnector(
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
- dbDatabase="poweron_graphicaleditor",
+ dbDatabase=graphicalEditorDatabase,
dbUser=APP_CONFIG.get("DB_USER"),
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD"),
dbPort=int(APP_CONFIG.get("DB_PORT", 5432)),
diff --git a/modules/interfaces/interfaceDbBilling.py b/modules/interfaces/interfaceDbBilling.py
index 342c98c0..af696ab1 100644
--- a/modules/interfaces/interfaceDbBilling.py
+++ b/modules/interfaces/interfaceDbBilling.py
@@ -14,6 +14,7 @@ import uuid
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.configuration import APP_CONFIG
+from modules.shared.dbRegistry import registerDatabase
from modules.shared.timeUtils import getUtcTimestamp
from modules.datamodels.datamodelUam import User, Mandate
from modules.datamodels.datamodelMembership import UserMandate
@@ -109,6 +110,7 @@ _billingInterfaces: Dict[str, "BillingObjects"] = {}
# Database name for billing
BILLING_DATABASE = "poweron_billing"
+registerDatabase(BILLING_DATABASE)
def getInterface(currentUser: User, mandateId: str = None) -> "BillingObjects":
diff --git a/modules/interfaces/interfaceDbChat.py b/modules/interfaces/interfaceDbChat.py
index 874fa589..c8bbadf0 100644
--- a/modules/interfaces/interfaceDbChat.py
+++ b/modules/interfaces/interfaceDbChat.py
@@ -29,6 +29,7 @@ from modules.datamodels.datamodelUam import User
# DYNAMIC PART: Connectors to the Interface
from modules.connectors.connectorDbPostgre import DatabaseConnector
+from modules.shared.dbRegistry import registerDatabase
from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResult
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
@@ -37,6 +38,9 @@ from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
+chatDatabase = "poweron_chat"
+registerDatabase(chatDatabase)
+
# Singleton factory for Chat instances
_chatInterfaces = {}
@@ -314,7 +318,7 @@ class ChatObjects:
try:
# Get configuration values with defaults
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
- dbDatabase = "poweron_chat"
+ dbDatabase = chatDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
diff --git a/modules/interfaces/interfaceDbKnowledge.py b/modules/interfaces/interfaceDbKnowledge.py
index 4370cd3f..a12ac048 100644
--- a/modules/interfaces/interfaceDbKnowledge.py
+++ b/modules/interfaces/interfaceDbKnowledge.py
@@ -12,6 +12,7 @@ from datetime import datetime, timezone, timedelta
from typing import Dict, Any, List, Optional
from modules.connectors.connectorDbPostgre import _get_cached_connector
+from modules.shared.dbRegistry import registerDatabase
from modules.datamodels.datamodelKnowledge import FileContentIndex, ContentChunk, RoundMemory, WorkflowMemory
from modules.datamodels.datamodelUam import User
from modules.shared.configuration import APP_CONFIG
@@ -19,6 +20,9 @@ from modules.shared.timeUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
+knowledgeDatabase = "poweron_knowledge"
+registerDatabase(knowledgeDatabase)
+
_instances: Dict[str, "KnowledgeObjects"] = {}
@@ -34,7 +38,7 @@ class KnowledgeObjects:
def _initializeDatabase(self):
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
- dbDatabase = "poweron_knowledge"
+ dbDatabase = knowledgeDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
diff --git a/modules/interfaces/interfaceDbManagement.py b/modules/interfaces/interfaceDbManagement.py
index 9589f7d6..96b64e95 100644
--- a/modules/interfaces/interfaceDbManagement.py
+++ b/modules/interfaces/interfaceDbManagement.py
@@ -14,6 +14,7 @@ import mimetypes
from typing import Dict, Any, List, Optional, Union
from modules.connectors.connectorDbPostgre import DatabaseConnector, _get_cached_connector
+from modules.shared.dbRegistry import registerDatabase
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC, getRecordsetPaginatedWithRBAC
from modules.security.rbac import RbacClass
from modules.datamodels.datamodelRbac import AccessRuleContext
@@ -34,6 +35,9 @@ from modules.datamodels.datamodelPagination import PaginationParams, PaginatedRe
logger = logging.getLogger(__name__)
+managementDatabase = "poweron_management"
+registerDatabase(managementDatabase)
+
# Singleton factory for Management instances with AI service per context
_instancesManagement = {}
@@ -127,7 +131,7 @@ class ComponentObjects:
try:
# Get configuration values with defaults
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
- dbDatabase = "poweron_management"
+ dbDatabase = managementDatabase
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
diff --git a/modules/interfaces/interfaceDbSubscription.py b/modules/interfaces/interfaceDbSubscription.py
index d3943d4b..05d83a58 100644
--- a/modules/interfaces/interfaceDbSubscription.py
+++ b/modules/interfaces/interfaceDbSubscription.py
@@ -13,6 +13,7 @@ from datetime import datetime, timezone
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.configuration import APP_CONFIG
+from modules.shared.dbRegistry import registerDatabase
from modules.datamodels.datamodelUam import User
from modules.datamodels.datamodelMembership import UserMandate
from modules.datamodels.datamodelSubscription import (
@@ -31,6 +32,7 @@ from modules.datamodels.datamodelSubscription import (
logger = logging.getLogger(__name__)
SUBSCRIPTION_DATABASE = "poweron_billing"
+registerDatabase(SUBSCRIPTION_DATABASE)
_subscriptionInterfaces: Dict[str, "SubscriptionObjects"] = {}
diff --git a/modules/routes/routeAdminDatabaseHealth.py b/modules/routes/routeAdminDatabaseHealth.py
new file mode 100644
index 00000000..a3e7a165
--- /dev/null
+++ b/modules/routes/routeAdminDatabaseHealth.py
@@ -0,0 +1,102 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+SysAdmin API for database table statistics and FK orphan detection/cleanup.
+"""
+
+import logging
+from typing import Any, Dict, List, Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Request, status
+from pydantic import BaseModel, Field
+
+from modules.auth import limiter
+from modules.auth.authentication import requireSysAdminRole
+from modules.datamodels.datamodelUam import User
+from modules.system.databaseHealth import (
+ _cleanAllOrphans,
+ _cleanOrphans,
+ _getTableStats,
+ _scanOrphans,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(
+ prefix="/api/admin/database-health",
+ tags=["Admin Database Health"],
+)
+
+
+class OrphanCleanRequest(BaseModel):
+ """Body for deleting orphans for one FK relationship."""
+
+ db: str = Field(..., description="Source database name (e.g. poweron_app)")
+ table: str = Field(..., description="Source table (Pydantic model class name)")
+ column: str = Field(..., description="FK column on the source table")
+
+
+@router.get("/stats")
+@limiter.limit("30/minute")
+def getDatabaseTableStats(
+ request: Request,
+ db: Optional[str] = None,
+ currentUser: User = Depends(requireSysAdminRole),
+) -> Dict[str, Any]:
+ """Table statistics from pg_stat_user_tables (optional filter by database name)."""
+ rows = _getTableStats(dbFilter=db)
+ return {"stats": rows}
+
+
+@router.get("/orphans")
+@limiter.limit("10/minute")
+def getDatabaseOrphans(
+ request: Request,
+ db: Optional[str] = None,
+ currentUser: User = Depends(requireSysAdminRole),
+) -> Dict[str, Any]:
+ """FK orphan scan (optional filter by source database name)."""
+ rows = _scanOrphans(dbFilter=db)
+ return {"orphans": rows}
+
+
+@router.post("/orphans/clean")
+@limiter.limit("10/minute")
+def postDatabaseOrphansClean(
+ request: Request,
+ body: OrphanCleanRequest,
+ currentUser: User = Depends(requireSysAdminRole),
+) -> Dict[str, Any]:
+ """Delete orphaned rows for a single FK relationship."""
+ try:
+ deleted = _cleanOrphans(body.db, body.table, body.column)
+ except ValueError as e:
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail=str(e),
+ ) from e
+ logger.info(
+ "SysAdmin orphan clean: user=%s db=%s table=%s column=%s deleted=%s",
+ currentUser.username,
+ body.db,
+ body.table,
+ body.column,
+ deleted,
+ )
+ return {"deleted": deleted}
+
+
+@router.post("/orphans/clean-all")
+@limiter.limit("2/minute")
+def postDatabaseOrphansCleanAll(
+ request: Request,
+ currentUser: User = Depends(requireSysAdminRole),
+) -> Dict[str, Any]:
+ """Run orphan cleanup for every relationship that currently has orphans."""
+ results: List[dict] = _cleanAllOrphans()
+ logger.info(
+ "SysAdmin orphan clean-all: user=%s batches=%s",
+ currentUser.username,
+ len(results),
+ )
+ return {"results": results}
diff --git a/modules/routes/routeDataFiles.py b/modules/routes/routeDataFiles.py
index e989fb2e..ebfd0e38 100644
--- a/modules/routes/routeDataFiles.py
+++ b/modules/routes/routeDataFiles.py
@@ -502,6 +502,114 @@ def move_folder(
raise HTTPException(status_code=500, detail=str(e))
+@router.patch("/folders/{folderId}/neutralize")
+@limiter.limit("10/minute")
+def updateFolderNeutralize(
+ request: Request,
+ background_tasks: BackgroundTasks,
+ folderId: str = Path(..., description="ID of the folder"),
+ neutralize: bool = Body(..., embed=True),
+ context: RequestContext = Depends(getRequestContext),
+) -> Dict[str, Any]:
+ """Toggle neutralization on a folder. Propagates to all files inside (recursively).
+
+ When turning ON: all files in the folder get ``neutralize=True``, their
+ knowledge indexes are purged synchronously, and background re-indexing
+ is triggered.
+ When turning OFF: files revert to ``neutralize=False`` unless they were
+ individually marked (not implemented yet -- all are reverted).
+ """
+ try:
+ mgmt = interfaceDbManagement.getInterface(
+ context.user,
+ mandateId=str(context.mandateId) if context.mandateId else None,
+ featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
+ )
+
+ folder = mgmt.getFolder(folderId)
+ if not folder:
+ raise HTTPException(status_code=404, detail=routeApiMsg("Folder not found"))
+
+ mgmt.updateFolder(folderId, {"neutralize": neutralize})
+
+ fileIds = _collectFolderFileIds(mgmt, folderId)
+ logger.info("Folder neutralize toggle %s for folder %s: %d files affected", neutralize, folderId, len(fileIds))
+
+ from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
+ knowledgeDb = getKnowledgeInterface()
+
+ for fid in fileIds:
+ try:
+ mgmt.updateFile(fid, {"neutralize": neutralize})
+ if neutralize:
+ try:
+ knowledgeDb.deleteFileContentIndex(fid)
+ except Exception as e:
+ logger.warning("Folder neutralize: failed to purge index for file %s: %s", fid, e)
+ else:
+ try:
+ from modules.datamodels.datamodelKnowledge import FileContentIndex
+ indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fid})
+ for idx in indices:
+ idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
+ if idxId:
+ knowledgeDb.db.recordModify(FileContentIndex, idxId, {
+ "neutralizationStatus": "original",
+ "isNeutralized": False,
+ })
+ except Exception as e:
+ logger.warning("Folder neutralize OFF: metadata update failed for %s: %s", fid, e)
+ except Exception as e:
+ logger.error("Folder neutralize: failed to update file %s: %s", fid, e)
+
+ for fid in fileIds:
+ fileMeta = mgmt.getFile(fid)
+ if fileMeta:
+ fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
+ mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
+
+ async def _reindex(fileId=fid, fileName=fn, mimeType=mt):
+ try:
+ await _autoIndexFile(fileId=fileId, fileName=fileName, mimeType=mimeType, user=context.user)
+ except Exception as ex:
+ logger.error("Folder neutralize re-index failed for %s: %s", fileId, ex)
+
+ background_tasks.add_task(_reindex)
+
+ return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": len(fileIds)}
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error updating folder neutralize flag: {e}")
+ raise HTTPException(status_code=500, detail=str(e))
+
+
+def _collectFolderFileIds(mgmt, folderId: str) -> List[str]:
+ """Recursively collect all file IDs in a folder and its sub-folders."""
+ fileIds = []
+ try:
+ files = mgmt.listFiles(folderId=folderId)
+ if isinstance(files, dict):
+ files = files.get("files", [])
+ for f in (files or []):
+ fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None)
+ if fid:
+ fileIds.append(fid)
+ except Exception as e:
+ logger.warning("_collectFolderFileIds: listFiles failed for folder %s: %s", folderId, e)
+
+ try:
+ subFolders = mgmt.listFolders(parentId=folderId)
+ for sf in (subFolders or []):
+ sfId = sf.get("id") if isinstance(sf, dict) else getattr(sf, "id", None)
+ if sfId:
+ fileIds.extend(_collectFolderFileIds(mgmt, sfId))
+ except Exception as e:
+ logger.warning("_collectFolderFileIds: listFolders failed for folder %s: %s", folderId, e)
+
+ return fileIds
+
+
@router.get("/folders/{folderId}/download")
@limiter.limit("10/minute")
def download_folder(
@@ -1028,6 +1136,18 @@ def move_file(
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
)
mgmt.updateFile(fileId, {"folderId": targetFolderId})
+
+ if targetFolderId:
+ try:
+ targetFolder = mgmt.getFolder(targetFolderId)
+ folderNeut = (targetFolder.get("neutralize") if isinstance(targetFolder, dict)
+ else getattr(targetFolder, "neutralize", False)) if targetFolder else False
+ if folderNeut:
+ mgmt.updateFile(fileId, {"neutralize": True})
+ logger.info("File %s moved to neutralized folder %s — inherited neutralize=True", fileId, targetFolderId)
+ except Exception as e:
+ logger.warning("File move: folder neutralize inheritance check failed for %s: %s", fileId, e)
+
return {"success": True, "fileId": fileId, "folderId": targetFolderId}
except Exception as e:
logger.error(f"Error moving file: {e}")
diff --git a/modules/routes/routeWorkflowDashboard.py b/modules/routes/routeWorkflowDashboard.py
index 67b715b9..96075a26 100644
--- a/modules/routes/routeWorkflowDashboard.py
+++ b/modules/routes/routeWorkflowDashboard.py
@@ -26,6 +26,7 @@ from modules.datamodels.datamodelPagination import PaginationParams, normalize_p
from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import (
AutoRun, AutoStepLog, AutoWorkflow, AutoTask, AutoVersion,
)
+from modules.features.graphicalEditor.interfaceFeatureGraphicalEditor import graphicalEditorDatabase
from modules.shared.i18nRegistry import apiRouteContext
routeApiMsg = apiRouteContext("routeWorkflowDashboard")
@@ -35,13 +36,11 @@ limiter = Limiter(key_func=get_remote_address)
router = APIRouter(prefix="/api/system/workflow-runs", tags=["WorkflowDashboard"])
-_GREENFIELD_DB = "poweron_graphicaleditor"
-
def _getDb() -> DatabaseConnector:
return DatabaseConnector(
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
- dbDatabase=_GREENFIELD_DB,
+ dbDatabase=graphicalEditorDatabase,
dbUser=APP_CONFIG.get("DB_USER"),
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD"),
dbPort=int(APP_CONFIG.get("DB_PORT", 5432)),
diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_documentTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_documentTools.py
index 30810374..a48e53b3 100644
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_documentTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_documentTools.py
@@ -2,6 +2,7 @@
# All rights reserved.
"""Document and vision tools (containers, content objects, image description)."""
+import json
import logging
from typing import Any, Dict, List, Optional
@@ -18,6 +19,76 @@ from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
logger = logging.getLogger(__name__)
+def _parseUdmJson(raw: Any) -> Optional[Dict[str, Any]]:
+ if raw is None:
+ return None
+ if isinstance(raw, dict):
+ return raw
+ if isinstance(raw, str) and raw.strip():
+ try:
+ data = json.loads(raw)
+ return data if isinstance(data, dict) else None
+ except json.JSONDecodeError:
+ return None
+ return None
+
+
+def _walkUdmBlocksImpl(udm: Dict[str, Any], out: List[Dict[str, Any]], path: str) -> None:
+ if udm.get("contentType"):
+ raw = udm.get("raw") or ""
+ preview = raw[:240] + ("…" if len(raw) > 240 else "")
+ out.append({
+ "path": path,
+ "id": udm.get("id"),
+ "contentType": udm.get("contentType"),
+ "rawPreview": preview,
+ })
+ children = udm.get("children") or []
+ for i, ch in enumerate(children):
+ if isinstance(ch, dict):
+ role = ch.get("role") or "node"
+ label = f"{path}/children[{i}]"
+ if ch.get("role") in ("page", "section", "slide", "sheet"):
+ label = f"{path}/{role}[{ch.get('index', i)}]"
+ _walkUdmBlocksImpl(ch, out, label)
+
+
+def _getUdmStructureText(udm: Dict[str, Any]) -> str:
+ lines = [
+ f"id: {udm.get('id', '?')}",
+ f"role: {udm.get('role', '?')}",
+ f"sourceType: {udm.get('sourceType', '?')}",
+ f"sourcePath: {udm.get('sourcePath', '')}",
+ ]
+ nodes = udm.get("children") or []
+ lines.append(f"structuralNodes (top-level): {len(nodes)}")
+ for i, sn in enumerate(nodes[:80]):
+ if isinstance(sn, dict):
+ role = sn.get("role", "?")
+ idx = sn.get("index", i)
+ lab = sn.get("label") or ""
+ blocks = sn.get("children") or []
+ lines.append(f" [{i}] {role} index={idx} label={lab!r} contentBlocks={len(blocks)}")
+ if len(nodes) > 80:
+ lines.append(f" … and {len(nodes) - 80} more structural nodes")
+ return "\n".join(lines)
+
+
+def _filterUdmByTypeImpl(udm: Dict[str, Any], content_type: str) -> Dict[str, Any]:
+ hits: List[Dict[str, Any]] = []
+
+ def collect(node: Any) -> None:
+ if not isinstance(node, dict):
+ return
+ if node.get("contentType") == content_type:
+ hits.append(dict(node))
+ for child in node.get("children") or []:
+ collect(child)
+
+ collect(udm)
+ return {"nodes": hits, "count": len(hits), "contentType": content_type}
+
+
def _registerDocumentTools(registry: ToolRegistry, services):
"""Auto-extracted from registerCoreTools."""
# ---- Document tools (Smart Documents / Container Handling) ----
@@ -205,6 +276,91 @@ def _registerDocumentTools(registry: ToolRegistry, services):
readOnly=True,
)
+ # ---- UDM (Unified Document Model) tools ----
+
+ async def _getUdmStructure(args: Dict[str, Any], context: Dict[str, Any]):
+ udm = _parseUdmJson(args.get("udmJson") or args.get("udm"))
+ if not udm:
+ return ToolResult(toolCallId="", toolName="getUdmStructure", success=False, error="udmJson must be a JSON object or string")
+ text = _getUdmStructureText(udm)
+ return ToolResult(toolCallId="", toolName="getUdmStructure", success=True, data=text)
+
+ async def _walkUdmBlocks(args: Dict[str, Any], context: Dict[str, Any]):
+ udm = _parseUdmJson(args.get("udmJson") or args.get("udm"))
+ if not udm:
+ return ToolResult(toolCallId="", toolName="walkUdmBlocks", success=False, error="udmJson must be a JSON object or string")
+ blocks: List[Dict[str, Any]] = []
+ _walkUdmBlocksImpl(udm, blocks, "document")
+ max_n = int(args.get("maxResults") or 200)
+ trimmed = blocks[:max_n]
+ lines = [f"Total content blocks found: {len(blocks)} (showing {len(trimmed)})"]
+ for b in trimmed:
+ lines.append(f"{b.get('path')} | {b.get('contentType')} | id={b.get('id')}")
+ if b.get("rawPreview"):
+ lines.append(f" preview: {b['rawPreview'][:120]}")
+ if len(blocks) > max_n:
+ lines.append(f"... {len(blocks) - max_n} more not shown (increase maxResults)")
+ return ToolResult(toolCallId="", toolName="walkUdmBlocks", success=True, data="\n".join(lines))
+
+ async def _filterUdmByType(args: Dict[str, Any], context: Dict[str, Any]):
+ udm = _parseUdmJson(args.get("udmJson") or args.get("udm"))
+ content_type = (args.get("contentType") or "").strip()
+ if not udm:
+ return ToolResult(toolCallId="", toolName="filterUdmByType", success=False, error="udmJson is required")
+ if not content_type:
+ return ToolResult(toolCallId="", toolName="filterUdmByType", success=False, error="contentType is required")
+ filtered = _filterUdmByTypeImpl(udm, content_type)
+ return ToolResult(
+ toolCallId="",
+ toolName="filterUdmByType",
+ success=True,
+ data=json.dumps(filtered, ensure_ascii=False, default=str)[:_MAX_TOOL_RESULT_CHARS],
+ )
+
+ registry.register(
+ "getUdmStructure",
+ _getUdmStructure,
+ description="Summarize hierarchy of a Unified Document Model (UDM) JSON: ids, sourceType, structural nodes and block counts. Pass udmJson as stringified JSON.",
+ parameters={
+ "type": "object",
+ "properties": {
+ "udmJson": {"type": "string", "description": "Stringified UDM document object (Document → StructuralNode → ContentBlock)"},
+ },
+ "required": ["udmJson"],
+ },
+ readOnly=True,
+ )
+
+ registry.register(
+ "walkUdmBlocks",
+ _walkUdmBlocks,
+ description="Depth-first walk over a UDM tree; lists each ContentBlock with path, id, type, and short text preview.",
+ parameters={
+ "type": "object",
+ "properties": {
+ "udmJson": {"type": "string", "description": "Stringified UDM document"},
+ "maxResults": {"type": "integer", "description": "Max blocks to return (default 200)"},
+ },
+ "required": ["udmJson"],
+ },
+ readOnly=True,
+ )
+
+ registry.register(
+ "filterUdmByType",
+ _filterUdmByType,
+ description="Return all ContentBlocks in a UDM tree whose contentType matches (e.g. table, image, text).",
+ parameters={
+ "type": "object",
+ "properties": {
+ "udmJson": {"type": "string", "description": "Stringified UDM document"},
+ "contentType": {"type": "string", "description": "contentType to match (text, image, table, code, media, link, formula)"},
+ },
+ "required": ["udmJson", "contentType"],
+ },
+ readOnly=True,
+ )
+
# ---- Vision tool ----
async def _describeImage(args: Dict[str, Any], context: Dict[str, Any]):
diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_featureSubAgentTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_featureSubAgentTools.py
index 91fbb81d..51c3c3d5 100644
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_featureSubAgentTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_featureSubAgentTools.py
@@ -116,6 +116,17 @@ def _registerFeatureSubAgentTools(registry: ToolRegistry, services):
for ds in (featureDataSources or [])
)
+ neutralizeFieldsPerTable: Dict[str, List[str]] = {}
+ for ds in (featureDataSources or []):
+ nf = ds.get("neutralizeFields") if isinstance(ds, dict) else getattr(ds, "neutralizeFields", None)
+ tn = ds.get("tableName", "") if isinstance(ds, dict) else getattr(ds, "tableName", "")
+ if nf and isinstance(nf, list) and tn:
+ existing = neutralizeFieldsPerTable.get(tn, [])
+ for f in nf:
+ if f not in existing:
+ existing.append(f)
+ neutralizeFieldsPerTable[tn] = existing
+
from modules.security.rbacCatalog import getCatalogService
catalog = getCatalogService()
tableFilters = {}
@@ -182,6 +193,7 @@ def _registerFeatureSubAgentTools(registry: ToolRegistry, services):
instanceLabel=instanceLabel,
tableFilters=tableFilters,
requestLang=requestLang,
+ neutralizeFields=neutralizeFieldsPerTable if neutralizeFieldsPerTable else None,
)
_featureQueryCache[cacheKey] = (time.time(), answer)
diff --git a/modules/serviceCenter/services/serviceAgent/featureDataAgent.py b/modules/serviceCenter/services/serviceAgent/featureDataAgent.py
index 4b20f6a3..553848ed 100644
--- a/modules/serviceCenter/services/serviceAgent/featureDataAgent.py
+++ b/modules/serviceCenter/services/serviceAgent/featureDataAgent.py
@@ -41,6 +41,7 @@ async def runFeatureDataAgent(
instanceLabel: str = "",
tableFilters: Optional[Dict[str, Dict[str, str]]] = None,
requestLang: Optional[str] = None,
+ neutralizeFields: Optional[Dict[str, List[str]]] = None,
) -> str:
"""Run the feature data sub-agent and return the textual result.
@@ -56,12 +57,14 @@ async def runFeatureDataAgent(
instanceLabel: Human-readable instance name for context.
tableFilters: Per-table record filters from FeatureDataSource.recordFilter.
requestLang: ISO 639-1 code for resolving multilingual table labels in the schema prompt.
+ neutralizeFields: Per-table list of field names to mask with placeholders
+ before returning data to the AI.
Returns:
Plain-text answer produced by the sub-agent.
"""
- provider = FeatureDataProvider(dbConnector)
+ provider = FeatureDataProvider(dbConnector, neutralizeFields=neutralizeFields)
registry = _buildSubAgentTools(provider, featureInstanceId, mandateId, tableFilters or {})
for tbl in selectedTables:
diff --git a/modules/serviceCenter/services/serviceAgent/featureDataProvider.py b/modules/serviceCenter/services/serviceAgent/featureDataProvider.py
index 872e47eb..d7707bdf 100644
--- a/modules/serviceCenter/services/serviceAgent/featureDataProvider.py
+++ b/modules/serviceCenter/services/serviceAgent/featureDataProvider.py
@@ -8,12 +8,13 @@ feature table. All queries are automatically filtered by featureInstanceId
and mandateId so data isolation is guaranteed.
"""
+import hashlib
import logging
import json
import os
import time
from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Set
logger = logging.getLogger(__name__)
@@ -61,12 +62,18 @@ _ALLOWED_AGGREGATES = {"SUM", "COUNT", "AVG", "MIN", "MAX"}
class FeatureDataProvider:
"""Reads feature-instance data from the DB using DATA_OBJECTS metadata."""
- def __init__(self, dbConnector):
+ def __init__(self, dbConnector, neutralizeFields: Optional[Dict[str, List[str]]] = None):
"""
Args:
dbConnector: A connectorDbPostgre.DatabaseConnector with an open connection.
+ neutralizeFields: Per-table field names whose values must be replaced
+ with placeholders before returning to the AI, e.g.
+ ``{"TrusteePosition": ["firstName", "lastName", "address"]}``.
"""
self._db = dbConnector
+ self._neutralizeFields: Dict[str, Set[str]] = {
+ tbl: set(fields) for tbl, fields in (neutralizeFields or {}).items()
+ }
# ------------------------------------------------------------------
# public API (called by FeatureDataAgent tools)
@@ -102,6 +109,13 @@ class FeatureDataProvider:
logger.warning(f"getActualColumns({tableName}) failed: {e}")
return []
+ def _applyFieldNeutralization(self, tableName: str, rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ """Neutralize sensitive field values in query results before they reach the AI."""
+ fieldsToNeut = self._neutralizeFields.get(tableName)
+ if not fieldsToNeut:
+ return rows
+ return [_neutralizeRowFields(row, fieldsToNeut) for row in rows]
+
def browseTable(
self,
tableName: str,
@@ -152,6 +166,7 @@ class FeatureDataProvider:
cur.execute(dataSql, allParams + [limit, offset])
rows = [_serializeRow(dict(r)) for r in cur.fetchall()]
+ rows = self._applyFieldNeutralization(tableName, rows)
result = {"rows": rows, "total": total, "limit": limit, "offset": offset}
_debugQueryLog("browseTable", tableName, {
"fields": fields, "limit": limit, "offset": offset,
@@ -220,6 +235,7 @@ class FeatureDataProvider:
cur.execute(sql, allParams)
rows = [_serializeRow(dict(r)) for r in cur.fetchall()]
+ rows = self._applyFieldNeutralization(tableName, rows)
result = {
"rows": rows,
"aggregate": aggregate,
@@ -298,6 +314,7 @@ class FeatureDataProvider:
cur.execute(dataSql, allParams + [limit, offset])
rows = [_serializeRow(dict(r)) for r in cur.fetchall()]
+ rows = self._applyFieldNeutralization(tableName, rows)
result = {"rows": rows, "total": total, "limit": limit, "offset": offset}
_debugQueryLog("queryTable", tableName, {
"filters": filters, "fields": fields, "orderBy": orderBy,
@@ -417,3 +434,22 @@ def _serializeRow(row: Dict[str, Any]) -> Dict[str, Any]:
elif hasattr(v, "isoformat"):
row[k] = v.isoformat()
return row
+
+
+_PLACEHOLDER_PREFIX = "NEUT"
+
+
+def _neutralizeRowFields(row: Dict[str, Any], fieldsToNeutralize: Set[str]) -> Dict[str, Any]:
+ """Replace values in sensitive fields with stable, deterministic placeholders.
+
+ The placeholder format ``[NEUT..]`` is stable for the same
+ value so that identical values in different rows produce the same token.
+ This allows the AI to reason about equality without seeing the real data.
+ """
+ for field in fieldsToNeutralize:
+ val = row.get(field)
+ if val is None or val == "":
+ continue
+ shortHash = hashlib.sha256(str(val).encode()).hexdigest()[:8]
+ row[field] = f"[{_PLACEHOLDER_PREFIX}.{field}.{shortHash}]"
+ return row
diff --git a/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py b/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py
index ae81b1a1..941168d5 100644
--- a/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py
+++ b/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py
@@ -91,6 +91,12 @@ class ContainerExtractor(Extractor):
)
]
+ if context.get("lazyContainer"):
+ lazy = _extractLazyListing(fileBytes, mimeType, fileName, rootId)
+ if lazy is not None:
+ parts.extend(lazy)
+ return parts
+
state = {"totalSize": 0, "fileCount": 0}
try:
childParts = _resolveContainerRecursive(
@@ -112,6 +118,42 @@ class ContainerExtractor(Extractor):
return parts
+def _extractLazyListing(
+ fileBytes: bytes,
+ containerMime: str,
+ containerName: str,
+ parentId: str,
+) -> Optional[List[ContentPart]]:
+ """ZIP only: list member files with metadata (no nested extraction)."""
+ if containerMime not in ("application/zip", "application/x-zip-compressed") and not (containerName or "").lower().endswith(".zip"):
+ return None
+ out: List[ContentPart] = []
+ try:
+ with zipfile.ZipFile(io.BytesIO(fileBytes)) as zf:
+ for info in zf.infolist():
+ if info.is_dir():
+ continue
+ entryMime = _detectMimeType(info.filename)
+ out.append(
+ ContentPart(
+ id=makeId(),
+ parentId=parentId,
+ label=info.filename,
+ typeGroup="container",
+ mimeType=entryMime,
+ data="",
+ metadata={
+ "containerPath": info.filename,
+ "size": info.file_size,
+ "lazyReference": True,
+ },
+ )
+ )
+ except zipfile.BadZipFile:
+ return None
+ return out
+
+
def _resolveContainerRecursive(
containerBytes: bytes,
containerMime: str,
@@ -160,8 +202,9 @@ def _addFilePart(
entryPath = f"{containerPath}/{fileName}" if containerPath else fileName
detectedMime = _detectMimeType(fileName)
- from ..subRegistry import ExtractorRegistry
- registry = ExtractorRegistry()
+ from ..subRegistry import getExtractorRegistry
+
+ registry = getExtractorRegistry()
extractor = registry.resolve(detectedMime, fileName)
if extractor and not isinstance(extractor, ContainerExtractor):
diff --git a/modules/serviceCenter/services/serviceExtraction/extractors/extractorPdf.py b/modules/serviceCenter/services/serviceExtraction/extractors/extractorPdf.py
index 98b83188..1df4e7fc 100644
--- a/modules/serviceCenter/services/serviceExtraction/extractors/extractorPdf.py
+++ b/modules/serviceCenter/services/serviceExtraction/extractors/extractorPdf.py
@@ -75,33 +75,32 @@ class PdfExtractor(Extractor):
# Extract text per page with PyMuPDF (same lib as in-place search - ensures extraction matches PDF text layer)
try:
- with io.BytesIO(fileBytes) as buf:
- doc = fitz.open(stream=buf.getvalue(), filetype="pdf")
- for i in range(len(doc)):
- try:
- page = doc[i]
- text = page.get_text() or ""
- if text.strip():
- parts.append(ContentPart(
- id=makeId(),
- parentId=rootId,
- label=f"page_{i+1}",
- typeGroup="text",
- mimeType="text/plain",
- data=text,
- metadata={
- "pages": 1, "pageIndex": i,
- "size": len(text.encode('utf-8')),
- "contextRef": {
- "containerPath": context.get("fileName", "document.pdf"),
- "location": f"page:{i+1}",
- "pageIndex": i,
- },
- }
- ))
- except Exception:
- continue
- doc.close()
+ doc = fitz.open(stream=fileBytes, filetype="pdf")
+ for i in range(len(doc)):
+ try:
+ page = doc[i]
+ text = page.get_text() or ""
+ if text.strip():
+ parts.append(ContentPart(
+ id=makeId(),
+ parentId=rootId,
+ label=f"page_{i+1}",
+ typeGroup="text",
+ mimeType="text/plain",
+ data=text,
+ metadata={
+ "pages": 1, "pageIndex": i,
+ "size": len(text.encode('utf-8')),
+ "contextRef": {
+ "containerPath": context.get("fileName", "document.pdf"),
+ "location": f"page:{i+1}",
+ "pageIndex": i,
+ },
+ }
+ ))
+ except Exception:
+ continue
+ doc.close()
except Exception:
pass
@@ -139,38 +138,37 @@ class PdfExtractor(Extractor):
# Extract images with PyMuPDF
try:
- with io.BytesIO(fileBytes) as buf2:
- doc = fitz.open(stream=buf2.getvalue(), filetype="pdf")
- for i in range(len(doc)):
- page = doc[i]
- images = page.get_images(full=True)
- for j, img in enumerate(images):
- try:
- xref = img[0]
- baseImage = doc.extract_image(xref)
- if baseImage:
- imgBytes = baseImage.get("image", b"")
- ext = baseImage.get("ext", "png")
- if imgBytes:
- parts.append(ContentPart(
- id=makeId(),
- parentId=rootId,
- label=f"image_{i+1}_{j}",
- typeGroup="image",
- mimeType=f"image/{ext}",
- data=base64.b64encode(imgBytes).decode("utf-8"),
- metadata={
- "pageIndex": i, "size": len(imgBytes),
- "contextRef": {
- "containerPath": context.get("fileName", "document.pdf"),
- "location": f"page:{i+1}/image:{j}",
- "pageIndex": i,
- },
- }
- ))
- except Exception:
- continue
- doc.close()
+ doc = fitz.open(stream=fileBytes, filetype="pdf")
+ for i in range(len(doc)):
+ page = doc[i]
+ images = page.get_images(full=True)
+ for j, img in enumerate(images):
+ try:
+ xref = img[0]
+ baseImage = doc.extract_image(xref)
+ if baseImage:
+ imgBytes = baseImage.get("image", b"")
+ ext = baseImage.get("ext", "png")
+ if imgBytes:
+ parts.append(ContentPart(
+ id=makeId(),
+ parentId=rootId,
+ label=f"image_{i+1}_{j}",
+ typeGroup="image",
+ mimeType=f"image/{ext}",
+ data=base64.b64encode(imgBytes).decode("utf-8"),
+ metadata={
+ "pageIndex": i, "size": len(imgBytes),
+ "contextRef": {
+ "containerPath": context.get("fileName", "document.pdf"),
+ "location": f"page:{i+1}/image:{j}",
+ "pageIndex": i,
+ },
+ }
+ ))
+ except Exception:
+ continue
+ doc.close()
except Exception:
pass
diff --git a/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py b/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py
index a227e66f..13f4a1d3 100644
--- a/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py
@@ -9,7 +9,7 @@ import asyncio
import base64
import json
-from .subRegistry import ExtractorRegistry, ChunkerRegistry
+from .subRegistry import ExtractorRegistry, ChunkerRegistry, getExtractorRegistry
from .subPipeline import runExtraction
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult, DocumentIntent
from modules.datamodels.datamodelChat import ChatDocument
@@ -23,7 +23,6 @@ logger = logging.getLogger(__name__)
class ExtractionService:
- _sharedExtractorRegistry: Optional[ExtractorRegistry] = None
_sharedChunkerRegistry: Optional[ChunkerRegistry] = None
def __init__(self, context, get_service: Callable[[str], Any]):
@@ -35,11 +34,9 @@ class ExtractionService:
context.user,
mandateId=context.mandate_id,
)
- if ExtractionService._sharedExtractorRegistry is None:
- ExtractionService._sharedExtractorRegistry = ExtractorRegistry()
+ self._extractorRegistry = getExtractorRegistry()
if ExtractionService._sharedChunkerRegistry is None:
ExtractionService._sharedChunkerRegistry = ChunkerRegistry()
- self._extractorRegistry = ExtractionService._sharedExtractorRegistry
self._chunkerRegistry = ExtractionService._sharedChunkerRegistry
modelRegistry.ensureConnectorsRegistered()
diff --git a/modules/serviceCenter/services/serviceExtraction/subPipeline.py b/modules/serviceCenter/services/serviceExtraction/subPipeline.py
index 3af8ba87..ab14fddb 100644
--- a/modules/serviceCenter/services/serviceExtraction/subPipeline.py
+++ b/modules/serviceCenter/services/serviceExtraction/subPipeline.py
@@ -4,6 +4,7 @@ from typing import List
import logging
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, ExtractionOptions, MergeStrategy
+from modules.datamodels.datamodelUdm import _applyUdmOutputDetail
from .subUtils import makeId
from .subRegistry import ExtractorRegistry, ChunkerRegistry
@@ -29,7 +30,12 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
)
return ContentExtracted(id=makeId(), parts=[part])
- parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType})
+ extractCtx = {
+ "fileName": fileName,
+ "mimeType": mimeType,
+ "lazyContainer": options.lazyContainer,
+ }
+ parts = extractor.extract(documentBytes, extractCtx)
# REMOVED: poolAndLimit(parts, chunkerRegistry, options)
# REMOVED: Chunking logic - now handled in AI call phase
@@ -39,8 +45,17 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
# Use module-level applyMerging function
from .mainServiceExtraction import applyMerging
parts = applyMerging(parts, options.mergeStrategy)
-
- return ContentExtracted(id=makeId(), parts=parts)
+
+ ec_id = makeId()
+ extracted = ContentExtracted(id=ec_id, parts=parts)
+ if options.outputFormat in ("udm", "both"):
+ udm = extractor.extractToUdm(
+ documentBytes,
+ {**extractCtx, "extractionId": ec_id},
+ precomputedParts=parts,
+ )
+ extracted.udm = _applyUdmOutputDetail(udm, options.outputDetail)
+ return extracted
# REMOVED: poolAndLimit function - chunking now handled in AI call phase
diff --git a/modules/serviceCenter/services/serviceExtraction/subRegistry.py b/modules/serviceCenter/services/serviceExtraction/subRegistry.py
index 826eef9d..9412ef91 100644
--- a/modules/serviceCenter/services/serviceExtraction/subRegistry.py
+++ b/modules/serviceCenter/services/serviceExtraction/subRegistry.py
@@ -1,12 +1,25 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
import logging
from modules.datamodels.datamodelExtraction import ContentPart
+if TYPE_CHECKING:
+ from modules.datamodels.datamodelUdm import UdmDocument
+
logger = logging.getLogger(__name__)
+_extractorRegistrySingleton: Optional["ExtractorRegistry"] = None
+
+
+def getExtractorRegistry() -> "ExtractorRegistry":
+ """Shared ExtractorRegistry instance (avoid repeated auto-discovery e.g. per file in ZIP)."""
+ global _extractorRegistrySingleton
+ if _extractorRegistrySingleton is None:
+ _extractorRegistrySingleton = ExtractorRegistry()
+ return _extractorRegistrySingleton
+
class Extractor:
"""
@@ -26,6 +39,23 @@ class Extractor:
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> list[ContentPart]:
"""Extract content from the file bytes."""
raise NotImplementedError
+
+ def extractToUdm(
+ self,
+ fileBytes: bytes,
+ context: Dict[str, Any],
+ precomputedParts: Optional[List[ContentPart]] = None,
+ ) -> "UdmDocument":
+ """Build UDM from extracted parts (default: heuristic grouping). Override for format-specific trees."""
+ from modules.datamodels.datamodelUdm import _contentPartsToUdm, _mimeToUdmSourceType
+ from modules.datamodels.datamodelExtraction import ContentExtracted
+ from .subUtils import makeId
+
+ parts = precomputedParts if precomputedParts is not None else self.extract(fileBytes, context)
+ eid = context.get("extractionId") or makeId()
+ extracted = ContentExtracted(id=eid, parts=parts)
+ src = _mimeToUdmSourceType(context.get("mimeType", ""), context.get("fileName", ""))
+ return _contentPartsToUdm(extracted, src, context.get("fileName", ""))
def getSupportedExtensions(self) -> list[str]:
"""Return list of supported file extensions (including dots)."""
diff --git a/modules/shared/dbRegistry.py b/modules/shared/dbRegistry.py
new file mode 100644
index 00000000..057e27f8
--- /dev/null
+++ b/modules/shared/dbRegistry.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Dynamic database registry — each interface self-registers its DB on import.
+
+Usage in any interfaceDb*.py / interfaceFeature*.py:
+ from modules.shared.dbRegistry import registerDatabase
+ registerDatabase("poweron_xyz")
+"""
+
+import logging
+import threading
+from typing import Dict, Optional
+
+from modules.connectors.connectorDbPostgre import DatabaseConnector
+from modules.shared.configuration import APP_CONFIG
+
+logger = logging.getLogger(__name__)
+
+_lock = threading.Lock()
+_registry: Dict[str, str] = {}
+
+
+def registerDatabase(dbName: str, configPrefix: str = "DB") -> None:
+ """Register a database for health monitoring.
+
+ Called at module-level by each interface so that the registry
+ is populated automatically as interfaces are imported.
+
+ Args:
+ dbName: PostgreSQL database name (e.g. "poweron_app").
+ configPrefix: Config key prefix for host/port/user/password.
+ Default "DB" reads DB_HOST, DB_PORT, etc.
+ """
+ with _lock:
+ if dbName in _registry:
+ return
+ _registry[dbName] = configPrefix
+ logger.debug(f"Database registered: {dbName} (configPrefix={configPrefix})")
+
+
+def _getRegisteredDatabases() -> Dict[str, str]:
+ """Return snapshot of all registered databases {dbName: configPrefix}."""
+ with _lock:
+ return dict(_registry)
+
+
+def _getConnectorForDb(dbName: str) -> DatabaseConnector:
+ """Create a lightweight DatabaseConnector for the given registered DB.
+
+ Intended for read-only health queries (pg_stat, orphan scans).
+ Uses the same APP_CONFIG credentials as the application connectors.
+ """
+ with _lock:
+ configPrefix = _registry.get(dbName)
+ if configPrefix is None:
+ raise ValueError(f"Database '{dbName}' is not registered.")
+
+ hostKey = f"{configPrefix}_HOST" if configPrefix != "DB" else "DB_HOST"
+ portKey = f"{configPrefix}_PORT" if configPrefix != "DB" else "DB_PORT"
+ userKey = f"{configPrefix}_USER" if configPrefix != "DB" else "DB_USER"
+ passwordKey = f"{configPrefix}_PASSWORD_SECRET" if configPrefix != "DB" else "DB_PASSWORD_SECRET"
+
+ return DatabaseConnector(
+ dbHost=APP_CONFIG.get(hostKey, "localhost"),
+ dbDatabase=dbName,
+ dbUser=APP_CONFIG.get(userKey),
+ dbPassword=APP_CONFIG.get(passwordKey),
+ dbPort=int(APP_CONFIG.get(portKey, 5432)),
+ )
diff --git a/modules/shared/fkRegistry.py b/modules/shared/fkRegistry.py
new file mode 100644
index 00000000..ccf68666
--- /dev/null
+++ b/modules/shared/fkRegistry.py
@@ -0,0 +1,243 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+FK-Discovery — scans the Model-Registry for `fk_target` annotations and
+builds a cached list of foreign-key relationships.
+
+Each relationship describes one directed edge:
+ sourceTable.sourceColumn → targetTable.targetColumn
+ (possibly across databases)
+
+The table→db mapping is derived automatically from the `fk_target`
+annotations themselves: every `fk_target` declares `{"db": "...", "table": "..."}`
+for the *target* side. By collecting all such declarations we know which DB
+each table lives in — no extra registration step needed.
+
+Usage:
+ from modules.shared.fkRegistry import _getFkRelationships
+ rels = _getFkRelationships()
+"""
+
+import importlib
+import logging
+import os
+import threading
+from dataclasses import dataclass
+from typing import Dict, List, Optional
+
+from modules.datamodels.datamodelBase import _MODEL_REGISTRY
+
+logger = logging.getLogger(__name__)
+
+_modelsLoaded = False
+
+
+def _ensureModelsLoaded() -> None:
+ """Import all datamodel modules so that __init_subclass__ fills _MODEL_REGISTRY.
+
+ In a running server the interfaces import the datamodels automatically.
+ This function makes FK-Discovery work in standalone / test contexts too.
+ """
+ global _modelsLoaded
+ if _modelsLoaded:
+ return
+
+ gatewayRoot = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+ datamodelDir = os.path.join(gatewayRoot, "modules", "datamodels")
+ for fname in os.listdir(datamodelDir):
+ if fname.startswith("datamodel") and fname.endswith(".py") and fname != "__init__.py":
+ modName = f"modules.datamodels.{fname[:-3]}"
+ try:
+ importlib.import_module(modName)
+ except Exception as e:
+ logger.debug(f"Could not import {modName}: {e}")
+
+ featuresDir = os.path.join(gatewayRoot, "modules", "features")
+ if os.path.isdir(featuresDir):
+ for featureDir in os.listdir(featuresDir):
+ featurePath = os.path.join(featuresDir, featureDir)
+ if not os.path.isdir(featurePath):
+ continue
+ for fname in os.listdir(featurePath):
+ if fname.startswith("datamodel") and fname.endswith(".py"):
+ modName = f"modules.features.{featureDir}.{fname[:-3]}"
+ try:
+ importlib.import_module(modName)
+ except Exception as e:
+ logger.debug(f"Could not import {modName}: {e}")
+
+ _modelsLoaded = True
+
+_lock = threading.Lock()
+_cachedRelationships: Optional[List["FkRelationship"]] = None
+_cachedTableToDb: Optional[Dict[str, str]] = None
+
+
+@dataclass(frozen=True)
+class FkRelationship:
+ sourceDb: str
+ sourceTable: str
+ sourceColumn: str
+ targetDb: str
+ targetTable: str
+ targetColumn: str
+
+
+def _buildTableToDbMap() -> Dict[str, str]:
+ """Derive {tableName → dbName} for every PowerOnModel subclass.
+
+ Two-pass approach:
+ 1. Collect explicit mappings from fk_target annotations
+ (every fk_target declares the DB for its *target* table).
+ 2. For models still unmapped, query each registered database's
+ catalog (information_schema) to find the table there.
+ """
+ _ensureModelsLoaded()
+
+ mapping: Dict[str, str] = {}
+ for modelCls in _MODEL_REGISTRY.values():
+ for fieldInfo in modelCls.model_fields.values():
+ extra = fieldInfo.json_schema_extra
+ if not isinstance(extra, dict):
+ continue
+ fkTarget = extra.get("fk_target")
+ if not isinstance(fkTarget, dict):
+ continue
+ table = fkTarget.get("table", "")
+ db = fkTarget.get("db", "")
+ if table and db:
+ mapping[table] = db
+
+ unmapped = [name for name in _MODEL_REGISTRY if name not in mapping]
+ if unmapped:
+ try:
+ from modules.shared.dbRegistry import _getRegisteredDatabases
+ _resolveUnmappedTablesFromCatalog(mapping, unmapped, _getRegisteredDatabases())
+ except Exception as e:
+ logger.warning(f"Could not resolve unmapped tables from catalog: {e}")
+
+ return mapping
+
+
+def _resolveUnmappedTablesFromCatalog(
+ mapping: Dict[str, str],
+ unmapped: List[str],
+ registeredDbs: Dict[str, str],
+) -> None:
+ """Query information_schema in each registered DB for unmapped table names."""
+ import psycopg2
+ import psycopg2.extras
+ from modules.shared.configuration import APP_CONFIG
+
+ unmappedSet = set(unmapped)
+ for dbName, configPrefix in registeredDbs.items():
+ if not unmappedSet:
+ break
+ try:
+ hostKey = f"{configPrefix}_HOST" if configPrefix != "DB" else "DB_HOST"
+ portKey = f"{configPrefix}_PORT" if configPrefix != "DB" else "DB_PORT"
+ userKey = f"{configPrefix}_USER" if configPrefix != "DB" else "DB_USER"
+ pwKey = f"{configPrefix}_PASSWORD_SECRET" if configPrefix != "DB" else "DB_PASSWORD_SECRET"
+
+ conn = psycopg2.connect(
+ host=APP_CONFIG.get(hostKey, "localhost"),
+ port=int(APP_CONFIG.get(portKey, 5432)),
+ database=dbName,
+ user=APP_CONFIG.get(userKey),
+ password=APP_CONFIG.get(pwKey),
+ client_encoding="utf8",
+ )
+ try:
+ with conn.cursor() as cur:
+ cur.execute("""
+ SELECT table_name FROM information_schema.tables
+ WHERE table_schema = 'public'
+ AND table_name NOT LIKE '\\_%%'
+ """)
+ dbTables = {row[0] for row in cur.fetchall()}
+
+ for tableName in list(unmappedSet):
+ if tableName in dbTables:
+ mapping[tableName] = dbName
+ unmappedSet.discard(tableName)
+ finally:
+ conn.close()
+ except Exception as e:
+ logger.debug(f"Catalog query for {dbName} failed: {e}")
+
+
+def _discoverFkRelationships() -> List[FkRelationship]:
+ """Scan every PowerOnModel subclass for `fk_target` in json_schema_extra.
+
+ Returns a de-duplicated, sorted list of FkRelationship objects.
+ """
+ tableToDb = _buildTableToDbMap()
+
+ relationships: List[FkRelationship] = []
+ for tableName, modelCls in _MODEL_REGISTRY.items():
+ sourceDb = tableToDb.get(tableName)
+ if sourceDb is None:
+ continue
+
+ for fieldName, fieldInfo in modelCls.model_fields.items():
+ extra = fieldInfo.json_schema_extra
+ if not isinstance(extra, dict):
+ continue
+
+ fkTarget = extra.get("fk_target")
+ if not isinstance(fkTarget, dict):
+ continue
+
+ targetDb = fkTarget.get("db", "")
+ targetTable = fkTarget.get("table", "")
+ targetColumn = fkTarget.get("column", "id")
+
+ if not targetDb or not targetTable:
+ continue
+
+ relationships.append(
+ FkRelationship(
+ sourceDb=sourceDb,
+ sourceTable=tableName,
+ sourceColumn=fieldName,
+ targetDb=targetDb,
+ targetTable=targetTable,
+ targetColumn=targetColumn,
+ )
+ )
+
+ relationships.sort(key=lambda r: (r.sourceDb, r.sourceTable, r.sourceColumn))
+ return relationships
+
+
+def _getFkRelationships() -> List[FkRelationship]:
+ """Return the cached list of FK relationships (discovered on first call)."""
+ global _cachedRelationships
+ with _lock:
+ if _cachedRelationships is not None:
+ return _cachedRelationships
+ rels = _discoverFkRelationships()
+ with _lock:
+ _cachedRelationships = rels
+ return rels
+
+
+def _getTableToDbMap() -> Dict[str, str]:
+ """Return the cached table→db mapping (built on first call)."""
+ global _cachedTableToDb
+ with _lock:
+ if _cachedTableToDb is not None:
+ return _cachedTableToDb
+ mapping = _buildTableToDbMap()
+ with _lock:
+ _cachedTableToDb = mapping
+ return mapping
+
+
+def _invalidateFkCache() -> None:
+ """Force re-scan on next call (useful for testing)."""
+ global _cachedRelationships, _cachedTableToDb
+ with _lock:
+ _cachedRelationships = None
+ _cachedTableToDb = None
diff --git a/modules/system/databaseHealth.py b/modules/system/databaseHealth.py
new file mode 100644
index 00000000..3a7feaf2
--- /dev/null
+++ b/modules/system/databaseHealth.py
@@ -0,0 +1,405 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Database health utilities — table statistics and orphan detection/cleanup.
+
+All functions are intended for SysAdmin use only (access control in the route layer).
+"""
+
+import logging
+import time
+import threading
+from dataclasses import dataclass, asdict
+from typing import Dict, List, Optional, Set
+
+import psycopg2
+import psycopg2.extras
+
+from modules.shared.configuration import APP_CONFIG
+from modules.shared.dbRegistry import _getRegisteredDatabases
+from modules.shared.fkRegistry import _getFkRelationships, FkRelationship
+
+logger = logging.getLogger(__name__)
+
+_ORPHAN_CACHE_TTL = 300 # 5 minutes
+_orphanCacheLock = threading.Lock()
+_orphanCache: Optional[Dict] = None # {"ts": float, "results": [...]}
+
+
+# ---------------------------------------------------------------------------
+# Dataclasses
+# ---------------------------------------------------------------------------
+
+@dataclass
+class TableStats:
+ db: str
+ table: str
+ estimatedRows: int
+ totalSizeBytes: int
+ indexSizeBytes: int
+ lastVacuum: Optional[str]
+ lastAnalyze: Optional[str]
+
+
+@dataclass
+class OrphanResult:
+ sourceDb: str
+ sourceTable: str
+ sourceColumn: str
+ targetDb: str
+ targetTable: str
+ targetColumn: str
+ orphanCount: int
+
+
+# ---------------------------------------------------------------------------
+# Low-level DB helpers (read-only, lightweight connections)
+# ---------------------------------------------------------------------------
+
+def _getConnection(dbName: str):
+ """Open a psycopg2 connection to the given registered database."""
+ registeredDbs = _getRegisteredDatabases()
+ configPrefix = registeredDbs.get(dbName)
+ if configPrefix is None:
+ raise ValueError(f"Database '{dbName}' is not registered.")
+
+ hostKey = f"{configPrefix}_HOST" if configPrefix != "DB" else "DB_HOST"
+ portKey = f"{configPrefix}_PORT" if configPrefix != "DB" else "DB_PORT"
+ userKey = f"{configPrefix}_USER" if configPrefix != "DB" else "DB_USER"
+ passwordKey = (
+ f"{configPrefix}_PASSWORD_SECRET" if configPrefix != "DB" else "DB_PASSWORD_SECRET"
+ )
+
+ return psycopg2.connect(
+ host=APP_CONFIG.get(hostKey, "localhost"),
+ port=int(APP_CONFIG.get(portKey, 5432)),
+ database=dbName,
+ user=APP_CONFIG.get(userKey),
+ password=APP_CONFIG.get(passwordKey),
+ client_encoding="utf8",
+ cursor_factory=psycopg2.extras.RealDictCursor,
+ )
+
+
+# ---------------------------------------------------------------------------
+# Table statistics
+# ---------------------------------------------------------------------------
+
+def _getTableStats(dbFilter: Optional[str] = None) -> List[dict]:
+ """Query pg_stat_user_tables + pg_total_relation_size for every registered DB.
+
+ Returns a list of TableStats dicts, optionally filtered by database name.
+ """
+ registeredDbs = _getRegisteredDatabases()
+ if dbFilter:
+ registeredDbs = {k: v for k, v in registeredDbs.items() if k == dbFilter}
+
+ results: List[dict] = []
+ for dbName in sorted(registeredDbs):
+ try:
+ conn = _getConnection(dbName)
+ try:
+ with conn.cursor() as cur:
+ cur.execute("""
+ SELECT
+ s.relname AS "table",
+ s.n_live_tup AS "estimatedRows",
+ pg_total_relation_size(quote_ident(s.relname)) AS "totalSizeBytes",
+ pg_indexes_size(quote_ident(s.relname)) AS "indexSizeBytes",
+ s.last_vacuum::text AS "lastVacuum",
+ s.last_analyze::text AS "lastAnalyze"
+ FROM pg_stat_user_tables s
+ WHERE s.schemaname = 'public'
+ AND s.relname NOT LIKE '\\_%%'
+ ORDER BY s.relname
+ """)
+ for row in cur.fetchall():
+ results.append(asdict(TableStats(
+ db=dbName,
+ table=row["table"],
+ estimatedRows=row["estimatedRows"],
+ totalSizeBytes=row["totalSizeBytes"],
+ indexSizeBytes=row["indexSizeBytes"],
+ lastVacuum=row["lastVacuum"],
+ lastAnalyze=row["lastAnalyze"],
+ )))
+ finally:
+ conn.close()
+ except Exception as e:
+ logger.error(f"Failed to get table stats for {dbName}: {e}")
+
+ return results
+
+
+# ---------------------------------------------------------------------------
+# Orphan scanning
+# ---------------------------------------------------------------------------
+
+def _loadParentIds(conn, tableName: str, columnName: str) -> Set[str]:
+ """Load all distinct values of a column from a table (for cross-DB checks)."""
+ ids: Set[str] = set()
+ with conn.cursor() as cur:
+ cur.execute(f'SELECT DISTINCT "{columnName}" FROM "{tableName}"')
+ for row in cur.fetchall():
+ val = row[columnName]
+ if val is not None:
+ ids.add(str(val))
+ return ids
+
+
+def _countOrphansSameDb(
+ conn, sourceTable: str, sourceColumn: str,
+ targetTable: str, targetColumn: str,
+) -> int:
+ """Count orphans when source and target live in the same DB."""
+ with conn.cursor() as cur:
+ cur.execute(f"""
+ SELECT COUNT(*) AS cnt
+ FROM "{sourceTable}" s
+ WHERE s."{sourceColumn}" IS NOT NULL
+ AND s."{sourceColumn}" != ''
+ AND NOT EXISTS (
+ SELECT 1 FROM "{targetTable}" t
+ WHERE t."{targetColumn}" = s."{sourceColumn}"
+ )
+ """)
+ return cur.fetchone()["cnt"]
+
+
+def _countOrphansCrossDb(
+ sourceConn, sourceTable: str, sourceColumn: str,
+ parentIds: Set[str],
+) -> int:
+ """Count orphans when parent IDs come from a different DB."""
+ if not parentIds:
+ with sourceConn.cursor() as cur:
+ cur.execute(f"""
+ SELECT COUNT(*) AS cnt
+ FROM "{sourceTable}"
+ WHERE "{sourceColumn}" IS NOT NULL
+ AND "{sourceColumn}" != ''
+ """)
+ return cur.fetchone()["cnt"]
+
+ with sourceConn.cursor() as cur:
+ cur.execute(f"""
+ SELECT COUNT(*) AS cnt
+ FROM "{sourceTable}"
+ WHERE "{sourceColumn}" IS NOT NULL
+ AND "{sourceColumn}" != ''
+ AND "{sourceColumn}" NOT IN (
+ SELECT unnest(%(ids)s::text[])
+ )
+ """, {"ids": list(parentIds)})
+ return cur.fetchone()["cnt"]
+
+
+def _scanOrphans(dbFilter: Optional[str] = None) -> List[dict]:
+ """Scan for orphaned records across all FK relationships.
+
+ Uses a 5-minute cache to avoid repeated heavy scans.
+ """
+ global _orphanCache
+ with _orphanCacheLock:
+ if _orphanCache and (time.time() - _orphanCache["ts"]) < _ORPHAN_CACHE_TTL:
+ cached = _orphanCache["results"]
+ if dbFilter:
+ return [r for r in cached if r["sourceDb"] == dbFilter]
+ return list(cached)
+
+ relationships = _getFkRelationships()
+ if dbFilter:
+ relationships = [r for r in relationships if r.sourceDb == dbFilter]
+
+ connCache: Dict[str, any] = {}
+ tableCache: Dict[str, Set[str]] = {}
+ parentIdCache: Dict[str, Set[str]] = {}
+ results: List[dict] = []
+
+ def _ensureConn(dbName: str):
+ if dbName not in connCache:
+ connCache[dbName] = _getConnection(dbName)
+ return connCache[dbName]
+
+ def _existingTables(dbName: str) -> Set[str]:
+ """Cached lookup of physically existing public tables in a DB."""
+ if dbName not in tableCache:
+ try:
+ conn = _ensureConn(dbName)
+ with conn.cursor() as cur:
+ cur.execute("""
+ SELECT table_name FROM information_schema.tables
+ WHERE table_schema = 'public'
+ """)
+ tableCache[dbName] = {row["table_name"] for row in cur.fetchall()}
+ except Exception:
+ tableCache[dbName] = set()
+ return tableCache[dbName]
+
+ try:
+ for rel in relationships:
+ try:
+ sourceTables = _existingTables(rel.sourceDb)
+ if rel.sourceTable not in sourceTables:
+ continue
+
+ if rel.sourceDb == rel.targetDb:
+ if rel.targetTable not in sourceTables:
+ continue
+ else:
+ targetTables = _existingTables(rel.targetDb)
+ if rel.targetTable not in targetTables:
+ continue
+
+ sourceConn = _ensureConn(rel.sourceDb)
+
+ if rel.sourceDb == rel.targetDb:
+ count = _countOrphansSameDb(
+ sourceConn, rel.sourceTable, rel.sourceColumn,
+ rel.targetTable, rel.targetColumn,
+ )
+ else:
+ parentKey = f"{rel.targetDb}.{rel.targetTable}.{rel.targetColumn}"
+ if parentKey not in parentIdCache:
+ targetConn = _ensureConn(rel.targetDb)
+ parentIdCache[parentKey] = _loadParentIds(
+ targetConn, rel.targetTable, rel.targetColumn,
+ )
+
+ count = _countOrphansCrossDb(
+ sourceConn, rel.sourceTable, rel.sourceColumn,
+ parentIdCache[parentKey],
+ )
+
+ results.append(asdict(OrphanResult(
+ sourceDb=rel.sourceDb,
+ sourceTable=rel.sourceTable,
+ sourceColumn=rel.sourceColumn,
+ targetDb=rel.targetDb,
+ targetTable=rel.targetTable,
+ targetColumn=rel.targetColumn,
+ orphanCount=count,
+ )))
+
+ except Exception as e:
+ logger.warning(
+ f"Orphan scan failed for {rel.sourceDb}.{rel.sourceTable}.{rel.sourceColumn}: {e}"
+ )
+ for dbKey in (rel.sourceDb, rel.targetDb):
+ if dbKey in connCache:
+ try:
+ connCache[dbKey].rollback()
+ except Exception:
+ pass
+ finally:
+ for conn in connCache.values():
+ try:
+ conn.close()
+ except Exception:
+ pass
+
+ with _orphanCacheLock:
+ _orphanCache = {"ts": time.time(), "results": results}
+
+ return results
+
+
+# ---------------------------------------------------------------------------
+# Orphan cleanup
+# ---------------------------------------------------------------------------
+
+def _cleanOrphans(db: str, table: str, column: str) -> int:
+ """Delete orphaned records for a single FK relationship. Returns count deleted."""
+ relationships = _getFkRelationships()
+ rel = next(
+ (r for r in relationships
+ if r.sourceDb == db and r.sourceTable == table and r.sourceColumn == column),
+ None,
+ )
+ if rel is None:
+ raise ValueError(f"No FK relationship found for {db}.{table}.{column}")
+
+ conn = _getConnection(rel.sourceDb)
+ try:
+ if rel.sourceDb == rel.targetDb:
+ with conn.cursor() as cur:
+ cur.execute(f"""
+ DELETE FROM "{rel.sourceTable}"
+ WHERE "{rel.sourceColumn}" IS NOT NULL
+ AND "{rel.sourceColumn}" != ''
+ AND NOT EXISTS (
+ SELECT 1 FROM "{rel.targetTable}" t
+ WHERE t."{rel.targetColumn}" = "{rel.sourceTable}"."{rel.sourceColumn}"
+ )
+ """)
+ deleted = cur.rowcount
+ conn.commit()
+ else:
+ targetConn = _getConnection(rel.targetDb)
+ try:
+ parentIds = _loadParentIds(targetConn, rel.targetTable, rel.targetColumn)
+ finally:
+ targetConn.close()
+
+ if not parentIds:
+ with conn.cursor() as cur:
+ cur.execute(f"""
+ DELETE FROM "{rel.sourceTable}"
+ WHERE "{rel.sourceColumn}" IS NOT NULL
+ AND "{rel.sourceColumn}" != ''
+ """)
+ deleted = cur.rowcount
+ else:
+ with conn.cursor() as cur:
+ cur.execute(f"""
+ DELETE FROM "{rel.sourceTable}"
+ WHERE "{rel.sourceColumn}" IS NOT NULL
+ AND "{rel.sourceColumn}" != ''
+ AND "{rel.sourceColumn}" NOT IN (
+ SELECT unnest(%(ids)s::text[])
+ )
+ """, {"ids": list(parentIds)})
+ deleted = cur.rowcount
+ conn.commit()
+ except Exception:
+ conn.rollback()
+ raise
+ finally:
+ conn.close()
+
+ _invalidateOrphanCache()
+ logger.info(f"Cleaned {deleted} orphans from {db}.{table}.{column}")
+ return deleted
+
+
+def _cleanAllOrphans() -> List[dict]:
+ """Clean all detected orphans. Returns list of {db, table, column, deleted}."""
+ orphans = _scanOrphans()
+ results = []
+ for orphan in orphans:
+ try:
+ deleted = _cleanOrphans(orphan["sourceDb"], orphan["sourceTable"], orphan["sourceColumn"])
+ results.append({
+ "db": orphan["sourceDb"],
+ "table": orphan["sourceTable"],
+ "column": orphan["sourceColumn"],
+ "deleted": deleted,
+ })
+ except Exception as e:
+ logger.error(
+ f"Failed to clean orphans for {orphan['sourceDb']}.{orphan['sourceTable']}.{orphan['sourceColumn']}: {e}"
+ )
+ results.append({
+ "db": orphan["sourceDb"],
+ "table": orphan["sourceTable"],
+ "column": orphan["sourceColumn"],
+ "deleted": 0,
+ "error": str(e),
+ })
+ return results
+
+
+def _invalidateOrphanCache() -> None:
+ global _orphanCache
+ with _orphanCacheLock:
+ _orphanCache = None
diff --git a/modules/system/mainSystem.py b/modules/system/mainSystem.py
index 277e7e4b..59a42fcb 100644
--- a/modules/system/mainSystem.py
+++ b/modules/system/mainSystem.py
@@ -329,6 +329,16 @@ NAVIGATION_SECTIONS = [
"adminOnly": True,
"sysAdminOnly": True,
},
+ {
+ "id": "admin-database-health",
+ "objectKey": "ui.admin.databaseHealth",
+ "label": t("Datenbank-Gesundheit"),
+ "icon": "FaDatabase",
+ "path": "/admin/database-health",
+ "order": 98,
+ "adminOnly": True,
+ "sysAdminOnly": True,
+ },
{
"id": "admin-demo-config",
"objectKey": "ui.admin.demoConfig",
diff --git a/modules/workflows/automation2/executionEngine.py b/modules/workflows/automation2/executionEngine.py
index 45554632..d3a51800 100644
--- a/modules/workflows/automation2/executionEngine.py
+++ b/modules/workflows/automation2/executionEngine.py
@@ -159,7 +159,8 @@ def _getExecutor(
return DataExecutor()
if (nodeType.startswith("ai.") or nodeType.startswith("email.")
or nodeType.startswith("sharepoint.") or nodeType.startswith("clickup.")
- or nodeType.startswith("file.") or nodeType.startswith("trustee.")):
+ or nodeType.startswith("file.") or nodeType.startswith("trustee.")
+ or nodeType.startswith("context.")):
return ActionNodeExecutor(services)
if nodeType.startswith("input.") and automation2_interface:
return InputExecutor(automation2_interface)
@@ -424,6 +425,10 @@ async def executeGraph(
processed_in_loop: Set[str] = set()
_aggregateAccumulators: Dict[str, list] = {}
+ STEPLOG_BATCH_THRESHOLD = 100
+ AGGREGATE_FLUSH_THRESHOLD = 1000
+ _aggregateTempChunks: Dict[str, List[list]] = {}
+
# Check for loop resume: run was paused inside a loop, we're resuming for next iteration
run = automation2_interface.getRun(runId) if (runId and automation2_interface) else None
loop_resume_state = (run.get("context") or {}).get("_loopState") if run else None
@@ -551,71 +556,138 @@ async def executeGraph(
body_ordered = [n for n in ordered if n.get("id") in body_ids]
processed_in_loop.update(body_ids)
processed_in_loop.add(nodeId)
- for idx, item in enumerate(items):
- nodeOutputs[nodeId] = {"items": items, "count": len(items), "currentItem": item, "currentIndex": idx}
- context["_loopState"] = {"loopNodeId": nodeId, "currentIndex": idx, "items": items}
+ _loopConcurrency = int((node.get("parameters") or {}).get("concurrency", 1))
+ _loopConcurrency = max(1, min(_loopConcurrency, 20))
+ _batchMode = len(items) > STEPLOG_BATCH_THRESHOLD
+ _aggLock = asyncio.Lock()
+
+ async def _runLoopIteration(_idx: int, _item: Any) -> Optional[Dict]:
+ """Execute all body nodes for one iteration. Returns error dict or None."""
+ _iterOutputs = dict(nodeOutputs)
+ _iterOutputs[nodeId] = {"items": items, "count": len(items), "currentItem": _item, "currentIndex": _idx}
+ _iterCtx = dict(context)
+ _iterCtx["nodeOutputs"] = _iterOutputs if _loopConcurrency > 1 else nodeOutputs
+ _iterCtx["_loopState"] = {"loopNodeId": nodeId, "currentIndex": _idx, "items": items}
+
+ if _loopConcurrency == 1:
+ nodeOutputs[nodeId] = _iterOutputs[nodeId]
+ context["_loopState"] = _iterCtx["_loopState"]
+
+ _activeOutputs = _iterOutputs if _loopConcurrency > 1 else nodeOutputs
+ _activeCtx = _iterCtx if _loopConcurrency > 1 else context
+
for body_node in body_ordered:
bnid = body_node.get("id")
if not bnid or context.get("_stopped"):
break
- if not _is_node_on_active_path(bnid, connectionMap, nodeOutputs):
+ if not _is_node_on_active_path(bnid, connectionMap, _activeOutputs):
continue
bexec = _getExecutor(body_node.get("type", ""), services, automation2_interface)
if not bexec:
- nodeOutputs[bnid] = None
+ _activeOutputs[bnid] = None
continue
_bStepStart = time.time()
- _bInputSnap = {"_loopItem": item, "_loopIndex": idx}
- for _bSrc, _, _ in connectionMap.get(bnid, []):
- if _bSrc in nodeOutputs:
- _bInputSnap[_bSrc] = nodeOutputs[_bSrc]
- _bStepId = _createStepLog(automation2_interface, runId, bnid, body_node.get("type", ""), "running", _bInputSnap)
+ _bStepId = None
+ if not _batchMode or _idx == 0 or _idx == len(items) - 1:
+ _bInputSnap = {"_loopItem": _item, "_loopIndex": _idx}
+ _bStepId = _createStepLog(automation2_interface, runId, bnid, body_node.get("type", ""), "running", _bInputSnap)
try:
- bres, _bRetry = await _executeWithRetry(bexec, body_node, context)
- # data.aggregate: accumulate instead of overwrite
+ bres, _bRetry = await _executeWithRetry(bexec, body_node, _activeCtx)
if body_node.get("type") == "data.aggregate":
- if bnid not in _aggregateAccumulators:
- _aggregateAccumulators[bnid] = []
- accItems = bres.get("items", [bres]) if isinstance(bres, dict) else [bres]
- _aggregateAccumulators[bnid].extend(accItems)
- nodeOutputs[bnid] = bres
- _bDur = int((time.time() - _bStepStart) * 1000)
- _updateStepLog(automation2_interface, _bStepId, "completed",
- output=bres if isinstance(bres, dict) else {"value": bres},
- durationMs=_bDur, retryCount=_bRetry)
- logger.info("executeGraph loop body node %s done (iter %d, retries=%d)", bnid, idx, _bRetry)
+ async with _aggLock:
+ if bnid not in _aggregateAccumulators:
+ _aggregateAccumulators[bnid] = []
+ accItems = bres.get("items", [bres]) if isinstance(bres, dict) else [bres]
+ _aggregateAccumulators[bnid].extend(accItems)
+ if len(_aggregateAccumulators[bnid]) >= AGGREGATE_FLUSH_THRESHOLD:
+ _aggregateTempChunks.setdefault(bnid, []).append(_aggregateAccumulators[bnid])
+ _aggregateAccumulators[bnid] = []
+ _activeOutputs[bnid] = bres
+ if _bStepId:
+ _bDur = int((time.time() - _bStepStart) * 1000)
+ _updateStepLog(automation2_interface, _bStepId, "completed",
+ output=bres if isinstance(bres, dict) else {"value": bres},
+ durationMs=_bDur, retryCount=_bRetry)
+ if _loopConcurrency == 1:
+ nodeOutputs[bnid] = bres
except PauseForHumanTaskError as e:
- _updateStepLog(automation2_interface, _bStepId, "completed",
- durationMs=int((time.time() - _bStepStart) * 1000))
+ if _bStepId:
+ _updateStepLog(automation2_interface, _bStepId, "completed",
+ durationMs=int((time.time() - _bStepStart) * 1000))
if runId and automation2_interface:
- run = automation2_interface.getRun(runId) or {}
- run_ctx = dict(run.get("context") or {})
- run_ctx["_loopState"] = {"loopNodeId": nodeId, "currentIndex": idx, "items": items}
- automation2_interface.updateRun(e.runId, status="paused", nodeOutputs=_serializableOutputs(nodeOutputs), currentNodeId=e.nodeId, context=run_ctx)
- return {"success": False, "paused": True, "taskId": e.taskId, "runId": e.runId, "nodeId": e.nodeId, "nodeOutputs": _serializableOutputs(nodeOutputs)}
- except PauseForEmailWaitError as e:
- _updateStepLog(automation2_interface, _bStepId, "completed",
- durationMs=int((time.time() - _bStepStart) * 1000))
+ _run = automation2_interface.getRun(runId) or {}
+ _run_ctx = dict(_run.get("context") or {})
+ _run_ctx["_loopState"] = {"loopNodeId": nodeId, "currentIndex": _idx, "items": items}
+ automation2_interface.updateRun(e.runId, status="paused", nodeOutputs=_serializableOutputs(nodeOutputs), currentNodeId=e.nodeId, context=_run_ctx)
+ return {"_pause": True, "taskId": e.taskId, "runId": e.runId, "nodeId": e.nodeId}
+ except PauseForEmailWaitError:
+ if _bStepId:
+ _updateStepLog(automation2_interface, _bStepId, "completed",
+ durationMs=int((time.time() - _bStepStart) * 1000))
raise
except Exception as ex:
- _updateStepLog(automation2_interface, _bStepId, "failed",
- error=str(ex), durationMs=int((time.time() - _bStepStart) * 1000))
- logger.exception("executeGraph loop body node %s FAILED: %s", bnid, ex)
- nodeOutputs[bnid] = {"error": str(ex), "success": False}
+ if _bStepId:
+ _updateStepLog(automation2_interface, _bStepId, "failed",
+ error=str(ex), durationMs=int((time.time() - _bStepStart) * 1000))
+ logger.exception("executeGraph loop body node %s FAILED (iter %d): %s", bnid, _idx, ex)
+ return {"_error": str(ex), "failedNode": bnid}
+
+ if _batchMode and _idx > 0 and _idx % STEPLOG_BATCH_THRESHOLD == 0 and runId:
+ _emitStepEvent(runId, {"type": "loop_progress", "nodeId": nodeId, "iteration": _idx, "total": len(items)})
+ return None
+
+ if _loopConcurrency <= 1:
+ for idx, item in enumerate(items):
+ iterErr = await _runLoopIteration(idx, item)
+ if iterErr:
+ if iterErr.get("_pause"):
+ return {"success": False, "paused": True, "taskId": iterErr["taskId"], "runId": iterErr["runId"], "nodeId": iterErr["nodeId"], "nodeOutputs": _serializableOutputs(nodeOutputs)}
+ nodeOutputs[iterErr.get("failedNode", nodeId)] = {"error": iterErr["_error"], "success": False}
if runId and automation2_interface:
automation2_interface.updateRun(runId, status="failed", nodeOutputs=_serializableOutputs(nodeOutputs))
if runId:
_activeRunContexts.pop(runId, None)
- return {"success": False, "error": str(ex), "nodeOutputs": _serializableOutputs(nodeOutputs), "failedNode": bnid, "runId": runId}
+ return {"success": False, "error": iterErr["_error"], "nodeOutputs": _serializableOutputs(nodeOutputs), "failedNode": iterErr.get("failedNode"), "runId": runId}
+ else:
+ _sem = asyncio.Semaphore(_loopConcurrency)
+
+ async def _concurrentIter(_ci: int, _citem: Any):
+ async with _sem:
+ return await _runLoopIteration(_ci, _citem)
+
+ _tasks = [_concurrentIter(ci, citem) for ci, citem in enumerate(items)]
+ _results = await asyncio.gather(*_tasks, return_exceptions=True)
+ for _ri, _rval in enumerate(_results):
+ if isinstance(_rval, Exception):
+ logger.exception("Loop iteration %d raised: %s", _ri, _rval)
+ if runId and automation2_interface:
+ automation2_interface.updateRun(runId, status="failed", nodeOutputs=_serializableOutputs(nodeOutputs))
+ if runId:
+ _activeRunContexts.pop(runId, None)
+ return {"success": False, "error": str(_rval), "nodeOutputs": _serializableOutputs(nodeOutputs), "runId": runId}
+ if isinstance(_rval, dict):
+ if _rval.get("_pause"):
+ return {"success": False, "paused": True, "taskId": _rval["taskId"], "runId": _rval["runId"], "nodeId": _rval["nodeId"], "nodeOutputs": _serializableOutputs(nodeOutputs)}
+ if _rval.get("_error"):
+ if runId and automation2_interface:
+ automation2_interface.updateRun(runId, status="failed", nodeOutputs=_serializableOutputs(nodeOutputs))
+ if runId:
+ _activeRunContexts.pop(runId, None)
+ return {"success": False, "error": _rval["_error"], "nodeOutputs": _serializableOutputs(nodeOutputs), "failedNode": _rval.get("failedNode"), "runId": runId}
+
nodeOutputs[nodeId] = {"items": items, "count": len(items)}
- # Finalize aggregate accumulators after loop
for aggId, accItems in _aggregateAccumulators.items():
- nodeOutputs[aggId] = {"items": accItems, "count": len(accItems), "_success": True}
+ allChunks = _aggregateTempChunks.pop(aggId, [])
+ finalItems = []
+ for chunk in allChunks:
+ finalItems.extend(chunk)
+ finalItems.extend(accItems)
+ nodeOutputs[aggId] = {"items": finalItems, "count": len(finalItems), "_success": True}
_aggregateAccumulators.clear()
_updateStepLog(automation2_interface, _stepId, "completed",
- output={"iterationCount": len(items), "items": len(items)},
+ output={"iterationCount": len(items), "items": len(items), "concurrency": _loopConcurrency, "batchMode": _batchMode},
durationMs=int((time.time() - _stepStartMs) * 1000))
- logger.info("executeGraph flow.loop done: %d iterations", len(items))
+ logger.info("executeGraph flow.loop done: %d iterations (concurrency=%d, batchMode=%s)", len(items), _loopConcurrency, _batchMode)
elif _isMergeNode(nodeType):
if not _allMergePredecessorsReady(nodeId, connectionMap, nodeOutputs):
logger.info("executeGraph node %s (flow.merge): waiting — not all predecessors ready, deferring", nodeId)
diff --git a/modules/workflows/automation2/executors/actionNodeExecutor.py b/modules/workflows/automation2/executors/actionNodeExecutor.py
index 0d5134f1..e431e83f 100644
--- a/modules/workflows/automation2/executors/actionNodeExecutor.py
+++ b/modules/workflows/automation2/executors/actionNodeExecutor.py
@@ -385,4 +385,13 @@ class ActionNodeExecutor:
except (json.JSONDecodeError, TypeError, ValueError):
pass
+ if outputSchema == "ConsolidateResult" and nodeType == "ai.consolidate":
+ data_dict = result.data if isinstance(getattr(result, "data", None), dict) else {}
+ cr_out = {
+ "result": data_dict.get("result", ""),
+ "mode": data_dict.get("mode", resolvedParams.get("mode", "summarize")),
+ "count": int(data_dict.get("count", 0)),
+ }
+ return _normalizeToSchema(cr_out, outputSchema)
+
return _normalizeToSchema(out, outputSchema)
diff --git a/modules/workflows/automation2/executors/dataExecutor.py b/modules/workflows/automation2/executors/dataExecutor.py
index 8da5cd75..26334dd0 100644
--- a/modules/workflows/automation2/executors/dataExecutor.py
+++ b/modules/workflows/automation2/executors/dataExecutor.py
@@ -30,6 +30,8 @@ class DataExecutor:
return await self._transform(node, nodeOutputs, nodeId, inputSources)
if nodeType == "data.filter":
return await self._filter(node, nodeOutputs, nodeId, inputSources)
+ if nodeType == "data.consolidate":
+ return await self._consolidate(node, nodeOutputs, nodeId, inputSources)
logger.debug("DataExecutor node %s unhandled type %s", nodeId, nodeType)
return None
@@ -110,10 +112,15 @@ class DataExecutor:
nodeId: str,
inputSources: Dict,
) -> Any:
- """Filter items by condition expression. Returns Transit envelope."""
+ """Filter items by condition expression and/or UDM content type. Returns Transit envelope."""
inp = self._getInput(inputSources, nodeOutputs)
data = _unwrapTransit(inp) if isinstance(inp, dict) and inp.get("_transit") else inp
- condition = (node.get("parameters") or {}).get("condition", "")
+ params = node.get("parameters") or {}
+ condition = params.get("condition", "")
+ udmContentType = params.get("udmContentType", "")
+
+ if udmContentType and isinstance(data, dict) and data.get("children"):
+ data = self._filterUdmByContentType(data, udmContentType)
items = self._extractItems(data)
originalCount = len(items)
@@ -137,6 +144,56 @@ class DataExecutor:
"filteredCount": len(filtered),
})
+ async def _consolidate(
+ self,
+ node: Dict,
+ nodeOutputs: Dict,
+ nodeId: str,
+ inputSources: Dict,
+ ) -> Any:
+ """Deterministic consolidation: table, concat, merge, csvJoin."""
+ inp = self._getInput(inputSources, nodeOutputs)
+ data = _unwrapTransit(inp) if isinstance(inp, dict) and inp.get("_transit") else inp
+ params = node.get("parameters") or {}
+ mode = params.get("mode", "table")
+ separator = params.get("separator", "\n")
+
+ items = self._extractItems(data) if isinstance(data, (dict, list)) else []
+ count = len(items)
+
+ if mode == "concat":
+ result = separator.join(str(i) for i in items)
+ elif mode == "csvJoin":
+ lines = []
+ for item in items:
+ if isinstance(item, dict):
+ lines.append(separator.join(str(v) for v in item.values()))
+ else:
+ lines.append(str(item))
+ result = "\n".join(lines)
+ elif mode == "merge":
+ merged: Dict = {}
+ for item in items:
+ if isinstance(item, dict):
+ merged.update(item)
+ result = merged
+ else:
+ rows = []
+ headers: list = []
+ for item in items:
+ if isinstance(item, dict):
+ for k in item:
+ if k not in headers:
+ headers.append(k)
+ rows.append(item)
+ else:
+ rows.append({"value": item})
+ if "value" not in headers:
+ headers.append("value")
+ result = {"headers": headers, "rows": rows}
+
+ return {"result": result, "mode": mode, "count": count, "_success": True}
+
def _getInput(self, inputSources: Dict, nodeOutputs: Dict) -> Any:
"""Get data from the first connected input port."""
if 0 not in inputSources:
@@ -185,6 +242,21 @@ class DataExecutor:
return True
+ def _filterUdmByContentType(self, data: Dict, contentType: str) -> Dict:
+ """Filter UDM document/node, keeping only ContentBlocks matching the given contentType."""
+ result: list = []
+ children = data.get("children") or []
+ for child in children:
+ if not isinstance(child, dict):
+ continue
+ if child.get("contentType") == contentType:
+ result.append(child)
+ elif isinstance(child.get("children"), list):
+ for block in child["children"]:
+ if isinstance(block, dict) and block.get("contentType") == contentType:
+ result.append(block)
+ return {"nodes": result, "count": len(result), "_udmFiltered": True}
+
def _compareValues(self, left: Any, operator: str, right: Any) -> bool:
"""Compare two values with the given operator."""
if operator == "eq":
diff --git a/modules/workflows/automation2/executors/flowExecutor.py b/modules/workflows/automation2/executors/flowExecutor.py
index 0d50aa4e..de19d9a7 100644
--- a/modules/workflows/automation2/executors/flowExecutor.py
+++ b/modules/workflows/automation2/executors/flowExecutor.py
@@ -277,17 +277,61 @@ class FlowExecutor:
return False
async def _loop(self, node: Dict, nodeOutputs: Dict, nodeId: str, inputSources: Dict) -> Any:
- itemsPath = (node.get("parameters") or {}).get("items", "[]")
+ params = node.get("parameters") or {}
+ itemsPath = params.get("items", "[]")
+ level = params.get("level", "auto")
from modules.workflows.automation2.graphUtils import resolveParameterReferences
items = resolveParameterReferences(itemsPath, nodeOutputs)
- if isinstance(items, list):
+
+ if level != "auto" and isinstance(items, dict):
+ items = self._resolveUdmLevel(items, level)
+ elif isinstance(items, list):
pass
elif isinstance(items, dict):
- items = [{"name": k, "value": v} for k, v in items.items()]
+ children = items.get("children")
+ if isinstance(children, list) and children:
+ items = children
+ else:
+ items = [{"name": k, "value": v} for k, v in items.items()]
else:
items = [items] if items is not None else []
return {"items": items, "count": len(items)}
+ def _resolveUdmLevel(self, udm: Dict, level: str) -> list:
+ """Extract items from a UDM document/node at the requested structural level."""
+ children = udm.get("children") or []
+ if level == "documents":
+ return [c for c in children if isinstance(c, dict) and c.get("role") in ("document", "archive")]
+ if level == "structuralNodes":
+ if udm.get("role") == "document":
+ return children
+ out = []
+ for child in children:
+ if isinstance(child, dict) and isinstance(child.get("children"), list):
+ out.extend(child["children"])
+ elif isinstance(child, dict):
+ out.append(child)
+ return out if out else children
+ if level == "contentBlocks":
+ blocks = []
+ nodes = children
+ if udm.get("role") == "document":
+ for sn in nodes:
+ if isinstance(sn, dict) and isinstance(sn.get("children"), list):
+ blocks.extend(sn["children"])
+ elif udm.get("role") in ("page", "section", "slide", "sheet"):
+ blocks = nodes
+ else:
+ for child in nodes:
+ if isinstance(child, dict) and isinstance(child.get("children"), list):
+ for sn in child["children"]:
+ if isinstance(sn, dict) and isinstance(sn.get("children"), list):
+ blocks.extend(sn["children"])
+ else:
+ blocks.append(sn)
+ return blocks
+ return children
+
async def _merge(self, node: Dict, nodeOutputs: Dict, nodeId: str, inputSources: Dict, context: Dict) -> Any:
"""Merge multiple branch inputs. mode: first | all | append."""
mode = (node.get("parameters") or {}).get("mode", "first")
diff --git a/modules/workflows/methods/methodAi/actions/__init__.py b/modules/workflows/methods/methodAi/actions/__init__.py
index f0f18286..641b4eaf 100644
--- a/modules/workflows/methods/methodAi/actions/__init__.py
+++ b/modules/workflows/methods/methodAi/actions/__init__.py
@@ -11,6 +11,7 @@ from .translateDocument import translateDocument
from .convertDocument import convertDocument
from .generateDocument import generateDocument
from .generateCode import generateCode
+from .consolidate import consolidate
__all__ = [
'process',
@@ -20,5 +21,6 @@ __all__ = [
'convertDocument',
'generateDocument',
'generateCode',
+ 'consolidate',
]
diff --git a/modules/workflows/methods/methodAi/actions/consolidate.py b/modules/workflows/methods/methodAi/actions/consolidate.py
new file mode 100644
index 00000000..7a7d7982
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/consolidate.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+import json
+import logging
+from typing import Any, Dict, List
+
+from modules.datamodels.datamodelAi import AiCallOptions, AiCallRequest, OperationTypeEnum
+from modules.datamodels.datamodelChat import ActionResult
+
+logger = logging.getLogger(__name__)
+
+
+def _normalizeItems(parameters: Dict[str, Any]) -> List[Any]:
+ items = parameters.get("items")
+ if isinstance(items, list):
+ return items
+ agg = parameters.get("aggregateResult")
+ if isinstance(agg, dict) and isinstance(agg.get("items"), list):
+ return agg["items"]
+ return []
+
+
+async def consolidate(self, parameters: Dict[str, Any]) -> ActionResult:
+ """AI-assisted consolidation of aggregated loop / workflow items."""
+ mode = (parameters.get("mode") or "summarize").strip()
+ extra = (parameters.get("prompt") or "").strip()
+ items = _normalizeItems(parameters)
+ if not items:
+ return ActionResult.isFailure(
+ error="No items to consolidate. Connect an AggregateResult or pass items.",
+ )
+
+ try:
+ payload = json.dumps(items, ensure_ascii=False, default=str)[:120000]
+ except TypeError:
+ payload = str(items)[:120000]
+
+ if mode == "summarize":
+ instr = "Summarize the following aggregated workflow results clearly and concisely."
+ elif mode == "classify":
+ instr = (
+ "Classify and group the following aggregated items. "
+ "Output a structured summary (categories, counts, key labels)."
+ )
+ elif mode == "semanticMerge":
+ instr = (
+ "Semantically merge the following items into one coherent result. "
+ "Remove duplicates where appropriate."
+ )
+ else:
+ instr = "Process the following aggregated data according to the user instructions."
+
+ if extra:
+ instr += f"\n\nAdditional instructions: {extra}"
+
+ prompt = f"{instr}\n\n--- DATA ---\n{payload}"
+
+ ai_service = getattr(self.services, "ai", None)
+ if not ai_service:
+ return ActionResult.isFailure(error="AI service unavailable")
+
+ try:
+ req = AiCallRequest(
+ prompt=prompt,
+ options=AiCallOptions(operationType=OperationTypeEnum.DATA_ANALYSE),
+ )
+ resp = await ai_service.callAi(req)
+ except Exception as e:
+ logger.exception("consolidate: AI call failed: %s", e)
+ return ActionResult.isFailure(error=str(e))
+
+ if getattr(resp, "errorCount", 0) and resp.errorCount > 0:
+ return ActionResult.isFailure(error=resp.content or "AI call failed")
+
+ text = (resp.content or "").strip()
+ return ActionResult.isSuccess(
+ data={
+ "result": text,
+ "mode": mode,
+ "count": len(items),
+ },
+ )
diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py
index c9a3cdbf..eac1babe 100644
--- a/modules/workflows/methods/methodAi/methodAi.py
+++ b/modules/workflows/methods/methodAi/methodAi.py
@@ -18,6 +18,7 @@ from .actions.translateDocument import translateDocument
from .actions.convertDocument import convertDocument
from .actions.generateDocument import generateDocument
from .actions.generateCode import generateCode
+from .actions.consolidate import consolidate
logger = logging.getLogger(__name__)
@@ -317,7 +318,38 @@ class MethodAi(MethodBase):
)
},
execute=generateCode.__get__(self, self.__class__)
- )
+ ),
+ "consolidate": WorkflowActionDefinition(
+ actionId="ai.consolidate",
+ description="AI-assisted consolidation of aggregated workflow results (summarize, classify, semantic merge)",
+ dynamicMode=True,
+ parameters={
+ "mode": WorkflowActionParameter(
+ name="mode",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["summarize", "classify", "semanticMerge"],
+ required=False,
+ default="summarize",
+ description="Consolidation strategy",
+ ),
+ "prompt": WorkflowActionParameter(
+ name="prompt",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=False,
+ description="Optional extra instructions for the LLM",
+ ),
+ "items": WorkflowActionParameter(
+ name="items",
+ type="List[Any]",
+ frontendType=FrontendType.HIDDEN,
+ required=False,
+ description="Aggregated items (from AggregateResult wire handover)",
+ ),
+ },
+ execute=consolidate.__get__(self, self.__class__)
+ ),
}
# Validate actions after definition
@@ -331,6 +363,7 @@ class MethodAi(MethodBase):
self.convertDocument = convertDocument.__get__(self, self.__class__)
self.generateDocument = generateDocument.__get__(self, self.__class__)
self.generateCode = generateCode.__get__(self, self.__class__)
+ self.consolidate = consolidate.__get__(self, self.__class__)
def _format_timestamp_for_filename(self) -> str:
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
diff --git a/tests/integration/extraction/test_extract_udm_pipeline.py b/tests/integration/extraction/test_extract_udm_pipeline.py
new file mode 100644
index 00000000..7c9b2bf8
--- /dev/null
+++ b/tests/integration/extraction/test_extract_udm_pipeline.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
+from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
+from modules.serviceCenter.services.serviceExtraction.subRegistry import ChunkerRegistry, getExtractorRegistry
+
+
+def test_run_extraction_html_with_udm():
+ reg = getExtractorRegistry()
+ chunkers = ChunkerRegistry()
+ html = b"Hello
"
+ opts = ExtractionOptions(
+ mergeStrategy=MergeStrategy(),
+ outputFormat="both",
+ outputDetail="full",
+ )
+ ec = runExtraction(reg, chunkers, html, "t.html", "text/html", opts)
+ assert ec.parts
+ assert ec.udm is not None
+ assert ec.udm.sourceType == "html"
+ assert ec.udm.children
+
+
+def test_run_extraction_parts_only_no_udm():
+ reg = getExtractorRegistry()
+ chunkers = ChunkerRegistry()
+ html = b""
+ opts = ExtractionOptions(mergeStrategy=MergeStrategy(), outputFormat="parts")
+ ec = runExtraction(reg, chunkers, html, "t.html", "text/html", opts)
+ assert ec.parts
+ assert ec.udm is None
+
+
+def test_get_extractor_registry_singleton():
+ a = getExtractorRegistry()
+ b = getExtractorRegistry()
+ assert a is b
diff --git a/tests/integration/workflows/test_execute_graph_loop_aggregate_consolidate.py b/tests/integration/workflows/test_execute_graph_loop_aggregate_consolidate.py
new file mode 100644
index 00000000..428fcd25
--- /dev/null
+++ b/tests/integration/workflows/test_execute_graph_loop_aggregate_consolidate.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2025 Patrick Motsch
+# Integration: executeGraph with flow.loop + data.aggregate (no AI), then data.consolidate on same outputs.
+
+import pytest
+from unittest.mock import MagicMock
+
+from modules.workflows.automation2.executionEngine import executeGraph
+from modules.workflows.automation2.graphUtils import buildConnectionMap, getInputSources
+from modules.workflows.automation2.executors.dataExecutor import DataExecutor
+from modules.workflows.automation2.runEnvelope import default_run_envelope
+
+
+def _minimal_services():
+ return MagicMock()
+
+
+@pytest.mark.asyncio
+async def test_execute_graph_loop_and_aggregate_collects_items():
+ """
+ Trigger -> flow.loop (3 items) -> data.aggregate in loop body.
+ Final aggregate output must list 3 collected loop outputs (no AI).
+ """
+ graph = {
+ "nodes": [
+ {"id": "t1", "type": "trigger.manual", "parameters": {}},
+ {
+ "id": "loop1",
+ "type": "flow.loop",
+ "parameters": {
+ "items": {"type": "ref", "nodeId": "t1", "path": ["payload", "items"]},
+ "level": "auto",
+ "concurrency": 1,
+ },
+ },
+ {"id": "agg1", "type": "data.aggregate", "parameters": {"mode": "collect"}},
+ ],
+ "connections": [
+ {"source": "t1", "target": "loop1"},
+ {"source": "loop1", "target": "agg1"},
+ ],
+ }
+ run_envelope = default_run_envelope(
+ "manual",
+ payload={"items": [{"idx": 0}, {"idx": 1}, {"idx": 2}]},
+ )
+ res = await executeGraph(
+ graph,
+ services=_minimal_services(),
+ run_envelope=run_envelope,
+ userId="test-user",
+ )
+ assert res.get("success") is True, res
+ out = res["nodeOutputs"]
+ assert "agg1" in out
+ agg = out["agg1"]
+ assert agg.get("count") == 3
+ assert len(agg.get("items", [])) == 3
+
+
+@pytest.mark.asyncio
+async def test_data_consolidate_after_aggregate_same_context_as_post_loop():
+ """
+ After loop+aggregate, consolidate deterministically merges rows (table mode).
+ Uses the same wiring shape as DataExecutor would see after executeGraph (no second executeGraph
+ step: downstream-of-loop nodes are currently in the loop body only).
+ """
+ graph = {
+ "nodes": [
+ {"id": "t1", "type": "trigger.manual", "parameters": {}},
+ {
+ "id": "loop1",
+ "type": "flow.loop",
+ "parameters": {
+ "items": {"type": "ref", "nodeId": "t1", "path": ["payload", "items"]},
+ "level": "auto",
+ "concurrency": 1,
+ },
+ },
+ {"id": "agg1", "type": "data.aggregate", "parameters": {"mode": "collect"}},
+ ],
+ "connections": [
+ {"source": "t1", "target": "loop1"},
+ {"source": "loop1", "target": "agg1"},
+ ],
+ }
+ run_envelope = default_run_envelope(
+ "manual",
+ payload={"items": [{"a": 1, "b": "x"}, {"a": 2, "b": "y"}]},
+ )
+ res = await executeGraph(
+ graph,
+ services=_minimal_services(),
+ run_envelope=run_envelope,
+ userId="test-user",
+ )
+ assert res["success"] is True
+ node_outputs = res["nodeOutputs"]
+
+ connection_map = buildConnectionMap(graph["connections"])
+ input_sources = {"agg1": getInputSources("agg1", connection_map)}
+ context = {
+ "nodeOutputs": node_outputs,
+ "inputSources": input_sources,
+ }
+ cons_node = {
+ "id": "cons1",
+ "type": "data.consolidate",
+ "parameters": {"mode": "table", "separator": ","},
+ }
+ # Wire aggregate output into consolidate (same as an edge agg1 -> cons1 would provide).
+ context["inputSources"]["cons1"] = {0: ("agg1", 0)}
+ ex = DataExecutor()
+ c_out = await ex.execute(cons_node, context)
+ assert c_out.get("_success") is True
+ assert c_out.get("mode") == "table"
+ assert c_out.get("count") == 2
+ result = c_out.get("result")
+ assert isinstance(result, dict)
+ assert "headers" in result and "rows" in result
+ # Aggregate collected full loop outputs (LoopItem shape: currentItem, currentIndex, …).
+ assert len(result["rows"]) == 2
+ assert result["rows"][0].get("currentItem", {}).get("a") == 1
+ assert result["rows"][1].get("currentItem", {}).get("b") == "y"
diff --git a/tests/unit/datamodels/test_udm_bridge.py b/tests/unit/datamodels/test_udm_bridge.py
new file mode 100644
index 00000000..64cda51a
--- /dev/null
+++ b/tests/unit/datamodels/test_udm_bridge.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
+from modules.datamodels.datamodelUdm import _contentPartsToUdm, _udmToContentParts
+
+
+def test_bridge_pdf_like_pages():
+ root = "root-id"
+ parts = [
+ ContentPart(
+ id=root,
+ parentId=None,
+ label="pdf",
+ typeGroup="container",
+ mimeType="application/pdf",
+ data="",
+ metadata={},
+ ),
+ ContentPart(
+ id="t1",
+ parentId=root,
+ label="page_1",
+ typeGroup="text",
+ mimeType="text/plain",
+ data="A",
+ metadata={"pageIndex": 0},
+ ),
+ ContentPart(
+ id="t2",
+ parentId=root,
+ label="page_2",
+ typeGroup="text",
+ mimeType="text/plain",
+ data="B",
+ metadata={"pageIndex": 1},
+ ),
+ ]
+ extracted = ContentExtracted(id="ext1", parts=parts)
+ udm = _contentPartsToUdm(extracted, "pdf", "a.pdf")
+ assert udm.sourceType == "pdf"
+ assert len(udm.children) == 2
+ assert all(n.role == "page" for n in udm.children)
+ assert udm.children[0].children[0].raw == "A"
+ assert udm.children[1].children[0].raw == "B"
+
+
+def test_udm_to_parts_roundtrip_preserves_ids():
+ udm = _contentPartsToUdm(
+ ContentExtracted(
+ id="e1",
+ parts=[
+ ContentPart(
+ id="p1",
+ parentId=None,
+ label="x",
+ typeGroup="text",
+ mimeType="text/plain",
+ data="hi",
+ metadata={"pageIndex": 0},
+ ),
+ ],
+ ),
+ "unknown",
+ "f.txt",
+ )
+ back = _udmToContentParts(udm)
+ assert len(back.parts) >= 2
+ textParts = [p for p in back.parts if p.typeGroup == "text"]
+ assert any(p.data == "hi" for p in textParts)
diff --git a/tests/unit/datamodels/test_udm_models.py b/tests/unit/datamodels/test_udm_models.py
new file mode 100644
index 00000000..92d86a85
--- /dev/null
+++ b/tests/unit/datamodels/test_udm_models.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+from modules.datamodels.datamodelUdm import UdmContentBlock, UdmDocument, UdmStructuralNode
+
+
+def test_udmDocument_roundtrip_minimal():
+ doc = UdmDocument(
+ id="d1",
+ sourceType="html",
+ sourcePath="x.html",
+ children=[
+ UdmStructuralNode(
+ id="s1",
+ role="section",
+ index=0,
+ label="body",
+ children=[
+ UdmContentBlock(
+ id="b1",
+ contentType="text",
+ raw="hello",
+ )
+ ],
+ )
+ ],
+ )
+ dumped = doc.model_dump()
+ assert dumped["sourceType"] == "html"
+ assert len(doc.children[0].children) == 1
+
+
+def test_udmContentBlock_fileRef_optional():
+ b = UdmContentBlock(id="x", contentType="image", raw="", fileRef="file:123")
+ assert b.fileRef == "file:123"
diff --git a/tests/unit/nodeDefinitions/test_usesai_flag.py b/tests/unit/nodeDefinitions/test_usesai_flag.py
new file mode 100644
index 00000000..1c7bbf99
--- /dev/null
+++ b/tests/unit/nodeDefinitions/test_usesai_flag.py
@@ -0,0 +1,42 @@
+# T18 — AC #16/#17: meta.usesAi on every node type; AI vs non-AI distinction.
+
+import pytest
+
+from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES
+
+
+def test_all_nodes_have_usesAi():
+ missing = [n["id"] for n in STATIC_NODE_TYPES if "usesAi" not in (n.get("meta") or {})]
+ assert not missing, f"Nodes missing meta.usesAi: {missing}"
+
+
+def test_ai_prompt_uses_ai():
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "ai.prompt")
+ assert node["meta"]["usesAi"] is True
+
+
+def test_data_filter_not_ai():
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "data.filter")
+ assert node["meta"]["usesAi"] is False
+
+
+def test_data_consolidate_not_ai():
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "data.consolidate")
+ assert node["meta"]["usesAi"] is False
+
+
+def test_ai_consolidate_is_ai():
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "ai.consolidate")
+ assert node["meta"]["usesAi"] is True
+
+
+def test_trustee_extract_uses_ai_process_not():
+ ex = next(n for n in STATIC_NODE_TYPES if n["id"] == "trustee.extractFromFiles")
+ pr = next(n for n in STATIC_NODE_TYPES if n["id"] == "trustee.processDocuments")
+ assert ex["meta"]["usesAi"] is True
+ assert pr["meta"]["usesAi"] is False
+
+
+def test_context_extract_not_ai():
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "context.extractContent")
+ assert node["meta"]["usesAi"] is False
diff --git a/tests/unit/serviceAgent/test_udm_agent_tools.py b/tests/unit/serviceAgent/test_udm_agent_tools.py
new file mode 100644
index 00000000..3449dd81
--- /dev/null
+++ b/tests/unit/serviceAgent/test_udm_agent_tools.py
@@ -0,0 +1,65 @@
+# Phase 7: UDM tools (getUdmStructure, walkUdmBlocks, filterUdmByType).
+
+from modules.serviceCenter.services.serviceAgent.coreTools._documentTools import (
+ _filterUdmByTypeImpl,
+ _getUdmStructureText,
+ _parseUdmJson,
+ _walkUdmBlocksImpl,
+)
+
+
+def test_parseUdmJson_dict():
+ d = {"id": "1", "role": "document", "children": []}
+ assert _parseUdmJson(d) == d
+
+
+def test_parseUdmJson_string():
+ raw = '{"id":"x","role":"document","children":[]}'
+ assert _parseUdmJson(raw)["id"] == "x"
+
+
+def test_getUdmStructure_text():
+ udm = {
+ "id": "d1",
+ "role": "document",
+ "sourceType": "pdf",
+ "children": [
+ {"id": "p1", "role": "page", "index": 0, "label": "P1", "children": [{"id": "c1", "contentType": "text", "raw": "hi"}]},
+ ],
+ }
+ text = _getUdmStructureText(udm)
+ assert "pdf" in text
+ assert "contentBlocks=1" in text
+
+
+def test_walkUdm_blocks():
+ udm = {
+ "id": "d1",
+ "role": "document",
+ "children": [
+ {"id": "p1", "role": "page", "children": [
+ {"id": "t1", "contentType": "text", "raw": "a"},
+ {"id": "i1", "contentType": "image", "raw": ""},
+ ]},
+ ],
+ }
+ out = []
+ _walkUdmBlocksImpl(udm, out, "document")
+ assert len(out) == 2
+ assert {b["contentType"] for b in out} == {"text", "image"}
+
+
+def test_filter_udm_by_type():
+ udm = {
+ "id": "d1",
+ "role": "document",
+ "children": [
+ {"id": "p1", "role": "page", "children": [
+ {"id": "t1", "contentType": "text"},
+ {"id": "x1", "contentType": "table"},
+ ]},
+ ],
+ }
+ r = _filterUdmByTypeImpl(udm, "table")
+ assert r["count"] == 1
+ assert r["nodes"][0]["id"] == "x1"
diff --git a/tests/unit/workflow/test_phase3_context_node.py b/tests/unit/workflow/test_phase3_context_node.py
new file mode 100644
index 00000000..300d861f
--- /dev/null
+++ b/tests/unit/workflow/test_phase3_context_node.py
@@ -0,0 +1,71 @@
+# Tests for Phase 3: context.extractContent node, port types, executor dispatch.
+
+import pytest
+from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES
+from modules.features.graphicalEditor.portTypes import (
+ PORT_TYPE_CATALOG,
+ INPUT_EXTRACTORS,
+ _extractUdmDocument,
+ _extractUdmNodeList,
+ _extractConsolidateResult,
+)
+
+
+def test_context_extractContent_node_exists():
+ ids = [n["id"] for n in STATIC_NODE_TYPES]
+ assert "context.extractContent" in ids
+
+
+def test_context_extractContent_node_shape():
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "context.extractContent")
+ assert node["category"] == "context"
+ assert node["meta"]["usesAi"] is False
+ assert node["_method"] == "context"
+ assert node["_action"] == "extractContent"
+ assert node["outputPorts"][0]["schema"] == "UdmDocument"
+ assert "DocumentList" in node["inputPorts"][0]["accepts"]
+
+
+def test_udm_port_types_registered():
+ assert "UdmDocument" in PORT_TYPE_CATALOG
+ assert "UdmNodeList" in PORT_TYPE_CATALOG
+ assert "ConsolidateResult" in PORT_TYPE_CATALOG
+
+
+def test_udm_extractors_registered():
+ assert "UdmDocument" in INPUT_EXTRACTORS
+ assert "UdmNodeList" in INPUT_EXTRACTORS
+ assert "ConsolidateResult" in INPUT_EXTRACTORS
+
+
+def test_extractUdmDocument_from_direct():
+ upstream = {"id": "d1", "sourceType": "pdf", "sourcePath": "/a.pdf", "children": []}
+ result = _extractUdmDocument(upstream)
+ assert result["sourceType"] == "pdf"
+
+
+def test_extractUdmDocument_from_nested():
+ upstream = {"udm": {"id": "d1", "sourceType": "pdf", "sourcePath": "/a.pdf", "children": []}, "other": 1}
+ result = _extractUdmDocument(upstream)
+ assert result["sourceType"] == "pdf"
+
+
+def test_extractUdmNodeList():
+ upstream = {"nodes": [{"id": "n1"}, {"id": "n2"}], "count": 2}
+ result = _extractUdmNodeList(upstream)
+ assert result["count"] == 2
+ assert len(result["nodes"]) == 2
+
+
+def test_extractConsolidateResult():
+ upstream = {"result": {"headers": [], "rows": []}, "mode": "table", "count": 3}
+ result = _extractConsolidateResult(upstream)
+ assert result["mode"] == "table"
+ assert result["count"] == 3
+
+
+def test_getExecutor_dispatches_context():
+ from modules.workflows.automation2.executionEngine import _getExecutor
+ from modules.workflows.automation2.executors import ActionNodeExecutor
+ executor = _getExecutor("context.extractContent", None)
+ assert isinstance(executor, ActionNodeExecutor)
diff --git a/tests/unit/workflow/test_phase4_workflow_nodes.py b/tests/unit/workflow/test_phase4_workflow_nodes.py
new file mode 100644
index 00000000..c24a485b
--- /dev/null
+++ b/tests/unit/workflow/test_phase4_workflow_nodes.py
@@ -0,0 +1,177 @@
+# Tests for Phase 4: data.consolidate, ai.consolidate, flow.loop level/concurrency, flow.merge dynamic.
+
+import pytest
+from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES
+
+
+class TestNodeDefinitions:
+
+ def test_data_consolidate_exists(self):
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "data.consolidate")
+ assert node["meta"]["usesAi"] is False
+ assert node["outputPorts"][0]["schema"] == "ConsolidateResult"
+ modes = node["parameters"][0]["frontendOptions"]["options"]
+ assert "table" in modes
+ assert "csvJoin" in modes
+
+ def test_ai_consolidate_exists(self):
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "ai.consolidate")
+ assert node["meta"]["usesAi"] is True
+ assert node["_method"] == "ai"
+ assert node["_action"] == "consolidate"
+ assert node["outputPorts"][0]["schema"] == "ConsolidateResult"
+
+ def test_flow_loop_has_level_and_concurrency(self):
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "flow.loop")
+ paramNames = [p["name"] for p in node["parameters"]]
+ assert "level" in paramNames
+ assert "concurrency" in paramNames
+ levelParam = next(p for p in node["parameters"] if p["name"] == "level")
+ assert "structuralNodes" in levelParam["frontendOptions"]["options"]
+ assert "contentBlocks" in levelParam["frontendOptions"]["options"]
+ concParam = next(p for p in node["parameters"] if p["name"] == "concurrency")
+ assert concParam["default"] == 1
+
+ def test_flow_loop_accepts_udm(self):
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "flow.loop")
+ assert "UdmDocument" in node["inputPorts"][0]["accepts"]
+
+ def test_flow_merge_has_inputCount(self):
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "flow.merge")
+ paramNames = [p["name"] for p in node["parameters"]]
+ assert "inputCount" in paramNames
+ icParam = next(p for p in node["parameters"] if p["name"] == "inputCount")
+ assert icParam["frontendOptions"]["min"] == 2
+ assert icParam["frontendOptions"]["max"] == 5
+
+ def test_data_filter_accepts_udm_types(self):
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "data.filter")
+ accepts = node["inputPorts"][0]["accepts"]
+ assert "UdmDocument" in accepts
+ assert "UdmNodeList" in accepts
+
+ def test_data_filter_has_udmContentType_param(self):
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "data.filter")
+ paramNames = [p["name"] for p in node["parameters"]]
+ assert "udmContentType" in paramNames
+
+
+@pytest.mark.asyncio
+class TestDataConsolidateExecutor:
+
+ async def test_consolidate_table_mode(self):
+ from modules.workflows.automation2.executors.dataExecutor import DataExecutor
+ ex = DataExecutor()
+ node = {"type": "data.consolidate", "id": "dc1", "parameters": {"mode": "table"}}
+ ctx = {"nodeOutputs": {"src": {"items": [{"a": 1, "b": 2}, {"a": 3, "b": 4}], "count": 2}}, "inputSources": {"dc1": {0: ("src", 0)}}}
+ result = await ex.execute(node, ctx)
+ assert result["_success"]
+ assert result["mode"] == "table"
+ assert result["count"] == 2
+ assert len(result["result"]["headers"]) == 2
+ assert len(result["result"]["rows"]) == 2
+
+ async def test_consolidate_concat_mode(self):
+ from modules.workflows.automation2.executors.dataExecutor import DataExecutor
+ ex = DataExecutor()
+ node = {"type": "data.consolidate", "id": "dc1", "parameters": {"mode": "concat", "separator": "; "}}
+ ctx = {"nodeOutputs": {"src": {"items": ["hello", "world"], "count": 2}}, "inputSources": {"dc1": {0: ("src", 0)}}}
+ result = await ex.execute(node, ctx)
+ assert result["_success"]
+ assert result["result"] == "hello; world"
+
+ async def test_consolidate_merge_mode(self):
+ from modules.workflows.automation2.executors.dataExecutor import DataExecutor
+ ex = DataExecutor()
+ node = {"type": "data.consolidate", "id": "dc1", "parameters": {"mode": "merge"}}
+ ctx = {"nodeOutputs": {"src": {"items": [{"a": 1}, {"b": 2}, {"a": 99}], "count": 3}}, "inputSources": {"dc1": {0: ("src", 0)}}}
+ result = await ex.execute(node, ctx)
+ assert result["_success"]
+ assert result["result"]["a"] == 99
+ assert result["result"]["b"] == 2
+
+
+class TestFlowLoopUdmLevel:
+ """Unit tests for FlowExecutor._resolveUdmLevel (bypass resolveParameterReferences)."""
+
+ def test_resolveUdmLevel_structural_nodes(self):
+ from modules.workflows.automation2.executors.flowExecutor import FlowExecutor
+ ex = FlowExecutor()
+ udm = {
+ "id": "d1", "role": "document",
+ "children": [
+ {"id": "p1", "role": "page", "index": 0, "children": [{"id": "c1"}]},
+ {"id": "p2", "role": "page", "index": 1, "children": [{"id": "c2"}]},
+ ]
+ }
+ result = ex._resolveUdmLevel(udm, "structuralNodes")
+ assert len(result) == 2
+ assert result[0]["id"] == "p1"
+
+ def test_resolveUdmLevel_content_blocks(self):
+ from modules.workflows.automation2.executors.flowExecutor import FlowExecutor
+ ex = FlowExecutor()
+ udm = {
+ "id": "d1", "role": "document",
+ "children": [
+ {"id": "p1", "role": "page", "children": [
+ {"id": "c1", "contentType": "text"},
+ {"id": "c2", "contentType": "image"},
+ ]},
+ {"id": "p2", "role": "page", "children": [
+ {"id": "c3", "contentType": "table"},
+ ]},
+ ]
+ }
+ result = ex._resolveUdmLevel(udm, "contentBlocks")
+ assert len(result) == 3
+
+ def test_resolveUdmLevel_documents(self):
+ from modules.workflows.automation2.executors.flowExecutor import FlowExecutor
+ ex = FlowExecutor()
+ archive = {
+ "id": "a1", "role": "archive",
+ "children": [
+ {"id": "d1", "role": "document", "children": []},
+ {"id": "d2", "role": "document", "children": []},
+ {"id": "x1", "role": "page", "children": []},
+ ]
+ }
+ result = ex._resolveUdmLevel(archive, "documents")
+ assert len(result) == 2
+
+ @pytest.mark.asyncio
+ async def test_loop_auto_dict_with_children(self):
+ from modules.workflows.automation2.executors.flowExecutor import FlowExecutor
+ ex = FlowExecutor()
+ udm = {"id": "d1", "role": "document", "children": [{"id": "p1"}, {"id": "p2"}]}
+ node = {"type": "flow.loop", "id": "loop1",
+ "parameters": {"items": "direct", "level": "auto"}}
+ ctx = {"nodeOutputs": {"loop1": udm, "direct": udm}, "connectionMap": {}, "inputSources": {"loop1": {0: ("direct", 0)}}}
+ from unittest.mock import patch
+ with patch("modules.workflows.automation2.graphUtils.resolveParameterReferences", return_value=udm):
+ result = await ex.execute(node, ctx)
+ assert result["count"] == 2
+
+
+@pytest.mark.asyncio
+class TestDataFilterUdm:
+
+ async def test_filter_by_udm_content_type(self):
+ from modules.workflows.automation2.executors.dataExecutor import DataExecutor
+ ex = DataExecutor()
+ udmData = {
+ "id": "d1", "role": "document",
+ "children": [
+ {"id": "p1", "role": "page", "children": [
+ {"id": "c1", "contentType": "text", "raw": "hello"},
+ {"id": "c2", "contentType": "image", "raw": "base64..."},
+ ]},
+ ]
+ }
+ node = {"type": "data.filter", "id": "f1",
+ "parameters": {"condition": "", "udmContentType": "image"}}
+ ctx = {"nodeOutputs": {"src": udmData}, "inputSources": {"f1": {0: ("src", 0)}}}
+ result = await ex.execute(node, ctx)
+ inner = result.get("data") if isinstance(result, dict) and result.get("_transit") else result
+ assert inner is not None
diff --git a/tests/unit/workflow/test_phase5_highvol.py b/tests/unit/workflow/test_phase5_highvol.py
new file mode 100644
index 00000000..382c273b
--- /dev/null
+++ b/tests/unit/workflow/test_phase5_highvol.py
@@ -0,0 +1,45 @@
+# Tests for Phase 5: Loop concurrency, StepLog batching, streaming aggregate.
+
+import pytest
+from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES
+
+
+def test_loop_concurrency_param_default_1():
+ node = next(n for n in STATIC_NODE_TYPES if n["id"] == "flow.loop")
+ concParam = next(p for p in node["parameters"] if p["name"] == "concurrency")
+ assert concParam["default"] == 1
+ assert concParam["frontendOptions"]["min"] == 1
+ assert concParam["frontendOptions"]["max"] == 20
+
+
+def test_executionEngine_has_batch_threshold():
+ """Verify STEPLOG_BATCH_THRESHOLD and AGGREGATE_FLUSH_THRESHOLD are defined in the loop block."""
+ import inspect
+ from modules.workflows.automation2.executionEngine import executeGraph
+ source = inspect.getsource(executeGraph)
+ assert "STEPLOG_BATCH_THRESHOLD" in source
+ assert "AGGREGATE_FLUSH_THRESHOLD" in source
+
+
+def test_executionEngine_has_loop_progress_event():
+ """Verify loop_progress SSE event is emitted for batch-mode loops."""
+ import inspect
+ from modules.workflows.automation2.executionEngine import executeGraph
+ source = inspect.getsource(executeGraph)
+ assert "loop_progress" in source
+
+
+def test_executionEngine_has_concurrency_semaphore():
+ """Verify asyncio.Semaphore is used for concurrent loop execution."""
+ import inspect
+ from modules.workflows.automation2.executionEngine import executeGraph
+ source = inspect.getsource(executeGraph)
+ assert "Semaphore" in source
+
+
+def test_executionEngine_aggregate_temp_chunks():
+ """Verify streaming aggregate flush uses _aggregateTempChunks."""
+ import inspect
+ from modules.workflows.automation2.executionEngine import executeGraph
+ source = inspect.getsource(executeGraph)
+ assert "_aggregateTempChunks" in source