Merge pull request #149 from valueonag/feat/demo-system-readieness

Feat/demo system readieness
This commit is contained in:
Patrick Motsch 2026-04-30 23:58:26 +02:00 committed by GitHub
commit 7942766931
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
58 changed files with 2689 additions and 1139 deletions

3
app.py
View file

@ -682,6 +682,9 @@ app.include_router(navigationRouter)
from modules.routes.routeWorkflowDashboard import router as workflowDashboardRouter
app.include_router(workflowDashboardRouter)
from modules.routes.routeAutomationWorkspace import router as automationWorkspaceRouter
app.include_router(automationWorkspaceRouter)
# ============================================================================
# PLUG&PLAY FEATURE ROUTERS
# Dynamically load routers from feature containers in modules/features/

View file

@ -162,6 +162,7 @@ class AiCallOptions(BaseModel):
# Provider filtering (from UI multiselect or automation config)
allowedProviders: Optional[List[str]] = Field(default=None, description="List of allowed AI providers to use (empty = all RBAC-permitted)")
allowedModels: Optional[List[str]] = Field(default=None, description="Whitelist of allowed model names (AND-filter with allowedProviders). None/empty = all allowed.")
class AiCallRequest(BaseModel):

View file

@ -110,11 +110,13 @@ class DocumentReferenceList(BaseModel):
# docItem:documentId
references.append(DocumentItemReference(documentId=parts[0]))
# Unknown format - skip or log warning
else:
# Try to parse as simple string (backward compatibility)
# Assume it's a label if it doesn't match known patterns
if refStr:
if not refStr:
continue
import re
if re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', refStr, re.I):
references.append(DocumentItemReference(documentId=refStr))
else:
references.append(DocumentListReference(label=refStr))
return cls(references=references)

View file

@ -6,7 +6,7 @@ Unified JSON document schema and helpers used by both generation prompts and ren
This defines a single canonical template and the supported section types.
"""
from typing import List
from typing import List, Literal, TypedDict
# Canonical list of supported section types across the system
supportedSectionTypes: List[str] = [
@ -18,6 +18,21 @@ supportedSectionTypes: List[str] = [
"image",
]
class InlineRun(TypedDict, total=False):
"""Single inline content run. Every paragraph/cell/list-item is a List[InlineRun]."""
type: Literal["text", "image", "link", "bold", "italic", "code"]
value: str # text content (for text/bold/italic/code/link-label)
fileId: str # for type=image: reference to FileItem
base64Data: str # for type=image: resolved base64 (post-processing)
mimeType: str # for type=image: e.g. "image/png"
widthPt: int # for type=image: optional render width
href: str # for type=link: URL target
supportedInlineRunTypes: List[str] = [
"text", "image", "link", "bold", "italic", "code",
]
# Canonical JSON template used for AI generation (documents array + sections)
# This template is used for STRUCTURE generation - sections have empty elements arrays.
# For content generation, elements arrays will be populated later.

View file

@ -72,7 +72,7 @@ class AutoWorkflow(PowerOnModel):
},
)
featureInstanceId: str = Field(
description="Feature instance ID",
description="Feature instance ID (GE owner instance / RBAC scope)",
json_schema_extra={
"frontend_type": "text",
"frontend_readonly": True,
@ -81,6 +81,17 @@ class AutoWorkflow(PowerOnModel):
"fk_target": {"db": "poweron_app", "table": "FeatureInstance", "labelField": "label"},
},
)
targetFeatureInstanceId: Optional[str] = Field(
default=None,
description="Target feature instance for execution data scope. NULL for templates, mandatory for non-templates.",
json_schema_extra={
"frontend_type": "select",
"frontend_readonly": False,
"frontend_required": False,
"label": "Ziel-Instanz",
"fk_target": {"db": "poweron_app", "table": "FeatureInstance", "labelField": "label"},
},
)
label: str = Field(
description="User-friendly workflow name",
json_schema_extra={"frontend_type": "text", "frontend_required": True, "label": "Bezeichnung"},

View file

@ -217,6 +217,8 @@ class GraphicalEditorObjects:
data["id"] = str(uuid.uuid4())
data["mandateId"] = self.mandateId
data["featureInstanceId"] = self.featureInstanceId
if not data.get("targetFeatureInstanceId") and not data.get("isTemplate"):
data["targetFeatureInstanceId"] = self.featureInstanceId
if "active" not in data or data.get("active") is None:
data["active"] = True
data["invocations"] = normalize_invocations_list(data.get("invocations"))

View file

@ -3,6 +3,15 @@
from modules.shared.i18nRegistry import t
_AI_COMMON_PARAMS = [
{"name": "requireNeutralization", "type": "boolean", "required": False,
"frontendType": "checkbox", "default": False,
"description": t("Eingaben fuer diesen Call neutralisieren")},
{"name": "allowedModels", "type": "array", "required": False,
"frontendType": "modelMultiSelect", "default": [],
"description": t("Erlaubte LLM-Modelle (leer = alle erlaubten)")},
]
AI_NODES = [
{
"id": "ai.prompt",
@ -10,16 +19,21 @@ AI_NODES = [
"label": t("Prompt"),
"description": t("Prompt eingeben und KI führt aus"),
"parameters": [
{"name": "aiPrompt", "type": "string", "required": True, "frontendType": "textarea",
{"name": "aiPrompt", "type": "string", "required": True, "frontendType": "templateTextarea",
"description": t("KI-Prompt")},
{"name": "resultType", "type": "string", "required": False, "frontendType": "select",
"frontendOptions": {"options": ["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"]},
"description": t("Ausgabeformat"), "default": "txt"},
{"name": "documentList", "type": "string", "required": False, "frontendType": "hidden",
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
{"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "dataRef",
"description": t("Dokumentenliste (Upstream-Output binden)"), "default": ""},
{"name": "context", "type": "string", "required": False, "frontendType": "dataRef",
"description": t("Kontextdaten fuer den Prompt (Upstream-Output binden)"), "default": ""},
{"name": "documentTheme", "type": "string", "required": False, "frontendType": "select",
"frontendOptions": {"options": ["general", "finance", "legal", "technical", "hr"]},
"description": t("Dokument-Thema (Style-Hinweis fuer den Renderer)"), "default": "general"},
{"name": "simpleMode", "type": "boolean", "required": False, "frontendType": "checkbox",
"description": t("Einfacher Modus"), "default": True},
],
] + _AI_COMMON_PARAMS,
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": [
@ -38,7 +52,7 @@ AI_NODES = [
"parameters": [
{"name": "prompt", "type": "string", "required": True, "frontendType": "textarea",
"description": t("Recherche-Anfrage")},
],
] + _AI_COMMON_PARAMS,
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
@ -53,12 +67,12 @@ AI_NODES = [
"label": t("Dokument zusammenfassen"),
"description": t("Dokumentinhalt zusammenfassen"),
"parameters": [
{"name": "documentList", "type": "string", "required": True, "frontendType": "hidden",
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
{"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef",
"description": t("Dokumentenliste (Upstream-Output binden)"), "default": ""},
{"name": "summaryLength", "type": "string", "required": False, "frontendType": "select",
"frontendOptions": {"options": ["brief", "medium", "detailed"]},
"description": t("Kurz, mittel oder ausführlich"), "default": "medium"},
],
] + _AI_COMMON_PARAMS,
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
@ -73,11 +87,11 @@ AI_NODES = [
"label": t("Dokument übersetzen"),
"description": t("Dokument in Zielsprache übersetzen"),
"parameters": [
{"name": "documentList", "type": "string", "required": True, "frontendType": "hidden",
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
{"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef",
"description": t("Dokumentenliste (Upstream-Output binden)"), "default": ""},
{"name": "targetLanguage", "type": "string", "required": True, "frontendType": "text",
"description": t("Zielsprache (z.B. de, en, French)")},
],
] + _AI_COMMON_PARAMS,
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
@ -92,12 +106,12 @@ AI_NODES = [
"label": t("Dokument konvertieren"),
"description": t("Dokument in anderes Format konvertieren"),
"parameters": [
{"name": "documentList", "type": "string", "required": True, "frontendType": "hidden",
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
{"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef",
"description": t("Dokumentenliste (Upstream-Output binden)"), "default": ""},
{"name": "targetFormat", "type": "string", "required": True, "frontendType": "select",
"frontendOptions": {"options": ["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"]},
"description": t("Zielformat")},
],
] + _AI_COMMON_PARAMS,
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
@ -114,7 +128,7 @@ AI_NODES = [
"parameters": [
{"name": "prompt", "type": "string", "required": True, "frontendType": "textarea",
"description": t("Generierungs-Prompt")},
],
] + _AI_COMMON_PARAMS,
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
@ -134,7 +148,7 @@ AI_NODES = [
{"name": "resultType", "type": "string", "required": False, "frontendType": "select",
"frontendOptions": {"options": ["py", "js", "ts", "html", "java", "cpp", "txt", "json", "csv", "xml"]},
"description": t("Datei-Endung der erzeugten Code-Datei"), "default": "py"},
],
] + _AI_COMMON_PARAMS,
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}},
@ -154,7 +168,7 @@ AI_NODES = [
"description": t("Konsolidierungsmodus"), "default": "summarize"},
{"name": "prompt", "type": "string", "required": False, "frontendType": "textarea",
"description": t("Optionaler Prompt für die Konsolidierung"), "default": ""},
],
] + _AI_COMMON_PARAMS,
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}},

View file

@ -62,7 +62,7 @@ EMAIL_NODES = [
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
"frontendOptions": {"authority": "msft"},
"description": t("E-Mail-Konto")},
{"name": "context", "type": "string", "required": False, "frontendType": "textarea",
{"name": "context", "type": "string", "required": False, "frontendType": "templateTextarea",
"description": t("Kontext / Brief-Beschreibung für die KI-Komposition"), "default": ""},
{"name": "to", "type": "string", "required": False, "frontendType": "text",
"description": t("Empfänger (komma-separiert, optional für Entwurf)"), "default": ""},

View file

@ -111,6 +111,44 @@ def _validateInstanceAccess(instanceId: str, context: RequestContext) -> str:
return str(instance.mandateId) if instance.mandateId else ""
def _validateTargetInstance(
workflowData: Dict[str, Any],
ownerInstanceId: str,
context: RequestContext,
) -> None:
"""Enforce targetFeatureInstanceId rules for non-template workflows.
- Templates (isTemplate=True) may omit targetFeatureInstanceId.
- Non-templates MUST have a non-empty targetFeatureInstanceId.
- If the targetFeatureInstanceId differs from the GE owner instance,
the user must also have FeatureAccess on that target instance.
"""
if workflowData.get("isTemplate"):
return
targetId = workflowData.get("targetFeatureInstanceId")
if not targetId:
return
if targetId == ownerInstanceId:
return
from modules.interfaces.interfaceDbApp import getRootInterface
rootInterface = getRootInterface()
targetInstance = rootInterface.getFeatureInstance(targetId)
if not targetInstance:
raise HTTPException(
status_code=400,
detail=routeApiMsg("targetFeatureInstanceId refers to a non-existent feature instance"),
)
targetAccess = rootInterface.getFeatureAccess(str(context.user.id), targetId)
if not targetAccess or not targetAccess.enabled:
raise HTTPException(
status_code=403,
detail=routeApiMsg("Access denied to target feature instance"),
)
@router.get("/{instanceId}/node-types")
@limiter.limit("60/minute")
def get_node_types(
@ -318,9 +356,12 @@ async def post_execute(
workflowId = body.get("workflowId")
req_nodes = graph.get("nodes") or []
workflow_for_envelope: Optional[Dict[str, Any]] = None
targetFeatureInstanceId: Optional[str] = None
if workflowId and not str(workflowId).startswith("transient-"):
iface = getGraphicalEditorInterface(context.user, mandateId, instanceId)
workflow_for_envelope = iface.getWorkflow(workflowId)
if workflow_for_envelope:
targetFeatureInstanceId = workflow_for_envelope.get("targetFeatureInstanceId")
if workflowId and len(req_nodes) == 0:
iface = getGraphicalEditorInterface(context.user, mandateId, instanceId)
wf = iface.getWorkflow(workflowId)
@ -328,10 +369,18 @@ async def post_execute(
graph = wf["graph"]
logger.info("graphicalEditor execute: loaded graph from workflow %s", workflowId)
workflow_for_envelope = wf
targetFeatureInstanceId = wf.get("targetFeatureInstanceId")
if not workflowId:
import uuid
workflowId = f"transient-{uuid.uuid4().hex[:12]}"
logger.info("graphicalEditor execute: using transient workflowId=%s", workflowId)
if targetFeatureInstanceId and targetFeatureInstanceId != instanceId:
_validateTargetInstance(
{"targetFeatureInstanceId": targetFeatureInstanceId},
instanceId,
context,
)
nodes_count = len(graph.get("nodes") or [])
connections_count = len(graph.get("connections") or [])
logger.info(
@ -363,6 +412,7 @@ async def post_execute(
automation2_interface=ge_interface,
run_envelope=run_env,
label=_wfLabel,
targetFeatureInstanceId=targetFeatureInstanceId,
)
logger.info(
"graphicalEditor execute result: success=%s error=%s nodeOutputs_keys=%s failedNode=%s paused=%s",
@ -1371,6 +1421,7 @@ def create_workflow(
) -> dict:
"""Create a new workflow."""
mandateId = _validateInstanceAccess(instanceId, context)
_validateTargetInstance(body, instanceId, context)
iface = getGraphicalEditorInterface(context.user, mandateId, instanceId)
created = iface.createWorkflow(body)
return created
@ -1388,6 +1439,11 @@ def update_workflow(
"""Update a workflow."""
mandateId = _validateInstanceAccess(instanceId, context)
iface = getGraphicalEditorInterface(context.user, mandateId, instanceId)
existing = iface.getWorkflow(workflowId)
if not existing:
raise HTTPException(status_code=404, detail=routeApiMsg("Workflow not found"))
merged = {**existing, **body}
_validateTargetInstance(merged, instanceId, context)
updated = iface.updateWorkflow(workflowId, body)
if not updated:
raise HTTPException(status_code=404, detail=routeApiMsg("Workflow not found"))

View file

@ -361,6 +361,17 @@ QUICK_ACTIONS = [
# The placeholder {{featureInstanceId}} is replaced by _copyTemplateWorkflows.
# ---------------------------------------------------------------------------
_FINANCE_STYLE_HINT = (
"\n\nWenn du ein Dokument erstellst, verwende einen professionellen Finanz-Stil:\n"
"- Schriftart: Calibri\n"
"- Primaerfarbe: #1F3864 (Dunkelblau)\n"
"- Akzentfarbe: #2980B9\n"
"- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n"
"- Konservatives, seriöses Layout\n"
"Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen."
)
def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]:
"""Build a standard analysis graph: trigger -> refreshAccountingData -> ai.prompt."""
return {
@ -370,8 +381,9 @@ def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]:
"parameters": {"featureInstanceId": "{{featureInstanceId}}", "forceRefresh": False}, "position": {"x": 250, "y": 0}},
{"id": "analyse", "type": "ai.prompt", "label": "Analyse", "_method": "ai", "_action": "process",
"parameters": {
"aiPrompt": prompt,
"aiPrompt": prompt + _FINANCE_STYLE_HINT,
"context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]},
"requireNeutralization": False,
"simpleMode": False,
}, "position": {"x": 500, "y": 0}},
],
@ -440,15 +452,33 @@ TEMPLATE_WORKFLOWS = [
{"id": "analyse", "type": "ai.prompt", "label": "Budget-Analyse", "_method": "ai", "_action": "process",
"parameters": {
"aiPrompt": (
"Fuehre einen Budget-Soll/Ist-Vergleich durch.\n"
"Die Budget-Datei (Excel) wurde als Dokument uebergeben. "
"Die aktuellen Buchhaltungsdaten sind im Kontext verfuegbar.\n"
"1. Lies die Soll-Werte aus dem uebergebenen Budget-Dokument\n"
"2. Vergleiche sie mit den Ist-Werten aus der Buchhaltung pro Konto\n"
"3. Berechne die Abweichung (absolut und prozentual)\n"
"4. Erstelle ein Abweichungs-Chart (Balkendiagramm: Soll vs. Ist pro Konto)\n"
"5. Markiere kritische Abweichungen (>10%) und gib eine kurze Einschaetzung"
"Fuehre einen Budget-Soll/Ist-Vergleich durch und liefere EIN Excel-Dokument "
"mit folgender Struktur:\n\n"
"1. Tabelle \"Konten-Vergleich\" -- EINE Tabelle, EINE Zeile pro Konto:\n"
" Spalten: Konto-Nr | Konto-Name | Soll | Ist | Abweichung absolut | "
"Abweichung % | Status (OK / Warnung / Kritisch).\n"
"2. EINE Visualisierung \"Soll vs. Ist gesamt\" -- ein einziges "
"Balkendiagramm UNTER der Tabelle, das ALLE Konten in einer Grafik "
"gegenueberstellt (gruppierte Balken: Soll und Ist je Konto).\n"
"3. Kurzer Management-Summary-Absatz (3-5 Saetze) UNTER dem Chart "
"mit den 3 groessten Abweichungen (>10%) und einer fachlichen "
"Einschaetzung.\n\n"
"Verwende die uebergebene Budget-Datei als Soll-Quelle und die im "
"Kontext bereitgestellten Buchhaltungsdaten als Ist-Quelle.\n"
"WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN "
"Uebersichts-Chart ueber alle Konten ist gewuenscht.\n\n"
"Hinweis: Das documentTheme ist 'finance'. Wenn du ein Dokument erstellst, "
"verwende einen professionellen Finanz-Stil:\n"
"- Schriftart: Calibri\n"
"- Primaerfarbe: #1F3864 (Dunkelblau)\n"
"- Akzentfarbe: #2980B9\n"
"- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n"
"- Konservatives, seriöses Layout\n"
"Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen."
),
"resultType": "xlsx",
"documentTheme": "finance",
"requireNeutralization": False,
"documentList": {"type": "ref", "nodeId": "trigger", "path": ["payload", "documentList"]},
"context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]},
"simpleMode": False,

View file

@ -2,8 +2,8 @@
# All rights reserved.
"""Workspace feature data models — WorkspaceUserSettings."""
from typing import Optional
from pydantic import BaseModel, Field
from typing import List, Optional
from pydantic import Field
from modules.datamodels.datamodelBase import PowerOnModel
from modules.shared.i18nRegistry import i18nModel
import uuid
@ -52,3 +52,18 @@ class WorkspaceUserSettings(PowerOnModel):
description="Max agent rounds override (None = instance default)",
json_schema_extra={"label": "Max. Agenten-Runden", "frontend_type": "number", "frontend_readonly": False, "frontend_required": False},
)
requireNeutralization: bool = Field(
default=False,
description="Default neutralization setting for this user",
json_schema_extra={"label": "Neutralisierung", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
)
allowedProviders: List[str] = Field(
default_factory=list,
description="Allowed AI providers (empty = all permitted by RBAC)",
json_schema_extra={"label": "Erlaubte Provider", "frontend_type": "multiselect", "frontend_readonly": False, "frontend_required": False},
)
allowedModels: List[str] = Field(
default_factory=list,
description="Allowed AI models (empty = all permitted)",
json_schema_extra={"label": "Erlaubte Modelle", "frontend_type": "modelMultiSelect", "frontend_readonly": False, "frontend_required": False},
)

View file

@ -110,6 +110,7 @@ class WorkspaceInputRequest(BaseModel):
workflowId: Optional[str] = Field(default=None, description="Continue existing workflow")
userLanguage: str = Field(default="en", description="User language code")
allowedProviders: List[str] = Field(default_factory=list, description="Restrict AI to these providers")
allowedModels: List[str] = Field(default_factory=list, description="Restrict AI to these models")
requireNeutralization: Optional[bool] = Field(default=None, description="Per-request neutralization override")
@ -635,6 +636,7 @@ async def streamWorkspaceStart(
userLanguage=userInput.userLanguage,
instanceConfig=instanceConfig,
allowedProviders=userInput.allowedProviders,
allowedModels=userInput.allowedModels,
requireNeutralization=userInput.requireNeutralization,
billingFeatureCode=wsBillingFeatureCode,
)
@ -692,6 +694,7 @@ async def _runWorkspaceAgent(
userLanguage: str = "en",
instanceConfig: Dict[str, Any] = None,
allowedProviders: List[str] = None,
allowedModels: List[str] = None,
requireNeutralization: Optional[bool] = None,
billingFeatureCode: Optional[str] = None,
):
@ -715,6 +718,9 @@ async def _runWorkspaceAgent(
logger.info(f"Workspace agent: allowedProviders={allowedProviders}")
else:
logger.debug("Workspace agent: no allowedProviders in request")
if allowedModels:
aiService.services.allowedModels = allowedModels
logger.info(f"Workspace agent: allowedModels={allowedModels}")
if requireNeutralization is not None:
ctx.requireNeutralization = requireNeutralization
@ -2114,6 +2120,76 @@ async def updateGeneralSettings(
return await getGeneralSettings(request, instanceId, context)
# =========================================================================
# User-level AI settings (neutralisation, providers, models)
# =========================================================================
@router.get("/{instanceId}/user-settings")
@limiter.limit("120/minute")
async def getWorkspaceUserSettings(
request: Request,
instanceId: str = Path(...),
context: RequestContext = Depends(getRequestContext),
):
"""Get the current user's workspace AI settings (auto-creates with defaults if not exists)."""
_mandateId, _ = _validateInstanceAccess(instanceId, context)
wsInterface = _getWorkspaceInterface(context, instanceId)
userId = str(context.user.id)
settings = wsInterface.getWorkspaceUserSettings(userId)
if settings:
return JSONResponse({
"requireNeutralization": settings.requireNeutralization,
"allowedProviders": settings.allowedProviders,
"allowedModels": settings.allowedModels,
})
data = {
"userId": userId,
"mandateId": str(context.mandateId) if context.mandateId else "",
"featureInstanceId": instanceId,
}
created = wsInterface.saveWorkspaceUserSettings(data)
return JSONResponse({
"requireNeutralization": created.requireNeutralization,
"allowedProviders": created.allowedProviders,
"allowedModels": created.allowedModels,
})
@router.put("/{instanceId}/user-settings")
@limiter.limit("120/minute")
async def putWorkspaceUserSettings(
request: Request,
instanceId: str = Path(...),
body: dict = Body(...),
context: RequestContext = Depends(getRequestContext),
):
"""Save the current user's workspace AI settings."""
_mandateId, _ = _validateInstanceAccess(instanceId, context)
wsInterface = _getWorkspaceInterface(context, instanceId)
userId = str(context.user.id)
data = {
"userId": userId,
"mandateId": str(context.mandateId) if context.mandateId else "",
"featureInstanceId": instanceId,
}
if "requireNeutralization" in body:
data["requireNeutralization"] = bool(body["requireNeutralization"])
if "allowedProviders" in body:
data["allowedProviders"] = body["allowedProviders"]
if "allowedModels" in body:
data["allowedModels"] = body["allowedModels"]
saved = wsInterface.saveWorkspaceUserSettings(data)
return JSONResponse({
"requireNeutralization": saved.requireNeutralization,
"allowedProviders": saved.allowedProviders,
"allowedModels": saved.allowedModels,
})
# =========================================================================
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
# =========================================================================

View file

@ -0,0 +1,198 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Lightweight Bootstrap-Telemetrie fuer entfernte Migrationsroutinen.
Wenn eine idempotente Bootstrap-Migration (z.B. ``_migrateAndDropSysAdminRole``)
aus dem Boot-Pfad entfernt wird, koennte ein theoretischer Edge-Case (alte
DB-Restore, manueller INSERT) wieder Legacy-Daten ins System bringen. Damit das
nicht still bleibt, ruft ``initBootstrap`` nach Abschluss aller Init-Schritte
einmalig ``runLegacyDataChecks`` auf -- das logged WARN bei Restbestand.
Designprinzipien:
- KEINE Schreibzugriffe (rein lesend).
- Process-lokal gecached (``_cache``), damit identische Boots/Reloads den Check
nur einmal laufen lassen.
- Pro Check eine Recordset-Abfrage; Ausnahmen werden als WARN geloggt, nicht
re-raised, damit Telemetrie den Boot nie crasht.
"""
from __future__ import annotations
import logging
from typing import Any
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.datamodels.datamodelRbac import Role
from modules.datamodels.datamodelUam import Mandate
from modules.shared.mandateNameUtils import isValidMandateName
logger = logging.getLogger(__name__)
_alreadyRan: bool = False
def runLegacyDataChecks(db: DatabaseConnector) -> None:
"""Logged WARN, falls noch Legacy-Daten existieren, die durch entfernte
Migrationsroutinen behandelt wurden. Prozessweit nur einmal aktiv.
Aufruf: am Ende von ``initBootstrap``.
"""
global _alreadyRan
if _alreadyRan:
return
_alreadyRan = True
_checkMandateDescription(db)
_checkMandateSlugRules(db)
_checkLegacyRootMandate(db)
_checkSysadminRole(db)
_backfillTargetFeatureInstanceId()
def _safe(checkName: str, fn) -> Any:
try:
return fn()
except Exception as exc:
logger.warning(
"Legacy-data telemetry check '%s' failed: %s: %s",
checkName, type(exc).__name__, exc,
)
return None
def _checkMandateDescription(db: DatabaseConnector) -> None:
def _do() -> None:
rows = db.getRecordset(Mandate)
bad = [
r.get("id") for r in rows
if r.get("description") and not r.get("label")
]
if bad:
logger.warning(
"Legacy-data check: %d Mandate row(s) still have description "
"but empty label (removed migration: _migrateMandateDescriptionToLabel). "
"Run scripts/script_db_audit_legacy_state.py for details. IDs: %s",
len(bad), bad[:5],
)
_safe("mandate-description", _do)
def _checkMandateSlugRules(db: DatabaseConnector) -> None:
def _do() -> None:
rows = db.getRecordset(Mandate)
seen: set[str] = set()
bad: list[str] = []
for r in sorted(rows, key=lambda x: str(x.get("id", ""))):
mid = r.get("id")
if not mid:
continue
name = (r.get("name") or "").strip()
labelRaw = r.get("label")
labelEmpty = not (labelRaw or "").strip() if labelRaw is not None else True
invalid = not isValidMandateName(name)
collides = name in seen
if not invalid and not collides:
seen.add(name)
if labelEmpty or invalid or collides:
bad.append(str(mid))
if bad:
logger.warning(
"Legacy-data check: %d Mandate row(s) violate slug/label rules "
"(removed migration: _migrateMandateNameLabelSlugRules). "
"Run scripts/script_db_audit_legacy_state.py for details. IDs: %s",
len(bad), bad[:5],
)
_safe("mandate-slug-rules", _do)
def _checkLegacyRootMandate(db: DatabaseConnector) -> None:
def _do() -> None:
legacy = db.getRecordset(Mandate, recordFilter={"name": "Root"})
rootRows = db.getRecordset(Mandate, recordFilter={"name": "root"})
legacyByFlag = [r for r in rootRows if not r.get("isSystem")]
all_ = list(legacy) + legacyByFlag
if all_:
logger.warning(
"Legacy-data check: %d Root-Mandate row(s) still in legacy form "
"(removed migration: initRootMandate-legacy-branch). IDs: %s",
len(all_), [r.get("id") for r in all_][:5],
)
_safe("root-mandate-legacy", _do)
def _checkSysadminRole(db: DatabaseConnector) -> None:
def _do() -> None:
rootMandates = db.getRecordset(
Mandate, recordFilter={"name": "root", "isSystem": True}
)
if not rootMandates:
return
rootId = str(rootMandates[0].get("id"))
rows = db.getRecordset(
Role,
recordFilter={
"roleLabel": "sysadmin",
"mandateId": rootId,
"featureInstanceId": None,
},
)
if rows:
logger.warning(
"Legacy-data check: %d 'sysadmin' role(s) still present in root mandate "
"(removed migration: _migrateAndDropSysAdminRole). "
"Authority is now User.isPlatformAdmin -- migrate manually. IDs: %s",
len(rows), [r.get("id") for r in rows],
)
_safe("sysadmin-role", _do)
def _backfillTargetFeatureInstanceId() -> None:
"""Idempotent backfill: set targetFeatureInstanceId = featureInstanceId
for all non-template AutoWorkflow rows where it is still NULL.
Connects to ``poweron_graphicaleditor`` independently.
"""
def _do() -> None:
from modules.shared.configuration import APP_CONFIG
from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import AutoWorkflow
dbHost = APP_CONFIG.get("DB_HOST", "localhost")
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
geDb = DatabaseConnector(
dbHost=dbHost,
dbDatabase="poweron_graphicaleditor",
dbUser=dbUser,
dbPassword=dbPassword,
dbPort=dbPort,
userId=None,
)
if not geDb._ensureTableExists(AutoWorkflow):
return
rows = geDb.getRecordset(AutoWorkflow) or []
backfilled = 0
for r in rows:
if r.get("isTemplate"):
continue
if r.get("targetFeatureInstanceId"):
continue
srcId = r.get("featureInstanceId")
if not srcId:
continue
geDb.recordModify(AutoWorkflow, r["id"], {"targetFeatureInstanceId": srcId})
backfilled += 1
if backfilled:
logger.info(
"targetFeatureInstanceId backfill: set %d non-template AutoWorkflow row(s) "
"to their featureInstanceId",
backfilled,
)
_safe("backfill-targetFeatureInstanceId", _do)

View file

@ -111,6 +111,19 @@ class AiObjects:
processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1,
)
allowedModels = getattr(options, 'allowedModels', None) if options else None
if allowedModels:
filteredModels = [m for m in availableModels if m.name in allowedModels]
if filteredModels:
availableModels = filteredModels
else:
errorMsg = f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}"
logger.error(errorMsg)
return AiCallResponse(
content=errorMsg, modelName="error", priceCHF=0.0,
processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1,
)
failoverModelList = modelSelector.getFailoverModelList(prompt, context, options, availableModels)
if not failoverModelList:
@ -364,6 +377,19 @@ class AiObjects:
)
return
allowedModels = getattr(options, 'allowedModels', None) if options else None
if allowedModels:
filtered = [m for m in availableModels if m.name in allowedModels]
if filtered:
availableModels = filtered
else:
yield AiCallResponse(
content=f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}",
modelName="error", priceCHF=0.0, processingTime=0.0,
bytesSent=0, bytesReceived=0, errorCount=1,
)
return
failoverModelList = modelSelector.getFailoverModelList(
request.prompt, request.context or "", options, availableModels
)
@ -516,6 +542,14 @@ class AiObjects:
else:
logger.warning(f"No embedding models match allowedProviders {allowedProviders}")
allowedModels = getattr(options, 'allowedModels', None) if options else None
if allowedModels:
filtered = [m for m in availableModels if m.name in allowedModels]
if filtered:
availableModels = filtered
else:
logger.warning(f"No embedding models match allowedModels {allowedModels}")
failoverModelList = modelSelector.getFailoverModelList(
combinedText, "", options, availableModels
)

View file

@ -56,14 +56,8 @@ def initBootstrap(db: DatabaseConnector) -> None:
logger.info("Starting system bootstrap")
# Initialize root mandate
mandateId = initRootMandate(db)
# Migrate existing mandate records: description -> label
_migrateMandateDescriptionToLabel(db)
_migrateMandateNameLabelSlugRules(db)
# Clean up duplicate roles and fix corrupted templates FIRST
_deduplicateRoles(db)
# Initialize system role TEMPLATES (mandateId=None, isSystemRole=True)
@ -76,14 +70,6 @@ def initBootstrap(db: DatabaseConnector) -> None:
# This also serves as migration for existing mandates that don't have instance roles yet
_ensureAllMandatesHaveSystemRoles(db)
# Migration: eliminate the legacy ``sysadmin`` role in root mandate
# (replaced by ``User.isPlatformAdmin`` flag — see
# wiki/c-work/4-done/2026-04-sysadmin-authority-split.md).
# Idempotent: noop after first successful run.
if mandateId:
_migrateAndDropSysAdminRole(db, mandateId)
# Ensure UI rules for navigation items (admin/user/viewer roles)
_ensureUiContextRules(db)
# Initialize admin user
@ -129,9 +115,22 @@ def initBootstrap(db: DatabaseConnector) -> None:
# Bootstrap system workflow templates for graphical editor
_bootstrapSystemTemplates(db)
# Sync feature template workflows (update graph of existing instance workflows
# whose templateSourceId matches a current code-defined template)
_syncFeatureTemplateWorkflows()
# Ensure billing settings and accounts exist for all mandates
_bootstrapBilling()
# Telemetrie: warne falls Restbestaende der entfernten idempotenten
# Migrationen wieder auftauchen (Edge-Case: alter DB-Restore o.ae.).
# Schreibt nicht, scheitert nicht den Boot.
try:
from modules.interfaces._legacyMigrationTelemetry import runLegacyDataChecks
runLegacyDataChecks(db)
except Exception as e:
logger.warning(f"Legacy-data telemetry skipped: {e}")
def _bootstrapBilling() -> None:
"""
@ -195,6 +194,97 @@ def _bootstrapSystemTemplates(db: DatabaseConnector) -> None:
logger.warning(f"System workflow template bootstrap failed: {e}")
def _syncFeatureTemplateWorkflows() -> None:
"""Sync existing instance-scoped workflows with current code-defined templates.
For each feature that exposes getTemplateWorkflows(), find all AutoWorkflow
rows whose templateSourceId matches a template ID and update their graph
if the code-defined version has changed. Preserves instance-specific
fields (label, tags, targetFeatureInstanceId, invocations, active).
Idempotent, runs on every boot.
"""
import json
try:
from modules.system.registry import loadFeatureMainModules
from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import AutoWorkflow
from modules.features.graphicalEditor.interfaceFeatureGraphicalEditor import graphicalEditorDatabase
mainModules = loadFeatureMainModules()
templatesBySourceId: dict = {}
for featureCode, mod in mainModules.items():
getTemplateWorkflows = getattr(mod, "getTemplateWorkflows", None)
if not getTemplateWorkflows:
continue
try:
templates = getTemplateWorkflows() or []
except Exception:
continue
for tpl in templates:
tplId = tpl.get("id")
if tplId:
templatesBySourceId[tplId] = tpl
if not templatesBySourceId:
logger.info("_syncFeatureTemplateWorkflows: no templates found, skipping")
return
logger.info(f"_syncFeatureTemplateWorkflows: found {len(templatesBySourceId)} template(s): {list(templatesBySourceId.keys())}")
greenfieldDb = DatabaseConnector(
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
dbDatabase=graphicalEditorDatabase,
dbUser=APP_CONFIG.get("DB_USER"),
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD"),
)
updated = 0
for sourceId, tpl in templatesBySourceId.items():
instances = greenfieldDb.getRecordset(AutoWorkflow, recordFilter={
"templateSourceId": sourceId,
"isTemplate": False,
})
if not instances:
continue
canonicalGraph = tpl.get("graph", {})
for inst in instances:
instId = inst.get("id") if isinstance(inst, dict) else getattr(inst, "id", None)
targetInstanceId = (
inst.get("targetFeatureInstanceId") if isinstance(inst, dict)
else getattr(inst, "targetFeatureInstanceId", None)
) or ""
graphJson = json.dumps(canonicalGraph)
graphJson = graphJson.replace("{{featureInstanceId}}", targetInstanceId)
newGraph = json.loads(graphJson)
existingGraph = inst.get("graph") if isinstance(inst, dict) else getattr(inst, "graph", None)
if isinstance(existingGraph, str):
try:
existingGraph = json.loads(existingGraph)
except Exception:
existingGraph = None
if existingGraph == newGraph:
logger.debug(f"_syncFeatureTemplateWorkflows: graph unchanged for workflow {instId} (template={sourceId})")
continue
logger.debug(f"_syncFeatureTemplateWorkflows: graph DIFFERS for workflow {instId} (template={sourceId}), updating")
greenfieldDb.recordModify(AutoWorkflow, instId, {"graph": newGraph})
updated += 1
logger.info(f"_syncFeatureTemplateWorkflows: updated graph for workflow {instId} (template={sourceId})")
if updated:
logger.info(f"_syncFeatureTemplateWorkflows: synced {updated} workflow(s) with current templates")
else:
logger.info("_syncFeatureTemplateWorkflows: all instance graphs already match current templates")
greenfieldDb.close()
except Exception as e:
logger.warning(f"Feature template workflow sync failed: {e}")
def _buildSystemTemplates():
"""Build the graph definitions for platform system templates."""
return [
@ -396,21 +486,12 @@ def initRootMandate(db: DatabaseConnector) -> Optional[str]:
Returns:
Mandate ID if created or found, None otherwise
"""
# Find existing root mandate by name AND isSystem flag
existingMandates = db.getRecordset(Mandate, recordFilter={"name": "root", "isSystem": True})
if existingMandates:
mandateId = existingMandates[0].get("id")
logger.info(f"Root mandate already exists with ID {mandateId}")
return mandateId
# Check for legacy root mandates (name="Root" without isSystem flag) and migrate
legacyMandates = db.getRecordset(Mandate, recordFilter={"name": "Root"})
if legacyMandates:
mandateId = legacyMandates[0].get("id")
logger.info(f"Migrating legacy Root mandate {mandateId}: setting name='root', isSystem=True")
db.recordModify(Mandate, mandateId, {"name": "root", "isSystem": True})
return mandateId
logger.info("Creating Root mandate")
rootMandate = Mandate(name="root", label="Root", isSystem=True, enabled=True)
createdMandate = db.recordCreate(Mandate, rootMandate)
@ -419,98 +500,6 @@ def initRootMandate(db: DatabaseConnector) -> Optional[str]:
return mandateId
def _migrateMandateDescriptionToLabel(db: DatabaseConnector) -> None:
"""
Migration: Rename 'description' field to 'label' in all Mandate records.
Copies existing 'description' values to 'label' and removes the old field.
Safe to run multiple times (idempotent).
"""
allMandates = db.getRecordset(Mandate)
migratedCount = 0
for mandateRecord in allMandates:
mandateId = mandateRecord.get("id")
hasDescription = "description" in mandateRecord and mandateRecord.get("description") is not None
hasLabel = "label" in mandateRecord and mandateRecord.get("label") is not None
if hasDescription and not hasLabel:
# Copy description to label
updateData = {"label": mandateRecord["description"]}
db.recordModify(Mandate, mandateId, updateData)
migratedCount += 1
logger.info(f"Migrated mandate {mandateId}: description -> label")
if migratedCount > 0:
logger.info(f"Migrated {migratedCount} mandate(s) from description to label")
else:
logger.debug("No mandate description->label migration needed")
def _migrateMandateNameLabelSlugRules(db: DatabaseConnector) -> None:
"""
Migration: normalize Mandate.name to the slug rules ([a-z0-9-], length 2..32, single
hyphen segments) and ensure Mandate.label is non-empty.
Rules (see wiki/c-work/1-plan/2026-04-mandate-name-label-logic.md):
1. If ``label`` is empty/None set ``label := name`` (or "Mandate" when both empty).
2. If ``name`` is not a valid slug, or collides with an earlier mandate in stable id
order, allocate a unique slug from the (now non-empty) ``label`` using
``slugifyMandateName`` + ``allocateUniqueMandateSlug``.
Idempotent: a second run is a no-op because all valid names stay valid and stay unique.
Each rename and label fill-in is logged for audit.
"""
from modules.shared.mandateNameUtils import (
allocateUniqueMandateSlug,
isValidMandateName,
slugifyMandateName,
)
allRows = db.getRecordset(Mandate)
if not allRows:
return
sortedRows = sorted(allRows, key=lambda r: str(r.get("id", "")))
used: set[str] = set()
labelFills = 0
nameRenames: list[tuple[str, str, str]] = []
for rec in sortedRows:
mid = rec.get("id")
if not mid:
continue
name = (rec.get("name") or "").strip()
labelRaw = rec.get("label")
label = (labelRaw or "").strip() if labelRaw is not None else ""
if not label:
label = name if name else "Mandate"
db.recordModify(Mandate, mid, {"label": label})
labelFills += 1
logger.info(f"Mandate {mid}: filled empty label with '{label}'")
nameFits = isValidMandateName(name)
nameCollides = name in used
if nameFits and not nameCollides:
used.add(name)
continue
base = slugifyMandateName(label) or "mn"
newName = allocateUniqueMandateSlug(base, used)
used.add(newName)
if newName != name:
db.recordModify(Mandate, mid, {"name": newName})
nameRenames.append((str(mid), name, newName))
logger.info(f"Mandate {mid}: renamed name '{name}' -> '{newName}'")
if labelFills or nameRenames:
logger.info(
"Mandate name/label slug migration: %d label fill-in(s), %d name rename(s)",
labelFills, len(nameRenames),
)
else:
logger.debug("No mandate name/label slug migration needed")
def initAdminUser(db: DatabaseConnector, mandateId: Optional[str]) -> Optional[str]:
"""
Creates the Admin user if it doesn't exist.
@ -837,101 +826,6 @@ def copySystemRolesToMandate(db: DatabaseConnector, mandateId: str) -> int:
return copiedCount
def _migrateAndDropSysAdminRole(db: DatabaseConnector, mandateId: str) -> None:
"""
One-shot migration: eliminate the legacy ``sysadmin`` role in the root mandate.
Authority semantics moved to two orthogonal flags on User:
- ``isSysAdmin`` Infrastructure-Operator (RBAC bypass)
- ``isPlatformAdmin`` Cross-Mandate-Governance (no bypass)
Migration steps (idempotent):
1. Find sysadmin role(s) in root mandate. If none exist done.
2. For every UserMandateRole row referencing such a role: set
``user.isPlatformAdmin = True`` (preserves cross-mandate authority).
3. Delete those UserMandateRole rows.
4. Delete AccessRules attached to the sysadmin role.
5. Delete the sysadmin Role record.
Args:
db: Database connector instance
mandateId: Root mandate ID
"""
sysadminRoles = db.getRecordset(
Role,
recordFilter={"roleLabel": "sysadmin", "mandateId": mandateId, "featureInstanceId": None},
)
if not sysadminRoles:
logger.debug("Sysadmin role migration: no legacy sysadmin role present, nothing to do")
return
sysadminRoleIds = [str(r.get("id")) for r in sysadminRoles if r.get("id")]
logger.warning(
f"Sysadmin role migration: found {len(sysadminRoleIds)} legacy sysadmin role(s) "
f"in root mandate, migrating to isPlatformAdmin flag"
)
# 1) Promote every holder to isPlatformAdmin=True
promoted = 0
for sysadminRoleId in sysadminRoleIds:
umRoleRows = db.getRecordset(
UserMandateRole, recordFilter={"roleId": sysadminRoleId}
)
userMandateIds = [str(r.get("userMandateId")) for r in umRoleRows if r.get("userMandateId")]
if not userMandateIds:
continue
# Resolve userIds via UserMandate
userIds = set()
for umId in userMandateIds:
ums = db.getRecordset(UserMandate, recordFilter={"id": umId})
for um in ums:
uid = um.get("userId") if isinstance(um, dict) else getattr(um, "userId", None)
if uid:
userIds.add(str(uid))
for userId in userIds:
users = db.getRecordset(UserInDB, recordFilter={"id": userId})
if not users:
continue
current = users[0].get("isPlatformAdmin", False)
if not current:
db.recordModify(UserInDB, userId, {"isPlatformAdmin": True})
promoted += 1
logger.warning(
f"Sysadmin role migration: granted isPlatformAdmin=True to user {userId}"
)
# 2) Delete UserMandateRole rows
for umRow in umRoleRows:
rowId = umRow.get("id") if isinstance(umRow, dict) else getattr(umRow, "id", None)
if rowId:
try:
db.recordDelete(UserMandateRole, str(rowId))
except Exception as e:
logger.error(f"Sysadmin role migration: failed to drop UserMandateRole {rowId}: {e}")
# 3) Delete AccessRules
accessRules = db.getRecordset(AccessRule, recordFilter={"roleId": sysadminRoleId})
for ar in accessRules:
arId = ar.get("id") if isinstance(ar, dict) else getattr(ar, "id", None)
if arId:
try:
db.recordDelete(AccessRule, str(arId))
except Exception as e:
logger.error(f"Sysadmin role migration: failed to drop AccessRule {arId}: {e}")
# 4) Delete the Role
try:
db.recordDelete(Role, sysadminRoleId)
except Exception as e:
logger.error(f"Sysadmin role migration: failed to drop Role {sysadminRoleId}: {e}")
logger.warning(
f"Sysadmin role migration: completed; promoted {promoted} user(s) to isPlatformAdmin"
)
def _getRoleId(db: DatabaseConnector, roleLabel: str) -> Optional[str]:
"""
Get role ID by label, using cache or database lookup.

View file

@ -643,41 +643,10 @@ def aggregateMandateRagTotalBytes(mandateId: str) -> int:
if rid and str(rid) not in byId:
byId[str(rid)] = row
# DEPRECATED: file-ID-correlation fallback from poweron_management.
# Only needed for pre-migration data where mandateId/featureInstanceId on the
# FileContentIndex are empty. Safe to remove once all environments are migrated.
_fallbackCount = 0
try:
from modules.datamodels.datamodelFiles import FileItem
from modules.interfaces.interfaceDbManagement import ComponentObjects
mgmtDb = ComponentObjects().db
knowledgeIf = getInterface(None)
fileIds: set = set()
for f in mgmtDb.getRecordset(FileItem, recordFilter={"mandateId": mandateId}):
fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None)
if fid:
fileIds.add(str(fid))
for instId in instIds:
for f in mgmtDb.getRecordset(FileItem, recordFilter={"featureInstanceId": instId}):
fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None)
if fid:
fileIds.add(str(fid))
for fid in fileIds:
if fid in byId:
continue
row = knowledgeIf.getFileContentIndex(fid)
if row:
byId[fid] = row
_fallbackCount += 1
except Exception as e:
logger.warning("aggregateMandateRagTotalBytes fallback failed: %s", e)
total = sum(int(r.get("totalSize") or 0) for r in byId.values())
logger.info(
"aggregateMandateRagTotalBytes(%s): %d indexes, %d bytes (fallback: %d)",
mandateId, len(byId), total, _fallbackCount,
"aggregateMandateRagTotalBytes(%s): %d indexes, %d bytes",
mandateId, len(byId), total,
)
return total

View file

@ -347,6 +347,7 @@ class FeatureInterface:
"templateSourceId": templateId,
"templateScope": "instance",
"active": True,
"targetFeatureInstanceId": instanceId,
})
copied += 1
except Exception as e:

View file

@ -0,0 +1,305 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
User-facing Automation Workspace API.
Lists workflow runs the user can access (via FeatureAccess on
targetFeatureInstanceId) and provides detail views with step logs
and linked files. Designed for the "Workspace" tab under
Nutzung > Automation.
"""
import logging
import math
from typing import Optional
from fastapi import APIRouter, Depends, Request, Query, Path, HTTPException
from slowapi import Limiter
from slowapi.util import get_remote_address
from modules.auth.authentication import getRequestContext, RequestContext
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.configuration import APP_CONFIG
from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import (
AutoRun,
AutoStepLog,
AutoWorkflow,
)
from modules.features.graphicalEditor.interfaceFeatureGraphicalEditor import graphicalEditorDatabase
from modules.shared.i18nRegistry import apiRouteContext
routeApiMsg = apiRouteContext("routeAutomationWorkspace")
logger = logging.getLogger(__name__)
limiter = Limiter(key_func=get_remote_address)
router = APIRouter(prefix="/api/automations/runs", tags=["AutomationWorkspace"])
def _getDb() -> DatabaseConnector:
return DatabaseConnector(
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
dbDatabase=graphicalEditorDatabase,
dbUser=APP_CONFIG.get("DB_USER"),
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD"),
dbPort=int(APP_CONFIG.get("DB_PORT", 5432)),
userId=None,
)
def _getUserAccessibleInstanceIds(userId: str) -> list[str]:
"""Return all featureInstanceIds the user has enabled FeatureAccess for."""
from modules.interfaces.interfaceDbApp import getRootInterface
rootIface = getRootInterface()
allAccess = rootIface.getFeatureAccessesForUser(userId) or []
return [
a.featureInstanceId
for a in allAccess
if a.featureInstanceId and a.enabled
]
_FILE_REF_KEYS = ("fileId", "documentId", "fileIds", "documents")
def _extractFileIdsFromValue(value, accumulator: set[str]) -> None:
"""Recursively scan a value (dict/list/str) for file id references."""
if isinstance(value, dict):
for key, sub in value.items():
if key in _FILE_REF_KEYS:
_collectFileIdsFromRef(sub, accumulator)
else:
_extractFileIdsFromValue(sub, accumulator)
elif isinstance(value, list):
for item in value:
_extractFileIdsFromValue(item, accumulator)
def _collectFileIdsFromRef(val, accumulator: set[str]) -> None:
"""Add file ids from a value located under a known file-reference key."""
if isinstance(val, str) and val:
accumulator.add(val)
elif isinstance(val, list):
for v in val:
if isinstance(v, str) and v:
accumulator.add(v)
elif isinstance(v, dict) and v.get("id"):
accumulator.add(v["id"])
elif isinstance(val, dict) and val.get("id"):
accumulator.add(val["id"])
@router.get("")
@limiter.limit("60/minute")
def listWorkspaceRuns(
request: Request,
scope: str = Query("mine", description="mine = own runs, mandate = all accessible"),
status: Optional[str] = Query(None, description="Filter by run status"),
targetInstanceId: Optional[str] = Query(None, description="Filter by targetFeatureInstanceId"),
workflowId: Optional[str] = Query(None, description="Filter by workflow"),
limit: int = Query(50, ge=1, le=200),
offset: int = Query(0, ge=0),
context: RequestContext = Depends(getRequestContext),
) -> dict:
"""List workflow runs visible to the user.
scope=mine: only runs owned by the user.
scope=mandate: all runs where the user has FeatureAccess on the
workflow's targetFeatureInstanceId.
"""
db = _getDb()
if not db._ensureTableExists(AutoRun):
return {"runs": [], "total": 0, "limit": limit, "offset": offset}
userId = str(context.user.id) if context.user else None
if not userId:
raise HTTPException(status_code=401, detail=routeApiMsg("Authentication required"))
accessibleInstanceIds = _getUserAccessibleInstanceIds(userId)
if not accessibleInstanceIds:
return {"runs": [], "total": 0, "limit": limit, "offset": offset}
if not db._ensureTableExists(AutoWorkflow):
return {"runs": [], "total": 0, "limit": limit, "offset": offset}
wfFilter: dict = {}
if targetInstanceId:
if targetInstanceId not in accessibleInstanceIds:
raise HTTPException(status_code=403, detail=routeApiMsg("Access denied to target instance"))
wfFilter["targetFeatureInstanceId"] = targetInstanceId
workflows = db.getRecordset(AutoWorkflow, recordFilter=wfFilter or None) or []
visibleWfIds: set[str] = set()
wfMap: dict = {}
for wf in workflows:
wfDict = dict(wf)
tid = wfDict.get("targetFeatureInstanceId") or wfDict.get("featureInstanceId")
if tid and tid in accessibleInstanceIds:
wfId = wfDict.get("id")
if wfId:
visibleWfIds.add(wfId)
wfMap[wfId] = wfDict
if workflowId:
if workflowId not in visibleWfIds:
return {"runs": [], "total": 0, "limit": limit, "offset": offset}
visibleWfIds = {workflowId}
if not visibleWfIds:
return {"runs": [], "total": 0, "limit": limit, "offset": offset}
allRuns = db.getRecordset(AutoRun, recordFilter={}) or []
filtered = []
for r in allRuns:
row = dict(r)
if row.get("workflowId") not in visibleWfIds:
continue
if scope == "mine" and row.get("ownerId") != userId:
continue
if status and row.get("status") != status:
continue
filtered.append(row)
filtered.sort(
key=lambda x: x.get("startedAt") or x.get("sysCreatedAt") or 0,
reverse=True,
)
total = len(filtered)
page = filtered[offset: offset + limit]
from modules.routes.routeHelpers import enrichRowsWithFkLabels, resolveMandateLabels, resolveInstanceLabels
for row in page:
wf = wfMap.get(row.get("workflowId"), {})
row["workflowLabel"] = row.get("label") or wf.get("label") or row.get("workflowId", "")
row["targetFeatureInstanceId"] = wf.get("targetFeatureInstanceId") or wf.get("featureInstanceId")
enrichRowsWithFkLabels(
page,
labelResolvers={
"mandateId": resolveMandateLabels,
"targetFeatureInstanceId": resolveInstanceLabels,
},
)
for row in page:
row["targetInstanceLabel"] = row.pop("targetFeatureInstanceIdLabel", None)
row["mandateLabel"] = row.pop("mandateIdLabel", None)
return {"runs": page, "total": total, "limit": limit, "offset": offset}
@router.get("/{runId}/detail")
@limiter.limit("60/minute")
def getWorkspaceRunDetail(
request: Request,
runId: str = Path(..., description="Run ID"),
context: RequestContext = Depends(getRequestContext),
) -> dict:
"""Get full detail for a single run: metadata, step logs, linked files."""
db = _getDb()
userId = str(context.user.id) if context.user else None
if not userId:
raise HTTPException(status_code=401, detail=routeApiMsg("Authentication required"))
if not db._ensureTableExists(AutoRun):
raise HTTPException(status_code=404, detail=routeApiMsg("Run not found"))
runs = db.getRecordset(AutoRun, recordFilter={"id": runId})
if not runs:
raise HTTPException(status_code=404, detail=routeApiMsg("Run not found"))
run = dict(runs[0])
wfId = run.get("workflowId")
workflow: dict = {}
if wfId and db._ensureTableExists(AutoWorkflow):
wfs = db.getRecordset(AutoWorkflow, recordFilter={"id": wfId})
if wfs:
workflow = dict(wfs[0])
tid = workflow.get("targetFeatureInstanceId") or workflow.get("featureInstanceId")
accessibleIds = _getUserAccessibleInstanceIds(userId)
isOwner = run.get("ownerId") == userId
if not isOwner and (not tid or tid not in accessibleIds) and not context.isPlatformAdmin:
raise HTTPException(status_code=403, detail=routeApiMsg("Access denied"))
steps: list = []
if db._ensureTableExists(AutoStepLog):
stepRecords = db.getRecordset(AutoStepLog, recordFilter={"runId": runId}) or []
steps = [dict(s) for s in stepRecords]
steps.sort(key=lambda s: s.get("startedAt") or 0)
allFileIds: set[str] = set()
perStepFileIds: list[tuple[set[str], set[str]]] = []
for step in steps:
inputIds: set[str] = set()
outputIds: set[str] = set()
_extractFileIdsFromValue(step.get("inputSnapshot") or {}, inputIds)
_extractFileIdsFromValue(step.get("output") or {}, outputIds)
perStepFileIds.append((inputIds, outputIds))
allFileIds.update(inputIds)
allFileIds.update(outputIds)
nodeOutputs = run.get("nodeOutputs") or {}
runLevelIds: set[str] = set()
_extractFileIdsFromValue(nodeOutputs, runLevelIds)
allFileIds.update(runLevelIds)
fileMetaById: dict[str, dict] = {}
try:
from modules.datamodels.datamodelFiles import FileItem
from modules.interfaces.interfaceDbManagement import ComponentObjects
mgmtDb = ComponentObjects().db
if mgmtDb._ensureTableExists(FileItem):
for fid in allFileIds:
try:
rec = mgmtDb.getRecord(FileItem, fid)
if rec:
recDict = dict(rec)
fileMetaById[fid] = {
"id": fid,
"fileName": recDict.get("fileName") or recDict.get("name"),
}
except Exception:
pass
except Exception as e:
logger.warning("getWorkspaceRunDetail: file lookup failed: %s", e)
def _resolveFileList(ids: set[str]) -> list[dict]:
return [fileMetaById[fid] for fid in ids if fid in fileMetaById]
assignedFileIds: set[str] = set()
for step, (inputIds, outputIds) in zip(steps, perStepFileIds):
step["inputFiles"] = _resolveFileList(inputIds)
step["outputFiles"] = _resolveFileList(outputIds)
assignedFileIds.update(inputIds)
assignedFileIds.update(outputIds)
unassignedFiles = _resolveFileList(allFileIds - assignedFileIds)
allFiles = _resolveFileList(allFileIds)
run["workflowLabel"] = run.get("label") or workflow.get("label") or wfId
run["targetFeatureInstanceId"] = tid
targetInstanceLabel = None
if tid:
try:
from modules.routes.routeHelpers import resolveInstanceLabels
labelMap = resolveInstanceLabels([tid])
targetInstanceLabel = labelMap.get(tid)
except Exception:
pass
run["targetInstanceLabel"] = targetInstanceLabel
return {
"run": run,
"workflow": {
"id": workflow.get("id"),
"label": workflow.get("label"),
"targetFeatureInstanceId": tid,
"featureInstanceId": workflow.get("featureInstanceId"),
"tags": workflow.get("tags", []),
} if workflow else None,
"steps": steps,
"files": allFiles,
"unassignedFiles": unassignedFiles,
}

View file

@ -25,142 +25,11 @@ def _registerMediaTools(registry: ToolRegistry, services):
# ---- Document rendering tool ----
def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
"""Convert markdown content to the standard document JSON format expected by renderers."""
import re as _re
sections = []
order = 0
lines = markdown.split("\n")
i = 0
def _nextId():
nonlocal order
order += 1
return f"s_{order}"
while i < len(lines):
line = lines[i]
# --- Headings ---
headingMatch = _re.match(r'^(#{1,6})\s+(.+)', line)
if headingMatch:
level = len(headingMatch.group(1))
text = headingMatch.group(2).strip()
sections.append({
"id": _nextId(), "content_type": "heading", "order": order,
"elements": [{"content": {"text": text, "level": level}}],
})
i += 1
continue
# --- Fenced code blocks ---
codeMatch = _re.match(r'^```(\w*)', line)
if codeMatch:
lang = codeMatch.group(1) or "text"
codeLines = []
i += 1
while i < len(lines) and not lines[i].startswith("```"):
codeLines.append(lines[i])
i += 1
i += 1
sections.append({
"id": _nextId(), "content_type": "code_block", "order": order,
"elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}],
})
continue
# --- Tables ---
tableMatch = _re.match(r'^\|(.+)\|$', line)
if tableMatch and (i + 1) < len(lines) and _re.match(r'^\|[\s\-:|]+\|$', lines[i + 1]):
headerCells = [c.strip() for c in tableMatch.group(1).split("|")]
i += 2
rows = []
while i < len(lines) and _re.match(r'^\|(.+)\|$', lines[i]):
rowCells = [c.strip() for c in lines[i][1:-1].split("|")]
rows.append(rowCells)
i += 1
sections.append({
"id": _nextId(), "content_type": "table", "order": order,
"elements": [{"content": {"headers": headerCells, "rows": rows}}],
})
continue
# --- Bullet / numbered lists ---
listMatch = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', line)
if listMatch:
isNumbered = bool(_re.match(r'\d+[.)]', listMatch.group(2)))
items = []
while i < len(lines) and _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]):
m = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i])
items.append({"text": m.group(3).strip()})
i += 1
sections.append({
"id": _nextId(), "content_type": "bullet_list", "order": order,
"elements": [{"content": {"items": items, "list_type": "numbered" if isNumbered else "bullet"}}],
})
continue
# --- Empty lines (skip) ---
if not line.strip():
i += 1
continue
# --- Images: ![alt](file:fileId) or ![alt](url) ---
imgMatch = _re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', line)
if imgMatch:
altText = imgMatch.group(1).strip() or "Image"
src = imgMatch.group(2).strip()
fileId = ""
if src.startswith("file:"):
fileId = src[5:]
sections.append({
"id": _nextId(), "content_type": "image", "order": order,
"elements": [{
"content": {
"altText": altText,
"base64Data": "",
"_fileRef": fileId,
"_srcUrl": src if not fileId else "",
}
}],
})
i += 1
continue
# --- Paragraph (collect consecutive non-empty lines) ---
paraLines = []
while i < len(lines) and lines[i].strip() and not _re.match(r'^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )', lines[i]):
paraLines.append(lines[i])
i += 1
if paraLines:
sections.append({
"id": _nextId(), "content_type": "paragraph", "order": order,
"elements": [{"content": {"text": " ".join(paraLines)}}],
})
continue
i += 1
if not sections:
sections.append({
"id": _nextId(), "content_type": "paragraph", "order": order,
"elements": [{"content": {"text": markdown.strip() or "(empty)"}}],
})
return {
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "agent_rendering",
"title": title,
"language": language,
},
"documents": [{
"id": "doc_1",
"title": title,
"sections": sections,
}],
}
"""Delegate to the consolidated parser in subDocumentUtility."""
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
result = markdownToDocumentJson(markdown, title, language)
result["metadata"]["extraction_method"] = "agent_rendering"
return result
async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]):
"""Render agent-produced markdown content into any document format via the RendererRegistry."""
@ -245,35 +114,75 @@ def _registerMediaTools(registry: ToolRegistry, services):
except Exception as e:
logger.warning(f"renderDocument: knowledge service unavailable: {e}")
resolvedImages = 0
for doc in structuredContent.get("documents", []):
for section in doc.get("sections", []):
if section.get("content_type") != "image":
continue
for element in section.get("elements", []):
contentObj = element.get("content", {})
fileRef = contentObj.get("_fileRef", "")
if not fileRef or contentObj.get("base64Data"):
continue
def _resolveImageRef(targetObj, fileRefKey="_fileRef", fileIdKey="fileId"):
"""Resolve a single image reference dict to base64Data in-place."""
nonlocal resolvedImages
fileRef = targetObj.get(fileRefKey, "") or targetObj.get(fileIdKey, "")
if not fileRef or targetObj.get("base64Data"):
return
if knowledgeService:
chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef)
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
if imageChunks:
contentObj["base64Data"] = imageChunks[0].get("data", "")
targetObj["base64Data"] = imageChunks[0].get("data", "")
chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png")
contentObj["mimeType"] = chunkMime
targetObj["mimeType"] = chunkMime
resolvedImages += 1
if not contentObj.get("base64Data"):
if not targetObj.get("base64Data"):
try:
rawBytes = services.chat.getFileData(fileRef)
if rawBytes:
import base64 as _b64
contentObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii")
contentObj["mimeType"] = "image/png"
targetObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii")
targetObj["mimeType"] = "image/png"
resolvedImages += 1
except Exception as e:
logger.warning(f"renderDocument: image resolve failed for fileRef={fileRef}: {e}")
contentObj.pop("_fileRef", None)
contentObj.pop("_srcUrl", None)
targetObj.pop("_fileRef", None)
targetObj.pop("_srcUrl", None)
def _resolveInlineRuns(runsList):
"""Scan a list of inline runs and resolve any image runs with fileId."""
for run in runsList:
if run.get("type") == "image" and run.get("fileId") and not run.get("base64Data"):
_resolveImageRef(run, fileRefKey="fileId", fileIdKey="fileId")
for doc in structuredContent.get("documents", []):
for section in doc.get("sections", []):
cType = section.get("content_type")
# Block-level image sections
if cType == "image":
for element in section.get("elements", []):
contentObj = element.get("content", {})
_resolveImageRef(contentObj)
continue
# Paragraphs with inlineRuns
if cType == "paragraph":
for element in section.get("elements", []):
runs = element.get("content", {}).get("inlineRuns")
if runs:
_resolveInlineRuns(runs)
continue
# Bullet lists - items are List[List[InlineRun]]
if cType == "bullet_list":
for element in section.get("elements", []):
items = element.get("content", {}).get("items", [])
for item in items:
if isinstance(item, list):
_resolveInlineRuns(item)
continue
# Tables - headers and row cells are List[InlineRun]
if cType == "table":
for element in section.get("elements", []):
contentObj = element.get("content", {})
for cell in contentObj.get("headers", []):
if isinstance(cell, list):
_resolveInlineRuns(cell)
for row in contentObj.get("rows", []):
for cell in row:
if isinstance(cell, list):
_resolveInlineRuns(cell)
sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", []))
logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}")
@ -285,6 +194,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
language=language,
title=title,
userPrompt=content,
style=args.get("style"),
)
if not documents:
@ -367,6 +277,20 @@ def _registerMediaTools(registry: ToolRegistry, services):
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
"title": {"type": "string", "description": "Document title", "default": "Document"},
"language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
"style": {
"type": "object",
"description": (
"Optional style overrides for the rendered document. Supports nested keys: "
"fonts (primary, monospace), colors (primary, secondary, accent, background), "
"headings (h1-h4 with sizePt, weight, color, spaceBeforePt, spaceAfterPt), "
"paragraph (sizePt, lineSpacing, color), table (headerBg, headerFg, headerSizePt, "
"bodySizePt, rowBandingEven, rowBandingOdd, borderColor, borderWidthPt), "
"list (bulletChar, indentPt, sizePt), image (defaultWidthPt, maxWidthPt, alignment), "
"codeBlock (fontSizePt, background, borderColor), "
"page (format, marginsPt, showPageNumbers, headerHeight, footerHeight, headerLogo, headerText, footerText). "
"Only provided keys override defaults; omitted keys keep their default values."
),
},
},
},
readOnly=False,

View file

@ -51,6 +51,10 @@ class _ServicesAdapter:
def workflow(self):
return self._context.workflow
@workflow.setter
def workflow(self, value):
self._context.workflow = value
@property
def chat(self):
return self._get_service("chat")
@ -86,7 +90,7 @@ class _ServicesAdapter:
return getattr(w, "featureCode", None) if w else None
def __getattr__(self, name: str):
if name in ("allowedProviders", "preferredProviders", "currentUserLanguage"):
if name in ("allowedProviders", "allowedModels", "preferredProviders", "currentUserLanguage"):
return getattr(self.workflow, name, None) if self.workflow else None
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
@ -194,6 +198,11 @@ class AiService:
request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders})
logger.debug(f"Effective allowedProviders for AI request: {effectiveProviders}")
# Calculate effective allowedModels: Workflow ∩ Request (node-level)
effectiveModels = self._calculateEffectiveModels(request)
if effectiveModels and request.options:
request.options = request.options.model_copy(update={'allowedModels': effectiveModels})
# Neutralize prompt if enabled (before AI call)
_wasNeutralized = False
_excludedDocs: List[str] = []
@ -249,6 +258,11 @@ class AiService:
if effectiveProviders and request.options:
request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders})
# Calculate effective allowedModels: Workflow ∩ Request (node-level)
effectiveModels = self._calculateEffectiveModels(request)
if effectiveModels and request.options:
request.options = request.options.model_copy(update={'allowedModels': effectiveModels})
# Neutralize prompt if enabled (before streaming)
_wasNeutralized = False
_excludedDocs: List[str] = []
@ -1264,6 +1278,43 @@ detectedIntent-Werte:
logger.warning(f"Error calculating effective providers: {e}")
return None
def _calculateEffectiveModels(self, request: AiCallRequest = None) -> Optional[List[str]]:
"""
Calculate effective allowed models: Workflow.allowedModels request.options.allowedModels.
AND-logic intersection:
- If workflow specifies allowedModels, start with those.
- If request (node-level) also specifies allowedModels, intersect.
- Returns None if no model filtering is needed.
"""
try:
effectiveModels = None
# Workflow-level allowedModels (from automation config)
workflowModels = getattr(self.services, 'allowedModels', None)
if workflowModels:
effectiveModels = list(workflowModels)
# Request-level (node-level) allowedModels
requestModels = None
if request and request.options and request.options.allowedModels:
requestModels = request.options.allowedModels
if requestModels:
if effectiveModels:
effectiveModels = [m for m in effectiveModels if m in requestModels]
else:
effectiveModels = list(requestModels)
if effectiveModels:
logger.debug(f"Model filter: Workflow={workflowModels}, Request={requestModels}, Effective={effectiveModels}")
return effectiveModels if effectiveModels else None
except Exception as e:
logger.warning(f"Error calculating effective models: {e}")
return None
async def ensureAiObjectsInitialized(self):
"""Ensure aiObjects is initialized and submodules are ready."""
if self.aiObjects is None:

View file

@ -199,13 +199,8 @@ class ChatService:
label = parts[1]
messageFound = None
for message in workflow.messages:
# Validate message belongs to this workflow
msgWorkflowId = getattr(message, 'workflowId', None)
if not msgWorkflowId or msgWorkflowId != workflowId:
if msgWorkflowId:
logger.warning(f"Message {message.id} has workflowId {msgWorkflowId} but belongs to workflow {workflowId}. Skipping.")
else:
logger.warning(f"Message {message.id} has no workflowId. Skipping.")
continue
msgLabel = getattr(message, 'documentsLabel', None)
@ -213,7 +208,6 @@ class ChatService:
messageFound = message
break
# If found, add documents
if messageFound and messageFound.documents:
allDocuments.extend(messageFound.documents)
else:

View file

@ -14,6 +14,7 @@ from .subDocumentUtility import (
detectMimeTypeFromData,
convertDocumentDataToString
)
from .styleDefaults import resolveStyle
logger = logging.getLogger(__name__)
@ -382,7 +383,7 @@ class GenerationService:
'workflowId': 'unknown'
}
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None) -> List[RenderedDocument]:
"""
Render extracted JSON content to the specified output format.
Processes EACH document separately and calls renderer for each.
@ -399,12 +400,14 @@ class GenerationService:
userPrompt: User's original prompt for report generation
aiService: AI service instance for generation prompt creation
parentOperationId: Optional parent operation ID for hierarchical logging
style: Optional style overrides (deep-merged with DEFAULT_STYLE)
Returns:
List of RenderedDocument objects.
Each RenderedDocument represents one rendered file (main document or supporting file)
"""
try:
resolvedStyle = resolveStyle(style)
# Validate JSON input
if not isinstance(extractedContent, dict):
raise ValueError("extractedContent must be a JSON dictionary")
@ -469,7 +472,7 @@ class GenerationService:
docTitle = doc.get("title", title)
# Render this document (can return multiple files, e.g., HTML + images)
renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService)
renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService, style=resolvedStyle)
allRenderedDocuments.extend(renderedDocs)
logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)")

View file

@ -84,7 +84,7 @@ class BaseRenderer(ABC):
return list(supportedSectionTypes)
@abstractmethod
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
"""
Render extracted JSON content to multiple documents.
Each renderer must implement this method.
@ -95,6 +95,9 @@ class BaseRenderer(ABC):
title: Report title
userPrompt: Original user prompt for context
aiService: AI service instance for additional processing
style: Fully-resolved unified style dict from styleDefaults.resolveStyle().
When provided, renderers use these values instead of their
own defaults / AI-generated styles.
Returns:
List of RenderedDocument objects.
@ -103,6 +106,112 @@ class BaseRenderer(ABC):
"""
pass
def _convertUnifiedStyleToInternal(self, style: Dict[str, Any]) -> Dict[str, Any]:
"""Convert the unified resolvedStyle dict (from styleDefaults) into
the renderer-internal style-set format that all rendering methods already
consume. Override in subclasses for format-specific tweaks."""
h1 = style["headings"]["h1"]
h2 = style["headings"]["h2"]
h3 = style["headings"].get("h3", h2)
h4 = style["headings"].get("h4", h3)
tbl = style["table"]
para = style["paragraph"]
lst = style["list"]
cb = style["codeBlock"]
return {
"title": {
"font_size": h1["sizePt"], "color": h1["color"],
"bold": h1.get("weight") == "bold", "align": "left",
},
"heading1": {
"font_size": h1["sizePt"], "color": h1["color"],
"bold": h1.get("weight") == "bold", "align": "left",
},
"heading2": {
"font_size": h2["sizePt"], "color": h2["color"],
"bold": h2.get("weight") == "bold", "align": "left",
},
"heading3": {
"font_size": h3["sizePt"], "color": h3["color"],
"bold": h3.get("weight") == "bold", "align": "left",
},
"heading4": {
"font_size": h4["sizePt"], "color": h4["color"],
"bold": h4.get("weight") == "bold", "align": "left",
},
"paragraph": {
"font_size": para["sizePt"], "color": para["color"],
"bold": False, "align": "left",
},
"table_header": {
"background": tbl["headerBg"], "text_color": tbl["headerFg"],
"bold": True, "align": "center",
},
"table_cell": {
"background": tbl["rowBandingOdd"], "text_color": para["color"],
"bold": False, "align": "left",
},
"table_border": {
"style": "grid", "color": tbl["borderColor"],
},
"bullet_list": {
"font_size": lst["sizePt"], "color": para["color"],
"indent": lst["indentPt"],
},
"code_block": {
"font": style["fonts"]["monospace"],
"font_size": cb["fontSizePt"], "color": para["color"],
"background": cb["background"],
},
}
@staticmethod
def _inlineRunsFromContent(content: Dict[str, Any], *, itemsKey: str = None) -> Any:
"""Extract inline runs from new-format content, falling back to old format.
For paragraphs (itemsKey=None):
new: content["inlineRuns"] -> List[InlineRun]
old: content["text"] -> wrapped in [{"type":"text","value":text}]
For list items (itemsKey="items"):
new: content["items"] is List[List[InlineRun]]
old: content["items"] is List[str] or List[{"text":}]
Returns the items list (caller decides per-item conversion).
For table headers/cells:
new: each header/cell is List[InlineRun]
old: each header/cell is a plain str
Caller handles per-cell.
"""
if itemsKey:
return content.get(itemsKey, [])
inlineRuns = content.get("inlineRuns")
if inlineRuns:
return inlineRuns
text = content.get("text", "")
if text:
return [{"type": "text", "value": text}]
return []
@staticmethod
def _inlineRunsForCell(cell) -> list:
"""Normalize a single table header or cell value to List[InlineRun].
Accepts either a plain string or an already-correct list of run dicts."""
if isinstance(cell, list):
return cell
return [{"type": "text", "value": str(cell) if cell is not None else ""}]
@staticmethod
def _inlineRunsForListItem(item) -> list:
"""Normalize a single list item to List[InlineRun].
Accepts a plain string, a dict with 'text', or an already-correct list of run dicts."""
if isinstance(item, list):
return item
if isinstance(item, dict):
text = item.get("text", "")
return [{"type": "text", "value": text}]
return [{"type": "text", "value": str(item)}]
def _determineFilename(self, title: str, mimeType: str) -> str:
"""Determine filename from title and mimeType."""
import re

View file

@ -53,18 +53,17 @@ class RendererDocx(BaseRenderer):
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
try:
if not DOCX_AVAILABLE:
# Fallback to HTML if python-docx not available
from .rendererHtml import RendererHtml
htmlRenderer = RendererHtml()
return await htmlRenderer.render(extractedContent, title, userPrompt, aiService)
return await htmlRenderer.render(extractedContent, title, userPrompt, aiService, style=style)
# Generate DOCX using AI-analyzed styling
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style)
# Extract metadata for document type and other info
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
@ -114,23 +113,27 @@ class RendererDocx(BaseRenderer):
)
]
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, unifiedStyle: Dict[str, Any] = None) -> str:
"""Generate DOCX content from structured JSON document."""
import time
start_time = time.time()
try:
self.logger.debug("_generateDocxFromJson: Starting document generation")
# Create new document
doc = Document()
self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s")
# Get style set: use styles from metadata if available, otherwise enhance with AI
# Phase 3: prefer unified style when provided
style_start = time.time()
self.logger.debug("_generateDocxFromJson: About to get style set")
if unifiedStyle:
styleSet = self._convertUnifiedStyleToInternal(unifiedStyle)
self._unifiedStyle = unifiedStyle
else:
template_from_metadata = None
if json_content and isinstance(json_content.get("metadata"), dict):
template_from_metadata = json_content["metadata"].get("templateName")
style_start = time.time()
self.logger.debug("_generateDocxFromJson: About to get style set")
styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata)
self._unifiedStyle = None
self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s")
# Setup basic document styles and create all styles from style set
@ -298,11 +301,11 @@ class RendererDocx(BaseRenderer):
def _setupBasicDocumentStyles(self, doc: Document) -> None:
"""Set up basic document styles."""
try:
# Set default font
style = doc.styles['Normal']
font = style.font
font.name = 'Calibri'
font.size = Pt(11)
us = getattr(self, '_unifiedStyle', None)
font.name = us["fonts"]["primary"] if us else 'Calibri'
font.size = Pt(us["paragraph"]["sizePt"] if us else 11)
except Exception as e:
self.logger.warning(f"Could not set up basic document styles: {str(e)}")
@ -421,6 +424,8 @@ class RendererDocx(BaseRenderer):
def _addMarkdownInlineRuns(self, paragraph, text: str) -> None:
"""Parse markdown inline formatting and add corresponding Runs to a python-docx paragraph."""
pos = 0
us = getattr(self, '_unifiedStyle', None)
monoFont = us["fonts"]["monospace"] if us else "Courier New"
for m in self._MD_INLINE_RE.finditer(text):
if m.start() > pos:
paragraph.add_run(text[pos:m.start()])
@ -434,12 +439,45 @@ class RendererDocx(BaseRenderer):
paragraph.add_run(m.group(6)).italic = True
elif m.group(7):
run = paragraph.add_run(m.group(7))
run.font.name = "Courier New"
run.font.name = monoFont
run.font.size = Pt(9)
pos = m.end()
if pos < len(text):
paragraph.add_run(text[pos:])
def _renderInlineRuns(self, runs: list, paragraph, styleSet: Dict[str, Any]) -> None:
"""Process a list of InlineRun dicts into python-docx Runs on a paragraph."""
us = getattr(self, '_unifiedStyle', None)
monoFont = us["fonts"]["monospace"] if us else "Courier New"
for run in runs:
runType = run.get("type", "text")
value = run.get("value", "")
if runType == "text":
paragraph.add_run(value)
elif runType == "bold":
paragraph.add_run(value).bold = True
elif runType == "italic":
paragraph.add_run(value).italic = True
elif runType == "code":
r = paragraph.add_run(value)
r.font.name = monoFont
r.font.size = Pt(9)
elif runType == "link":
r = paragraph.add_run(value)
r.font.underline = True
r.font.color.rgb = RGBColor(0x29, 0x80, 0xB9)
elif runType == "image":
b64 = run.get("base64Data", "")
if b64:
try:
imgBytes = base64.b64decode(b64)
imgStream = io.BytesIO(imgBytes)
paragraph.add_run().add_picture(imgStream, width=Inches(2))
except Exception:
paragraph.add_run(f"[Image: {run.get('altText', '')}]")
else:
paragraph.add_run(value)
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""
Render a JSON table to DOCX using AI-generated styles.
@ -485,7 +523,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.error(f"Error rendering table: {str(e)}", exc_info=True)
def _renderTableFastXml(self, doc: Document, headers: List[str], rows: List[List[Any]], styles: Dict[str, Any]) -> None:
def _renderTableFastXml(self, doc: Document, headers: list, rows: list, styles: Dict[str, Any]) -> None:
"""
High-performance table rendering using direct XML manipulation.
@ -546,22 +584,32 @@ class RendererDocx(BaseRenderer):
# Build all rows using fast XML
rows_start = time.time()
# Header row
headerRow = self._createTableRowXml(headers, isHeader=True)
# Resolve header style colors
tableStyle = styles.get("table_header", {})
headerBg = tableStyle.get("background", "")
headerFg = tableStyle.get("text_color", "")
# Flatten inline-run headers to plain strings for fast XML path
flatHeaders = []
for h in headers:
runs = self._inlineRunsForCell(h)
flatHeaders.append("".join(r.get("value", "") for r in runs))
headerRow = self._createTableRowXml(flatHeaders, isHeader=True, headerBgHex=headerBg or None, headerFgHex=headerFg or None)
tbl.append(headerRow)
header_time = time.time() - rows_start
self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s")
# Data rows - batch process for performance
data_start = time.time()
rowCount = len(rows)
for idx, rowData in enumerate(rows):
# Convert all cells to strings
cellTexts = [str(cell) if cell is not None else '' for cell in rowData]
# Pad if needed
while len(cellTexts) < len(headers):
cellTexts = []
for cell in rowData:
runs = self._inlineRunsForCell(cell)
cellTexts.append("".join(r.get("value", "") for r in runs))
while len(cellTexts) < len(flatHeaders):
cellTexts.append('')
row = self._createTableRowXml(cellTexts, isHeader=False)
@ -641,70 +689,60 @@ class RendererDocx(BaseRenderer):
return tblBorders
def _createTableRowXml(self, cells: List[str], isHeader: bool = False) -> Any:
"""
Create a table row XML element with cells.
This is the core fast-path: builds the row XML directly without
going through python-docx's slow cell.text assignment.
"""
def _createTableRowXml(self, cells: list, isHeader: bool = False, headerBgHex: str = None, headerFgHex: str = None) -> Any:
"""Create a table row XML element with cells.
Fast-path: builds row XML directly via lxml."""
from docx.oxml.shared import OxmlElement, qn
tr = OxmlElement('w:tr')
if headerBgHex is None:
us = getattr(self, '_unifiedStyle', None)
headerBgHex = us["table"]["headerBg"].lstrip('#') if us else '1F3864'
else:
headerBgHex = headerBgHex.lstrip('#')
if headerFgHex is None:
us = getattr(self, '_unifiedStyle', None)
headerFgHex = us["table"]["headerFg"].lstrip('#') if us else 'FFFFFF'
else:
headerFgHex = headerFgHex.lstrip('#')
# Row properties for header
tr = OxmlElement('w:tr')
if isHeader:
trPr = OxmlElement('w:trPr')
tblHeader = OxmlElement('w:tblHeader')
trPr.append(tblHeader)
trPr.append(OxmlElement('w:tblHeader'))
tr.append(trPr)
for cellText in cells:
# Create cell
tc = OxmlElement('w:tc')
# Cell properties
tcPr = OxmlElement('w:tcPr')
tcW = OxmlElement('w:tcW')
tcW.set(qn('w:type'), 'auto')
tcW.set(qn('w:w'), '0')
tcPr.append(tcW)
# Header cell styling - light blue background
if isHeader:
shd = OxmlElement('w:shd')
shd.set(qn('w:val'), 'clear')
shd.set(qn('w:color'), 'auto')
shd.set(qn('w:fill'), '4472C4') # Professional blue
shd.set(qn('w:fill'), headerBgHex)
tcPr.append(shd)
tc.append(tcPr)
# Paragraph with text
p = OxmlElement('w:p')
# Add run with text
r = OxmlElement('w:r')
# Header text styling - bold and white
if isHeader:
rPr = OxmlElement('w:rPr')
b = OxmlElement('w:b')
rPr.append(b)
# White text color
rPr.append(OxmlElement('w:b'))
color = OxmlElement('w:color')
color.set(qn('w:val'), 'FFFFFF')
color.set(qn('w:val'), headerFgHex)
rPr.append(color)
r.append(rPr)
# Text element
t = OxmlElement('w:t')
# Preserve spaces if text starts/ends with whitespace
if cellText and (cellText[0] == ' ' or cellText[-1] == ' '):
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
t.text = cellText
r.append(t)
p.append(r)
tc.append(p)
tr.append(tc)
@ -836,46 +874,36 @@ class RendererDocx(BaseRenderer):
def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance."""
try:
# Extract from nested content structure
content = list_data.get("content", {})
if not isinstance(content, dict):
return
items = content.get("items", [])
bullet_style = styles.get("bullet_list", {})
# Pre-calculate and cache style objects to avoid repeated parsing
font_size_pt = None
font_size_pt = Pt(bullet_style["font_size"]) if bullet_style.get("font_size") else None
text_color_rgb = None
if bullet_style:
if "font_size" in bullet_style:
font_size_pt = Pt(bullet_style["font_size"])
if "color" in bullet_style:
if bullet_style.get("color"):
color_hex = bullet_style["color"].lstrip('#')
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
for item in items:
itemText = item if isinstance(item, str) else (item.get("text", "") if isinstance(item, dict) else "")
if not itemText:
itemRuns = self._inlineRunsForListItem(item)
if not itemRuns or not any(r.get("value") for r in itemRuns):
continue
para = doc.add_paragraph(style='List Bullet')
isNewRunFormat = isinstance(item, list)
if isNewRunFormat:
self._renderInlineRuns(itemRuns, para, styles)
else:
itemText = "".join(r.get("value", "") for r in itemRuns)
self._addMarkdownInlineRuns(para, itemText)
# Apply bullet list styling from style set - use cached objects
if bullet_style and para.runs:
# Use direct access instead of iterating
if len(para.runs) > 0:
if bullet_style and para.runs and len(para.runs) > 0:
run = para.runs[0]
if font_size_pt:
run.font.size = font_size_pt
if text_color_rgb:
run.font.color.rgb = text_color_rgb
else:
# Create run if none exists
run = para.add_run()
if font_size_pt:
run.font.size = font_size_pt
if text_color_rgb:
run.font.color.rgb = text_color_rgb
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
@ -905,42 +933,41 @@ class RendererDocx(BaseRenderer):
def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON paragraph to DOCX using AI-generated styles."""
try:
# Extract from nested content structure
content = paragraph_data.get("content", {})
if isinstance(content, dict):
text = content.get("text", "")
inlineRuns = self._inlineRunsFromContent(content)
elif isinstance(content, str):
text = content
inlineRuns = [{"type": "text", "value": content}]
else:
text = ""
inlineRuns = []
# CRITICAL: Prevent rendering base64 image data as text
# Base64 image data typically starts with /9j/ (JPEG) or iVBORw0KGgo (PNG)
if text and (text.startswith("/9j/") or text.startswith("iVBORw0KGgo") or
(len(text) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in text[:100]))):
# This looks like base64 data - don't render as text
self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(text)})")
para = doc.add_paragraph("[Error: Image data found in text content - image embedding may have failed]")
if para.runs:
para.runs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color for error
if not inlineRuns:
return
plainText = "".join(r.get("value", "") for r in inlineRuns)
if plainText and (plainText.startswith("/9j/") or plainText.startswith("iVBORw0KGgo") or
(len(plainText) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in plainText[:100]))):
self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(plainText)})")
para = doc.add_paragraph("[Error: Image data found in text content - image embedding may have failed]")
if para.runs:
para.runs[0].font.color.rgb = RGBColor(255, 0, 0)
return
if text:
para = doc.add_paragraph()
self._addMarkdownInlineRuns(para, text)
hasNewRuns = content.get("inlineRuns") if isinstance(content, dict) else None
if hasNewRuns:
self._renderInlineRuns(inlineRuns, para, styles)
else:
self._addMarkdownInlineRuns(para, plainText)
paragraph_style = styles.get("paragraph", {})
if paragraph_style:
# Pre-calculate and cache style objects
font_size_pt = None
font_size_pt = Pt(paragraph_style["font_size"]) if "font_size" in paragraph_style else None
text_color_rgb = None
if "font_size" in paragraph_style:
font_size_pt = Pt(paragraph_style["font_size"])
if "color" in paragraph_style:
color_hex = paragraph_style["color"].lstrip('#')
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
bold = paragraph_style.get("bold", False)
# Use direct access instead of iterating
if len(para.runs) > 0:
run = para.runs[0]
if font_size_pt:
@ -948,15 +975,6 @@ class RendererDocx(BaseRenderer):
run.font.bold = bold
if text_color_rgb:
run.font.color.rgb = text_color_rgb
else:
# Create run if none exists
run = para.add_run()
if font_size_pt:
run.font.size = font_size_pt
run.font.bold = bold
if text_color_rgb:
run.font.color.rgb = text_color_rgb
if "align" in paragraph_style:
align = paragraph_style["align"]
if align == "center":
@ -972,13 +990,13 @@ class RendererDocx(BaseRenderer):
def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON code block to DOCX using AI-generated styles."""
try:
# Extract from nested content structure
content = code_data.get("content", {})
if not isinstance(content, dict):
return
code = content.get("code", "")
language = content.get("language", "")
code_style = styles.get("code_block", {})
us = getattr(self, '_unifiedStyle', None)
if code:
if language:
@ -986,9 +1004,8 @@ class RendererDocx(BaseRenderer):
if len(lang_para.runs) > 0:
lang_para.runs[0].bold = True
# Pre-calculate and cache style objects
code_font_name = code_style.get("font", "Courier New")
code_font_size_pt = Pt(code_style.get("font_size", 9))
code_font_name = code_style.get("font", us["fonts"]["monospace"] if us else "Courier New")
code_font_size_pt = Pt(code_style.get("font_size", us["codeBlock"]["fontSizePt"] if us else 9))
code_text_color_rgb = None
if "color" in code_style:
color_hex = code_style["color"].lstrip('#')

View file

@ -40,7 +40,7 @@ class RendererHtml(BaseRenderer):
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
"""
Render HTML document with images as separate files.
Returns list of documents: [HTML document, image1, image2, ...]
@ -54,7 +54,7 @@ class RendererHtml(BaseRenderer):
self._renderedImages = images
# Generate HTML using AI-analyzed styling
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService, style=style)
# Replace base64 data URIs with relative file paths if images exist
if images:
@ -107,11 +107,16 @@ class RendererHtml(BaseRenderer):
return resultDocuments
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str:
"""Generate HTML content from structured JSON document using AI-generated styling."""
try:
# Get style set: use styles from metadata if available, otherwise enhance with AI
# Use unified style when provided, otherwise fall back to existing flow
if style:
styles = self._convertUnifiedStyleToInternal(style)
self._unifiedStyle = style
else:
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
self._unifiedStyle = None
# Validate JSON structure
if not self._validateJsonStructure(jsonContent):
@ -272,6 +277,10 @@ class RendererHtml(BaseRenderer):
def _generateCssStyles(self, styles: Dict[str, Any]) -> str:
"""Generate CSS from style definitions."""
# When unified style is available, generate CSS directly from it
if getattr(self, "_unifiedStyle", None):
return self._generateCssFromUnifiedStyle(self._unifiedStyle)
css_parts = []
# Body styles
@ -368,6 +377,164 @@ class RendererHtml(BaseRenderer):
return '\n'.join(css_parts)
def _generateCssFromUnifiedStyle(self, style: Dict[str, Any]) -> str:
"""Generate CSS directly from unified style dict."""
fonts = style.get("fonts", {})
colors = style.get("colors", {})
headings = style.get("headings", {})
para = style.get("paragraph", {})
tbl = style.get("table", {})
lst = style.get("list", {})
cb = style.get("codeBlock", {})
page = style.get("page", {})
primaryFont = fonts.get("primary", "Arial, sans-serif")
monoFont = fonts.get("monospace", "Courier New, monospace")
bgColor = colors.get("background", "#FFFFFF")
primaryColor = colors.get("primary", "#1F3864")
paraColor = para.get("color", "#333333")
paraSizePt = para.get("sizePt", 11)
lineSpacing = para.get("lineSpacing", 1.15)
css_parts = []
# Body
css_parts.append("body {")
css_parts.append(f" font-family: {primaryFont};")
css_parts.append(f" background: {bgColor};")
css_parts.append(f" color: {paraColor};")
css_parts.append(f" font-size: {paraSizePt}pt;")
css_parts.append(f" line-height: {lineSpacing};")
margins = page.get("marginsPt", {})
if margins:
css_parts.append(f" margin: {margins.get('top', 60)}pt {margins.get('right', 60)}pt {margins.get('bottom', 60)}pt {margins.get('left', 60)}pt;")
else:
css_parts.append(" margin: 0; padding: 20px;")
css_parts.append("}")
# Document title (uses h1 style)
h1 = headings.get("h1", {})
css_parts.append(".document-title {")
css_parts.append(f" font-size: {h1.get('sizePt', 24)}pt;")
css_parts.append(f" color: {h1.get('color', primaryColor)};")
css_parts.append(f" font-weight: {h1.get('weight', 'bold')};")
css_parts.append(" margin: 0 0 1em 0;")
css_parts.append("}")
# Headings h1-h4
for level in range(1, 5):
key = f"h{level}"
h = headings.get(key, h1 if level == 1 else headings.get(f"h{level-1}", {}))
css_parts.append(f"h{level} {{")
css_parts.append(f" font-size: {h.get('sizePt', max(24 - (level-1)*4, 12))}pt;")
css_parts.append(f" color: {h.get('color', primaryColor)};")
css_parts.append(f" font-weight: {h.get('weight', 'bold')};")
css_parts.append(f" margin: 1.2em 0 0.4em 0;")
css_parts.append("}")
# Paragraphs
css_parts.append("p {")
css_parts.append(f" font-size: {paraSizePt}pt;")
css_parts.append(f" color: {paraColor};")
css_parts.append(f" line-height: {lineSpacing};")
css_parts.append(" margin: 0 0 1em 0;")
css_parts.append("}")
# Tables
borderColor = tbl.get("borderColor", "#DEE2E6")
css_parts.append("table {")
css_parts.append(f" border-collapse: collapse;")
css_parts.append(f" width: 100%;")
css_parts.append(f" margin: 1em 0;")
css_parts.append(f" border: 1px solid {borderColor};")
css_parts.append("}")
# Table headers
css_parts.append("th {")
css_parts.append(f" background: {tbl.get('headerBg', '#1F3864')};")
css_parts.append(f" color: {tbl.get('headerFg', '#FFFFFF')};")
css_parts.append(" font-weight: bold;")
css_parts.append(" text-align: center;")
css_parts.append(f" padding: 10px;")
css_parts.append(f" border: 1px solid {borderColor};")
css_parts.append("}")
# Table cells
css_parts.append("td {")
css_parts.append(f" color: {paraColor};")
css_parts.append(" padding: 8px;")
css_parts.append(f" border: 1px solid {borderColor};")
css_parts.append("}")
# Lists
css_parts.append("ul {")
css_parts.append(f" font-size: {lst.get('sizePt', paraSizePt)}pt;")
css_parts.append(f" color: {paraColor};")
css_parts.append(f" padding-left: {lst.get('indentPt', 18)}pt;")
css_parts.append(" margin: 0 0 1em 0;")
css_parts.append("}")
# Code blocks
css_parts.append("pre {")
css_parts.append(f" font-family: {monoFont};")
css_parts.append(f" font-size: {cb.get('fontSizePt', 9)}pt;")
css_parts.append(f" color: {paraColor};")
css_parts.append(f" background: {cb.get('background', '#F8F9FA')};")
css_parts.append(f" border: 1px solid {cb.get('borderColor', '#E2E8F0')};")
css_parts.append(" border-radius: 4px;")
css_parts.append(" padding: 1em;")
css_parts.append(" margin: 1em 0;")
css_parts.append(" overflow-x: auto;")
css_parts.append("}")
# Images
css_parts.append("img {")
css_parts.append(" max-width: 100%;")
css_parts.append(" height: auto;")
css_parts.append(" margin: 1em 0;")
css_parts.append(" border-radius: 4px;")
css_parts.append("}")
# Generated info
css_parts.append(".generated-info {")
css_parts.append(" font-size: 0.9em;")
css_parts.append(" color: #666;")
css_parts.append(" text-align: center;")
css_parts.append(" margin-top: 2em;")
css_parts.append(" padding-top: 1em;")
css_parts.append(" border-top: 1px solid #ddd;")
css_parts.append("}")
return '\n'.join(css_parts)
def _renderInlineRuns(self, runs: list) -> str:
"""Convert inline runs to HTML markup."""
import html as htmlLib
parts = []
for run in runs:
runType = run.get("type", "text")
value = htmlLib.escape(run.get("value", ""))
if runType == "text":
parts.append(value)
elif runType == "bold":
parts.append(f"<strong>{value}</strong>")
elif runType == "italic":
parts.append(f"<em>{value}</em>")
elif runType == "code":
parts.append(f"<code>{value}</code>")
elif runType == "link":
href = htmlLib.escape(run.get("href", ""))
parts.append(f'<a href="{href}">{value}</a>')
elif runType == "image":
b64 = run.get("base64Data", "")
mime = run.get("mimeType", "image/png")
alt = value
if b64:
parts.append(f'<img src="data:{mime};base64,{b64}" alt="{alt}" style="max-width:100%;height:auto;">')
else:
parts.append(value)
return "".join(parts)
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a single JSON section to HTML using AI-generated styles.
Supports three content formats: reference, object (base64), extracted_text.
@ -419,6 +586,11 @@ class RendererHtml(BaseRenderer):
# Regular paragraph element - extract from nested content structure (standard JSON format)
content = element.get("content", {})
if isinstance(content, dict):
# New format: inlineRuns
inlineRuns = content.get("inlineRuns")
if inlineRuns and isinstance(inlineRuns, list):
htmlParts.append(f'<p>{self._renderInlineRuns(inlineRuns)}</p>')
continue
text = content.get("text", "")
elif isinstance(content, str):
text = content
@ -495,7 +667,8 @@ class RendererHtml(BaseRenderer):
# Table header
htmlParts.append('<thead><tr>')
for header in headers:
htmlParts.append(f'<th>{header}</th>')
runs = self._inlineRunsForCell(header)
htmlParts.append(f'<th>{self._renderInlineRuns(runs)}</th>')
htmlParts.append('</tr></thead>')
# Table body
@ -503,7 +676,8 @@ class RendererHtml(BaseRenderer):
for row in rows:
htmlParts.append('<tr>')
for cellData in row:
htmlParts.append(f'<td>{cellData}</td>')
runs = self._inlineRunsForCell(cellData)
htmlParts.append(f'<td>{self._renderInlineRuns(runs)}</td>')
htmlParts.append('</tr>')
htmlParts.append('</tbody>')
@ -528,10 +702,8 @@ class RendererHtml(BaseRenderer):
htmlParts = ['<ul>']
for item in items:
if isinstance(item, str):
htmlParts.append(f'<li>{item}</li>')
elif isinstance(item, dict) and "text" in item:
htmlParts.append(f'<li>{item["text"]}</li>')
runs = self._inlineRunsForListItem(item)
htmlParts.append(f'<li>{self._renderInlineRuns(runs)}</li>')
htmlParts.append('</ul>')
return '\n'.join(htmlParts)
@ -571,6 +743,11 @@ class RendererHtml(BaseRenderer):
if isinstance(el, dict):
content = el.get("content", {})
if isinstance(content, dict):
# New format: inlineRuns
inlineRuns = content.get("inlineRuns")
if inlineRuns and isinstance(inlineRuns, list):
texts.append(self._renderInlineRuns(inlineRuns))
continue
text = content.get("text", "")
elif isinstance(content, str):
text = content
@ -581,16 +758,18 @@ class RendererHtml(BaseRenderer):
elif isinstance(el, str):
texts.append(el)
if texts:
# Join multiple paragraphs with <p> tags
return '\n'.join(f'<p>{text}</p>' for text in texts)
return ""
elif isinstance(paragraphData, str):
return f'<p>{paragraphData}</p>'
elif isinstance(paragraphData, dict):
# Handle nested content structure: element.content vs element.text
# Extract from nested content structure
content = paragraphData.get("content", {})
if isinstance(content, dict):
# New format: inlineRuns
inlineRuns = content.get("inlineRuns")
if inlineRuns and isinstance(inlineRuns, list):
return f'<p>{self._renderInlineRuns(inlineRuns)}</p>'
text = content.get("text", "")
elif isinstance(content, str):
text = content

View file

@ -106,17 +106,17 @@ class RendererPdf(BaseRenderer):
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
try:
if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available
from .rendererHtml import RendererHtml
html_renderer = RendererHtml()
return await html_renderer.render(extractedContent, title, userPrompt, aiService)
return await html_renderer.render(extractedContent, title, userPrompt, aiService, style=style)
# Generate PDF using AI-analyzed styling
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style)
# Extract metadata for document type and other info
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
@ -163,11 +163,28 @@ class RendererPdf(BaseRenderer):
)
]
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, unifiedStyle: Dict[str, Any] = None) -> str:
"""Generate PDF content from structured JSON document using AI-generated styling."""
try:
# Get style set: use styles from metadata if available, otherwise enhance with AI
# Get style set from unified style or legacy approach
if unifiedStyle:
styles = self._convertUnifiedStyleToInternal(unifiedStyle)
self._unifiedStyle = unifiedStyle
for level in range(1, 7):
hKey = f"heading{level}"
if hKey not in styles:
styles[hKey] = self._defaultHeadingStyleDef(level)
else:
styles[hKey].setdefault("space_after", 12)
styles[hKey].setdefault("space_before", 12)
styles["paragraph"].setdefault("space_after", 6)
styles["paragraph"].setdefault("line_height", unifiedStyle["paragraph"].get("lineSpacing", 1.2))
styles["bullet_list"].setdefault("space_after", 3)
styles["code_block"].setdefault("space_after", 6)
styles["code_block"].setdefault("align", "left")
else:
styles = await self._getStyleSet(json_content, userPrompt, aiService)
self._unifiedStyle = None
# Validate JSON structure
if not self._validateJsonStructure(json_content):
@ -179,15 +196,13 @@ class RendererPdf(BaseRenderer):
# Create a buffer to hold the PDF
buffer = io.BytesIO()
# Create PDF document
doc = SimpleDocTemplate(
buffer,
pagesize=A4,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=18
)
# Create PDF document with unified page margins or defaults
pageCfg = unifiedStyle["page"] if unifiedStyle else None
if pageCfg:
m = pageCfg["marginsPt"]
doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=m["right"], leftMargin=m["left"], topMargin=m["top"], bottomMargin=m["bottom"])
else:
doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
# Build PDF content (no cover page — body starts on page 1; filename still uses `title`)
story = []
@ -232,13 +247,28 @@ class RendererPdf(BaseRenderer):
removed = False
for idx, flowable in enumerate(story):
fRepr = repr(flowable)
if "Image" in fRepr and hasattr(flowable, 'drawWidth') and hasattr(flowable, 'drawHeight'):
from reportlab.platypus import Image as ReportLabImage
if isinstance(flowable, ReportLabImage):
frameH = 650.0
frameW = 450.0
if flowable.drawHeight > frameH or flowable.drawWidth > frameW:
scaleW = frameW / flowable.drawWidth if flowable.drawWidth > frameW else 1.0
scaleH = frameH / flowable.drawHeight if flowable.drawHeight > frameH else 1.0
s = min(scaleW, scaleH) * 0.9
flowable.drawWidth = flowable.drawWidth * s
flowable.drawHeight = flowable.drawHeight * s
flowable._width = flowable.drawWidth
flowable._height = flowable.drawHeight
removed = True
break
if "Table" in fRepr and hasattr(flowable, '_cellvalues'):
try:
nRows = len(flowable._cellvalues)
nCols = len(flowable._cellvalues[0]) if flowable._cellvalues else 0
if nRows == 1 and nCols == 1:
errPara = Paragraph(
"[Code block omitted — content too large for PDF page]",
"[Code block omitted - content too large for PDF page]",
self._createNormalStyle({}),
)
story[idx] = errPara
@ -609,6 +639,31 @@ class RendererPdf(BaseRenderer):
.replace(">", "&gt;")
)
def _renderInlineRunsToPdfXml(self, runs: list) -> str:
"""Convert inline runs to ReportLab Paragraph XML."""
parts = []
us = getattr(self, '_unifiedStyle', None)
monoFont = us["fonts"]["monospace"] if us else "Courier"
for run in runs:
runType = run.get("type", "text")
value = self._escapeReportlabXml(run.get("value", ""))
if runType == "text":
parts.append(value)
elif runType == "bold":
parts.append(f"<b>{value}</b>")
elif runType == "italic":
parts.append(f"<i>{value}</i>")
elif runType == "code":
parts.append(f'<font name="{monoFont}">{value}</font>')
elif runType == "link":
href = self._escapeReportlabXml(run.get("href", ""))
parts.append(f'<a href="{href}">{value}</a>')
elif runType == "image":
parts.append(f"[Image: {value}]")
else:
parts.append(value)
return "".join(parts)
def _applyInlineMarkdownToEscapedPlain(self, text: str) -> str:
"""Escape XML then apply bold/italic to a segment with no `code` spans (code is handled separately)."""
if not text:
@ -755,8 +810,12 @@ class RendererPdf(BaseRenderer):
hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header")
cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell")
def _cellPara(val, ps):
return self._paragraphFromInlineMarkdown(str(val) if val is not None else "", ps)
def _cellPara(cell, ps):
runs = self._inlineRunsForCell(cell)
if isinstance(cell, list):
xml = self._renderInlineRunsToPdfXml(runs)
return Paragraph(_wrapEmojiSpansInXml(xml), ps)
return self._paragraphFromInlineMarkdown(str(cell) if cell is not None else "", ps)
headerRow = [_cellPara(h, hdrPs) for h in headers]
bodyRows = []
@ -794,29 +853,26 @@ class RendererPdf(BaseRenderer):
def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
try:
# Extract from nested content structure
content = list_data.get("content", {})
if not isinstance(content, dict):
return []
items = content.get("items", [])
bullet_style_def = styles.get("bullet_list", {})
bulletStyleDef = styles.get("bullet_list", {})
normalStyle = self._createNormalStyle(styles)
elements = []
for item in items:
if isinstance(item, str):
elements.append(
Paragraph(f"{self._markdownInlineToReportlabXml(item)}", self._createNormalStyle(styles))
)
runs = self._inlineRunsForListItem(item)
if isinstance(item, list):
xml = self._renderInlineRunsToPdfXml(runs)
elements.append(Paragraph(f"\u2022 {_wrapEmojiSpansInXml(xml)}", normalStyle))
elif isinstance(item, str):
elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item)}", normalStyle))
elif isinstance(item, dict) and "text" in item:
elements.append(
Paragraph(
f"{self._markdownInlineToReportlabXml(item['text'])}",
self._createNormalStyle(styles),
)
)
elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item['text'])}", normalStyle))
if elements:
elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
elements.append(Spacer(1, bulletStyleDef.get("space_after", 3)))
return elements
@ -848,17 +904,24 @@ class RendererPdf(BaseRenderer):
def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
try:
# Extract from nested content structure
content = paragraph_data.get("content", {})
if isinstance(content, dict):
text = content.get("text", "")
elif isinstance(content, str):
text = content
else:
text = ""
if isinstance(content, str):
content = {"text": content}
if not isinstance(content, dict):
return []
normalStyle = self._createNormalStyle(styles)
if "inlineRuns" in content:
runs = self._inlineRunsFromContent(content)
xml = self._renderInlineRunsToPdfXml(runs)
if xml:
return [Paragraph(_wrapEmojiSpansInXml(xml), normalStyle)]
return []
text = content.get("text", "")
if text:
return [self._paragraphFromInlineMarkdown(text, self._createNormalStyle(styles))]
return [self._paragraphFromInlineMarkdown(text, normalStyle)]
return []
@ -1030,20 +1093,18 @@ class RendererPdf(BaseRenderer):
pilImage = PILImage.open(imageStream)
originalWidth, originalHeight = pilImage.size
# Calculate available page dimensions (A4 with margins: 72pt left/right, 72pt top, 18pt bottom)
pageWidth = A4[0] # 595.27 points
pageHeight = A4[1] # 841.89 points
leftMargin = 72
rightMargin = 72
topMargin = 72
bottomMargin = 18
# Use actual frame dimensions from SimpleDocTemplate
# Frame is smaller than page minus margins due to internal spacing
# From error message: frame is 439.27559055118115 x 739.8897637795277
# Use conservative values with safety margin
availableWidth = 430.0 # Slightly smaller than frame width for safety
availableHeight = 730.0 # Slightly smaller than frame height for safety
# Use page dimensions minus margins with generous safety buffer
# A4 = 595.27 x 841.89 pt; frame = page - margins - internal padding
_us = getattr(self, '_unifiedStyle', None) or {}
_pageMgn = (_us.get('page') or {}).get('marginsPt') or {}
marginTop = _pageMgn.get('top', 60)
marginBottom = _pageMgn.get('bottom', 60)
marginLeft = _pageMgn.get('left', 60)
marginRight = _pageMgn.get('right', 60)
availableWidth = pageWidth - marginLeft - marginRight - 20 # 20pt safety
availableHeight = pageHeight - marginTop - marginBottom - 80 # 80pt safety for header/footer
# Convert original image size from pixels to points
# PIL provides size in pixels, need to convert to points

View file

@ -59,7 +59,7 @@ class RendererPptx(BaseRenderer):
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
"""
Render content as PowerPoint presentation from JSON data.
@ -68,7 +68,7 @@ class RendererPptx(BaseRenderer):
title: Title for the presentation
userPrompt: User prompt for AI styling
aiService: AI service for styling
**kwargs: Additional rendering options
style: Unified style dict from pipeline (preferred over AI-generated styles)
Returns:
Base64-encoded PowerPoint presentation as string
@ -81,8 +81,19 @@ class RendererPptx(BaseRenderer):
from pptx.dml.color import RGBColor
import re
# Get style set: use styles from metadata if available, otherwise enhance with AI
# Get style set: prefer unified style, then metadata, then AI-enhanced
if style:
internalStyle = self._convertUnifiedStyleToInternal(style)
defaultPptx = self._getDefaultStyleSet()
for key in ("slide_size", "content_per_slide", "design_theme", "color_scheme", "background_style", "accent_colors", "professional_grade", "executive_ready"):
internalStyle[key] = defaultPptx.get(key)
internalStyle["heading"] = internalStyle["heading1"]
internalStyle["subheading"] = internalStyle["heading2"]
styles = internalStyle
self._unifiedStyle = style
else:
styles = await self._getStyleSet(extractedContent, userPrompt, aiService)
self._unifiedStyle = None
# Create new presentation
prs = Presentation()
@ -910,6 +921,9 @@ JSON ONLY. NO OTHER TEXT."""
# Extract from nested content structure
content = paragraph_data.get("content", {})
if isinstance(content, dict):
if content.get("inlineRuns"):
text = "".join(r.get("value", "") for r in content["inlineRuns"])
else:
text = content.get("text", "")
elif isinstance(content, str):
text = content
@ -917,8 +931,7 @@ JSON ONLY. NO OTHER TEXT."""
text = ""
if text:
# Limit paragraph length based on content density
max_length = 200 # Default limit
max_length = 200
if len(text) > max_length:
text = text[:max_length] + "..."
@ -1303,6 +1316,32 @@ JSON ONLY. NO OTHER TEXT."""
r.text = text[pos:]
_applyBase(r)
def _renderInlineRunsPptx(self, runs, paragraph, fontSize=None, fontColor=None):
"""Process InlineRun dicts into pptx text runs."""
from pptx.util import Pt
paragraph.text = ""
us = getattr(self, '_unifiedStyle', None)
monoFont = us["fonts"]["monospace"] if us else "Courier New"
for run in runs:
runType = run.get("type", "text")
value = run.get("value", "")
r = paragraph.add_run()
r.text = value
if fontSize:
r.font.size = fontSize
if fontColor:
r.font.color.rgb = fontColor
if runType == "bold":
r.font.bold = True
elif runType == "italic":
r.font.italic = True
elif runType == "code":
r.font.name = monoFont
if fontSize and hasattr(fontSize, 'pt'):
r.font.size = Pt(max(8, int(fontSize.pt * 0.85)))
elif runType == "link":
r.font.underline = True
def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float = None, max_width: float = None) -> None:
"""Add a PowerPoint table to slide."""
try:
@ -1374,7 +1413,8 @@ JSON ONLY. NO OTHER TEXT."""
cell = table.cell(0, col_idx)
# Clear existing text and set new text
cell.text_frame.clear()
header_text = str(header) if header else ""
cellRuns = self._inlineRunsForCell(header)
header_text = "".join(r.get("value", "") for r in cellRuns)
cell.text = header_text
# Ensure paragraph exists
@ -1420,7 +1460,8 @@ JSON ONLY. NO OTHER TEXT."""
cell = table.cell(row_idx, col_idx)
# Clear existing text and set new text
cell.text_frame.clear()
cell_text = str(cell_data) if cell_data is not None else ""
cellRuns = self._inlineRunsForCell(cell_data)
cell_text = "".join(r.get("value", "") for r in cellRuns)
cell.text = cell_text
# Ensure paragraph exists
@ -1462,9 +1503,8 @@ JSON ONLY. NO OTHER TEXT."""
fontColor = RGBColor(*self._getSafeColor(listStyle.get("color", (47, 47, 47))))
for item in items:
itemText = item.get("text", "") if isinstance(item, dict) else str(item)
if not itemText or not itemText.strip():
continue
runs = self._inlineRunsForListItem(item)
isNewFormat = isinstance(item, list)
p = text_frame.add_paragraph()
p.level = 0
@ -1472,21 +1512,33 @@ JSON ONLY. NO OTHER TEXT."""
p.space_before = Pt(2)
p.space_after = Pt(2)
# Consistent bullet prefix
self._addMarkdownInlineRuns(p, f"{itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
if isNewFormat:
bulletRuns = [{"type": "text", "value": " \u2022 "}] + runs
self._renderInlineRunsPptx(bulletRuns, p, fontSize=fontSize, fontColor=fontColor)
else:
itemText = item.get("text", "") if isinstance(item, dict) else str(item)
if not itemText or not itemText.strip():
continue
self._addMarkdownInlineRuns(p, f" \u2022 {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
# Subitems
# Subitems (only for dict-style items)
if isinstance(item, dict):
for sub in item.get("subitems", []):
subText = sub.get("text", "") if isinstance(sub, dict) else str(sub)
if not subText:
continue
subRuns = self._inlineRunsForListItem(sub)
isSubNew = isinstance(sub, list)
sp = text_frame.add_paragraph()
sp.level = 0
sp.alignment = PP_ALIGN.LEFT
sp.space_before = Pt(1)
sp.space_after = Pt(1)
self._addMarkdownInlineRuns(sp, f" {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
if isSubNew:
subBulletRuns = [{"type": "text", "value": " \u2013 "}] + subRuns
self._renderInlineRunsPptx(subBulletRuns, sp, fontSize=fontSize, fontColor=fontColor)
else:
subText = sub.get("text", "") if isinstance(sub, dict) else str(sub)
if not subText:
continue
self._addMarkdownInlineRuns(sp, f" \u2013 {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
except Exception as e:
logger.warning(f"Error adding bullet list to slide: {str(e)}")
@ -1540,13 +1592,21 @@ JSON ONLY. NO OTHER TEXT."""
# Extract from nested content structure
content = element.get("content", {})
if isinstance(content, dict):
inlineRuns = self._inlineRunsFromContent(content)
hasInlineRuns = content.get("inlineRuns") is not None
text = content.get("text", "")
elif isinstance(content, str):
text = content
inlineRuns = [{"type": "text", "value": text}] if text else []
hasInlineRuns = False
else:
text = ""
inlineRuns = []
hasInlineRuns = False
if not inlineRuns and not text:
return
if text:
p = text_frame.add_paragraph()
p.level = 0
@ -1562,12 +1622,15 @@ JSON ONLY. NO OTHER TEXT."""
fSize = Pt(calculated_size)
fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
fBold = paragraph_style.get("bold", False)
if hasInlineRuns:
self._renderInlineRunsPptx(inlineRuns, p, fontSize=fSize, fontColor=fColor)
else:
self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=fBold)
# Add proper spacing
p.space_before = Pt(6) # Space before paragraph
p.space_after = Pt(6) # Space after paragraph
p.line_spacing = 1.2 # Line spacing for readability
p.space_before = Pt(6)
p.space_after = Pt(6)
p.line_spacing = 1.2
align = paragraph_style.get("align", "left")
if align == "center":

View file

@ -68,17 +68,17 @@ class RendererXlsx(BaseRenderer):
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
try:
if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available
from .rendererCsv import RendererCsv
csvRenderer = RendererCsv()
return await csvRenderer.render(extractedContent, title, userPrompt, aiService)
return await csvRenderer.render(extractedContent, title, userPrompt, aiService, style=style)
# Generate Excel using AI-analyzed styling
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService, style=style)
# Extract metadata for document type and other info
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
@ -298,14 +298,21 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str:
"""Generate Excel content from structured JSON document using AI-generated styling."""
try:
# Debug output
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER")
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER")
# Get style set: use styles from metadata if available, otherwise enhance with AI
# Store unified style for use by inline-run helpers
self._unifiedStyle = style
# Get style set: prefer unified style, fall back to legacy approach
if style:
styles = self._convertUnifiedStyleToInternal(style)
styles = self._convertColorsFormat(styles)
else:
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
@ -511,6 +518,10 @@ class RendererXlsx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"}
}
def _renderInlineRuns(self, runs: list) -> str:
"""Flatten inline runs to plain text for Excel cells."""
return "".join(r.get("value", "") for r in runs)
async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper Excel color conversion."""
if not aiService:
@ -1206,7 +1217,9 @@ class RendererXlsx(BaseRenderer):
# Add headers with formatting - OPTIMIZED: use cached style objects
for col, header in enumerate(headers, 1):
sanitized_header = self._sanitizeCellValue(header)
runs = self._inlineRunsForCell(header)
headerText = self._renderInlineRuns(runs)
sanitized_header = self._sanitizeCellValue(headerText)
cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
# Apply styling with fallbacks - use pre-calculated objects
@ -1272,7 +1285,9 @@ class RendererXlsx(BaseRenderer):
cell_values = cell_values[:header_count]
for col, cell_value in enumerate(cell_values, 1):
sanitized_value = self._sanitizeCellValue(cell_value)
runs = self._inlineRunsForCell(cell_value)
cellText = self._renderInlineRuns(runs)
sanitized_value = self._sanitizeCellValue(cellText)
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
# Apply styling with fallbacks - use pre-calculated objects
@ -1311,20 +1326,20 @@ class RendererXlsx(BaseRenderer):
def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a list element to Excel sheet. Expects nested content structure."""
try:
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return startRow
list_items = content.get("items") or []
# Ensure list_items is a list
if not isinstance(list_items, list):
list_items = []
listItems = content.get("items") or []
if not isinstance(listItems, list):
listItems = []
list_style = styles.get("bullet_list", {})
for item in list_items:
sheet.cell(row=startRow, column=1, value=f"{item}")
if list_style.get("color"):
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"]))
listStyle = styles.get("bullet_list", {})
for item in listItems:
runs = self._inlineRunsForListItem(item)
text = self._renderInlineRuns(runs)
sheet.cell(row=startRow, column=1, value=f"\u2022 {text}")
if listStyle.get("color"):
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(listStyle["color"]))
startRow += 1
return startRow
@ -1336,10 +1351,10 @@ class RendererXlsx(BaseRenderer):
def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a paragraph element to Excel sheet. Expects nested content structure."""
try:
# Extract from nested content structure
content = element.get("content", {})
if isinstance(content, dict):
text = content.get("text", "")
runs = self._inlineRunsFromContent(content)
text = self._renderInlineRuns(runs)
elif isinstance(content, str):
text = content
else:

View file

@ -0,0 +1,75 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Default style definitions and style resolution for document rendering."""
from typing import Any, Dict
DEFAULT_STYLE: Dict[str, Any] = {
"fonts": {
"primary": "Calibri",
"monospace": "Consolas",
},
"colors": {
"primary": "#1F3864",
"secondary": "#2C3E50",
"accent": "#2980B9",
"background": "#FFFFFF",
},
"headings": {
"h1": {"sizePt": 24, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 12, "spaceAfterPt": 6},
"h2": {"sizePt": 18, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 10, "spaceAfterPt": 4},
"h3": {"sizePt": 14, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 8, "spaceAfterPt": 3},
"h4": {"sizePt": 12, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 6, "spaceAfterPt": 2},
},
"paragraph": {"sizePt": 11, "lineSpacing": 1.15, "color": "#333333"},
"table": {
"headerBg": "#1F3864",
"headerFg": "#FFFFFF",
"headerSizePt": 10,
"bodySizePt": 10,
"rowBandingEven": "#F2F6FC",
"rowBandingOdd": "#FFFFFF",
"borderColor": "#CBD5E1",
"borderWidthPt": 0.5,
},
"list": {"bulletChar": "\u2022", "indentPt": 18, "sizePt": 11},
"image": {"defaultWidthPt": 480, "maxWidthPt": 800, "alignment": "center"},
"codeBlock": {"fontSizePt": 9, "background": "#F8F9FA", "borderColor": "#E2E8F0"},
"page": {
"format": "A4",
"marginsPt": {"top": 60, "bottom": 60, "left": 60, "right": 60},
"showPageNumbers": True,
"headerHeight": 30,
"footerHeight": 30,
"headerLogo": None,
"headerText": "",
"footerText": "",
},
}
def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
"""Recursively merge override into base. Both dicts left unchanged; returns new dict."""
result = {}
for key in base:
if key in override:
baseVal = base[key]
overVal = override[key]
if isinstance(baseVal, dict) and isinstance(overVal, dict):
result[key] = _deepMerge(baseVal, overVal)
else:
result[key] = overVal
else:
result[key] = base[key]
for key in override:
if key not in base:
result[key] = override[key]
return result
def resolveStyle(agentStyle: dict | None) -> Dict[str, Any]:
"""Deep-merge DEFAULT_STYLE <- agentStyle. Returns fully resolved style dict."""
if not agentStyle:
return dict(DEFAULT_STYLE)
return _deepMerge(DEFAULT_STYLE, agentStyle)

View file

@ -9,11 +9,70 @@ from typing import Any, Dict
logger = logging.getLogger(__name__)
def _parseInlineRuns(text: str) -> list:
"""
Parse inline markdown formatting into a list of InlineRun dicts.
Handles: images, links, bold, italic, inline code, plain text.
Uses a regex-based tokenizer that processes tokens left-to-right.
"""
if not text:
return [{"type": "text", "value": ""}]
# Pattern order matters: images before links, bold before italic
_TOKEN_RE = re.compile(
r'!\[(?P<imgAlt>[^\]]*)\]\((?P<imgSrc>[^)"]+)(?:\s+"(?P<imgWidth>\d+)pt")?\)' # image
r'|\[(?P<linkText>[^\]]+)\]\((?P<linkHref>[^)]+)\)' # link
r'|`(?P<code>[^`]+)`' # inline code
r'|\*\*(?P<bold>.+?)\*\*' # bold
r'|(?<!\w)\*(?P<italic1>.+?)\*(?!\w)' # italic *x*
r'|(?<!\w)_(?P<italic2>.+?)_(?!\w)' # italic _x_
)
runs = []
lastEnd = 0
for m in _TOKEN_RE.finditer(text):
# Plain text before this match
if m.start() > lastEnd:
runs.append({"type": "text", "value": text[lastEnd:m.start()]})
if m.group("imgAlt") is not None or m.group("imgSrc") is not None:
alt = (m.group("imgAlt") or "").strip() or "Image"
src = (m.group("imgSrc") or "").strip()
widthStr = m.group("imgWidth")
run = {"type": "image", "value": alt}
if src.startswith("file:"):
run["fileId"] = src[5:]
else:
run["href"] = src
if widthStr:
run["widthPt"] = int(widthStr)
runs.append(run)
elif m.group("linkText") is not None:
runs.append({"type": "link", "value": m.group("linkText"), "href": m.group("linkHref")})
elif m.group("code") is not None:
runs.append({"type": "code", "value": m.group("code")})
elif m.group("bold") is not None:
runs.append({"type": "bold", "value": m.group("bold")})
elif m.group("italic1") is not None:
runs.append({"type": "italic", "value": m.group("italic1")})
elif m.group("italic2") is not None:
runs.append({"type": "italic", "value": m.group("italic2")})
lastEnd = m.end()
# Trailing plain text
if lastEnd < len(text):
runs.append({"type": "text", "value": text[lastEnd:]})
return runs if runs else [{"type": "text", "value": text}]
def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
"""
Convert markdown content to the standard document JSON format expected by renderReport.
Supports headings, code blocks, tables, lists, images (file: refs), paragraphs.
For plain text: wraps entire content in a single paragraph section.
Convert markdown content to the standard document JSON format with Inline-Run model.
Sections use inlineRuns (list of run dicts) instead of plain text strings.
Supports headings, code blocks, tables, lists, images, paragraphs.
"""
if not isinstance(markdown, str):
markdown = str(markdown) if markdown else ""
@ -31,7 +90,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
while i < len(lines):
line = lines[i]
# Headings
# Headings (plain text, no inline formatting)
headingMatch = re.match(r"^(#{1,6})\s+(.+)", line)
if headingMatch:
level = len(headingMatch.group(1))
@ -43,7 +102,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
i += 1
continue
# Fenced code blocks
# Fenced code blocks (no inline formatting)
codeMatch = re.match(r"^```(\w*)", line)
if codeMatch:
lang = codeMatch.group(1) or "text"
@ -59,14 +118,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
})
continue
# Tables
# Tables - cells are List[InlineRun]
tableMatch = re.match(r"^\|(.+)\|$", line)
if tableMatch and (i + 1) < len(lines) and re.match(r"^\|[\s\-:|]+\|$", lines[i + 1]):
headerCells = [c.strip() for c in tableMatch.group(1).split("|")]
headerCells = [_parseInlineRuns(c.strip()) for c in tableMatch.group(1).split("|")]
i += 2
rows = []
while i < len(lines) and re.match(r"^\|(.+)\|$", lines[i]):
rowCells = [c.strip() for c in lines[i][1:-1].split("|")]
rowCells = [_parseInlineRuns(c.strip()) for c in lines[i][1:-1].split("|")]
rows.append(rowCells)
i += 1
sections.append({
@ -75,14 +134,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
})
continue
# Bullet / numbered lists
# Bullet / numbered lists - items are List[List[InlineRun]]
listMatch = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", line)
if listMatch:
isNumbered = bool(re.match(r"\d+[.)]", listMatch.group(2)))
items = []
while i < len(lines) and re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i]):
m = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i])
items.append({"text": m.group(3).strip()})
items.append(_parseInlineRuns(m.group(3).strip()))
i += 1
sections.append({
"id": _nextId(), "content_type": "bullet_list", "order": order,
@ -95,46 +154,50 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
i += 1
continue
# Images (simplified: store as paragraph with ref for now - full resolution needs Knowledge Store)
imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)]+)\)", line)
# Standalone image on its own line -> block-level image section
imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)\"]+)(?:\s+\"(\d+)pt\")?\)\s*$", line)
if imgMatch:
altText = imgMatch.group(1).strip() or "Image"
src = imgMatch.group(2).strip()
widthStr = imgMatch.group(3)
fileId = src[5:] if src.startswith("file:") else ""
sections.append({
"id": _nextId(), "content_type": "image", "order": order,
"elements": [{
"content": {
content = {
"altText": altText,
"base64Data": "",
"_fileRef": fileId,
"_srcUrl": src if not fileId else "",
}
}],
if widthStr:
content["widthPt"] = int(widthStr)
sections.append({
"id": _nextId(), "content_type": "image", "order": order,
"elements": [{"content": content}],
})
i += 1
continue
# Paragraph
# Paragraph - produces inlineRuns
paraLines = []
while i < len(lines) and lines[i].strip() and not re.match(
r"^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )", lines[i]
r"^(#{1,6}\s|```|\|.+\||!\[[^\]]*\]\([^)]+\)\s*$|(\s*)([-*+]|\d+[.)]) )", lines[i]
):
paraLines.append(lines[i])
i += 1
if paraLines:
combinedText = " ".join(paraLines)
sections.append({
"id": _nextId(), "content_type": "paragraph", "order": order,
"elements": [{"content": {"text": " ".join(paraLines)}}],
"elements": [{"content": {"inlineRuns": _parseInlineRuns(combinedText)}}],
})
continue
i += 1
if not sections:
fallbackText = markdown.strip() or "(empty)"
sections.append({
"id": _nextId(), "content_type": "paragraph", "order": order,
"elements": [{"content": {"text": markdown.strip() or "(empty)"}}],
"elements": [{"content": {"inlineRuns": _parseInlineRuns(fallbackText)}}],
})
return {

View file

@ -302,6 +302,30 @@ async def _executeWithRetry(executor, node, context, maxRetries: int = 0, retryD
raise lastError
def _substituteFeatureInstancePlaceholders(
graph: Dict[str, Any],
targetFeatureInstanceId: str,
) -> Dict[str, Any]:
"""Replace ``{{featureInstanceId}}`` placeholders in the serialised graph.
Works on the full JSON representation so that placeholders inside nested
parameter dicts, prompt strings, etc. are all caught. Already-resolved
concrete UUIDs (pre-baked by ``_copyTemplateWorkflows``) are left untouched
because the placeholder literal ``{{featureInstanceId}}`` will not match.
"""
import json as _json
raw = _json.dumps(graph)
if "{{featureInstanceId}}" not in raw:
return graph
replaced = raw.replace("{{featureInstanceId}}", targetFeatureInstanceId)
logger.debug(
"_substituteFeatureInstancePlaceholders: resolved %d occurrence(s) -> %s",
raw.count("{{featureInstanceId}}"),
targetFeatureInstanceId,
)
return _json.loads(replaced)
async def executeGraph(
graph: Dict[str, Any],
services: Any,
@ -315,6 +339,7 @@ async def executeGraph(
runId: Optional[str] = None,
run_envelope: Optional[Dict[str, Any]] = None,
label: Optional[str] = None,
targetFeatureInstanceId: Optional[str] = None,
) -> Dict[str, Any]:
"""
Execute automation2 graph. Returns { success, nodeOutputs, error?, stopped? }.
@ -322,14 +347,16 @@ async def executeGraph(
pauses the run, and returns { success: False, paused: True, taskId, runId }.
For resume: pass initialNodeOutputs (with result for the human node) and startAfterNodeId.
For fresh runs: pass run_envelope (unified start payload for the start node); normalized with userId into context.runEnvelope.
targetFeatureInstanceId: resolves {{featureInstanceId}} placeholders in the graph JSON before execution.
"""
logger.info(
"executeGraph start: instanceId=%s workflowId=%s userId=%s mandateId=%s resume=%s",
"executeGraph start: instanceId=%s workflowId=%s userId=%s mandateId=%s resume=%s targetInstance=%s",
instanceId,
workflowId,
userId,
mandateId,
startAfterNodeId is not None,
targetFeatureInstanceId,
)
from modules.workflows.processing.shared.methodDiscovery import discoverMethods
discoverMethods(services)
@ -338,6 +365,9 @@ async def executeGraph(
materializeFeatureInstanceRefs,
)
if targetFeatureInstanceId:
graph = _substituteFeatureInstancePlaceholders(graph, targetFeatureInstanceId)
# Phase-5 Schicht-4: typed-ref envelopes are materialized FIRST so the
# subsequent connection-ref pass and validation see the canonical shape.
graph = materializeFeatureInstanceRefs(graph)

View file

@ -0,0 +1,18 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Shared helpers for AI workflow actions."""
def applyCommonAiParams(parameters: dict, request) -> None:
"""Apply common AI parameters (requireNeutralization, allowedModels) from node to request."""
requireNeutralization = parameters.get("requireNeutralization")
if requireNeutralization is not None:
request.requireNeutralization = bool(requireNeutralization)
allowedModels = parameters.get("allowedModels")
if allowedModels and isinstance(allowedModels, list):
if not request.options:
from modules.datamodels.datamodelAi import AiCallOptions
request.options = AiCallOptions()
request.options.allowedModels = allowedModels

View file

@ -67,6 +67,8 @@ async def consolidate(self, parameters: Dict[str, Any]) -> ActionResult:
prompt=prompt,
options=AiCallOptions(operationType=OperationTypeEnum.DATA_ANALYSE),
)
from modules.workflows.methods.methodAi._common import applyCommonAiParams
applyCommonAiParams(parameters, req)
resp = await ai_service.callAi(req)
except (SubscriptionInactiveException, BillingContextError):
raise

View file

@ -36,6 +36,10 @@ async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
}
if parentOperationId:
processParams["parentOperationId"] = parentOperationId
if parameters.get("allowedModels"):
processParams["allowedModels"] = parameters["allowedModels"]
if parameters.get("requireNeutralization") is not None:
processParams["requireNeutralization"] = parameters["requireNeutralization"]
return await self.process(processParams)

View file

@ -55,6 +55,16 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
processingMode=ProcessingModeEnum.DETAILED
)
# Apply node-level AI params
allowedModels = parameters.get("allowedModels")
if allowedModels and isinstance(allowedModels, list):
options.allowedModels = allowedModels
requireNeutralization = parameters.get("requireNeutralization")
if requireNeutralization is not None:
_ctx = getattr(self.services, '_context', None)
if _ctx:
_ctx.requireNeutralization = bool(requireNeutralization)
# outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse: AiResponse = await self.services.ai.callAiContent(
prompt=prompt,

View file

@ -59,6 +59,16 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
compressContext=False
)
# Apply node-level AI params
allowedModels = parameters.get("allowedModels")
if allowedModels and isinstance(allowedModels, list):
options.allowedModels = allowedModels
requireNeutralization = parameters.get("requireNeutralization")
if requireNeutralization is not None:
_ctx = getattr(self.services, '_context', None)
if _ctx:
_ctx.requireNeutralization = bool(requireNeutralization)
# outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse: AiResponse = await self.services.ai.callAiContent(
prompt=prompt,

View file

@ -73,6 +73,47 @@ def _action_docs_to_content_parts(services, docs: List[Any]) -> List[ContentPart
logger.info(f"ai.process: Extracted {len(ec.parts)} parts from {name} (no persistence)")
return all_parts
def _resolve_file_refs_to_content_parts(services, fileIdRefs) -> List[ContentPart]:
"""Fetch files by ID from the file store and extract content.
Used for automation2 workflows where documents are file-store references,
not chat message attachments."""
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
mgmt = getattr(services, 'interfaceDbComponent', None)
extraction = getattr(services, 'extraction', None)
if not mgmt or not extraction:
logger.warning("_resolve_file_refs_to_content_parts: missing interfaceDbComponent or extraction service")
return []
allParts: List[ContentPart] = []
opts = ExtractionOptions(prompt="", mergeStrategy=MergeStrategy())
for ref in fileIdRefs:
fileId = ref.documentId
fileMeta = mgmt.getFile(fileId)
if not fileMeta:
logger.warning(f"_resolve_file_refs_to_content_parts: file {fileId} not found")
continue
fileData = mgmt.getFileData(fileId)
if not fileData:
logger.warning(f"_resolve_file_refs_to_content_parts: no data for file {fileId}")
continue
fileName = getattr(fileMeta, 'fileName', fileId)
mimeType = getattr(fileMeta, 'mimeType', 'application/octet-stream')
ec = extraction.extractContentFromBytes(
documentBytes=fileData,
fileName=fileName,
mimeType=mimeType,
documentId=fileId,
options=opts,
)
for p in ec.parts:
if p.data or getattr(p, "typeGroup", "") == "image":
p.metadata.setdefault("originalFileName", fileName)
allParts.append(p)
logger.info(f"_resolve_file_refs_to_content_parts: extracted {len(ec.parts)} parts from {fileName}")
return allParts
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
operationId = None
try:
@ -130,6 +171,17 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
f"to DocumentReferenceList with {len(documentList.references)} references"
)
# Resolve DocumentItemReferences (file-ID refs from automation2) directly
# from the file store. These cannot be resolved via chat messages.
from modules.datamodels.datamodelDocref import DocumentItemReference
fileIdRefs = [r for r in documentList.references if isinstance(r, DocumentItemReference)]
if fileIdRefs:
extractedParts = _resolve_file_refs_to_content_parts(self.services, fileIdRefs)
if extractedParts:
inline_content_parts = (inline_content_parts or []) + extractedParts
remaining = [r for r in documentList.references if not isinstance(r, DocumentItemReference)]
documentList = DocumentReferenceList(references=remaining)
# Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
resultType = parameters.get("resultType")
simpleMode = parameters.get("simpleMode", False)
@ -158,6 +210,18 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
mimeMap = {"txt": "text/plain", "json": "application/json", "html": "text/html", "md": "text/markdown", "csv": "text/csv", "xml": "application/xml"}
output_mime_type = mimeMap.get(normalized_result_type, "text/plain") if normalized_result_type else "text/plain"
# Normalize context: workflow refs may resolve to dict/list instead of str
paramContext = parameters.get("context")
if paramContext is not None and not isinstance(paramContext, str):
try:
paramContext = json.dumps(paramContext, ensure_ascii=False, default=str)
parameters["context"] = paramContext
logger.info(f"ai.process: Serialized non-string context ({type(parameters.get('context')).__name__}) to JSON ({len(paramContext)} chars)")
except Exception as e:
logger.warning(f"ai.process: Failed to serialize context: {e}")
paramContext = str(paramContext)
parameters["context"] = paramContext
# Phase 7.3: Pass documentList and/or contentParts to AI service
contentParts: Optional[List[ContentPart]] = inline_content_parts
if "contentParts" in parameters and not inline_content_parts:
@ -212,6 +276,9 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
)
)
from modules.workflows.methods.methodAi._common import applyCommonAiParams
applyCommonAiParams(parameters, request)
aiResponse_obj = await self.services.ai.callAi(request)
# Convert AiCallResponse to AiResponse format
@ -243,6 +310,16 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
)
# Apply node-level AI params (allowedModels, requireNeutralization)
allowedModels = parameters.get("allowedModels")
if allowedModels and isinstance(allowedModels, list):
options.allowedModels = allowedModels
requireNeutralization = parameters.get("requireNeutralization")
if requireNeutralization is not None:
_ctx = getattr(self.services, '_context', None)
if _ctx:
_ctx.requireNeutralization = bool(requireNeutralization)
# Get generationIntent from parameters (required for DATA_GENERATE)
# Default to "document" if not provided (most common use case)
# For code generation, use ai.generateCode action or explicitly pass generationIntent="code"

View file

@ -39,6 +39,10 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
}
if parentOperationId:
processParams["parentOperationId"] = parentOperationId
if parameters.get("allowedModels"):
processParams["allowedModels"] = parameters["allowedModels"]
if parameters.get("requireNeutralization") is not None:
processParams["requireNeutralization"] = parameters["requireNeutralization"]
return await self.process(processParams)

View file

@ -41,6 +41,10 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
processParams["resultType"] = resultType
if parentOperationId:
processParams["parentOperationId"] = parentOperationId
if parameters.get("allowedModels"):
processParams["allowedModels"] = parameters["allowedModels"]
if parameters.get("requireNeutralization") is not None:
processParams["requireNeutralization"] = parameters["requireNeutralization"]
return await self.process(processParams)

View file

@ -56,6 +56,23 @@ class MethodAi(MethodBase):
required=False,
description="Document reference(s) in any format to use as input/context"
),
"context": WorkflowActionParameter(
name="context",
type="str",
frontendType=FrontendType.TEXTAREA,
required=False,
default="",
description="Additional context data (string or upstream-bound dict/list, e.g. accounting data) appended to the prompt. Non-string values are JSON-serialized."
),
"documentTheme": WorkflowActionParameter(
name="documentTheme",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["general", "finance", "legal", "technical", "hr"],
required=False,
default="general",
description="Style hint for the document renderer (e.g. finance, legal). Used by the AI agent to choose colors and layout."
),
"resultType": WorkflowActionParameter(
name="resultType",
type="str",

View file

@ -243,6 +243,7 @@ class WorkflowScheduler:
runEnv = normalize_run_envelope(runEnv, user_id=str(eventUser.id) if eventUser else None)
_wfLabel = wf.get("label") if isinstance(wf, dict) else getattr(wf, "label", None)
_targetInstanceId = wf.get("targetFeatureInstanceId") if isinstance(wf, dict) else getattr(wf, "targetFeatureInstanceId", None)
result = await executeGraph(
graph=wf["graph"],
@ -254,6 +255,7 @@ class WorkflowScheduler:
automation2_interface=iface,
run_envelope=runEnv,
label=_wfLabel,
targetFeatureInstanceId=_targetInstanceId,
)
logger.info(
"WorkflowScheduler: executed workflow %s success=%s paused=%s",

View file

@ -0,0 +1,19 @@
# Archived one-shot scripts
Diese Scripts haben einmal eine konkrete Daten- oder Code-Migration ausgefuehrt
und werden nicht mehr aktiv aufgerufen. Sie bleiben hier liegen, falls jemand
spaeter auf einem alten DB-Dump oder einem alten Branch nochmal denselben Stand
herstellen muss.
KEIN aktives Tool. Nicht aus CI, nicht aus Docs verlinken. Bei Aufraeumarbeiten
(z.B. nach 6 Monaten ohne Anwendung) loeschen.
## Inhalt
| Datei | Migrationsthema | Archiviert am | Begruendung |
|-------|-----------------|---------------|-------------|
| `check_orphan_featureinstance.py` | Vor-Ort-Check mit hardcoded FeatureInstance-/Mandate-UUIDs | 2026-04-29 | Ad-hoc fuer einen konkreten Vorfall |
| `script_db_cleanup_duplicate_roles.py` | Cleanup doppelter Roles wegen `IS NULL`-Bug in `connectorDbPostgre` | 2026-04-29 | Bug ist laengst gefixt, Cleanup ueberall durchgelaufen |
| `migrate_async_to_sync.py` | One-shot Codemod `async def` -> `def` fuer FastAPI-Routes | 2026-04-29 | Refactor abgeschlossen |
| `i18n_rekey_plaintext_keys.py` | Frontend `t('dot.notation')` -> `t('Klartext')` Rekey | 2026-04-29 | Frontend-Migration abgeschlossen (siehe `wiki/c-work/4-done/2026-04-ui-i18n-dynamic-language-sets.md`) |
| `script_db_migrate_accessrules_objectkeys.py` | AccessRule-Items: kurz -> vollqualifiziert (Navigation-API) | 2026-04-29 | Navigation-API live, MIGRATION_MAP nur fuer trustee+realestate hardcoded |

View file

@ -1,25 +0,0 @@
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
import psycopg2, psycopg2.extras
from modules.shared.configuration import APP_CONFIG
c = psycopg2.connect(
host=APP_CONFIG.get('DB_HOST','localhost'),
user=APP_CONFIG.get('DB_USER'),
password=APP_CONFIG.get('DB_PASSWORD_SECRET'),
port=int(APP_CONFIG.get('DB_PORT',5432)),
dbname='poweron_app',
)
cur = c.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
cur.execute('SELECT id, name, label, enabled, "deletedAt", "sysCreatedAt" FROM "Mandate" ORDER BY "sysCreatedAt"')
print("All Mandates in poweron_app:")
for r in cur.fetchall():
print(f" id={r['id']} name={r['name']} label={r['label']} enabled={r['enabled']} deletedAt={r['deletedAt']}")
cur.execute('SELECT COUNT(*) AS n FROM "FeatureInstance" WHERE "featureCode" = %s', ("redmine",))
print(f"\nTotal redmine FeatureInstances in poweron_app: {cur.fetchone()['n']}")
cur.execute('SELECT id, "mandateId", label, enabled FROM "FeatureInstance" WHERE "featureCode" = %s ORDER BY "sysCreatedAt"', ("redmine",))
for r in cur.fetchall():
print(f" fi={r['id']} mandate={r['mandateId']} label={r['label']} enabled={r['enabled']}")

View file

@ -0,0 +1,382 @@
#!/usr/bin/env python3
"""Audit-Skript fuer Legacy-Bestaende vor Bootstrap-Cleanup (Plan C).
Prueft fuer jede der 5 Bootstrap-Migrationsroutinen, ob noch Restbestand
existiert. Wenn alle Checks 0 / GREEN liefern, kann die jeweilige Routine
sicher aus ``interfaceBootstrap.py`` / ``interfaceDbKnowledge.py`` entfernt
werden.
Checks:
1. Mandate.description != NULL und Mandate.label leer
-> _migrateMandateDescriptionToLabel
2. Mandate.label leer ODER Mandate.name verstoesst gegen Slug-Regeln
-> _migrateMandateNameLabelSlugRules
3. Mandate mit name='Root' und isSystem=False
-> initRootMandate Legacy-Zweig
4. Role mit roleLabel='sysadmin' im Root-Mandat
-> _migrateAndDropSysAdminRole
5. FileContentIndex mit leerem mandateId UND leerem featureInstanceId
-> aggregateMandateRagTotalBytes Fallback-Block
Verwendung:
python -m scripts.script_db_audit_legacy_state # text-output
python -m scripts.script_db_audit_legacy_state --json # JSON-output
python -m scripts.script_db_audit_legacy_state --purge-rag-orphans
# loescht FileContentIndex-Rows ohne mandateId UND ohne featureInstanceId
# (Voraussetzung fuer Removal des aggregateMandateRagTotalBytes-Fallback)
Exit-Code:
0 alle Checks GREEN (Removal sicher)
1 mind. ein Check RED (erst Daten bereinigen)
2 Skript-Fehler (DB nicht erreichbar etc.)
Lese-Zugriffe sind die Default. Schreibzugriffe NUR mit explizitem
``--purge-*``-Flag.
"""
from __future__ import annotations
import argparse
import json
import logging
import os
import sys
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional
_gatewayDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _gatewayDir not in sys.path:
sys.path.insert(0, _gatewayDir)
from dotenv import load_dotenv
_envPath = os.path.join(_gatewayDir, "env_dev.env")
if os.path.exists(_envPath):
load_dotenv(_envPath)
from modules.datamodels.datamodelUam import Mandate
from modules.datamodels.datamodelRbac import Role
from modules.datamodels.datamodelKnowledge import FileContentIndex
from modules.security.rootAccess import getRootDbAppConnector
from modules.interfaces.interfaceDbKnowledge import KnowledgeObjects
from modules.shared.mandateNameUtils import isValidMandateName
logging.basicConfig(level=logging.WARNING, format="%(message)s")
logger = logging.getLogger(__name__)
@dataclass
class _CheckResult:
"""Ergebnis eines einzelnen Audit-Checks."""
name: str
routine: str
location: str
count: int
status: str
samples: List[Dict[str, Any]] = field(default_factory=list)
error: Optional[str] = None
def toDict(self) -> Dict[str, Any]:
return {
"name": self.name,
"routine": self.routine,
"location": self.location,
"count": self.count,
"status": self.status,
"samples": self.samples,
"error": self.error,
}
def _getAppDb():
return getRootDbAppConnector()
def _getKnowledgeDb():
return KnowledgeObjects().db
def _checkMandateDescription(db) -> _CheckResult:
"""Mandate.description noch vorhanden und label leer?"""
rows = db.getRecordset(Mandate)
legacy = [
{
"id": r.get("id"),
"name": r.get("name"),
"description": str(r.get("description"))[:60] if r.get("description") else None,
"label": r.get("label"),
}
for r in rows
if r.get("description") and not r.get("label")
]
return _CheckResult(
name="mandate-description-to-label",
routine="_migrateMandateDescriptionToLabel",
location="interfaces/interfaceBootstrap.py:422-445",
count=len(legacy),
status="GREEN" if not legacy else "RED",
samples=legacy[:5],
)
def _checkMandateSlugRules(db) -> _CheckResult:
"""Mandate.name verletzt Slug-Regeln ODER Mandate.label leer?"""
rows = db.getRecordset(Mandate)
legacy = []
seen: set[str] = set()
for r in sorted(rows, key=lambda x: str(x.get("id", ""))):
name = (r.get("name") or "").strip()
labelRaw = r.get("label")
labelEmpty = not (labelRaw or "").strip() if labelRaw is not None else True
nameInvalid = not isValidMandateName(name)
nameCollides = name in seen
if not nameInvalid and not nameCollides:
seen.add(name)
if labelEmpty or nameInvalid or nameCollides:
legacy.append(
{
"id": r.get("id"),
"name": name,
"label": r.get("label"),
"labelEmpty": labelEmpty,
"nameInvalid": nameInvalid,
"nameCollides": nameCollides,
}
)
return _CheckResult(
name="mandate-name-slug-rules",
routine="_migrateMandateNameLabelSlugRules",
location="interfaces/interfaceBootstrap.py:448-511",
count=len(legacy),
status="GREEN" if not legacy else "RED",
samples=legacy[:5],
)
def _checkRootMandateLegacy(db) -> _CheckResult:
"""Mandate mit name='Root' (case-sensitive) ODER isSystem=False fuer root?"""
legacyByName = db.getRecordset(Mandate, recordFilter={"name": "Root"})
rows = db.getRecordset(Mandate, recordFilter={"name": "root"})
legacyByFlag = [r for r in rows if not r.get("isSystem")]
combined = list(legacyByName) + legacyByFlag
samples = [
{
"id": r.get("id"),
"name": r.get("name"),
"isSystem": r.get("isSystem"),
}
for r in combined
]
return _CheckResult(
name="root-mandate-legacy",
routine="initRootMandate-legacy-branch",
location="interfaces/interfaceBootstrap.py:406-412",
count=len(samples),
status="GREEN" if not samples else "RED",
samples=samples[:5],
)
def _checkSysadminRole(db) -> _CheckResult:
"""Legacy 'sysadmin'-Rolle im Root-Mandat?"""
rootMandates = db.getRecordset(Mandate, recordFilter={"name": "root", "isSystem": True})
if not rootMandates:
return _CheckResult(
name="sysadmin-role",
routine="_migrateAndDropSysAdminRole",
location="interfaces/interfaceBootstrap.py:840-932",
count=0,
status="GREEN",
samples=[],
error="kein Root-Mandat gefunden -- Check uebersprungen (kann nicht relevant sein)",
)
rootId = str(rootMandates[0].get("id"))
rows = db.getRecordset(
Role,
recordFilter={"roleLabel": "sysadmin", "mandateId": rootId, "featureInstanceId": None},
)
samples = [{"id": r.get("id"), "roleLabel": r.get("roleLabel")} for r in rows]
return _CheckResult(
name="sysadmin-role",
routine="_migrateAndDropSysAdminRole",
location="interfaces/interfaceBootstrap.py:840-932",
count=len(samples),
status="GREEN" if not samples else "RED",
samples=samples[:5],
)
def _checkRagFallback(knowDb) -> _CheckResult:
"""FileContentIndex-Rows ohne mandateId UND ohne featureInstanceId?"""
rows = knowDb.getRecordset(FileContentIndex)
legacy = [
{
"id": r.get("id"),
"fileName": r.get("fileName"),
"totalSize": r.get("totalSize"),
}
for r in rows
if not (r.get("mandateId") or "").strip() and not (r.get("featureInstanceId") or "").strip()
]
return _CheckResult(
name="rag-fallback-orphan-index",
routine="aggregateMandateRagTotalBytes-fallback",
location="interfaces/interfaceDbKnowledge.py:609-635",
count=len(legacy),
status="GREEN" if not legacy else "RED",
samples=legacy[:5],
)
def _runChecks() -> List[_CheckResult]:
appDb = _getAppDb()
knowDb = _getKnowledgeDb()
appChecks: List[Callable[[Any], _CheckResult]] = [
_checkMandateDescription,
_checkMandateSlugRules,
_checkRootMandateLegacy,
_checkSysadminRole,
]
results: List[_CheckResult] = []
for fn in appChecks:
try:
results.append(fn(appDb))
except Exception as exc:
results.append(
_CheckResult(
name=fn.__name__,
routine="?",
location="?",
count=-1,
status="ERROR",
error=f"{type(exc).__name__}: {exc}",
)
)
try:
results.append(_checkRagFallback(knowDb))
except Exception as exc:
results.append(
_CheckResult(
name="rag-fallback-orphan-index",
routine="aggregateMandateRagTotalBytes-fallback",
location="interfaces/interfaceDbKnowledge.py:609-635",
count=-1,
status="ERROR",
error=f"{type(exc).__name__}: {exc}",
)
)
return results
def _printText(results: List[_CheckResult]) -> None:
print("=" * 78)
print("BOOTSTRAP-MIGRATIONS LEGACY-STATE-AUDIT")
print("=" * 78)
for r in results:
marker = {
"GREEN": "[OK]",
"RED": "[!!]",
"ERROR": "[ERR]",
}.get(r.status, "[?]")
print(f"\n{marker} {r.name}")
print(f" Routine : {r.routine}")
print(f" Location: {r.location}")
print(f" Count : {r.count}")
print(f" Status : {r.status}")
if r.error:
print(f" Note : {r.error}")
if r.samples:
print(f" Samples : (max 5)")
for s in r.samples:
print(f" {s}")
print("\n" + "=" * 78)
greens = sum(1 for r in results if r.status == "GREEN")
reds = sum(1 for r in results if r.status == "RED")
errs = sum(1 for r in results if r.status == "ERROR")
print(f"SUMMARY: {greens} GREEN {reds} RED {errs} ERROR ({len(results)} total)")
if reds == 0 and errs == 0:
print("VERDICT: alle Migrationsroutinen koennen entfernt werden.")
elif errs > 0:
print("VERDICT: Audit unvollstaendig (Fehler) -- bitte Skript fixen.")
else:
print("VERDICT: erst Daten bereinigen, dann Routinen entfernen.")
print("=" * 78)
def _purgeRagOrphans() -> int:
"""Loescht alle FileContentIndex-Rows ohne mandateId UND ohne featureInstanceId.
Returns: Anzahl geloeschter Rows.
"""
knowDb = _getKnowledgeDb()
rows = knowDb.getRecordset(FileContentIndex)
orphans = [
r for r in rows
if not (r.get("mandateId") or "").strip()
and not (r.get("featureInstanceId") or "").strip()
]
if not orphans:
print("Keine RAG-Orphans gefunden -- nichts zu purgen.")
return 0
print(f"Purge {len(orphans)} RAG-Orphan(s):")
deleted = 0
for r in orphans:
rid = r.get("id")
try:
knowDb.recordDelete(FileContentIndex, str(rid))
deleted += 1
print(f" geloescht: {rid} {r.get('fileName')}")
except Exception as exc:
print(f" FEHLER {rid}: {type(exc).__name__}: {exc}", file=sys.stderr)
print(f"Purge abgeschlossen: {deleted}/{len(orphans)} geloescht.")
return deleted
def main() -> int:
parser = argparse.ArgumentParser(
description="Audit-Skript fuer Legacy-Bestaende (Bootstrap-Cleanup Plan C)"
)
parser.add_argument("--json", action="store_true", help="JSON-Output statt Text")
parser.add_argument(
"--purge-rag-orphans",
action="store_true",
help="WRITE: loescht FileContentIndex-Rows ohne mandateId UND featureInstanceId",
)
args = parser.parse_args()
if args.purge_rag_orphans:
try:
_purgeRagOrphans()
except Exception as exc:
print(f"FATAL: Purge fehlgeschlagen -- {type(exc).__name__}: {exc}", file=sys.stderr)
return 2
print()
try:
results = _runChecks()
except Exception as exc:
print(f"FATAL: konnte Audit nicht starten -- {type(exc).__name__}: {exc}", file=sys.stderr)
return 2
if args.json:
print(json.dumps([r.toDict() for r in results], indent=2, default=str))
else:
_printText(results)
if any(r.status == "ERROR" for r in results):
return 2
if any(r.status == "RED" for r in results):
return 1
return 0
if __name__ == "__main__":
sys.exit(main())

View file

View file

@ -0,0 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import pytest
from modules.datamodels.datamodelAi import AiCallOptions
def test_allowed_models_field_exists():
opts = AiCallOptions(allowedModels=["gpt-5-mini", "claude-4-7-opus"])
assert opts.allowedModels == ["gpt-5-mini", "claude-4-7-opus"]
def test_allowed_models_default_none():
opts = AiCallOptions()
assert opts.allowedModels is None

View file

View file

@ -0,0 +1,23 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import pytest
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
def test_inline_image_in_paragraph():
md = "Results show ![chart](file:abc \"200pt\") clearly."
result = markdownToDocumentJson(md, "Test")
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
types = [r["type"] for r in runs]
assert "text" in types
assert "image" in types
imgRun = next(r for r in runs if r["type"] == "image")
assert imgRun.get("fileId") == "abc"
def test_multiple_inline_images():
md = "A ![x](file:1) B ![y](file:2) C"
result = markdownToDocumentJson(md, "Test")
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
images = [r for r in runs if r["type"] == "image"]
assert len(images) == 2

View file

@ -0,0 +1,71 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import pytest
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
def test_basic_paragraph():
result = markdownToDocumentJson("Hello world", "Test")
doc = result["documents"][0]
section = doc["sections"][0]
assert section["content_type"] == "paragraph"
assert section["elements"][0]["content"]["inlineRuns"][0] == {"type": "text", "value": "Hello world"}
def test_inline_bold():
result = markdownToDocumentJson("This is **bold** text", "Test")
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
assert any(r["type"] == "bold" and r["value"] == "bold" for r in runs)
def test_inline_image():
result = markdownToDocumentJson("Text ![logo](file:abc123) more", "Test")
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
assert any(r["type"] == "image" and r.get("fileId") == "abc123" for r in runs)
def test_inline_link():
result = markdownToDocumentJson("Click [here](https://example.com)", "Test")
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
assert any(r["type"] == "link" and r.get("href") == "https://example.com" for r in runs)
def test_table_cells_are_inline_runs():
md = "| A | B |\n| --- | --- |\n| **x** | y |"
result = markdownToDocumentJson(md, "Test")
section = result["documents"][0]["sections"][0]
assert section["content_type"] == "table"
rows = section["elements"][0]["content"]["rows"]
assert isinstance(rows[0][0], list)
def test_bullet_list_inline_runs():
md = "- Item **one**\n- Item two"
result = markdownToDocumentJson(md, "Test")
section = result["documents"][0]["sections"][0]
assert section["content_type"] == "bullet_list"
items = section["elements"][0]["content"]["items"]
assert isinstance(items[0], list)
def test_standalone_image_block():
md = "![Big chart](file:chart123)"
result = markdownToDocumentJson(md, "Test")
section = result["documents"][0]["sections"][0]
assert section["content_type"] == "image"
def test_heading_unchanged():
result = markdownToDocumentJson("# Title", "Test")
section = result["documents"][0]["sections"][0]
assert section["content_type"] == "heading"
assert section["elements"][0]["content"]["text"] == "Title"
assert section["elements"][0]["content"]["level"] == 1
def test_code_block_unchanged():
md = "```python\nprint('hi')\n```"
result = markdownToDocumentJson(md, "Test")
section = result["documents"][0]["sections"][0]
assert section["content_type"] == "code_block"
assert section["elements"][0]["content"]["code"] == "print('hi')"

View file

@ -0,0 +1,39 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import pytest
from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle, DEFAULT_STYLE
def test_resolve_none_returns_defaults():
result = resolveStyle(None)
assert result == DEFAULT_STYLE
def test_resolve_empty_returns_defaults():
result = resolveStyle({})
assert result == DEFAULT_STYLE
def test_override_single_color():
result = resolveStyle({"colors": {"primary": "#FF0000"}})
assert result["colors"]["primary"] == "#FF0000"
assert result["colors"]["secondary"] == DEFAULT_STYLE["colors"]["secondary"]
def test_override_nested_heading():
result = resolveStyle({"headings": {"h1": {"sizePt": 30}}})
assert result["headings"]["h1"]["sizePt"] == 30
assert result["headings"]["h1"]["weight"] == "bold"
def test_override_font():
result = resolveStyle({"fonts": {"primary": "Arial"}})
assert result["fonts"]["primary"] == "Arial"
assert result["fonts"]["monospace"] == "Consolas"
def test_full_style_passthrough():
custom = {"fonts": {"primary": "Helvetica", "monospace": "Monaco"}}
result = resolveStyle(custom)
assert result["fonts"]["primary"] == "Helvetica"
assert result["fonts"]["monospace"] == "Monaco"

View file

@ -1,133 +0,0 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Unit tests for ``_migrateMandateNameLabelSlugRules`` in interfaceBootstrap.
Covers:
- legacy ``name``/``label`` rows get fixed (label fill, slug rename),
- collisions across legacy rows resolve via -2/-3 suffixes in stable id order,
- valid rows are left untouched (idempotency),
- second invocation is a no-op.
"""
from typing import Any, Dict, List, Optional
import pytest
from modules.datamodels.datamodelUam import Mandate
from modules.interfaces.interfaceBootstrap import _migrateMandateNameLabelSlugRules
from modules.shared.mandateNameUtils import isValidMandateName
class _FakeDb:
"""Minimal connector simulating getRecordset(Mandate)+recordModify(Mandate, id, data)."""
def __init__(self, rows: List[Dict[str, Any]]):
self.rows: List[Dict[str, Any]] = [dict(r) for r in rows]
self.modifyCalls: List[Dict[str, Any]] = []
def getRecordset(self, model, recordFilter: Optional[Dict[str, Any]] = None):
if model is not Mandate:
return []
if not recordFilter:
return [dict(r) for r in self.rows]
out = []
for r in self.rows:
if all(r.get(k) == v for k, v in recordFilter.items()):
out.append(dict(r))
return out
def recordModify(self, model, recordId: str, data: Dict[str, Any]):
self.modifyCalls.append({"id": str(recordId), "data": dict(data)})
for r in self.rows:
if str(r.get("id")) == str(recordId):
r.update(data)
return r
return None
def _row(mid: str, name: Any, label: Any = None) -> Dict[str, Any]:
return {"id": mid, "name": name, "label": label}
class TestMigrationFillsLabel:
def test_emptyLabelGetsNameAsLabel(self):
db = _FakeDb([_row("a1", "good-name", None)])
_migrateMandateNameLabelSlugRules(db)
assert db.rows[0]["label"] == "good-name"
assert db.rows[0]["name"] == "good-name"
def test_emptyLabelAndEmptyNameFallsBackToMandate(self):
db = _FakeDb([_row("a1", "", "")])
_migrateMandateNameLabelSlugRules(db)
assert db.rows[0]["label"] == "Mandate"
assert isValidMandateName(db.rows[0]["name"])
class TestMigrationRenamesInvalidNames:
def test_invalidNameGetsSlugFromLabel(self):
db = _FakeDb([_row("a1", "Home patrick", "Home Patrick")])
_migrateMandateNameLabelSlugRules(db)
assert db.rows[0]["name"] == "home-patrick"
assert db.rows[0]["label"] == "Home Patrick"
def test_umlautsTransliterated(self):
db = _FakeDb([_row("a1", "Müller AG", "Müller AG")])
_migrateMandateNameLabelSlugRules(db)
assert db.rows[0]["name"] == "mueller-ag"
class TestMigrationCollisions:
def test_collisionsResolveByStableIdOrder(self):
rows = [
_row("z1", "Home patrick", "Home Patrick"),
_row("a1", "home-patrick", "Home Patrick Two"),
]
db = _FakeDb(rows)
_migrateMandateNameLabelSlugRules(db)
byId = {r["id"]: r for r in db.rows}
assert byId["a1"]["name"] == "home-patrick"
assert byId["z1"]["name"] == "home-patrick-2"
def test_threeWayCollisionGetsThirdSuffix(self):
rows = [
_row("id-aaa", "home-patrick", "Home Patrick"),
_row("id-bbb", "Home patrick", "Home Patrick"),
_row("id-ccc", "home patrick", "Home Patrick"),
]
db = _FakeDb(rows)
_migrateMandateNameLabelSlugRules(db)
names = sorted(r["name"] for r in db.rows)
assert names == ["home-patrick", "home-patrick-2", "home-patrick-3"]
class TestMigrationIdempotency:
def test_secondRunIsNoop(self):
rows = [
_row("a1", "home-patrick", "Home Patrick"),
_row("b1", "Home Müller", ""),
]
db = _FakeDb(rows)
_migrateMandateNameLabelSlugRules(db)
assert all(isValidMandateName(r["name"]) for r in db.rows)
firstChanges = list(db.modifyCalls)
db.modifyCalls.clear()
_migrateMandateNameLabelSlugRules(db)
assert db.modifyCalls == [], (
f"expected no further changes after first migration, got {db.modifyCalls}; "
f"firstRun changes: {firstChanges}"
)
def test_validRowsLeftUntouched(self):
rows = [_row("a1", "root", "Root"), _row("b1", "alpina-treuhand", "Alpina Treuhand AG")]
db = _FakeDb(rows)
_migrateMandateNameLabelSlugRules(db)
assert db.modifyCalls == []
class TestMigrationEmpty:
def test_emptyDbDoesNothing(self):
db = _FakeDb([])
_migrateMandateNameLabelSlugRules(db)
assert db.modifyCalls == []

View file

@ -1,209 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Unit tests for the one-shot sysadmin role -> isPlatformAdmin migration.
Covers acceptance criteria from
``wiki/c-work/4-done/2026-04-sysadmin-authority-split.md``:
- AC#4 -> Existing sysadmin role-holders are promoted to ``isPlatformAdmin=True``
and the legacy role is removed (Role + UserMandateRole + AccessRules)
when the gateway boots.
- AC#10 -> The migration is idempotent and removes ALL artefacts (Role,
AccessRules, UserMandateRole) of the legacy ``sysadmin`` role.
Strategy: use an in-memory fake ``DatabaseConnector`` that records calls
and returns deterministic recordsets for ``Role``/``UserMandateRole``/
``UserMandate``/``UserInDB``/``AccessRule`` lookups.
"""
from __future__ import annotations
from typing import Any, Dict, List
from unittest.mock import Mock
from modules.interfaces.interfaceBootstrap import _migrateAndDropSysAdminRole
from modules.datamodels.datamodelMembership import UserMandate, UserMandateRole
from modules.datamodels.datamodelRbac import AccessRule, Role
from modules.datamodels.datamodelUam import UserInDB
_ROOT_MANDATE_ID = "root-mandate-id"
_SYSADMIN_ROLE_ID = "sysadmin-role-id"
_USER_MANDATE_ID = "user-mandate-id"
_USER_ID = "legacy-user-id"
_UMR_ROW_ID = "umr-row-id"
_ACCESS_RULE_ID = "access-rule-id"
def _buildFakeDb(
*,
sysadminRoles: List[Dict[str, Any]],
umRoleRows: List[Dict[str, Any]],
userMandateRows: List[Dict[str, Any]],
users: List[Dict[str, Any]],
accessRules: List[Dict[str, Any]],
) -> Mock:
"""Build a fake ``DatabaseConnector`` that maps model -> recordset."""
deletes: List[tuple] = []
modifies: List[tuple] = []
def _getRecordset(model, recordFilter=None, **_): # noqa: ANN001
recordFilter = recordFilter or {}
if model is Role:
label = recordFilter.get("roleLabel")
mandateId = recordFilter.get("mandateId")
if label == "sysadmin" and mandateId == _ROOT_MANDATE_ID:
return list(sysadminRoles)
return []
if model is UserMandateRole:
wanted = recordFilter.get("roleId")
return [r for r in umRoleRows if r.get("roleId") == wanted]
if model is UserMandate:
wanted = recordFilter.get("id")
return [r for r in userMandateRows if r.get("id") == wanted]
if model is UserInDB:
wanted = recordFilter.get("id")
return [r for r in users if r.get("id") == wanted]
if model is AccessRule:
wanted = recordFilter.get("roleId")
return [r for r in accessRules if r.get("roleId") == wanted]
return []
def _recordModify(model, recordId, payload): # noqa: ANN001
modifies.append((model, recordId, payload))
# Reflect the change so a subsequent migration call is idempotent.
if model is UserInDB:
for u in users:
if u.get("id") == recordId:
u.update(payload)
return True
def _recordDelete(model, recordId): # noqa: ANN001
deletes.append((model, recordId))
if model is UserMandateRole:
umRoleRows[:] = [r for r in umRoleRows if r.get("id") != recordId]
elif model is AccessRule:
accessRules[:] = [r for r in accessRules if r.get("id") != recordId]
elif model is Role:
sysadminRoles[:] = [r for r in sysadminRoles if r.get("id") != recordId]
return True
db = Mock()
db.getRecordset = Mock(side_effect=_getRecordset)
db.recordModify = Mock(side_effect=_recordModify)
db.recordDelete = Mock(side_effect=_recordDelete)
db._modifies = modifies # exposed for assertions
db._deletes = deletes
return db
def _seed():
return {
"sysadminRoles": [{"id": _SYSADMIN_ROLE_ID, "roleLabel": "sysadmin",
"mandateId": _ROOT_MANDATE_ID}],
"umRoleRows": [{"id": _UMR_ROW_ID, "roleId": _SYSADMIN_ROLE_ID,
"userMandateId": _USER_MANDATE_ID}],
"userMandateRows": [{"id": _USER_MANDATE_ID, "userId": _USER_ID,
"mandateId": _ROOT_MANDATE_ID}],
"users": [{"id": _USER_ID, "username": "legacy",
"isSysAdmin": False, "isPlatformAdmin": False}],
"accessRules": [{"id": _ACCESS_RULE_ID, "roleId": _SYSADMIN_ROLE_ID}],
}
# ---------------------------------------------------------------------------
# AC #4 — promote + drop on first run
# ---------------------------------------------------------------------------
def testMigrationPromotesUserAndDropsArtefacts():
"""AC#4: legacy holder is promoted; Role+AccessRule+UMR are deleted."""
seed = _seed()
db = _buildFakeDb(**seed)
_migrateAndDropSysAdminRole(db, _ROOT_MANDATE_ID)
# User got isPlatformAdmin=True
assert seed["users"][0]["isPlatformAdmin"] is True
assert any(
m[0] is UserInDB and m[2] == {"isPlatformAdmin": True}
for m in db._modifies
), "Expected UserInDB.isPlatformAdmin promotion call"
# All three artefact tables had their rows deleted.
deletedModels = {m[0] for m in db._deletes}
assert UserMandateRole in deletedModels, "UserMandateRole row not deleted"
assert AccessRule in deletedModels, "AccessRule row not deleted"
assert Role in deletedModels, "Sysadmin Role record not deleted"
# And the seeded lists are empty after the migration.
assert seed["umRoleRows"] == []
assert seed["accessRules"] == []
assert seed["sysadminRoles"] == []
# ---------------------------------------------------------------------------
# AC #10 — idempotent: a second run is a no-op
# ---------------------------------------------------------------------------
def testMigrationIsIdempotent():
"""AC#10: a second invocation finds no sysadmin role and exits silently."""
seed = _seed()
db = _buildFakeDb(**seed)
_migrateAndDropSysAdminRole(db, _ROOT_MANDATE_ID)
firstModifies = list(db._modifies)
firstDeletes = list(db._deletes)
_migrateAndDropSysAdminRole(db, _ROOT_MANDATE_ID)
# No additional writes on the second call.
assert db._modifies == firstModifies, (
"Second migration call must not perform additional writes"
)
assert db._deletes == firstDeletes, (
"Second migration call must not perform additional deletes"
)
def testMigrationSkipsAlreadyPromotedUsers():
"""If a user already has ``isPlatformAdmin=True``, no redundant write."""
seed = _seed()
seed["users"][0]["isPlatformAdmin"] = True # already promoted
db = _buildFakeDb(**seed)
_migrateAndDropSysAdminRole(db, _ROOT_MANDATE_ID)
# No promotion write for an already-promoted user.
promotionWrites = [
m for m in db._modifies
if m[0] is UserInDB and m[2].get("isPlatformAdmin") is True
]
assert promotionWrites == [], (
"Should not re-write isPlatformAdmin if user already has it"
)
# But role + access-rule cleanup still happens.
deletedModels = {m[0] for m in db._deletes}
assert Role in deletedModels
assert AccessRule in deletedModels
assert UserMandateRole in deletedModels
def testMigrationOnEmptyDbIsNoop():
"""No legacy sysadmin role at all -> no calls, no errors."""
db = _buildFakeDb(
sysadminRoles=[],
umRoleRows=[],
userMandateRows=[],
users=[],
accessRules=[],
)
_migrateAndDropSysAdminRole(db, _ROOT_MANDATE_ID)
assert db._modifies == []
assert db._deletes == []

View file

@ -66,6 +66,17 @@ class TestResolveParameterReferences:
value = "Land: {{n1.country}}"
assert resolveParameterReferences(value, node_outputs) == "Land: CH"
def test_legacy_string_template_loop_current_item_nested(self):
"""Same shape as executionEngine sets on loop node id during body iteration."""
node_outputs = {
"loop93": {
"currentItem": {"subject": "Hello", "body": {"content": "World"}},
"currentIndex": 0,
},
}
value = "Subj: {{loop93.currentItem.subject}} Body: {{loop93.currentItem.body.content}}"
assert resolveParameterReferences(value, node_outputs) == "Subj: Hello Body: World"
class TestWildcardIteration:
"""Phase-4 typed Bindings-Resolver: ``*`` segment iterates over a list.