From 880fa4d78783889f5354af0a4bf075073469f0e8 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Wed, 29 Apr 2026 21:27:08 +0200 Subject: [PATCH 1/5] plana+c implemented --- app.py | 3 + .../datamodelFeatureGraphicalEditor.py | 13 +- .../interfaceFeatureGraphicalEditor.py | 2 + .../graphicalEditor/nodeDefinitions/ai.py | 2 +- .../graphicalEditor/nodeDefinitions/email.py | 2 +- .../routeFeatureGraphicalEditor.py | 56 +++ .../interfaces/_legacyMigrationTelemetry.py | 198 +++++++++ modules/interfaces/interfaceBootstrap.py | 221 +--------- modules/interfaces/interfaceDbKnowledge.py | 35 +- modules/interfaces/interfaceFeatures.py | 1 + modules/routes/routeAutomationWorkspace.py | 246 +++++++++++ .../workflows/automation2/executionEngine.py | 32 +- modules/workflows/scheduler/mainScheduler.py | 2 + scripts/_archive/README.md | 19 + .../check_orphan_featureinstance.py | 0 .../i18n_rekey_plaintext_keys.py | 0 .../{ => _archive}/migrate_async_to_sync.py | 0 .../script_db_cleanup_duplicate_roles.py | 0 ...cript_db_migrate_accessrules_objectkeys.py | 0 scripts/_listMandates.py | 25 -- scripts/script_db_audit_legacy_state.py | 382 ++++++++++++++++++ .../bootstrap/test_mandateNameMigration.py | 133 ------ tests/unit/rbac/test_sysadmin_migration.py | 209 ---------- .../workflows/test_automation2_graphUtils.py | 11 + 24 files changed, 977 insertions(+), 615 deletions(-) create mode 100644 modules/interfaces/_legacyMigrationTelemetry.py create mode 100644 modules/routes/routeAutomationWorkspace.py create mode 100644 scripts/_archive/README.md rename scripts/{ => _archive}/check_orphan_featureinstance.py (100%) rename scripts/{ => _archive}/i18n_rekey_plaintext_keys.py (100%) rename scripts/{ => _archive}/migrate_async_to_sync.py (100%) rename scripts/{ => _archive}/script_db_cleanup_duplicate_roles.py (100%) rename scripts/{ => _archive}/script_db_migrate_accessrules_objectkeys.py (100%) delete mode 100644 scripts/_listMandates.py create mode 100644 scripts/script_db_audit_legacy_state.py delete mode 100644 tests/unit/bootstrap/test_mandateNameMigration.py delete mode 100644 tests/unit/rbac/test_sysadmin_migration.py diff --git a/app.py b/app.py index 41271739..adcd5365 100644 --- a/app.py +++ b/app.py @@ -672,6 +672,9 @@ app.include_router(navigationRouter) from modules.routes.routeWorkflowDashboard import router as workflowDashboardRouter app.include_router(workflowDashboardRouter) +from modules.routes.routeAutomationWorkspace import router as automationWorkspaceRouter +app.include_router(automationWorkspaceRouter) + # ============================================================================ # PLUG&PLAY FEATURE ROUTERS # Dynamically load routers from feature containers in modules/features/ diff --git a/modules/features/graphicalEditor/datamodelFeatureGraphicalEditor.py b/modules/features/graphicalEditor/datamodelFeatureGraphicalEditor.py index 5ebf629e..10d1f47f 100644 --- a/modules/features/graphicalEditor/datamodelFeatureGraphicalEditor.py +++ b/modules/features/graphicalEditor/datamodelFeatureGraphicalEditor.py @@ -72,7 +72,7 @@ class AutoWorkflow(PowerOnModel): }, ) featureInstanceId: str = Field( - description="Feature instance ID", + description="Feature instance ID (GE owner instance / RBAC scope)", json_schema_extra={ "frontend_type": "text", "frontend_readonly": True, @@ -81,6 +81,17 @@ class AutoWorkflow(PowerOnModel): "fk_target": {"db": "poweron_app", "table": "FeatureInstance", "labelField": "label"}, }, ) + targetFeatureInstanceId: Optional[str] = Field( + default=None, + description="Target feature instance for execution data scope. NULL for templates, mandatory for non-templates.", + json_schema_extra={ + "frontend_type": "select", + "frontend_readonly": False, + "frontend_required": False, + "label": "Ziel-Instanz", + "fk_target": {"db": "poweron_app", "table": "FeatureInstance", "labelField": "label"}, + }, + ) label: str = Field( description="User-friendly workflow name", json_schema_extra={"frontend_type": "text", "frontend_required": True, "label": "Bezeichnung"}, diff --git a/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py b/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py index c84db9d3..3b665981 100644 --- a/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py +++ b/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py @@ -217,6 +217,8 @@ class GraphicalEditorObjects: data["id"] = str(uuid.uuid4()) data["mandateId"] = self.mandateId data["featureInstanceId"] = self.featureInstanceId + if not data.get("targetFeatureInstanceId") and not data.get("isTemplate"): + data["targetFeatureInstanceId"] = self.featureInstanceId if "active" not in data or data.get("active") is None: data["active"] = True data["invocations"] = normalize_invocations_list(data.get("invocations")) diff --git a/modules/features/graphicalEditor/nodeDefinitions/ai.py b/modules/features/graphicalEditor/nodeDefinitions/ai.py index d0e0eb22..3273540a 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/ai.py +++ b/modules/features/graphicalEditor/nodeDefinitions/ai.py @@ -10,7 +10,7 @@ AI_NODES = [ "label": t("Prompt"), "description": t("Prompt eingeben und KI führt aus"), "parameters": [ - {"name": "aiPrompt", "type": "string", "required": True, "frontendType": "textarea", + {"name": "aiPrompt", "type": "string", "required": True, "frontendType": "templateTextarea", "description": t("KI-Prompt")}, {"name": "resultType", "type": "string", "required": False, "frontendType": "select", "frontendOptions": {"options": ["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"]}, diff --git a/modules/features/graphicalEditor/nodeDefinitions/email.py b/modules/features/graphicalEditor/nodeDefinitions/email.py index 11ff9895..270b8d63 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/email.py +++ b/modules/features/graphicalEditor/nodeDefinitions/email.py @@ -62,7 +62,7 @@ EMAIL_NODES = [ {"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection", "frontendOptions": {"authority": "msft"}, "description": t("E-Mail-Konto")}, - {"name": "context", "type": "string", "required": False, "frontendType": "textarea", + {"name": "context", "type": "string", "required": False, "frontendType": "templateTextarea", "description": t("Kontext / Brief-Beschreibung für die KI-Komposition"), "default": ""}, {"name": "to", "type": "string", "required": False, "frontendType": "text", "description": t("Empfänger (komma-separiert, optional für Entwurf)"), "default": ""}, diff --git a/modules/features/graphicalEditor/routeFeatureGraphicalEditor.py b/modules/features/graphicalEditor/routeFeatureGraphicalEditor.py index aed94a68..4748f39a 100644 --- a/modules/features/graphicalEditor/routeFeatureGraphicalEditor.py +++ b/modules/features/graphicalEditor/routeFeatureGraphicalEditor.py @@ -111,6 +111,44 @@ def _validateInstanceAccess(instanceId: str, context: RequestContext) -> str: return str(instance.mandateId) if instance.mandateId else "" +def _validateTargetInstance( + workflowData: Dict[str, Any], + ownerInstanceId: str, + context: RequestContext, +) -> None: + """Enforce targetFeatureInstanceId rules for non-template workflows. + + - Templates (isTemplate=True) may omit targetFeatureInstanceId. + - Non-templates MUST have a non-empty targetFeatureInstanceId. + - If the targetFeatureInstanceId differs from the GE owner instance, + the user must also have FeatureAccess on that target instance. + """ + if workflowData.get("isTemplate"): + return + + targetId = workflowData.get("targetFeatureInstanceId") + if not targetId: + return + + if targetId == ownerInstanceId: + return + + from modules.interfaces.interfaceDbApp import getRootInterface + rootInterface = getRootInterface() + targetInstance = rootInterface.getFeatureInstance(targetId) + if not targetInstance: + raise HTTPException( + status_code=400, + detail=routeApiMsg("targetFeatureInstanceId refers to a non-existent feature instance"), + ) + targetAccess = rootInterface.getFeatureAccess(str(context.user.id), targetId) + if not targetAccess or not targetAccess.enabled: + raise HTTPException( + status_code=403, + detail=routeApiMsg("Access denied to target feature instance"), + ) + + @router.get("/{instanceId}/node-types") @limiter.limit("60/minute") def get_node_types( @@ -318,9 +356,12 @@ async def post_execute( workflowId = body.get("workflowId") req_nodes = graph.get("nodes") or [] workflow_for_envelope: Optional[Dict[str, Any]] = None + targetFeatureInstanceId: Optional[str] = None if workflowId and not str(workflowId).startswith("transient-"): iface = getGraphicalEditorInterface(context.user, mandateId, instanceId) workflow_for_envelope = iface.getWorkflow(workflowId) + if workflow_for_envelope: + targetFeatureInstanceId = workflow_for_envelope.get("targetFeatureInstanceId") if workflowId and len(req_nodes) == 0: iface = getGraphicalEditorInterface(context.user, mandateId, instanceId) wf = iface.getWorkflow(workflowId) @@ -328,10 +369,18 @@ async def post_execute( graph = wf["graph"] logger.info("graphicalEditor execute: loaded graph from workflow %s", workflowId) workflow_for_envelope = wf + targetFeatureInstanceId = wf.get("targetFeatureInstanceId") if not workflowId: import uuid workflowId = f"transient-{uuid.uuid4().hex[:12]}" logger.info("graphicalEditor execute: using transient workflowId=%s", workflowId) + + if targetFeatureInstanceId and targetFeatureInstanceId != instanceId: + _validateTargetInstance( + {"targetFeatureInstanceId": targetFeatureInstanceId}, + instanceId, + context, + ) nodes_count = len(graph.get("nodes") or []) connections_count = len(graph.get("connections") or []) logger.info( @@ -363,6 +412,7 @@ async def post_execute( automation2_interface=ge_interface, run_envelope=run_env, label=_wfLabel, + targetFeatureInstanceId=targetFeatureInstanceId, ) logger.info( "graphicalEditor execute result: success=%s error=%s nodeOutputs_keys=%s failedNode=%s paused=%s", @@ -1371,6 +1421,7 @@ def create_workflow( ) -> dict: """Create a new workflow.""" mandateId = _validateInstanceAccess(instanceId, context) + _validateTargetInstance(body, instanceId, context) iface = getGraphicalEditorInterface(context.user, mandateId, instanceId) created = iface.createWorkflow(body) return created @@ -1388,6 +1439,11 @@ def update_workflow( """Update a workflow.""" mandateId = _validateInstanceAccess(instanceId, context) iface = getGraphicalEditorInterface(context.user, mandateId, instanceId) + existing = iface.getWorkflow(workflowId) + if not existing: + raise HTTPException(status_code=404, detail=routeApiMsg("Workflow not found")) + merged = {**existing, **body} + _validateTargetInstance(merged, instanceId, context) updated = iface.updateWorkflow(workflowId, body) if not updated: raise HTTPException(status_code=404, detail=routeApiMsg("Workflow not found")) diff --git a/modules/interfaces/_legacyMigrationTelemetry.py b/modules/interfaces/_legacyMigrationTelemetry.py new file mode 100644 index 00000000..4a0db04c --- /dev/null +++ b/modules/interfaces/_legacyMigrationTelemetry.py @@ -0,0 +1,198 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +"""Lightweight Bootstrap-Telemetrie fuer entfernte Migrationsroutinen. + +Wenn eine idempotente Bootstrap-Migration (z.B. ``_migrateAndDropSysAdminRole``) +aus dem Boot-Pfad entfernt wird, koennte ein theoretischer Edge-Case (alte +DB-Restore, manueller INSERT) wieder Legacy-Daten ins System bringen. Damit das +nicht still bleibt, ruft ``initBootstrap`` nach Abschluss aller Init-Schritte +einmalig ``runLegacyDataChecks`` auf -- das logged WARN bei Restbestand. + +Designprinzipien: +- KEINE Schreibzugriffe (rein lesend). +- Process-lokal gecached (``_cache``), damit identische Boots/Reloads den Check + nur einmal laufen lassen. +- Pro Check eine Recordset-Abfrage; Ausnahmen werden als WARN geloggt, nicht + re-raised, damit Telemetrie den Boot nie crasht. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from modules.connectors.connectorDbPostgre import DatabaseConnector +from modules.datamodels.datamodelRbac import Role +from modules.datamodels.datamodelUam import Mandate +from modules.shared.mandateNameUtils import isValidMandateName + +logger = logging.getLogger(__name__) + +_alreadyRan: bool = False + + +def runLegacyDataChecks(db: DatabaseConnector) -> None: + """Logged WARN, falls noch Legacy-Daten existieren, die durch entfernte + Migrationsroutinen behandelt wurden. Prozessweit nur einmal aktiv. + + Aufruf: am Ende von ``initBootstrap``. + """ + global _alreadyRan + if _alreadyRan: + return + _alreadyRan = True + + _checkMandateDescription(db) + _checkMandateSlugRules(db) + _checkLegacyRootMandate(db) + _checkSysadminRole(db) + _backfillTargetFeatureInstanceId() + + +def _safe(checkName: str, fn) -> Any: + try: + return fn() + except Exception as exc: + logger.warning( + "Legacy-data telemetry check '%s' failed: %s: %s", + checkName, type(exc).__name__, exc, + ) + return None + + +def _checkMandateDescription(db: DatabaseConnector) -> None: + def _do() -> None: + rows = db.getRecordset(Mandate) + bad = [ + r.get("id") for r in rows + if r.get("description") and not r.get("label") + ] + if bad: + logger.warning( + "Legacy-data check: %d Mandate row(s) still have description " + "but empty label (removed migration: _migrateMandateDescriptionToLabel). " + "Run scripts/script_db_audit_legacy_state.py for details. IDs: %s", + len(bad), bad[:5], + ) + + _safe("mandate-description", _do) + + +def _checkMandateSlugRules(db: DatabaseConnector) -> None: + def _do() -> None: + rows = db.getRecordset(Mandate) + seen: set[str] = set() + bad: list[str] = [] + for r in sorted(rows, key=lambda x: str(x.get("id", ""))): + mid = r.get("id") + if not mid: + continue + name = (r.get("name") or "").strip() + labelRaw = r.get("label") + labelEmpty = not (labelRaw or "").strip() if labelRaw is not None else True + invalid = not isValidMandateName(name) + collides = name in seen + if not invalid and not collides: + seen.add(name) + if labelEmpty or invalid or collides: + bad.append(str(mid)) + if bad: + logger.warning( + "Legacy-data check: %d Mandate row(s) violate slug/label rules " + "(removed migration: _migrateMandateNameLabelSlugRules). " + "Run scripts/script_db_audit_legacy_state.py for details. IDs: %s", + len(bad), bad[:5], + ) + + _safe("mandate-slug-rules", _do) + + +def _checkLegacyRootMandate(db: DatabaseConnector) -> None: + def _do() -> None: + legacy = db.getRecordset(Mandate, recordFilter={"name": "Root"}) + rootRows = db.getRecordset(Mandate, recordFilter={"name": "root"}) + legacyByFlag = [r for r in rootRows if not r.get("isSystem")] + all_ = list(legacy) + legacyByFlag + if all_: + logger.warning( + "Legacy-data check: %d Root-Mandate row(s) still in legacy form " + "(removed migration: initRootMandate-legacy-branch). IDs: %s", + len(all_), [r.get("id") for r in all_][:5], + ) + + _safe("root-mandate-legacy", _do) + + +def _checkSysadminRole(db: DatabaseConnector) -> None: + def _do() -> None: + rootMandates = db.getRecordset( + Mandate, recordFilter={"name": "root", "isSystem": True} + ) + if not rootMandates: + return + rootId = str(rootMandates[0].get("id")) + rows = db.getRecordset( + Role, + recordFilter={ + "roleLabel": "sysadmin", + "mandateId": rootId, + "featureInstanceId": None, + }, + ) + if rows: + logger.warning( + "Legacy-data check: %d 'sysadmin' role(s) still present in root mandate " + "(removed migration: _migrateAndDropSysAdminRole). " + "Authority is now User.isPlatformAdmin -- migrate manually. IDs: %s", + len(rows), [r.get("id") for r in rows], + ) + + _safe("sysadmin-role", _do) + + +def _backfillTargetFeatureInstanceId() -> None: + """Idempotent backfill: set targetFeatureInstanceId = featureInstanceId + for all non-template AutoWorkflow rows where it is still NULL. + + Connects to ``poweron_graphicaleditor`` independently. + """ + def _do() -> None: + from modules.shared.configuration import APP_CONFIG + from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import AutoWorkflow + + dbHost = APP_CONFIG.get("DB_HOST", "localhost") + dbUser = APP_CONFIG.get("DB_USER") + dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD") + dbPort = int(APP_CONFIG.get("DB_PORT", 5432)) + geDb = DatabaseConnector( + dbHost=dbHost, + dbDatabase="poweron_graphicaleditor", + dbUser=dbUser, + dbPassword=dbPassword, + dbPort=dbPort, + userId=None, + ) + if not geDb._ensureTableExists(AutoWorkflow): + return + + rows = geDb.getRecordset(AutoWorkflow) or [] + backfilled = 0 + for r in rows: + if r.get("isTemplate"): + continue + if r.get("targetFeatureInstanceId"): + continue + srcId = r.get("featureInstanceId") + if not srcId: + continue + geDb.recordModify(AutoWorkflow, r["id"], {"targetFeatureInstanceId": srcId}) + backfilled += 1 + + if backfilled: + logger.info( + "targetFeatureInstanceId backfill: set %d non-template AutoWorkflow row(s) " + "to their featureInstanceId", + backfilled, + ) + + _safe("backfill-targetFeatureInstanceId", _do) diff --git a/modules/interfaces/interfaceBootstrap.py b/modules/interfaces/interfaceBootstrap.py index a6ae0052..4bcd0e97 100644 --- a/modules/interfaces/interfaceBootstrap.py +++ b/modules/interfaces/interfaceBootstrap.py @@ -56,14 +56,8 @@ def initBootstrap(db: DatabaseConnector) -> None: logger.info("Starting system bootstrap") - # Initialize root mandate mandateId = initRootMandate(db) - # Migrate existing mandate records: description -> label - _migrateMandateDescriptionToLabel(db) - _migrateMandateNameLabelSlugRules(db) - - # Clean up duplicate roles and fix corrupted templates FIRST _deduplicateRoles(db) # Initialize system role TEMPLATES (mandateId=None, isSystemRole=True) @@ -76,14 +70,6 @@ def initBootstrap(db: DatabaseConnector) -> None: # This also serves as migration for existing mandates that don't have instance roles yet _ensureAllMandatesHaveSystemRoles(db) - # Migration: eliminate the legacy ``sysadmin`` role in root mandate - # (replaced by ``User.isPlatformAdmin`` flag — see - # wiki/c-work/4-done/2026-04-sysadmin-authority-split.md). - # Idempotent: noop after first successful run. - if mandateId: - _migrateAndDropSysAdminRole(db, mandateId) - - # Ensure UI rules for navigation items (admin/user/viewer roles) _ensureUiContextRules(db) # Initialize admin user @@ -132,6 +118,15 @@ def initBootstrap(db: DatabaseConnector) -> None: # Ensure billing settings and accounts exist for all mandates _bootstrapBilling() + # Telemetrie: warne falls Restbestaende der entfernten idempotenten + # Migrationen wieder auftauchen (Edge-Case: alter DB-Restore o.ae.). + # Schreibt nicht, scheitert nicht den Boot. + try: + from modules.interfaces._legacyMigrationTelemetry import runLegacyDataChecks + runLegacyDataChecks(db) + except Exception as e: + logger.warning(f"Legacy-data telemetry skipped: {e}") + def _bootstrapBilling() -> None: """ @@ -396,21 +391,12 @@ def initRootMandate(db: DatabaseConnector) -> Optional[str]: Returns: Mandate ID if created or found, None otherwise """ - # Find existing root mandate by name AND isSystem flag existingMandates = db.getRecordset(Mandate, recordFilter={"name": "root", "isSystem": True}) if existingMandates: mandateId = existingMandates[0].get("id") logger.info(f"Root mandate already exists with ID {mandateId}") return mandateId - - # Check for legacy root mandates (name="Root" without isSystem flag) and migrate - legacyMandates = db.getRecordset(Mandate, recordFilter={"name": "Root"}) - if legacyMandates: - mandateId = legacyMandates[0].get("id") - logger.info(f"Migrating legacy Root mandate {mandateId}: setting name='root', isSystem=True") - db.recordModify(Mandate, mandateId, {"name": "root", "isSystem": True}) - return mandateId - + logger.info("Creating Root mandate") rootMandate = Mandate(name="root", label="Root", isSystem=True, enabled=True) createdMandate = db.recordCreate(Mandate, rootMandate) @@ -419,98 +405,6 @@ def initRootMandate(db: DatabaseConnector) -> Optional[str]: return mandateId -def _migrateMandateDescriptionToLabel(db: DatabaseConnector) -> None: - """ - Migration: Rename 'description' field to 'label' in all Mandate records. - Copies existing 'description' values to 'label' and removes the old field. - Safe to run multiple times (idempotent). - """ - allMandates = db.getRecordset(Mandate) - migratedCount = 0 - for mandateRecord in allMandates: - mandateId = mandateRecord.get("id") - hasDescription = "description" in mandateRecord and mandateRecord.get("description") is not None - hasLabel = "label" in mandateRecord and mandateRecord.get("label") is not None - - if hasDescription and not hasLabel: - # Copy description to label - updateData = {"label": mandateRecord["description"]} - db.recordModify(Mandate, mandateId, updateData) - migratedCount += 1 - logger.info(f"Migrated mandate {mandateId}: description -> label") - - if migratedCount > 0: - logger.info(f"Migrated {migratedCount} mandate(s) from description to label") - else: - logger.debug("No mandate description->label migration needed") - - -def _migrateMandateNameLabelSlugRules(db: DatabaseConnector) -> None: - """ - Migration: normalize Mandate.name to the slug rules ([a-z0-9-], length 2..32, single - hyphen segments) and ensure Mandate.label is non-empty. - - Rules (see wiki/c-work/1-plan/2026-04-mandate-name-label-logic.md): - 1. If ``label`` is empty/None → set ``label := name`` (or "Mandate" when both empty). - 2. If ``name`` is not a valid slug, or collides with an earlier mandate in stable id - order, allocate a unique slug from the (now non-empty) ``label`` using - ``slugifyMandateName`` + ``allocateUniqueMandateSlug``. - - Idempotent: a second run is a no-op because all valid names stay valid and stay unique. - Each rename and label fill-in is logged for audit. - """ - from modules.shared.mandateNameUtils import ( - allocateUniqueMandateSlug, - isValidMandateName, - slugifyMandateName, - ) - - allRows = db.getRecordset(Mandate) - if not allRows: - return - sortedRows = sorted(allRows, key=lambda r: str(r.get("id", ""))) - - used: set[str] = set() - labelFills = 0 - nameRenames: list[tuple[str, str, str]] = [] - - for rec in sortedRows: - mid = rec.get("id") - if not mid: - continue - name = (rec.get("name") or "").strip() - labelRaw = rec.get("label") - label = (labelRaw or "").strip() if labelRaw is not None else "" - - if not label: - label = name if name else "Mandate" - db.recordModify(Mandate, mid, {"label": label}) - labelFills += 1 - logger.info(f"Mandate {mid}: filled empty label with '{label}'") - - nameFits = isValidMandateName(name) - nameCollides = name in used - if nameFits and not nameCollides: - used.add(name) - continue - - base = slugifyMandateName(label) or "mn" - newName = allocateUniqueMandateSlug(base, used) - used.add(newName) - if newName != name: - db.recordModify(Mandate, mid, {"name": newName}) - nameRenames.append((str(mid), name, newName)) - logger.info(f"Mandate {mid}: renamed name '{name}' -> '{newName}'") - - if labelFills or nameRenames: - logger.info( - "Mandate name/label slug migration: %d label fill-in(s), %d name rename(s)", - labelFills, len(nameRenames), - ) - else: - logger.debug("No mandate name/label slug migration needed") - - def initAdminUser(db: DatabaseConnector, mandateId: Optional[str]) -> Optional[str]: """ Creates the Admin user if it doesn't exist. @@ -837,101 +731,6 @@ def copySystemRolesToMandate(db: DatabaseConnector, mandateId: str) -> int: return copiedCount -def _migrateAndDropSysAdminRole(db: DatabaseConnector, mandateId: str) -> None: - """ - One-shot migration: eliminate the legacy ``sysadmin`` role in the root mandate. - - Authority semantics moved to two orthogonal flags on User: - - ``isSysAdmin`` → Infrastructure-Operator (RBAC bypass) - - ``isPlatformAdmin`` → Cross-Mandate-Governance (no bypass) - - Migration steps (idempotent): - 1. Find sysadmin role(s) in root mandate. If none exist → done. - 2. For every UserMandateRole row referencing such a role: set - ``user.isPlatformAdmin = True`` (preserves cross-mandate authority). - 3. Delete those UserMandateRole rows. - 4. Delete AccessRules attached to the sysadmin role. - 5. Delete the sysadmin Role record. - - Args: - db: Database connector instance - mandateId: Root mandate ID - """ - sysadminRoles = db.getRecordset( - Role, - recordFilter={"roleLabel": "sysadmin", "mandateId": mandateId, "featureInstanceId": None}, - ) - if not sysadminRoles: - logger.debug("Sysadmin role migration: no legacy sysadmin role present, nothing to do") - return - - sysadminRoleIds = [str(r.get("id")) for r in sysadminRoles if r.get("id")] - logger.warning( - f"Sysadmin role migration: found {len(sysadminRoleIds)} legacy sysadmin role(s) " - f"in root mandate, migrating to isPlatformAdmin flag" - ) - - # 1) Promote every holder to isPlatformAdmin=True - promoted = 0 - for sysadminRoleId in sysadminRoleIds: - umRoleRows = db.getRecordset( - UserMandateRole, recordFilter={"roleId": sysadminRoleId} - ) - userMandateIds = [str(r.get("userMandateId")) for r in umRoleRows if r.get("userMandateId")] - if not userMandateIds: - continue - - # Resolve userIds via UserMandate - userIds = set() - for umId in userMandateIds: - ums = db.getRecordset(UserMandate, recordFilter={"id": umId}) - for um in ums: - uid = um.get("userId") if isinstance(um, dict) else getattr(um, "userId", None) - if uid: - userIds.add(str(uid)) - - for userId in userIds: - users = db.getRecordset(UserInDB, recordFilter={"id": userId}) - if not users: - continue - current = users[0].get("isPlatformAdmin", False) - if not current: - db.recordModify(UserInDB, userId, {"isPlatformAdmin": True}) - promoted += 1 - logger.warning( - f"Sysadmin role migration: granted isPlatformAdmin=True to user {userId}" - ) - - # 2) Delete UserMandateRole rows - for umRow in umRoleRows: - rowId = umRow.get("id") if isinstance(umRow, dict) else getattr(umRow, "id", None) - if rowId: - try: - db.recordDelete(UserMandateRole, str(rowId)) - except Exception as e: - logger.error(f"Sysadmin role migration: failed to drop UserMandateRole {rowId}: {e}") - - # 3) Delete AccessRules - accessRules = db.getRecordset(AccessRule, recordFilter={"roleId": sysadminRoleId}) - for ar in accessRules: - arId = ar.get("id") if isinstance(ar, dict) else getattr(ar, "id", None) - if arId: - try: - db.recordDelete(AccessRule, str(arId)) - except Exception as e: - logger.error(f"Sysadmin role migration: failed to drop AccessRule {arId}: {e}") - - # 4) Delete the Role - try: - db.recordDelete(Role, sysadminRoleId) - except Exception as e: - logger.error(f"Sysadmin role migration: failed to drop Role {sysadminRoleId}: {e}") - - logger.warning( - f"Sysadmin role migration: completed; promoted {promoted} user(s) to isPlatformAdmin" - ) - - def _getRoleId(db: DatabaseConnector, roleLabel: str) -> Optional[str]: """ Get role ID by label, using cache or database lookup. diff --git a/modules/interfaces/interfaceDbKnowledge.py b/modules/interfaces/interfaceDbKnowledge.py index f819615e..9d6ba3d4 100644 --- a/modules/interfaces/interfaceDbKnowledge.py +++ b/modules/interfaces/interfaceDbKnowledge.py @@ -603,41 +603,10 @@ def aggregateMandateRagTotalBytes(mandateId: str) -> int: if rid and str(rid) not in byId: byId[str(rid)] = row - # DEPRECATED: file-ID-correlation fallback from poweron_management. - # Only needed for pre-migration data where mandateId/featureInstanceId on the - # FileContentIndex are empty. Safe to remove once all environments are migrated. - _fallbackCount = 0 - try: - from modules.datamodels.datamodelFiles import FileItem - from modules.interfaces.interfaceDbManagement import ComponentObjects - mgmtDb = ComponentObjects().db - knowledgeIf = getInterface(None) - - fileIds: set = set() - for f in mgmtDb.getRecordset(FileItem, recordFilter={"mandateId": mandateId}): - fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None) - if fid: - fileIds.add(str(fid)) - for instId in instIds: - for f in mgmtDb.getRecordset(FileItem, recordFilter={"featureInstanceId": instId}): - fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None) - if fid: - fileIds.add(str(fid)) - - for fid in fileIds: - if fid in byId: - continue - row = knowledgeIf.getFileContentIndex(fid) - if row: - byId[fid] = row - _fallbackCount += 1 - except Exception as e: - logger.warning("aggregateMandateRagTotalBytes fallback failed: %s", e) - total = sum(int(r.get("totalSize") or 0) for r in byId.values()) logger.info( - "aggregateMandateRagTotalBytes(%s): %d indexes, %d bytes (fallback: %d)", - mandateId, len(byId), total, _fallbackCount, + "aggregateMandateRagTotalBytes(%s): %d indexes, %d bytes", + mandateId, len(byId), total, ) return total diff --git a/modules/interfaces/interfaceFeatures.py b/modules/interfaces/interfaceFeatures.py index ccb64a53..c965edb2 100644 --- a/modules/interfaces/interfaceFeatures.py +++ b/modules/interfaces/interfaceFeatures.py @@ -347,6 +347,7 @@ class FeatureInterface: "templateSourceId": templateId, "templateScope": "instance", "active": True, + "targetFeatureInstanceId": instanceId, }) copied += 1 except Exception as e: diff --git a/modules/routes/routeAutomationWorkspace.py b/modules/routes/routeAutomationWorkspace.py new file mode 100644 index 00000000..6efbdeb6 --- /dev/null +++ b/modules/routes/routeAutomationWorkspace.py @@ -0,0 +1,246 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +User-facing Automation Workspace API. + +Lists workflow runs the user can access (via FeatureAccess on +targetFeatureInstanceId) and provides detail views with step logs +and linked files. Designed for the "Workspace" tab under +Nutzung > Automation. +""" + +import logging +import math +from typing import Optional + +from fastapi import APIRouter, Depends, Request, Query, Path, HTTPException +from slowapi import Limiter +from slowapi.util import get_remote_address + +from modules.auth.authentication import getRequestContext, RequestContext +from modules.connectors.connectorDbPostgre import DatabaseConnector +from modules.shared.configuration import APP_CONFIG +from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import ( + AutoRun, + AutoStepLog, + AutoWorkflow, +) +from modules.features.graphicalEditor.interfaceFeatureGraphicalEditor import graphicalEditorDatabase +from modules.shared.i18nRegistry import apiRouteContext + +routeApiMsg = apiRouteContext("routeAutomationWorkspace") +logger = logging.getLogger(__name__) +limiter = Limiter(key_func=get_remote_address) + +router = APIRouter(prefix="/api/automations/runs", tags=["AutomationWorkspace"]) + + +def _getDb() -> DatabaseConnector: + return DatabaseConnector( + dbHost=APP_CONFIG.get("DB_HOST", "localhost"), + dbDatabase=graphicalEditorDatabase, + dbUser=APP_CONFIG.get("DB_USER"), + dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD"), + dbPort=int(APP_CONFIG.get("DB_PORT", 5432)), + userId=None, + ) + + +def _getUserAccessibleInstanceIds(userId: str) -> list[str]: + """Return all featureInstanceIds the user has enabled FeatureAccess for.""" + from modules.interfaces.interfaceDbApp import getRootInterface + rootIface = getRootInterface() + allAccess = rootIface.getFeatureAccessesForUser(userId) or [] + return [ + a.featureInstanceId + for a in allAccess + if a.featureInstanceId and a.enabled + ] + + +@router.get("") +@limiter.limit("60/minute") +def listWorkspaceRuns( + request: Request, + scope: str = Query("mine", description="mine = own runs, mandate = all accessible"), + status: Optional[str] = Query(None, description="Filter by run status"), + targetInstanceId: Optional[str] = Query(None, description="Filter by targetFeatureInstanceId"), + workflowId: Optional[str] = Query(None, description="Filter by workflow"), + limit: int = Query(50, ge=1, le=200), + offset: int = Query(0, ge=0), + context: RequestContext = Depends(getRequestContext), +) -> dict: + """List workflow runs visible to the user. + + scope=mine: only runs owned by the user. + scope=mandate: all runs where the user has FeatureAccess on the + workflow's targetFeatureInstanceId. + """ + db = _getDb() + if not db._ensureTableExists(AutoRun): + return {"runs": [], "total": 0, "limit": limit, "offset": offset} + + userId = str(context.user.id) if context.user else None + if not userId: + raise HTTPException(status_code=401, detail=routeApiMsg("Authentication required")) + + accessibleInstanceIds = _getUserAccessibleInstanceIds(userId) + if not accessibleInstanceIds: + return {"runs": [], "total": 0, "limit": limit, "offset": offset} + + if not db._ensureTableExists(AutoWorkflow): + return {"runs": [], "total": 0, "limit": limit, "offset": offset} + + wfFilter: dict = {} + if targetInstanceId: + if targetInstanceId not in accessibleInstanceIds: + raise HTTPException(status_code=403, detail=routeApiMsg("Access denied to target instance")) + wfFilter["targetFeatureInstanceId"] = targetInstanceId + workflows = db.getRecordset(AutoWorkflow, recordFilter=wfFilter or None) or [] + + visibleWfIds: set[str] = set() + wfMap: dict = {} + for wf in workflows: + wfDict = dict(wf) + tid = wfDict.get("targetFeatureInstanceId") or wfDict.get("featureInstanceId") + if tid and tid in accessibleInstanceIds: + wfId = wfDict.get("id") + if wfId: + visibleWfIds.add(wfId) + wfMap[wfId] = wfDict + + if workflowId: + if workflowId not in visibleWfIds: + return {"runs": [], "total": 0, "limit": limit, "offset": offset} + visibleWfIds = {workflowId} + + if not visibleWfIds: + return {"runs": [], "total": 0, "limit": limit, "offset": offset} + + allRuns = db.getRecordset(AutoRun, recordFilter={}) or [] + filtered = [] + for r in allRuns: + row = dict(r) + if row.get("workflowId") not in visibleWfIds: + continue + if scope == "mine" and row.get("ownerId") != userId: + continue + if status and row.get("status") != status: + continue + filtered.append(row) + + filtered.sort( + key=lambda x: x.get("startedAt") or x.get("sysCreatedAt") or 0, + reverse=True, + ) + total = len(filtered) + page = filtered[offset: offset + limit] + + from modules.routes.routeHelpers import enrichRowsWithFkLabels, resolveMandateLabels, resolveInstanceLabels + + for row in page: + wf = wfMap.get(row.get("workflowId"), {}) + row["workflowLabel"] = row.get("label") or wf.get("label") or row.get("workflowId", "") + row["targetFeatureInstanceId"] = wf.get("targetFeatureInstanceId") or wf.get("featureInstanceId") + + enrichRowsWithFkLabels( + page, + labelResolvers={ + "mandateId": resolveMandateLabels, + "targetFeatureInstanceId": resolveInstanceLabels, + }, + ) + for row in page: + row["targetInstanceLabel"] = row.pop("targetFeatureInstanceIdLabel", None) + row["mandateLabel"] = row.pop("mandateIdLabel", None) + + return {"runs": page, "total": total, "limit": limit, "offset": offset} + + +@router.get("/{runId}/detail") +@limiter.limit("60/minute") +def getWorkspaceRunDetail( + request: Request, + runId: str = Path(..., description="Run ID"), + context: RequestContext = Depends(getRequestContext), +) -> dict: + """Get full detail for a single run: metadata, step logs, linked files.""" + db = _getDb() + userId = str(context.user.id) if context.user else None + if not userId: + raise HTTPException(status_code=401, detail=routeApiMsg("Authentication required")) + + if not db._ensureTableExists(AutoRun): + raise HTTPException(status_code=404, detail=routeApiMsg("Run not found")) + + runs = db.getRecordset(AutoRun, recordFilter={"id": runId}) + if not runs: + raise HTTPException(status_code=404, detail=routeApiMsg("Run not found")) + run = dict(runs[0]) + + wfId = run.get("workflowId") + workflow: dict = {} + if wfId and db._ensureTableExists(AutoWorkflow): + wfs = db.getRecordset(AutoWorkflow, recordFilter={"id": wfId}) + if wfs: + workflow = dict(wfs[0]) + + tid = workflow.get("targetFeatureInstanceId") or workflow.get("featureInstanceId") + accessibleIds = _getUserAccessibleInstanceIds(userId) + isOwner = run.get("ownerId") == userId + + if not isOwner and (not tid or tid not in accessibleIds) and not context.isPlatformAdmin: + raise HTTPException(status_code=403, detail=routeApiMsg("Access denied")) + + steps: list = [] + if db._ensureTableExists(AutoStepLog): + stepRecords = db.getRecordset(AutoStepLog, recordFilter={"runId": runId}) or [] + steps = [dict(s) for s in stepRecords] + steps.sort(key=lambda s: s.get("startedAt") or 0) + + fileItems: list = [] + try: + from modules.datamodels.datamodelFiles import FileItem + from modules.interfaces.interfaceDbManagement import ComponentObjects + mgmtDb = ComponentObjects().db + if mgmtDb._ensureTableExists(FileItem): + nodeOutputs = run.get("nodeOutputs") or {} + fileIds: set[str] = set() + for nodeId, output in nodeOutputs.items(): + if not isinstance(output, dict): + continue + for key in ("fileId", "documentId", "fileIds", "documents"): + val = output.get(key) + if isinstance(val, str) and val: + fileIds.add(val) + elif isinstance(val, list): + for v in val: + if isinstance(v, str) and v: + fileIds.add(v) + elif isinstance(v, dict) and v.get("id"): + fileIds.add(v["id"]) + for fid in fileIds: + try: + rec = mgmtDb.getRecord(FileItem, fid) + if rec: + fileItems.append(dict(rec)) + except Exception: + pass + except Exception as e: + logger.warning("getWorkspaceRunDetail: file lookup failed: %s", e) + + run["workflowLabel"] = run.get("label") or workflow.get("label") or wfId + run["targetFeatureInstanceId"] = tid + + return { + "run": run, + "workflow": { + "id": workflow.get("id"), + "label": workflow.get("label"), + "targetFeatureInstanceId": tid, + "featureInstanceId": workflow.get("featureInstanceId"), + "tags": workflow.get("tags", []), + } if workflow else None, + "steps": steps, + "files": fileItems, + } diff --git a/modules/workflows/automation2/executionEngine.py b/modules/workflows/automation2/executionEngine.py index 1d0ca5c8..55a63281 100644 --- a/modules/workflows/automation2/executionEngine.py +++ b/modules/workflows/automation2/executionEngine.py @@ -302,6 +302,30 @@ async def _executeWithRetry(executor, node, context, maxRetries: int = 0, retryD raise lastError +def _substituteFeatureInstancePlaceholders( + graph: Dict[str, Any], + targetFeatureInstanceId: str, +) -> Dict[str, Any]: + """Replace ``{{featureInstanceId}}`` placeholders in the serialised graph. + + Works on the full JSON representation so that placeholders inside nested + parameter dicts, prompt strings, etc. are all caught. Already-resolved + concrete UUIDs (pre-baked by ``_copyTemplateWorkflows``) are left untouched + because the placeholder literal ``{{featureInstanceId}}`` will not match. + """ + import json as _json + raw = _json.dumps(graph) + if "{{featureInstanceId}}" not in raw: + return graph + replaced = raw.replace("{{featureInstanceId}}", targetFeatureInstanceId) + logger.debug( + "_substituteFeatureInstancePlaceholders: resolved %d occurrence(s) -> %s", + raw.count("{{featureInstanceId}}"), + targetFeatureInstanceId, + ) + return _json.loads(replaced) + + async def executeGraph( graph: Dict[str, Any], services: Any, @@ -315,6 +339,7 @@ async def executeGraph( runId: Optional[str] = None, run_envelope: Optional[Dict[str, Any]] = None, label: Optional[str] = None, + targetFeatureInstanceId: Optional[str] = None, ) -> Dict[str, Any]: """ Execute automation2 graph. Returns { success, nodeOutputs, error?, stopped? }. @@ -322,14 +347,16 @@ async def executeGraph( pauses the run, and returns { success: False, paused: True, taskId, runId }. For resume: pass initialNodeOutputs (with result for the human node) and startAfterNodeId. For fresh runs: pass run_envelope (unified start payload for the start node); normalized with userId into context.runEnvelope. + targetFeatureInstanceId: resolves {{featureInstanceId}} placeholders in the graph JSON before execution. """ logger.info( - "executeGraph start: instanceId=%s workflowId=%s userId=%s mandateId=%s resume=%s", + "executeGraph start: instanceId=%s workflowId=%s userId=%s mandateId=%s resume=%s targetInstance=%s", instanceId, workflowId, userId, mandateId, startAfterNodeId is not None, + targetFeatureInstanceId, ) from modules.workflows.processing.shared.methodDiscovery import discoverMethods discoverMethods(services) @@ -338,6 +365,9 @@ async def executeGraph( materializeFeatureInstanceRefs, ) + if targetFeatureInstanceId: + graph = _substituteFeatureInstancePlaceholders(graph, targetFeatureInstanceId) + # Phase-5 Schicht-4: typed-ref envelopes are materialized FIRST so the # subsequent connection-ref pass and validation see the canonical shape. graph = materializeFeatureInstanceRefs(graph) diff --git a/modules/workflows/scheduler/mainScheduler.py b/modules/workflows/scheduler/mainScheduler.py index bf2cd0fd..0dce2ec5 100644 --- a/modules/workflows/scheduler/mainScheduler.py +++ b/modules/workflows/scheduler/mainScheduler.py @@ -243,6 +243,7 @@ class WorkflowScheduler: runEnv = normalize_run_envelope(runEnv, user_id=str(eventUser.id) if eventUser else None) _wfLabel = wf.get("label") if isinstance(wf, dict) else getattr(wf, "label", None) + _targetInstanceId = wf.get("targetFeatureInstanceId") if isinstance(wf, dict) else getattr(wf, "targetFeatureInstanceId", None) result = await executeGraph( graph=wf["graph"], @@ -254,6 +255,7 @@ class WorkflowScheduler: automation2_interface=iface, run_envelope=runEnv, label=_wfLabel, + targetFeatureInstanceId=_targetInstanceId, ) logger.info( "WorkflowScheduler: executed workflow %s success=%s paused=%s", diff --git a/scripts/_archive/README.md b/scripts/_archive/README.md new file mode 100644 index 00000000..dba3deef --- /dev/null +++ b/scripts/_archive/README.md @@ -0,0 +1,19 @@ +# Archived one-shot scripts + +Diese Scripts haben einmal eine konkrete Daten- oder Code-Migration ausgefuehrt +und werden nicht mehr aktiv aufgerufen. Sie bleiben hier liegen, falls jemand +spaeter auf einem alten DB-Dump oder einem alten Branch nochmal denselben Stand +herstellen muss. + +KEIN aktives Tool. Nicht aus CI, nicht aus Docs verlinken. Bei Aufraeumarbeiten +(z.B. nach 6 Monaten ohne Anwendung) loeschen. + +## Inhalt + +| Datei | Migrationsthema | Archiviert am | Begruendung | +|-------|-----------------|---------------|-------------| +| `check_orphan_featureinstance.py` | Vor-Ort-Check mit hardcoded FeatureInstance-/Mandate-UUIDs | 2026-04-29 | Ad-hoc fuer einen konkreten Vorfall | +| `script_db_cleanup_duplicate_roles.py` | Cleanup doppelter Roles wegen `IS NULL`-Bug in `connectorDbPostgre` | 2026-04-29 | Bug ist laengst gefixt, Cleanup ueberall durchgelaufen | +| `migrate_async_to_sync.py` | One-shot Codemod `async def` -> `def` fuer FastAPI-Routes | 2026-04-29 | Refactor abgeschlossen | +| `i18n_rekey_plaintext_keys.py` | Frontend `t('dot.notation')` -> `t('Klartext')` Rekey | 2026-04-29 | Frontend-Migration abgeschlossen (siehe `wiki/c-work/4-done/2026-04-ui-i18n-dynamic-language-sets.md`) | +| `script_db_migrate_accessrules_objectkeys.py` | AccessRule-Items: kurz -> vollqualifiziert (Navigation-API) | 2026-04-29 | Navigation-API live, MIGRATION_MAP nur fuer trustee+realestate hardcoded | diff --git a/scripts/check_orphan_featureinstance.py b/scripts/_archive/check_orphan_featureinstance.py similarity index 100% rename from scripts/check_orphan_featureinstance.py rename to scripts/_archive/check_orphan_featureinstance.py diff --git a/scripts/i18n_rekey_plaintext_keys.py b/scripts/_archive/i18n_rekey_plaintext_keys.py similarity index 100% rename from scripts/i18n_rekey_plaintext_keys.py rename to scripts/_archive/i18n_rekey_plaintext_keys.py diff --git a/scripts/migrate_async_to_sync.py b/scripts/_archive/migrate_async_to_sync.py similarity index 100% rename from scripts/migrate_async_to_sync.py rename to scripts/_archive/migrate_async_to_sync.py diff --git a/scripts/script_db_cleanup_duplicate_roles.py b/scripts/_archive/script_db_cleanup_duplicate_roles.py similarity index 100% rename from scripts/script_db_cleanup_duplicate_roles.py rename to scripts/_archive/script_db_cleanup_duplicate_roles.py diff --git a/scripts/script_db_migrate_accessrules_objectkeys.py b/scripts/_archive/script_db_migrate_accessrules_objectkeys.py similarity index 100% rename from scripts/script_db_migrate_accessrules_objectkeys.py rename to scripts/_archive/script_db_migrate_accessrules_objectkeys.py diff --git a/scripts/_listMandates.py b/scripts/_listMandates.py deleted file mode 100644 index cf3e9bd2..00000000 --- a/scripts/_listMandates.py +++ /dev/null @@ -1,25 +0,0 @@ -import sys -from pathlib import Path -sys.path.insert(0, str(Path(__file__).resolve().parents[1])) -import psycopg2, psycopg2.extras -from modules.shared.configuration import APP_CONFIG - -c = psycopg2.connect( - host=APP_CONFIG.get('DB_HOST','localhost'), - user=APP_CONFIG.get('DB_USER'), - password=APP_CONFIG.get('DB_PASSWORD_SECRET'), - port=int(APP_CONFIG.get('DB_PORT',5432)), - dbname='poweron_app', -) -cur = c.cursor(cursor_factory=psycopg2.extras.RealDictCursor) -cur.execute('SELECT id, name, label, enabled, "deletedAt", "sysCreatedAt" FROM "Mandate" ORDER BY "sysCreatedAt"') -print("All Mandates in poweron_app:") -for r in cur.fetchall(): - print(f" id={r['id']} name={r['name']} label={r['label']} enabled={r['enabled']} deletedAt={r['deletedAt']}") - -cur.execute('SELECT COUNT(*) AS n FROM "FeatureInstance" WHERE "featureCode" = %s', ("redmine",)) -print(f"\nTotal redmine FeatureInstances in poweron_app: {cur.fetchone()['n']}") - -cur.execute('SELECT id, "mandateId", label, enabled FROM "FeatureInstance" WHERE "featureCode" = %s ORDER BY "sysCreatedAt"', ("redmine",)) -for r in cur.fetchall(): - print(f" fi={r['id']} mandate={r['mandateId']} label={r['label']} enabled={r['enabled']}") diff --git a/scripts/script_db_audit_legacy_state.py b/scripts/script_db_audit_legacy_state.py new file mode 100644 index 00000000..f51a132a --- /dev/null +++ b/scripts/script_db_audit_legacy_state.py @@ -0,0 +1,382 @@ +#!/usr/bin/env python3 +"""Audit-Skript fuer Legacy-Bestaende vor Bootstrap-Cleanup (Plan C). + +Prueft fuer jede der 5 Bootstrap-Migrationsroutinen, ob noch Restbestand +existiert. Wenn alle Checks 0 / GREEN liefern, kann die jeweilige Routine +sicher aus ``interfaceBootstrap.py`` / ``interfaceDbKnowledge.py`` entfernt +werden. + +Checks: + 1. Mandate.description != NULL und Mandate.label leer + -> _migrateMandateDescriptionToLabel + 2. Mandate.label leer ODER Mandate.name verstoesst gegen Slug-Regeln + -> _migrateMandateNameLabelSlugRules + 3. Mandate mit name='Root' und isSystem=False + -> initRootMandate Legacy-Zweig + 4. Role mit roleLabel='sysadmin' im Root-Mandat + -> _migrateAndDropSysAdminRole + 5. FileContentIndex mit leerem mandateId UND leerem featureInstanceId + -> aggregateMandateRagTotalBytes Fallback-Block + +Verwendung: + python -m scripts.script_db_audit_legacy_state # text-output + python -m scripts.script_db_audit_legacy_state --json # JSON-output + python -m scripts.script_db_audit_legacy_state --purge-rag-orphans + # loescht FileContentIndex-Rows ohne mandateId UND ohne featureInstanceId + # (Voraussetzung fuer Removal des aggregateMandateRagTotalBytes-Fallback) + +Exit-Code: + 0 alle Checks GREEN (Removal sicher) + 1 mind. ein Check RED (erst Daten bereinigen) + 2 Skript-Fehler (DB nicht erreichbar etc.) + +Lese-Zugriffe sind die Default. Schreibzugriffe NUR mit explizitem +``--purge-*``-Flag. +""" + +from __future__ import annotations + +import argparse +import json +import logging +import os +import sys +from dataclasses import dataclass, field +from typing import Any, Callable, Dict, List, Optional + + +_gatewayDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _gatewayDir not in sys.path: + sys.path.insert(0, _gatewayDir) + +from dotenv import load_dotenv + +_envPath = os.path.join(_gatewayDir, "env_dev.env") +if os.path.exists(_envPath): + load_dotenv(_envPath) + +from modules.datamodels.datamodelUam import Mandate +from modules.datamodels.datamodelRbac import Role +from modules.datamodels.datamodelKnowledge import FileContentIndex +from modules.security.rootAccess import getRootDbAppConnector +from modules.interfaces.interfaceDbKnowledge import KnowledgeObjects +from modules.shared.mandateNameUtils import isValidMandateName + +logging.basicConfig(level=logging.WARNING, format="%(message)s") +logger = logging.getLogger(__name__) + + +@dataclass +class _CheckResult: + """Ergebnis eines einzelnen Audit-Checks.""" + + name: str + routine: str + location: str + count: int + status: str + samples: List[Dict[str, Any]] = field(default_factory=list) + error: Optional[str] = None + + def toDict(self) -> Dict[str, Any]: + return { + "name": self.name, + "routine": self.routine, + "location": self.location, + "count": self.count, + "status": self.status, + "samples": self.samples, + "error": self.error, + } + + +def _getAppDb(): + return getRootDbAppConnector() + + +def _getKnowledgeDb(): + return KnowledgeObjects().db + + +def _checkMandateDescription(db) -> _CheckResult: + """Mandate.description noch vorhanden und label leer?""" + rows = db.getRecordset(Mandate) + legacy = [ + { + "id": r.get("id"), + "name": r.get("name"), + "description": str(r.get("description"))[:60] if r.get("description") else None, + "label": r.get("label"), + } + for r in rows + if r.get("description") and not r.get("label") + ] + return _CheckResult( + name="mandate-description-to-label", + routine="_migrateMandateDescriptionToLabel", + location="interfaces/interfaceBootstrap.py:422-445", + count=len(legacy), + status="GREEN" if not legacy else "RED", + samples=legacy[:5], + ) + + +def _checkMandateSlugRules(db) -> _CheckResult: + """Mandate.name verletzt Slug-Regeln ODER Mandate.label leer?""" + rows = db.getRecordset(Mandate) + legacy = [] + seen: set[str] = set() + for r in sorted(rows, key=lambda x: str(x.get("id", ""))): + name = (r.get("name") or "").strip() + labelRaw = r.get("label") + labelEmpty = not (labelRaw or "").strip() if labelRaw is not None else True + nameInvalid = not isValidMandateName(name) + nameCollides = name in seen + if not nameInvalid and not nameCollides: + seen.add(name) + if labelEmpty or nameInvalid or nameCollides: + legacy.append( + { + "id": r.get("id"), + "name": name, + "label": r.get("label"), + "labelEmpty": labelEmpty, + "nameInvalid": nameInvalid, + "nameCollides": nameCollides, + } + ) + return _CheckResult( + name="mandate-name-slug-rules", + routine="_migrateMandateNameLabelSlugRules", + location="interfaces/interfaceBootstrap.py:448-511", + count=len(legacy), + status="GREEN" if not legacy else "RED", + samples=legacy[:5], + ) + + +def _checkRootMandateLegacy(db) -> _CheckResult: + """Mandate mit name='Root' (case-sensitive) ODER isSystem=False fuer root?""" + legacyByName = db.getRecordset(Mandate, recordFilter={"name": "Root"}) + rows = db.getRecordset(Mandate, recordFilter={"name": "root"}) + legacyByFlag = [r for r in rows if not r.get("isSystem")] + combined = list(legacyByName) + legacyByFlag + samples = [ + { + "id": r.get("id"), + "name": r.get("name"), + "isSystem": r.get("isSystem"), + } + for r in combined + ] + return _CheckResult( + name="root-mandate-legacy", + routine="initRootMandate-legacy-branch", + location="interfaces/interfaceBootstrap.py:406-412", + count=len(samples), + status="GREEN" if not samples else "RED", + samples=samples[:5], + ) + + +def _checkSysadminRole(db) -> _CheckResult: + """Legacy 'sysadmin'-Rolle im Root-Mandat?""" + rootMandates = db.getRecordset(Mandate, recordFilter={"name": "root", "isSystem": True}) + if not rootMandates: + return _CheckResult( + name="sysadmin-role", + routine="_migrateAndDropSysAdminRole", + location="interfaces/interfaceBootstrap.py:840-932", + count=0, + status="GREEN", + samples=[], + error="kein Root-Mandat gefunden -- Check uebersprungen (kann nicht relevant sein)", + ) + rootId = str(rootMandates[0].get("id")) + rows = db.getRecordset( + Role, + recordFilter={"roleLabel": "sysadmin", "mandateId": rootId, "featureInstanceId": None}, + ) + samples = [{"id": r.get("id"), "roleLabel": r.get("roleLabel")} for r in rows] + return _CheckResult( + name="sysadmin-role", + routine="_migrateAndDropSysAdminRole", + location="interfaces/interfaceBootstrap.py:840-932", + count=len(samples), + status="GREEN" if not samples else "RED", + samples=samples[:5], + ) + + +def _checkRagFallback(knowDb) -> _CheckResult: + """FileContentIndex-Rows ohne mandateId UND ohne featureInstanceId?""" + rows = knowDb.getRecordset(FileContentIndex) + legacy = [ + { + "id": r.get("id"), + "fileName": r.get("fileName"), + "totalSize": r.get("totalSize"), + } + for r in rows + if not (r.get("mandateId") or "").strip() and not (r.get("featureInstanceId") or "").strip() + ] + return _CheckResult( + name="rag-fallback-orphan-index", + routine="aggregateMandateRagTotalBytes-fallback", + location="interfaces/interfaceDbKnowledge.py:609-635", + count=len(legacy), + status="GREEN" if not legacy else "RED", + samples=legacy[:5], + ) + + +def _runChecks() -> List[_CheckResult]: + appDb = _getAppDb() + knowDb = _getKnowledgeDb() + + appChecks: List[Callable[[Any], _CheckResult]] = [ + _checkMandateDescription, + _checkMandateSlugRules, + _checkRootMandateLegacy, + _checkSysadminRole, + ] + + results: List[_CheckResult] = [] + for fn in appChecks: + try: + results.append(fn(appDb)) + except Exception as exc: + results.append( + _CheckResult( + name=fn.__name__, + routine="?", + location="?", + count=-1, + status="ERROR", + error=f"{type(exc).__name__}: {exc}", + ) + ) + + try: + results.append(_checkRagFallback(knowDb)) + except Exception as exc: + results.append( + _CheckResult( + name="rag-fallback-orphan-index", + routine="aggregateMandateRagTotalBytes-fallback", + location="interfaces/interfaceDbKnowledge.py:609-635", + count=-1, + status="ERROR", + error=f"{type(exc).__name__}: {exc}", + ) + ) + + return results + + +def _printText(results: List[_CheckResult]) -> None: + print("=" * 78) + print("BOOTSTRAP-MIGRATIONS LEGACY-STATE-AUDIT") + print("=" * 78) + for r in results: + marker = { + "GREEN": "[OK]", + "RED": "[!!]", + "ERROR": "[ERR]", + }.get(r.status, "[?]") + print(f"\n{marker} {r.name}") + print(f" Routine : {r.routine}") + print(f" Location: {r.location}") + print(f" Count : {r.count}") + print(f" Status : {r.status}") + if r.error: + print(f" Note : {r.error}") + if r.samples: + print(f" Samples : (max 5)") + for s in r.samples: + print(f" {s}") + + print("\n" + "=" * 78) + greens = sum(1 for r in results if r.status == "GREEN") + reds = sum(1 for r in results if r.status == "RED") + errs = sum(1 for r in results if r.status == "ERROR") + print(f"SUMMARY: {greens} GREEN {reds} RED {errs} ERROR ({len(results)} total)") + if reds == 0 and errs == 0: + print("VERDICT: alle Migrationsroutinen koennen entfernt werden.") + elif errs > 0: + print("VERDICT: Audit unvollstaendig (Fehler) -- bitte Skript fixen.") + else: + print("VERDICT: erst Daten bereinigen, dann Routinen entfernen.") + print("=" * 78) + + +def _purgeRagOrphans() -> int: + """Loescht alle FileContentIndex-Rows ohne mandateId UND ohne featureInstanceId. + + Returns: Anzahl geloeschter Rows. + """ + knowDb = _getKnowledgeDb() + rows = knowDb.getRecordset(FileContentIndex) + orphans = [ + r for r in rows + if not (r.get("mandateId") or "").strip() + and not (r.get("featureInstanceId") or "").strip() + ] + if not orphans: + print("Keine RAG-Orphans gefunden -- nichts zu purgen.") + return 0 + + print(f"Purge {len(orphans)} RAG-Orphan(s):") + deleted = 0 + for r in orphans: + rid = r.get("id") + try: + knowDb.recordDelete(FileContentIndex, str(rid)) + deleted += 1 + print(f" geloescht: {rid} {r.get('fileName')}") + except Exception as exc: + print(f" FEHLER {rid}: {type(exc).__name__}: {exc}", file=sys.stderr) + print(f"Purge abgeschlossen: {deleted}/{len(orphans)} geloescht.") + return deleted + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Audit-Skript fuer Legacy-Bestaende (Bootstrap-Cleanup Plan C)" + ) + parser.add_argument("--json", action="store_true", help="JSON-Output statt Text") + parser.add_argument( + "--purge-rag-orphans", + action="store_true", + help="WRITE: loescht FileContentIndex-Rows ohne mandateId UND featureInstanceId", + ) + args = parser.parse_args() + + if args.purge_rag_orphans: + try: + _purgeRagOrphans() + except Exception as exc: + print(f"FATAL: Purge fehlgeschlagen -- {type(exc).__name__}: {exc}", file=sys.stderr) + return 2 + print() + + try: + results = _runChecks() + except Exception as exc: + print(f"FATAL: konnte Audit nicht starten -- {type(exc).__name__}: {exc}", file=sys.stderr) + return 2 + + if args.json: + print(json.dumps([r.toDict() for r in results], indent=2, default=str)) + else: + _printText(results) + + if any(r.status == "ERROR" for r in results): + return 2 + if any(r.status == "RED" for r in results): + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/unit/bootstrap/test_mandateNameMigration.py b/tests/unit/bootstrap/test_mandateNameMigration.py deleted file mode 100644 index d09a6846..00000000 --- a/tests/unit/bootstrap/test_mandateNameMigration.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Unit tests for ``_migrateMandateNameLabelSlugRules`` in interfaceBootstrap. - -Covers: -- legacy ``name``/``label`` rows get fixed (label fill, slug rename), -- collisions across legacy rows resolve via -2/-3 suffixes in stable id order, -- valid rows are left untouched (idempotency), -- second invocation is a no-op. -""" - -from typing import Any, Dict, List, Optional - -import pytest - -from modules.datamodels.datamodelUam import Mandate -from modules.interfaces.interfaceBootstrap import _migrateMandateNameLabelSlugRules -from modules.shared.mandateNameUtils import isValidMandateName - - -class _FakeDb: - """Minimal connector simulating getRecordset(Mandate)+recordModify(Mandate, id, data).""" - - def __init__(self, rows: List[Dict[str, Any]]): - self.rows: List[Dict[str, Any]] = [dict(r) for r in rows] - self.modifyCalls: List[Dict[str, Any]] = [] - - def getRecordset(self, model, recordFilter: Optional[Dict[str, Any]] = None): - if model is not Mandate: - return [] - if not recordFilter: - return [dict(r) for r in self.rows] - out = [] - for r in self.rows: - if all(r.get(k) == v for k, v in recordFilter.items()): - out.append(dict(r)) - return out - - def recordModify(self, model, recordId: str, data: Dict[str, Any]): - self.modifyCalls.append({"id": str(recordId), "data": dict(data)}) - for r in self.rows: - if str(r.get("id")) == str(recordId): - r.update(data) - return r - return None - - -def _row(mid: str, name: Any, label: Any = None) -> Dict[str, Any]: - return {"id": mid, "name": name, "label": label} - - -class TestMigrationFillsLabel: - def test_emptyLabelGetsNameAsLabel(self): - db = _FakeDb([_row("a1", "good-name", None)]) - _migrateMandateNameLabelSlugRules(db) - assert db.rows[0]["label"] == "good-name" - assert db.rows[0]["name"] == "good-name" - - def test_emptyLabelAndEmptyNameFallsBackToMandate(self): - db = _FakeDb([_row("a1", "", "")]) - _migrateMandateNameLabelSlugRules(db) - assert db.rows[0]["label"] == "Mandate" - assert isValidMandateName(db.rows[0]["name"]) - - -class TestMigrationRenamesInvalidNames: - def test_invalidNameGetsSlugFromLabel(self): - db = _FakeDb([_row("a1", "Home patrick", "Home Patrick")]) - _migrateMandateNameLabelSlugRules(db) - assert db.rows[0]["name"] == "home-patrick" - assert db.rows[0]["label"] == "Home Patrick" - - def test_umlautsTransliterated(self): - db = _FakeDb([_row("a1", "Müller AG", "Müller AG")]) - _migrateMandateNameLabelSlugRules(db) - assert db.rows[0]["name"] == "mueller-ag" - - -class TestMigrationCollisions: - def test_collisionsResolveByStableIdOrder(self): - rows = [ - _row("z1", "Home patrick", "Home Patrick"), - _row("a1", "home-patrick", "Home Patrick Two"), - ] - db = _FakeDb(rows) - _migrateMandateNameLabelSlugRules(db) - byId = {r["id"]: r for r in db.rows} - assert byId["a1"]["name"] == "home-patrick" - assert byId["z1"]["name"] == "home-patrick-2" - - def test_threeWayCollisionGetsThirdSuffix(self): - rows = [ - _row("id-aaa", "home-patrick", "Home Patrick"), - _row("id-bbb", "Home patrick", "Home Patrick"), - _row("id-ccc", "home patrick", "Home Patrick"), - ] - db = _FakeDb(rows) - _migrateMandateNameLabelSlugRules(db) - names = sorted(r["name"] for r in db.rows) - assert names == ["home-patrick", "home-patrick-2", "home-patrick-3"] - - -class TestMigrationIdempotency: - def test_secondRunIsNoop(self): - rows = [ - _row("a1", "home-patrick", "Home Patrick"), - _row("b1", "Home Müller", ""), - ] - db = _FakeDb(rows) - _migrateMandateNameLabelSlugRules(db) - assert all(isValidMandateName(r["name"]) for r in db.rows) - firstChanges = list(db.modifyCalls) - db.modifyCalls.clear() - _migrateMandateNameLabelSlugRules(db) - assert db.modifyCalls == [], ( - f"expected no further changes after first migration, got {db.modifyCalls}; " - f"firstRun changes: {firstChanges}" - ) - - def test_validRowsLeftUntouched(self): - rows = [_row("a1", "root", "Root"), _row("b1", "alpina-treuhand", "Alpina Treuhand AG")] - db = _FakeDb(rows) - _migrateMandateNameLabelSlugRules(db) - assert db.modifyCalls == [] - - -class TestMigrationEmpty: - def test_emptyDbDoesNothing(self): - db = _FakeDb([]) - _migrateMandateNameLabelSlugRules(db) - assert db.modifyCalls == [] diff --git a/tests/unit/rbac/test_sysadmin_migration.py b/tests/unit/rbac/test_sysadmin_migration.py deleted file mode 100644 index 8ca077bf..00000000 --- a/tests/unit/rbac/test_sysadmin_migration.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Unit tests for the one-shot sysadmin role -> isPlatformAdmin migration. - -Covers acceptance criteria from -``wiki/c-work/4-done/2026-04-sysadmin-authority-split.md``: - -- AC#4 -> Existing sysadmin role-holders are promoted to ``isPlatformAdmin=True`` - and the legacy role is removed (Role + UserMandateRole + AccessRules) - when the gateway boots. -- AC#10 -> The migration is idempotent and removes ALL artefacts (Role, - AccessRules, UserMandateRole) of the legacy ``sysadmin`` role. - -Strategy: use an in-memory fake ``DatabaseConnector`` that records calls -and returns deterministic recordsets for ``Role``/``UserMandateRole``/ -``UserMandate``/``UserInDB``/``AccessRule`` lookups. -""" - -from __future__ import annotations - -from typing import Any, Dict, List -from unittest.mock import Mock - -from modules.interfaces.interfaceBootstrap import _migrateAndDropSysAdminRole -from modules.datamodels.datamodelMembership import UserMandate, UserMandateRole -from modules.datamodels.datamodelRbac import AccessRule, Role -from modules.datamodels.datamodelUam import UserInDB - - -_ROOT_MANDATE_ID = "root-mandate-id" -_SYSADMIN_ROLE_ID = "sysadmin-role-id" -_USER_MANDATE_ID = "user-mandate-id" -_USER_ID = "legacy-user-id" -_UMR_ROW_ID = "umr-row-id" -_ACCESS_RULE_ID = "access-rule-id" - - -def _buildFakeDb( - *, - sysadminRoles: List[Dict[str, Any]], - umRoleRows: List[Dict[str, Any]], - userMandateRows: List[Dict[str, Any]], - users: List[Dict[str, Any]], - accessRules: List[Dict[str, Any]], -) -> Mock: - """Build a fake ``DatabaseConnector`` that maps model -> recordset.""" - - deletes: List[tuple] = [] - modifies: List[tuple] = [] - - def _getRecordset(model, recordFilter=None, **_): # noqa: ANN001 - recordFilter = recordFilter or {} - if model is Role: - label = recordFilter.get("roleLabel") - mandateId = recordFilter.get("mandateId") - if label == "sysadmin" and mandateId == _ROOT_MANDATE_ID: - return list(sysadminRoles) - return [] - if model is UserMandateRole: - wanted = recordFilter.get("roleId") - return [r for r in umRoleRows if r.get("roleId") == wanted] - if model is UserMandate: - wanted = recordFilter.get("id") - return [r for r in userMandateRows if r.get("id") == wanted] - if model is UserInDB: - wanted = recordFilter.get("id") - return [r for r in users if r.get("id") == wanted] - if model is AccessRule: - wanted = recordFilter.get("roleId") - return [r for r in accessRules if r.get("roleId") == wanted] - return [] - - def _recordModify(model, recordId, payload): # noqa: ANN001 - modifies.append((model, recordId, payload)) - # Reflect the change so a subsequent migration call is idempotent. - if model is UserInDB: - for u in users: - if u.get("id") == recordId: - u.update(payload) - return True - - def _recordDelete(model, recordId): # noqa: ANN001 - deletes.append((model, recordId)) - if model is UserMandateRole: - umRoleRows[:] = [r for r in umRoleRows if r.get("id") != recordId] - elif model is AccessRule: - accessRules[:] = [r for r in accessRules if r.get("id") != recordId] - elif model is Role: - sysadminRoles[:] = [r for r in sysadminRoles if r.get("id") != recordId] - return True - - db = Mock() - db.getRecordset = Mock(side_effect=_getRecordset) - db.recordModify = Mock(side_effect=_recordModify) - db.recordDelete = Mock(side_effect=_recordDelete) - db._modifies = modifies # exposed for assertions - db._deletes = deletes - return db - - -def _seed(): - return { - "sysadminRoles": [{"id": _SYSADMIN_ROLE_ID, "roleLabel": "sysadmin", - "mandateId": _ROOT_MANDATE_ID}], - "umRoleRows": [{"id": _UMR_ROW_ID, "roleId": _SYSADMIN_ROLE_ID, - "userMandateId": _USER_MANDATE_ID}], - "userMandateRows": [{"id": _USER_MANDATE_ID, "userId": _USER_ID, - "mandateId": _ROOT_MANDATE_ID}], - "users": [{"id": _USER_ID, "username": "legacy", - "isSysAdmin": False, "isPlatformAdmin": False}], - "accessRules": [{"id": _ACCESS_RULE_ID, "roleId": _SYSADMIN_ROLE_ID}], - } - - -# --------------------------------------------------------------------------- -# AC #4 — promote + drop on first run -# --------------------------------------------------------------------------- - - -def testMigrationPromotesUserAndDropsArtefacts(): - """AC#4: legacy holder is promoted; Role+AccessRule+UMR are deleted.""" - seed = _seed() - db = _buildFakeDb(**seed) - - _migrateAndDropSysAdminRole(db, _ROOT_MANDATE_ID) - - # User got isPlatformAdmin=True - assert seed["users"][0]["isPlatformAdmin"] is True - assert any( - m[0] is UserInDB and m[2] == {"isPlatformAdmin": True} - for m in db._modifies - ), "Expected UserInDB.isPlatformAdmin promotion call" - - # All three artefact tables had their rows deleted. - deletedModels = {m[0] for m in db._deletes} - assert UserMandateRole in deletedModels, "UserMandateRole row not deleted" - assert AccessRule in deletedModels, "AccessRule row not deleted" - assert Role in deletedModels, "Sysadmin Role record not deleted" - - # And the seeded lists are empty after the migration. - assert seed["umRoleRows"] == [] - assert seed["accessRules"] == [] - assert seed["sysadminRoles"] == [] - - -# --------------------------------------------------------------------------- -# AC #10 — idempotent: a second run is a no-op -# --------------------------------------------------------------------------- - - -def testMigrationIsIdempotent(): - """AC#10: a second invocation finds no sysadmin role and exits silently.""" - seed = _seed() - db = _buildFakeDb(**seed) - - _migrateAndDropSysAdminRole(db, _ROOT_MANDATE_ID) - firstModifies = list(db._modifies) - firstDeletes = list(db._deletes) - - _migrateAndDropSysAdminRole(db, _ROOT_MANDATE_ID) - - # No additional writes on the second call. - assert db._modifies == firstModifies, ( - "Second migration call must not perform additional writes" - ) - assert db._deletes == firstDeletes, ( - "Second migration call must not perform additional deletes" - ) - - -def testMigrationSkipsAlreadyPromotedUsers(): - """If a user already has ``isPlatformAdmin=True``, no redundant write.""" - seed = _seed() - seed["users"][0]["isPlatformAdmin"] = True # already promoted - db = _buildFakeDb(**seed) - - _migrateAndDropSysAdminRole(db, _ROOT_MANDATE_ID) - - # No promotion write for an already-promoted user. - promotionWrites = [ - m for m in db._modifies - if m[0] is UserInDB and m[2].get("isPlatformAdmin") is True - ] - assert promotionWrites == [], ( - "Should not re-write isPlatformAdmin if user already has it" - ) - - # But role + access-rule cleanup still happens. - deletedModels = {m[0] for m in db._deletes} - assert Role in deletedModels - assert AccessRule in deletedModels - assert UserMandateRole in deletedModels - - -def testMigrationOnEmptyDbIsNoop(): - """No legacy sysadmin role at all -> no calls, no errors.""" - db = _buildFakeDb( - sysadminRoles=[], - umRoleRows=[], - userMandateRows=[], - users=[], - accessRules=[], - ) - - _migrateAndDropSysAdminRole(db, _ROOT_MANDATE_ID) - - assert db._modifies == [] - assert db._deletes == [] diff --git a/tests/unit/workflows/test_automation2_graphUtils.py b/tests/unit/workflows/test_automation2_graphUtils.py index ff5df2cc..5ea7126a 100644 --- a/tests/unit/workflows/test_automation2_graphUtils.py +++ b/tests/unit/workflows/test_automation2_graphUtils.py @@ -66,6 +66,17 @@ class TestResolveParameterReferences: value = "Land: {{n1.country}}" assert resolveParameterReferences(value, node_outputs) == "Land: CH" + def test_legacy_string_template_loop_current_item_nested(self): + """Same shape as executionEngine sets on loop node id during body iteration.""" + node_outputs = { + "loop93": { + "currentItem": {"subject": "Hello", "body": {"content": "World"}}, + "currentIndex": 0, + }, + } + value = "Subj: {{loop93.currentItem.subject}} Body: {{loop93.currentItem.body.content}}" + assert resolveParameterReferences(value, node_outputs) == "Subj: Hello Body: World" + class TestWildcardIteration: """Phase-4 typed Bindings-Resolver: ``*`` segment iterates over a list. From b12671bbb531b11cfaa986ddee861cddf1693069 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Wed, 29 Apr 2026 22:54:17 +0200 Subject: [PATCH 2/5] fixes before document generation refactory styles --- modules/features/trustee/mainTrustee.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/modules/features/trustee/mainTrustee.py b/modules/features/trustee/mainTrustee.py index fba4346a..05e01e8a 100644 --- a/modules/features/trustee/mainTrustee.py +++ b/modules/features/trustee/mainTrustee.py @@ -440,15 +440,24 @@ TEMPLATE_WORKFLOWS = [ {"id": "analyse", "type": "ai.prompt", "label": "Budget-Analyse", "_method": "ai", "_action": "process", "parameters": { "aiPrompt": ( - "Fuehre einen Budget-Soll/Ist-Vergleich durch.\n" - "Die Budget-Datei (Excel) wurde als Dokument uebergeben. " - "Die aktuellen Buchhaltungsdaten sind im Kontext verfuegbar.\n" - "1. Lies die Soll-Werte aus dem uebergebenen Budget-Dokument\n" - "2. Vergleiche sie mit den Ist-Werten aus der Buchhaltung pro Konto\n" - "3. Berechne die Abweichung (absolut und prozentual)\n" - "4. Erstelle ein Abweichungs-Chart (Balkendiagramm: Soll vs. Ist pro Konto)\n" - "5. Markiere kritische Abweichungen (>10%) und gib eine kurze Einschaetzung" + "Fuehre einen Budget-Soll/Ist-Vergleich durch und liefere EIN Excel-Dokument " + "mit folgender Struktur:\n\n" + "1. Tabelle \"Konten-Vergleich\" -- EINE Tabelle, EINE Zeile pro Konto:\n" + " Spalten: Konto-Nr | Konto-Name | Soll | Ist | Abweichung absolut | " + "Abweichung % | Status (OK / Warnung / Kritisch).\n" + "2. EINE Visualisierung \"Soll vs. Ist gesamt\" -- ein einziges " + "Balkendiagramm UNTER der Tabelle, das ALLE Konten in einer Grafik " + "gegenueberstellt (gruppierte Balken: Soll und Ist je Konto).\n" + "3. Kurzer Management-Summary-Absatz (3-5 Saetze) UNTER dem Chart " + "mit den 3 groessten Abweichungen (>10%) und einer fachlichen " + "Einschaetzung.\n\n" + "Verwende die uebergebene Budget-Datei als Soll-Quelle und die im " + "Kontext bereitgestellten Buchhaltungsdaten als Ist-Quelle.\n" + "WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN " + "Uebersichts-Chart ueber alle Konten ist gewuenscht." ), + "resultType": "xlsx", + "documentTheme": "finance", "documentList": {"type": "ref", "nodeId": "trigger", "path": ["payload", "documentList"]}, "context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]}, "simpleMode": False, From afd7e9d941042534c314dee74c7d6cd965c37d59 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Wed, 29 Apr 2026 23:12:46 +0200 Subject: [PATCH 3/5] plan d implemented - generationn styles --- modules/datamodels/datamodelAi.py | 1 + modules/datamodels/datamodelJson.py | 17 +- .../graphicalEditor/nodeDefinitions/ai.py | 25 +- modules/features/trustee/mainTrustee.py | 25 +- .../workspace/datamodelFeatureWorkspace.py | 19 +- .../workspace/routeFeatureWorkspace.py | 76 +++++ modules/interfaces/interfaceAiObjects.py | 34 ++ .../serviceAgent/coreTools/_mediaTools.py | 248 +++++--------- .../services/serviceAi/mainServiceAi.py | 49 ++- .../mainServiceGeneration.py | 7 +- .../renderers/documentRendererBaseTemplate.py | 111 +++++- .../renderers/rendererDocx.py | 323 +++++++++--------- .../renderers/rendererHtml.py | 205 ++++++++++- .../renderers/rendererPdf.py | 144 +++++--- .../renderers/rendererPptx.py | 159 ++++++--- .../renderers/rendererXlsx.py | 55 +-- .../serviceGeneration/styleDefaults.py | 75 ++++ .../serviceGeneration/subDocumentUtility.py | 111 ++++-- modules/workflows/methods/methodAi/_common.py | 18 + .../methods/methodAi/actions/consolidate.py | 2 + .../methodAi/actions/convertDocument.py | 4 + .../methods/methodAi/actions/generateCode.py | 10 + .../methodAi/actions/generateDocument.py | 10 + .../methods/methodAi/actions/process.py | 13 + .../methodAi/actions/summarizeDocument.py | 4 + .../methodAi/actions/translateDocument.py | 4 + tests/serviceAi/__init__.py | 0 .../test_allowed_models_whitelist.py | 14 + tests/serviceGeneration/__init__.py | 0 .../test_inline_image_paragraph.py | 23 ++ .../test_md_to_json_consolidation.py | 71 ++++ .../serviceGeneration/test_style_resolver.py | 39 +++ 32 files changed, 1411 insertions(+), 485 deletions(-) create mode 100644 modules/serviceCenter/services/serviceGeneration/styleDefaults.py create mode 100644 modules/workflows/methods/methodAi/_common.py create mode 100644 tests/serviceAi/__init__.py create mode 100644 tests/serviceAi/test_allowed_models_whitelist.py create mode 100644 tests/serviceGeneration/__init__.py create mode 100644 tests/serviceGeneration/test_inline_image_paragraph.py create mode 100644 tests/serviceGeneration/test_md_to_json_consolidation.py create mode 100644 tests/serviceGeneration/test_style_resolver.py diff --git a/modules/datamodels/datamodelAi.py b/modules/datamodels/datamodelAi.py index cfc10db2..786eea7d 100644 --- a/modules/datamodels/datamodelAi.py +++ b/modules/datamodels/datamodelAi.py @@ -162,6 +162,7 @@ class AiCallOptions(BaseModel): # Provider filtering (from UI multiselect or automation config) allowedProviders: Optional[List[str]] = Field(default=None, description="List of allowed AI providers to use (empty = all RBAC-permitted)") + allowedModels: Optional[List[str]] = Field(default=None, description="Whitelist of allowed model names (AND-filter with allowedProviders). None/empty = all allowed.") class AiCallRequest(BaseModel): diff --git a/modules/datamodels/datamodelJson.py b/modules/datamodels/datamodelJson.py index 784cc042..0228fbad 100644 --- a/modules/datamodels/datamodelJson.py +++ b/modules/datamodels/datamodelJson.py @@ -6,7 +6,7 @@ Unified JSON document schema and helpers used by both generation prompts and ren This defines a single canonical template and the supported section types. """ -from typing import List +from typing import List, Literal, TypedDict # Canonical list of supported section types across the system supportedSectionTypes: List[str] = [ @@ -18,6 +18,21 @@ supportedSectionTypes: List[str] = [ "image", ] +class InlineRun(TypedDict, total=False): + """Single inline content run. Every paragraph/cell/list-item is a List[InlineRun].""" + type: Literal["text", "image", "link", "bold", "italic", "code"] + value: str # text content (for text/bold/italic/code/link-label) + fileId: str # for type=image: reference to FileItem + base64Data: str # for type=image: resolved base64 (post-processing) + mimeType: str # for type=image: e.g. "image/png" + widthPt: int # for type=image: optional render width + href: str # for type=link: URL target + +supportedInlineRunTypes: List[str] = [ + "text", "image", "link", "bold", "italic", "code", +] + + # Canonical JSON template used for AI generation (documents array + sections) # This template is used for STRUCTURE generation - sections have empty elements arrays. # For content generation, elements arrays will be populated later. diff --git a/modules/features/graphicalEditor/nodeDefinitions/ai.py b/modules/features/graphicalEditor/nodeDefinitions/ai.py index 3273540a..0336e382 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/ai.py +++ b/modules/features/graphicalEditor/nodeDefinitions/ai.py @@ -3,6 +3,15 @@ from modules.shared.i18nRegistry import t +_AI_COMMON_PARAMS = [ + {"name": "requireNeutralization", "type": "boolean", "required": False, + "frontendType": "checkbox", "default": False, + "description": t("Eingaben fuer diesen Call neutralisieren")}, + {"name": "allowedModels", "type": "array", "required": False, + "frontendType": "modelMultiSelect", "default": [], + "description": t("Erlaubte LLM-Modelle (leer = alle erlaubten)")}, +] + AI_NODES = [ { "id": "ai.prompt", @@ -19,7 +28,7 @@ AI_NODES = [ "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""}, {"name": "simpleMode", "type": "boolean", "required": False, "frontendType": "checkbox", "description": t("Einfacher Modus"), "default": True}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": [ @@ -38,7 +47,7 @@ AI_NODES = [ "parameters": [ {"name": "prompt", "type": "string", "required": True, "frontendType": "textarea", "description": t("Recherche-Anfrage")}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, @@ -58,7 +67,7 @@ AI_NODES = [ {"name": "summaryLength", "type": "string", "required": False, "frontendType": "select", "frontendOptions": {"options": ["brief", "medium", "detailed"]}, "description": t("Kurz, mittel oder ausführlich"), "default": "medium"}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, @@ -77,7 +86,7 @@ AI_NODES = [ "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""}, {"name": "targetLanguage", "type": "string", "required": True, "frontendType": "text", "description": t("Zielsprache (z.B. de, en, French)")}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, @@ -97,7 +106,7 @@ AI_NODES = [ {"name": "targetFormat", "type": "string", "required": True, "frontendType": "select", "frontendOptions": {"options": ["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"]}, "description": t("Zielformat")}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, @@ -114,7 +123,7 @@ AI_NODES = [ "parameters": [ {"name": "prompt", "type": "string", "required": True, "frontendType": "textarea", "description": t("Generierungs-Prompt")}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, @@ -134,7 +143,7 @@ AI_NODES = [ {"name": "resultType", "type": "string", "required": False, "frontendType": "select", "frontendOptions": {"options": ["py", "js", "ts", "html", "java", "cpp", "txt", "json", "csv", "xml"]}, "description": t("Datei-Endung der erzeugten Code-Datei"), "default": "py"}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, @@ -154,7 +163,7 @@ AI_NODES = [ "description": t("Konsolidierungsmodus"), "default": "summarize"}, {"name": "prompt", "type": "string", "required": False, "frontendType": "textarea", "description": t("Optionaler Prompt für die Konsolidierung"), "default": ""}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}}, diff --git a/modules/features/trustee/mainTrustee.py b/modules/features/trustee/mainTrustee.py index 05e01e8a..d8f7a804 100644 --- a/modules/features/trustee/mainTrustee.py +++ b/modules/features/trustee/mainTrustee.py @@ -361,6 +361,17 @@ QUICK_ACTIONS = [ # The placeholder {{featureInstanceId}} is replaced by _copyTemplateWorkflows. # --------------------------------------------------------------------------- +_FINANCE_STYLE_HINT = ( + "\n\nWenn du ein Dokument erstellst, verwende einen professionellen Finanz-Stil:\n" + "- Schriftart: Calibri\n" + "- Primaerfarbe: #1F3864 (Dunkelblau)\n" + "- Akzentfarbe: #2980B9\n" + "- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n" + "- Konservatives, seriöses Layout\n" + "Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen." +) + + def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]: """Build a standard analysis graph: trigger -> refreshAccountingData -> ai.prompt.""" return { @@ -370,8 +381,9 @@ def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]: "parameters": {"featureInstanceId": "{{featureInstanceId}}", "forceRefresh": False}, "position": {"x": 250, "y": 0}}, {"id": "analyse", "type": "ai.prompt", "label": "Analyse", "_method": "ai", "_action": "process", "parameters": { - "aiPrompt": prompt, + "aiPrompt": prompt + _FINANCE_STYLE_HINT, "context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]}, + "requireNeutralization": True, "simpleMode": False, }, "position": {"x": 500, "y": 0}}, ], @@ -454,10 +466,19 @@ TEMPLATE_WORKFLOWS = [ "Verwende die uebergebene Budget-Datei als Soll-Quelle und die im " "Kontext bereitgestellten Buchhaltungsdaten als Ist-Quelle.\n" "WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN " - "Uebersichts-Chart ueber alle Konten ist gewuenscht." + "Uebersichts-Chart ueber alle Konten ist gewuenscht.\n\n" + "Hinweis: Das documentTheme ist 'finance'. Wenn du ein Dokument erstellst, " + "verwende einen professionellen Finanz-Stil:\n" + "- Schriftart: Calibri\n" + "- Primaerfarbe: #1F3864 (Dunkelblau)\n" + "- Akzentfarbe: #2980B9\n" + "- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n" + "- Konservatives, seriöses Layout\n" + "Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen." ), "resultType": "xlsx", "documentTheme": "finance", + "requireNeutralization": True, "documentList": {"type": "ref", "nodeId": "trigger", "path": ["payload", "documentList"]}, "context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]}, "simpleMode": False, diff --git a/modules/features/workspace/datamodelFeatureWorkspace.py b/modules/features/workspace/datamodelFeatureWorkspace.py index b12d4b84..4e32702c 100644 --- a/modules/features/workspace/datamodelFeatureWorkspace.py +++ b/modules/features/workspace/datamodelFeatureWorkspace.py @@ -2,8 +2,8 @@ # All rights reserved. """Workspace feature data models — WorkspaceUserSettings.""" -from typing import Optional -from pydantic import BaseModel, Field +from typing import List, Optional +from pydantic import Field from modules.datamodels.datamodelBase import PowerOnModel from modules.shared.i18nRegistry import i18nModel import uuid @@ -52,3 +52,18 @@ class WorkspaceUserSettings(PowerOnModel): description="Max agent rounds override (None = instance default)", json_schema_extra={"label": "Max. Agenten-Runden", "frontend_type": "number", "frontend_readonly": False, "frontend_required": False}, ) + requireNeutralization: bool = Field( + default=False, + description="Default neutralization setting for this user", + json_schema_extra={"label": "Neutralisierung", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False}, + ) + allowedProviders: List[str] = Field( + default_factory=list, + description="Allowed AI providers (empty = all permitted by RBAC)", + json_schema_extra={"label": "Erlaubte Provider", "frontend_type": "multiselect", "frontend_readonly": False, "frontend_required": False}, + ) + allowedModels: List[str] = Field( + default_factory=list, + description="Allowed AI models (empty = all permitted)", + json_schema_extra={"label": "Erlaubte Modelle", "frontend_type": "modelMultiSelect", "frontend_readonly": False, "frontend_required": False}, + ) diff --git a/modules/features/workspace/routeFeatureWorkspace.py b/modules/features/workspace/routeFeatureWorkspace.py index 3e1a54b7..5b0d4d7a 100644 --- a/modules/features/workspace/routeFeatureWorkspace.py +++ b/modules/features/workspace/routeFeatureWorkspace.py @@ -110,6 +110,7 @@ class WorkspaceInputRequest(BaseModel): workflowId: Optional[str] = Field(default=None, description="Continue existing workflow") userLanguage: str = Field(default="en", description="User language code") allowedProviders: List[str] = Field(default_factory=list, description="Restrict AI to these providers") + allowedModels: List[str] = Field(default_factory=list, description="Restrict AI to these models") requireNeutralization: Optional[bool] = Field(default=None, description="Per-request neutralization override") @@ -635,6 +636,7 @@ async def streamWorkspaceStart( userLanguage=userInput.userLanguage, instanceConfig=instanceConfig, allowedProviders=userInput.allowedProviders, + allowedModels=userInput.allowedModels, requireNeutralization=userInput.requireNeutralization, billingFeatureCode=wsBillingFeatureCode, ) @@ -692,6 +694,7 @@ async def _runWorkspaceAgent( userLanguage: str = "en", instanceConfig: Dict[str, Any] = None, allowedProviders: List[str] = None, + allowedModels: List[str] = None, requireNeutralization: Optional[bool] = None, billingFeatureCode: Optional[str] = None, ): @@ -715,6 +718,9 @@ async def _runWorkspaceAgent( logger.info(f"Workspace agent: allowedProviders={allowedProviders}") else: logger.debug("Workspace agent: no allowedProviders in request") + if allowedModels: + aiService.services.allowedModels = allowedModels + logger.info(f"Workspace agent: allowedModels={allowedModels}") if requireNeutralization is not None: ctx.requireNeutralization = requireNeutralization @@ -2139,6 +2145,76 @@ async def updateGeneralSettings( return await getGeneralSettings(request, instanceId, context) +# ========================================================================= +# User-level AI settings (neutralisation, providers, models) +# ========================================================================= + +@router.get("/{instanceId}/user-settings") +@limiter.limit("120/minute") +async def getWorkspaceUserSettings( + request: Request, + instanceId: str = Path(...), + context: RequestContext = Depends(getRequestContext), +): + """Get the current user's workspace AI settings (auto-creates with defaults if not exists).""" + _mandateId, _ = _validateInstanceAccess(instanceId, context) + wsInterface = _getWorkspaceInterface(context, instanceId) + userId = str(context.user.id) + + settings = wsInterface.getWorkspaceUserSettings(userId) + if settings: + return JSONResponse({ + "requireNeutralization": settings.requireNeutralization, + "allowedProviders": settings.allowedProviders, + "allowedModels": settings.allowedModels, + }) + + data = { + "userId": userId, + "mandateId": str(context.mandateId) if context.mandateId else "", + "featureInstanceId": instanceId, + } + created = wsInterface.saveWorkspaceUserSettings(data) + return JSONResponse({ + "requireNeutralization": created.requireNeutralization, + "allowedProviders": created.allowedProviders, + "allowedModels": created.allowedModels, + }) + + +@router.put("/{instanceId}/user-settings") +@limiter.limit("120/minute") +async def putWorkspaceUserSettings( + request: Request, + instanceId: str = Path(...), + body: dict = Body(...), + context: RequestContext = Depends(getRequestContext), +): + """Save the current user's workspace AI settings.""" + _mandateId, _ = _validateInstanceAccess(instanceId, context) + wsInterface = _getWorkspaceInterface(context, instanceId) + userId = str(context.user.id) + + data = { + "userId": userId, + "mandateId": str(context.mandateId) if context.mandateId else "", + "featureInstanceId": instanceId, + } + if "requireNeutralization" in body: + data["requireNeutralization"] = bool(body["requireNeutralization"]) + if "allowedProviders" in body: + data["allowedProviders"] = body["allowedProviders"] + if "allowedModels" in body: + data["allowedModels"] = body["allowedModels"] + + saved = wsInterface.saveWorkspaceUserSettings(data) + return JSONResponse({ + "requireNeutralization": saved.requireNeutralization, + "allowedProviders": saved.allowedProviders, + "allowedModels": saved.allowedModels, + }) + + # ========================================================================= # RAG / Knowledge — anonymised instance statistics (presentation / KPIs) # ========================================================================= diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py index a859ffa7..dcf819cc 100644 --- a/modules/interfaces/interfaceAiObjects.py +++ b/modules/interfaces/interfaceAiObjects.py @@ -111,6 +111,19 @@ class AiObjects: processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1, ) + allowedModels = getattr(options, 'allowedModels', None) if options else None + if allowedModels: + filteredModels = [m for m in availableModels if m.name in allowedModels] + if filteredModels: + availableModels = filteredModels + else: + errorMsg = f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}" + logger.error(errorMsg) + return AiCallResponse( + content=errorMsg, modelName="error", priceCHF=0.0, + processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1, + ) + failoverModelList = modelSelector.getFailoverModelList(prompt, context, options, availableModels) if not failoverModelList: @@ -364,6 +377,19 @@ class AiObjects: ) return + allowedModels = getattr(options, 'allowedModels', None) if options else None + if allowedModels: + filtered = [m for m in availableModels if m.name in allowedModels] + if filtered: + availableModels = filtered + else: + yield AiCallResponse( + content=f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}", + modelName="error", priceCHF=0.0, processingTime=0.0, + bytesSent=0, bytesReceived=0, errorCount=1, + ) + return + failoverModelList = modelSelector.getFailoverModelList( request.prompt, request.context or "", options, availableModels ) @@ -516,6 +542,14 @@ class AiObjects: else: logger.warning(f"No embedding models match allowedProviders {allowedProviders}") + allowedModels = getattr(options, 'allowedModels', None) if options else None + if allowedModels: + filtered = [m for m in availableModels if m.name in allowedModels] + if filtered: + availableModels = filtered + else: + logger.warning(f"No embedding models match allowedModels {allowedModels}") + failoverModelList = modelSelector.getFailoverModelList( combinedText, "", options, availableModels ) diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py index c2a4842b..7b071996 100644 --- a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py +++ b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py @@ -25,142 +25,11 @@ def _registerMediaTools(registry: ToolRegistry, services): # ---- Document rendering tool ---- def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]: - """Convert markdown content to the standard document JSON format expected by renderers.""" - import re as _re - - sections = [] - order = 0 - lines = markdown.split("\n") - i = 0 - - def _nextId(): - nonlocal order - order += 1 - return f"s_{order}" - - while i < len(lines): - line = lines[i] - - # --- Headings --- - headingMatch = _re.match(r'^(#{1,6})\s+(.+)', line) - if headingMatch: - level = len(headingMatch.group(1)) - text = headingMatch.group(2).strip() - sections.append({ - "id": _nextId(), "content_type": "heading", "order": order, - "elements": [{"content": {"text": text, "level": level}}], - }) - i += 1 - continue - - # --- Fenced code blocks --- - codeMatch = _re.match(r'^```(\w*)', line) - if codeMatch: - lang = codeMatch.group(1) or "text" - codeLines = [] - i += 1 - while i < len(lines) and not lines[i].startswith("```"): - codeLines.append(lines[i]) - i += 1 - i += 1 - sections.append({ - "id": _nextId(), "content_type": "code_block", "order": order, - "elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}], - }) - continue - - # --- Tables --- - tableMatch = _re.match(r'^\|(.+)\|$', line) - if tableMatch and (i + 1) < len(lines) and _re.match(r'^\|[\s\-:|]+\|$', lines[i + 1]): - headerCells = [c.strip() for c in tableMatch.group(1).split("|")] - i += 2 - rows = [] - while i < len(lines) and _re.match(r'^\|(.+)\|$', lines[i]): - rowCells = [c.strip() for c in lines[i][1:-1].split("|")] - rows.append(rowCells) - i += 1 - sections.append({ - "id": _nextId(), "content_type": "table", "order": order, - "elements": [{"content": {"headers": headerCells, "rows": rows}}], - }) - continue - - # --- Bullet / numbered lists --- - listMatch = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', line) - if listMatch: - isNumbered = bool(_re.match(r'\d+[.)]', listMatch.group(2))) - items = [] - while i < len(lines) and _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]): - m = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]) - items.append({"text": m.group(3).strip()}) - i += 1 - sections.append({ - "id": _nextId(), "content_type": "bullet_list", "order": order, - "elements": [{"content": {"items": items, "list_type": "numbered" if isNumbered else "bullet"}}], - }) - continue - - # --- Empty lines (skip) --- - if not line.strip(): - i += 1 - continue - - # --- Images: ![alt](file:fileId) or ![alt](url) --- - imgMatch = _re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', line) - if imgMatch: - altText = imgMatch.group(1).strip() or "Image" - src = imgMatch.group(2).strip() - fileId = "" - if src.startswith("file:"): - fileId = src[5:] - sections.append({ - "id": _nextId(), "content_type": "image", "order": order, - "elements": [{ - "content": { - "altText": altText, - "base64Data": "", - "_fileRef": fileId, - "_srcUrl": src if not fileId else "", - } - }], - }) - i += 1 - continue - - # --- Paragraph (collect consecutive non-empty lines) --- - paraLines = [] - while i < len(lines) and lines[i].strip() and not _re.match(r'^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )', lines[i]): - paraLines.append(lines[i]) - i += 1 - if paraLines: - sections.append({ - "id": _nextId(), "content_type": "paragraph", "order": order, - "elements": [{"content": {"text": " ".join(paraLines)}}], - }) - continue - - i += 1 - - if not sections: - sections.append({ - "id": _nextId(), "content_type": "paragraph", "order": order, - "elements": [{"content": {"text": markdown.strip() or "(empty)"}}], - }) - - return { - "metadata": { - "split_strategy": "single_document", - "source_documents": [], - "extraction_method": "agent_rendering", - "title": title, - "language": language, - }, - "documents": [{ - "id": "doc_1", - "title": title, - "sections": sections, - }], - } + """Delegate to the consolidated parser in subDocumentUtility.""" + from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson + result = markdownToDocumentJson(markdown, title, language) + result["metadata"]["extraction_method"] = "agent_rendering" + return result async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]): """Render agent-produced markdown content into any document format via the RendererRegistry.""" @@ -245,35 +114,75 @@ def _registerMediaTools(registry: ToolRegistry, services): except Exception as e: logger.warning(f"renderDocument: knowledge service unavailable: {e}") resolvedImages = 0 + + def _resolveImageRef(targetObj, fileRefKey="_fileRef", fileIdKey="fileId"): + """Resolve a single image reference dict to base64Data in-place.""" + nonlocal resolvedImages + fileRef = targetObj.get(fileRefKey, "") or targetObj.get(fileIdKey, "") + if not fileRef or targetObj.get("base64Data"): + return + if knowledgeService: + chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef) + imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"] + if imageChunks: + targetObj["base64Data"] = imageChunks[0].get("data", "") + chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png") + targetObj["mimeType"] = chunkMime + resolvedImages += 1 + if not targetObj.get("base64Data"): + try: + rawBytes = services.chat.getFileData(fileRef) + if rawBytes: + import base64 as _b64 + targetObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii") + targetObj["mimeType"] = "image/png" + resolvedImages += 1 + except Exception as e: + logger.warning(f"renderDocument: image resolve failed for fileRef={fileRef}: {e}") + targetObj.pop("_fileRef", None) + targetObj.pop("_srcUrl", None) + + def _resolveInlineRuns(runsList): + """Scan a list of inline runs and resolve any image runs with fileId.""" + for run in runsList: + if run.get("type") == "image" and run.get("fileId") and not run.get("base64Data"): + _resolveImageRef(run, fileRefKey="fileId", fileIdKey="fileId") + for doc in structuredContent.get("documents", []): for section in doc.get("sections", []): - if section.get("content_type") != "image": + cType = section.get("content_type") + # Block-level image sections + if cType == "image": + for element in section.get("elements", []): + contentObj = element.get("content", {}) + _resolveImageRef(contentObj) continue - for element in section.get("elements", []): - contentObj = element.get("content", {}) - fileRef = contentObj.get("_fileRef", "") - if not fileRef or contentObj.get("base64Data"): - continue - if knowledgeService: - chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef) - imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"] - if imageChunks: - contentObj["base64Data"] = imageChunks[0].get("data", "") - chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png") - contentObj["mimeType"] = chunkMime - resolvedImages += 1 - if not contentObj.get("base64Data"): - try: - rawBytes = services.chat.getFileData(fileRef) - if rawBytes: - import base64 as _b64 - contentObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii") - contentObj["mimeType"] = "image/png" - resolvedImages += 1 - except Exception as e: - logger.warning(f"renderDocument: image resolve failed for fileRef={fileRef}: {e}") - contentObj.pop("_fileRef", None) - contentObj.pop("_srcUrl", None) + # Paragraphs with inlineRuns + if cType == "paragraph": + for element in section.get("elements", []): + runs = element.get("content", {}).get("inlineRuns") + if runs: + _resolveInlineRuns(runs) + continue + # Bullet lists - items are List[List[InlineRun]] + if cType == "bullet_list": + for element in section.get("elements", []): + items = element.get("content", {}).get("items", []) + for item in items: + if isinstance(item, list): + _resolveInlineRuns(item) + continue + # Tables - headers and row cells are List[InlineRun] + if cType == "table": + for element in section.get("elements", []): + contentObj = element.get("content", {}) + for cell in contentObj.get("headers", []): + if isinstance(cell, list): + _resolveInlineRuns(cell) + for row in contentObj.get("rows", []): + for cell in row: + if isinstance(cell, list): + _resolveInlineRuns(cell) sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", [])) logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}") @@ -285,6 +194,7 @@ def _registerMediaTools(registry: ToolRegistry, services): language=language, title=title, userPrompt=content, + style=args.get("style"), ) if not documents: @@ -367,6 +277,20 @@ def _registerMediaTools(registry: ToolRegistry, services): "outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"}, "title": {"type": "string", "description": "Document title", "default": "Document"}, "language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"}, + "style": { + "type": "object", + "description": ( + "Optional style overrides for the rendered document. Supports nested keys: " + "fonts (primary, monospace), colors (primary, secondary, accent, background), " + "headings (h1-h4 with sizePt, weight, color, spaceBeforePt, spaceAfterPt), " + "paragraph (sizePt, lineSpacing, color), table (headerBg, headerFg, headerSizePt, " + "bodySizePt, rowBandingEven, rowBandingOdd, borderColor, borderWidthPt), " + "list (bulletChar, indentPt, sizePt), image (defaultWidthPt, maxWidthPt, alignment), " + "codeBlock (fontSizePt, background, borderColor), " + "page (format, marginsPt, showPageNumbers, headerHeight, footerHeight, headerLogo, headerText, footerText). " + "Only provided keys override defaults; omitted keys keep their default values." + ), + }, }, }, readOnly=False, diff --git a/modules/serviceCenter/services/serviceAi/mainServiceAi.py b/modules/serviceCenter/services/serviceAi/mainServiceAi.py index 6428bed3..18ac46bc 100644 --- a/modules/serviceCenter/services/serviceAi/mainServiceAi.py +++ b/modules/serviceCenter/services/serviceAi/mainServiceAi.py @@ -86,7 +86,7 @@ class _ServicesAdapter: return getattr(w, "featureCode", None) if w else None def __getattr__(self, name: str): - if name in ("allowedProviders", "preferredProviders", "currentUserLanguage"): + if name in ("allowedProviders", "allowedModels", "preferredProviders", "currentUserLanguage"): return getattr(self.workflow, name, None) if self.workflow else None raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") @@ -177,6 +177,11 @@ class AiService: request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders}) logger.debug(f"Effective allowedProviders for AI request: {effectiveProviders}") + # Calculate effective allowedModels: Workflow ∩ Request (node-level) + effectiveModels = self._calculateEffectiveModels(request) + if effectiveModels and request.options: + request.options = request.options.model_copy(update={'allowedModels': effectiveModels}) + # Neutralize prompt if enabled (before AI call) _wasNeutralized = False _excludedDocs: List[str] = [] @@ -225,6 +230,11 @@ class AiService: if effectiveProviders and request.options: request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders}) + # Calculate effective allowedModels: Workflow ∩ Request (node-level) + effectiveModels = self._calculateEffectiveModels(request) + if effectiveModels and request.options: + request.options = request.options.model_copy(update={'allowedModels': effectiveModels}) + # Neutralize prompt if enabled (before streaming) _wasNeutralized = False _excludedDocs: List[str] = [] @@ -1240,6 +1250,43 @@ detectedIntent-Werte: logger.warning(f"Error calculating effective providers: {e}") return None + def _calculateEffectiveModels(self, request: AiCallRequest = None) -> Optional[List[str]]: + """ + Calculate effective allowed models: Workflow.allowedModels ∩ request.options.allowedModels. + + AND-logic intersection: + - If workflow specifies allowedModels, start with those. + - If request (node-level) also specifies allowedModels, intersect. + - Returns None if no model filtering is needed. + """ + try: + effectiveModels = None + + # Workflow-level allowedModels (from automation config) + workflowModels = getattr(self.services, 'allowedModels', None) + if workflowModels: + effectiveModels = list(workflowModels) + + # Request-level (node-level) allowedModels + requestModels = None + if request and request.options and request.options.allowedModels: + requestModels = request.options.allowedModels + + if requestModels: + if effectiveModels: + effectiveModels = [m for m in effectiveModels if m in requestModels] + else: + effectiveModels = list(requestModels) + + if effectiveModels: + logger.debug(f"Model filter: Workflow={workflowModels}, Request={requestModels}, Effective={effectiveModels}") + + return effectiveModels if effectiveModels else None + + except Exception as e: + logger.warning(f"Error calculating effective models: {e}") + return None + async def ensureAiObjectsInitialized(self): """Ensure aiObjects is initialized and submodules are ready.""" if self.aiObjects is None: diff --git a/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py b/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py index b9377404..6afcc0a8 100644 --- a/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py @@ -14,6 +14,7 @@ from .subDocumentUtility import ( detectMimeTypeFromData, convertDocumentDataToString ) +from .styleDefaults import resolveStyle logger = logging.getLogger(__name__) @@ -382,7 +383,7 @@ class GenerationService: 'workflowId': 'unknown' } - async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]: + async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None) -> List[RenderedDocument]: """ Render extracted JSON content to the specified output format. Processes EACH document separately and calls renderer for each. @@ -399,12 +400,14 @@ class GenerationService: userPrompt: User's original prompt for report generation aiService: AI service instance for generation prompt creation parentOperationId: Optional parent operation ID for hierarchical logging + style: Optional style overrides (deep-merged with DEFAULT_STYLE) Returns: List of RenderedDocument objects. Each RenderedDocument represents one rendered file (main document or supporting file) """ try: + resolvedStyle = resolveStyle(style) # Validate JSON input if not isinstance(extractedContent, dict): raise ValueError("extractedContent must be a JSON dictionary") @@ -469,7 +472,7 @@ class GenerationService: docTitle = doc.get("title", title) # Render this document (can return multiple files, e.g., HTML + images) - renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService) + renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService, style=resolvedStyle) allRenderedDocuments.extend(renderedDocs) logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)") diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py b/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py index b080ce88..583c423c 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py @@ -84,7 +84,7 @@ class BaseRenderer(ABC): return list(supportedSectionTypes) @abstractmethod - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """ Render extracted JSON content to multiple documents. Each renderer must implement this method. @@ -95,6 +95,9 @@ class BaseRenderer(ABC): title: Report title userPrompt: Original user prompt for context aiService: AI service instance for additional processing + style: Fully-resolved unified style dict from styleDefaults.resolveStyle(). + When provided, renderers use these values instead of their + own defaults / AI-generated styles. Returns: List of RenderedDocument objects. @@ -102,6 +105,112 @@ class BaseRenderer(ABC): Even if only one document is returned, it must be wrapped in a list. """ pass + + def _convertUnifiedStyleToInternal(self, style: Dict[str, Any]) -> Dict[str, Any]: + """Convert the unified resolvedStyle dict (from styleDefaults) into + the renderer-internal style-set format that all rendering methods already + consume. Override in subclasses for format-specific tweaks.""" + h1 = style["headings"]["h1"] + h2 = style["headings"]["h2"] + h3 = style["headings"].get("h3", h2) + h4 = style["headings"].get("h4", h3) + tbl = style["table"] + para = style["paragraph"] + lst = style["list"] + cb = style["codeBlock"] + return { + "title": { + "font_size": h1["sizePt"], "color": h1["color"], + "bold": h1.get("weight") == "bold", "align": "left", + }, + "heading1": { + "font_size": h1["sizePt"], "color": h1["color"], + "bold": h1.get("weight") == "bold", "align": "left", + }, + "heading2": { + "font_size": h2["sizePt"], "color": h2["color"], + "bold": h2.get("weight") == "bold", "align": "left", + }, + "heading3": { + "font_size": h3["sizePt"], "color": h3["color"], + "bold": h3.get("weight") == "bold", "align": "left", + }, + "heading4": { + "font_size": h4["sizePt"], "color": h4["color"], + "bold": h4.get("weight") == "bold", "align": "left", + }, + "paragraph": { + "font_size": para["sizePt"], "color": para["color"], + "bold": False, "align": "left", + }, + "table_header": { + "background": tbl["headerBg"], "text_color": tbl["headerFg"], + "bold": True, "align": "center", + }, + "table_cell": { + "background": tbl["rowBandingOdd"], "text_color": para["color"], + "bold": False, "align": "left", + }, + "table_border": { + "style": "grid", "color": tbl["borderColor"], + }, + "bullet_list": { + "font_size": lst["sizePt"], "color": para["color"], + "indent": lst["indentPt"], + }, + "code_block": { + "font": style["fonts"]["monospace"], + "font_size": cb["fontSizePt"], "color": para["color"], + "background": cb["background"], + }, + } + + @staticmethod + def _inlineRunsFromContent(content: Dict[str, Any], *, itemsKey: str = None) -> Any: + """Extract inline runs from new-format content, falling back to old format. + + For paragraphs (itemsKey=None): + new: content["inlineRuns"] -> List[InlineRun] + old: content["text"] -> wrapped in [{"type":"text","value":text}] + + For list items (itemsKey="items"): + new: content["items"] is List[List[InlineRun]] + old: content["items"] is List[str] or List[{"text":…}] + Returns the items list (caller decides per-item conversion). + + For table headers/cells: + new: each header/cell is List[InlineRun] + old: each header/cell is a plain str + Caller handles per-cell. + """ + if itemsKey: + return content.get(itemsKey, []) + inlineRuns = content.get("inlineRuns") + if inlineRuns: + return inlineRuns + text = content.get("text", "") + if text: + return [{"type": "text", "value": text}] + return [] + + @staticmethod + def _inlineRunsForCell(cell) -> list: + """Normalize a single table header or cell value to List[InlineRun]. + Accepts either a plain string or an already-correct list of run dicts.""" + if isinstance(cell, list): + return cell + return [{"type": "text", "value": str(cell) if cell is not None else ""}] + + @staticmethod + def _inlineRunsForListItem(item) -> list: + """Normalize a single list item to List[InlineRun]. + Accepts a plain string, a dict with 'text', or an already-correct list of run dicts.""" + if isinstance(item, list): + return item + if isinstance(item, dict): + text = item.get("text", "") + return [{"type": "text", "value": text}] + return [{"type": "text", "value": str(item)}] def _determineFilename(self, title: str, mimeType: str) -> str: """Determine filename from title and mimeType.""" diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py index 7a1277ca..ab37f756 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py @@ -53,18 +53,17 @@ class RendererDocx(BaseRenderer): from modules.datamodels.datamodelJson import supportedSectionTypes return list(supportedSectionTypes) - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """Render extracted JSON content to DOCX format using AI-analyzed styling.""" self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER") try: if not DOCX_AVAILABLE: - # Fallback to HTML if python-docx not available from .rendererHtml import RendererHtml htmlRenderer = RendererHtml() - return await htmlRenderer.render(extractedContent, title, userPrompt, aiService) + return await htmlRenderer.render(extractedContent, title, userPrompt, aiService, style=style) # Generate DOCX using AI-analyzed styling - docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService) + docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style) # Extract metadata for document type and other info metadata = extractedContent.get("metadata", {}) if extractedContent else {} @@ -114,23 +113,27 @@ class RendererDocx(BaseRenderer): ) ] - async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: + async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, unifiedStyle: Dict[str, Any] = None) -> str: """Generate DOCX content from structured JSON document.""" import time start_time = time.time() try: self.logger.debug("_generateDocxFromJson: Starting document generation") - # Create new document doc = Document() self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s") - # Get style set: use styles from metadata if available, otherwise enhance with AI - template_from_metadata = None - if json_content and isinstance(json_content.get("metadata"), dict): - template_from_metadata = json_content["metadata"].get("templateName") + # Phase 3: prefer unified style when provided style_start = time.time() self.logger.debug("_generateDocxFromJson: About to get style set") - styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata) + if unifiedStyle: + styleSet = self._convertUnifiedStyleToInternal(unifiedStyle) + self._unifiedStyle = unifiedStyle + else: + template_from_metadata = None + if json_content and isinstance(json_content.get("metadata"), dict): + template_from_metadata = json_content["metadata"].get("templateName") + styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata) + self._unifiedStyle = None self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s") # Setup basic document styles and create all styles from style set @@ -298,11 +301,11 @@ class RendererDocx(BaseRenderer): def _setupBasicDocumentStyles(self, doc: Document) -> None: """Set up basic document styles.""" try: - # Set default font style = doc.styles['Normal'] font = style.font - font.name = 'Calibri' - font.size = Pt(11) + us = getattr(self, '_unifiedStyle', None) + font.name = us["fonts"]["primary"] if us else 'Calibri' + font.size = Pt(us["paragraph"]["sizePt"] if us else 11) except Exception as e: self.logger.warning(f"Could not set up basic document styles: {str(e)}") @@ -421,6 +424,8 @@ class RendererDocx(BaseRenderer): def _addMarkdownInlineRuns(self, paragraph, text: str) -> None: """Parse markdown inline formatting and add corresponding Runs to a python-docx paragraph.""" pos = 0 + us = getattr(self, '_unifiedStyle', None) + monoFont = us["fonts"]["monospace"] if us else "Courier New" for m in self._MD_INLINE_RE.finditer(text): if m.start() > pos: paragraph.add_run(text[pos:m.start()]) @@ -434,12 +439,45 @@ class RendererDocx(BaseRenderer): paragraph.add_run(m.group(6)).italic = True elif m.group(7): run = paragraph.add_run(m.group(7)) - run.font.name = "Courier New" + run.font.name = monoFont run.font.size = Pt(9) pos = m.end() if pos < len(text): paragraph.add_run(text[pos:]) + def _renderInlineRuns(self, runs: list, paragraph, styleSet: Dict[str, Any]) -> None: + """Process a list of InlineRun dicts into python-docx Runs on a paragraph.""" + us = getattr(self, '_unifiedStyle', None) + monoFont = us["fonts"]["monospace"] if us else "Courier New" + for run in runs: + runType = run.get("type", "text") + value = run.get("value", "") + if runType == "text": + paragraph.add_run(value) + elif runType == "bold": + paragraph.add_run(value).bold = True + elif runType == "italic": + paragraph.add_run(value).italic = True + elif runType == "code": + r = paragraph.add_run(value) + r.font.name = monoFont + r.font.size = Pt(9) + elif runType == "link": + r = paragraph.add_run(value) + r.font.underline = True + r.font.color.rgb = RGBColor(0x29, 0x80, 0xB9) + elif runType == "image": + b64 = run.get("base64Data", "") + if b64: + try: + imgBytes = base64.b64decode(b64) + imgStream = io.BytesIO(imgBytes) + paragraph.add_run().add_picture(imgStream, width=Inches(2)) + except Exception: + paragraph.add_run(f"[Image: {run.get('altText', '')}]") + else: + paragraph.add_run(value) + def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """ Render a JSON table to DOCX using AI-generated styles. @@ -485,7 +523,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.error(f"Error rendering table: {str(e)}", exc_info=True) - def _renderTableFastXml(self, doc: Document, headers: List[str], rows: List[List[Any]], styles: Dict[str, Any]) -> None: + def _renderTableFastXml(self, doc: Document, headers: list, rows: list, styles: Dict[str, Any]) -> None: """ High-performance table rendering using direct XML manipulation. @@ -546,24 +584,34 @@ class RendererDocx(BaseRenderer): # Build all rows using fast XML rows_start = time.time() - # Header row - headerRow = self._createTableRowXml(headers, isHeader=True) + # Resolve header style colors + tableStyle = styles.get("table_header", {}) + headerBg = tableStyle.get("background", "") + headerFg = tableStyle.get("text_color", "") + + # Flatten inline-run headers to plain strings for fast XML path + flatHeaders = [] + for h in headers: + runs = self._inlineRunsForCell(h) + flatHeaders.append("".join(r.get("value", "") for r in runs)) + + headerRow = self._createTableRowXml(flatHeaders, isHeader=True, headerBgHex=headerBg or None, headerFgHex=headerFg or None) tbl.append(headerRow) - + header_time = time.time() - rows_start self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s") - - # Data rows - batch process for performance + data_start = time.time() rowCount = len(rows) - + for idx, rowData in enumerate(rows): - # Convert all cells to strings - cellTexts = [str(cell) if cell is not None else '' for cell in rowData] - # Pad if needed - while len(cellTexts) < len(headers): + cellTexts = [] + for cell in rowData: + runs = self._inlineRunsForCell(cell) + cellTexts.append("".join(r.get("value", "") for r in runs)) + while len(cellTexts) < len(flatHeaders): cellTexts.append('') - + row = self._createTableRowXml(cellTexts, isHeader=False) tbl.append(row) @@ -641,74 +689,64 @@ class RendererDocx(BaseRenderer): return tblBorders - def _createTableRowXml(self, cells: List[str], isHeader: bool = False) -> Any: - """ - Create a table row XML element with cells. - - This is the core fast-path: builds the row XML directly without - going through python-docx's slow cell.text assignment. - """ + def _createTableRowXml(self, cells: list, isHeader: bool = False, headerBgHex: str = None, headerFgHex: str = None) -> Any: + """Create a table row XML element with cells. + Fast-path: builds row XML directly via lxml.""" from docx.oxml.shared import OxmlElement, qn - + + if headerBgHex is None: + us = getattr(self, '_unifiedStyle', None) + headerBgHex = us["table"]["headerBg"].lstrip('#') if us else '1F3864' + else: + headerBgHex = headerBgHex.lstrip('#') + if headerFgHex is None: + us = getattr(self, '_unifiedStyle', None) + headerFgHex = us["table"]["headerFg"].lstrip('#') if us else 'FFFFFF' + else: + headerFgHex = headerFgHex.lstrip('#') + tr = OxmlElement('w:tr') - - # Row properties for header if isHeader: trPr = OxmlElement('w:trPr') - tblHeader = OxmlElement('w:tblHeader') - trPr.append(tblHeader) + trPr.append(OxmlElement('w:tblHeader')) tr.append(trPr) - + for cellText in cells: - # Create cell tc = OxmlElement('w:tc') - - # Cell properties tcPr = OxmlElement('w:tcPr') tcW = OxmlElement('w:tcW') tcW.set(qn('w:type'), 'auto') tcW.set(qn('w:w'), '0') tcPr.append(tcW) - - # Header cell styling - light blue background + if isHeader: shd = OxmlElement('w:shd') shd.set(qn('w:val'), 'clear') shd.set(qn('w:color'), 'auto') - shd.set(qn('w:fill'), '4472C4') # Professional blue + shd.set(qn('w:fill'), headerBgHex) tcPr.append(shd) - + tc.append(tcPr) - - # Paragraph with text p = OxmlElement('w:p') - - # Add run with text r = OxmlElement('w:r') - - # Header text styling - bold and white + if isHeader: rPr = OxmlElement('w:rPr') - b = OxmlElement('w:b') - rPr.append(b) - # White text color + rPr.append(OxmlElement('w:b')) color = OxmlElement('w:color') - color.set(qn('w:val'), 'FFFFFF') + color.set(qn('w:val'), headerFgHex) rPr.append(color) r.append(rPr) - - # Text element + t = OxmlElement('w:t') - # Preserve spaces if text starts/ends with whitespace if cellText and (cellText[0] == ' ' or cellText[-1] == ' '): t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve') t.text = cellText r.append(t) - p.append(r) tc.append(p) tr.append(tc) - + return tr def _applyHorizontalBordersOnly(self, table) -> None: @@ -836,47 +874,37 @@ class RendererDocx(BaseRenderer): def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance.""" try: - # Extract from nested content structure content = list_data.get("content", {}) if not isinstance(content, dict): return items = content.get("items", []) bullet_style = styles.get("bullet_list", {}) - - # Pre-calculate and cache style objects to avoid repeated parsing - font_size_pt = None + + font_size_pt = Pt(bullet_style["font_size"]) if bullet_style.get("font_size") else None text_color_rgb = None - if bullet_style: - if "font_size" in bullet_style: - font_size_pt = Pt(bullet_style["font_size"]) - if "color" in bullet_style: - color_hex = bullet_style["color"].lstrip('#') - text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) - + if bullet_style.get("color"): + color_hex = bullet_style["color"].lstrip('#') + text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + for item in items: - itemText = item if isinstance(item, str) else (item.get("text", "") if isinstance(item, dict) else "") - if not itemText: + itemRuns = self._inlineRunsForListItem(item) + if not itemRuns or not any(r.get("value") for r in itemRuns): continue para = doc.add_paragraph(style='List Bullet') - self._addMarkdownInlineRuns(para, itemText) - - # Apply bullet list styling from style set - use cached objects - if bullet_style and para.runs: - # Use direct access instead of iterating - if len(para.runs) > 0: - run = para.runs[0] - if font_size_pt: - run.font.size = font_size_pt - if text_color_rgb: - run.font.color.rgb = text_color_rgb - else: - # Create run if none exists - run = para.add_run() - if font_size_pt: - run.font.size = font_size_pt - if text_color_rgb: - run.font.color.rgb = text_color_rgb - + isNewRunFormat = isinstance(item, list) + if isNewRunFormat: + self._renderInlineRuns(itemRuns, para, styles) + else: + itemText = "".join(r.get("value", "") for r in itemRuns) + self._addMarkdownInlineRuns(para, itemText) + + if bullet_style and para.runs and len(para.runs) > 0: + run = para.runs[0] + if font_size_pt: + run.font.size = font_size_pt + if text_color_rgb: + run.font.color.rgb = text_color_rgb + except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") @@ -905,90 +933,79 @@ class RendererDocx(BaseRenderer): def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON paragraph to DOCX using AI-generated styles.""" try: - # Extract from nested content structure content = paragraph_data.get("content", {}) if isinstance(content, dict): - text = content.get("text", "") + inlineRuns = self._inlineRunsFromContent(content) elif isinstance(content, str): - text = content + inlineRuns = [{"type": "text", "value": content}] else: - text = "" - - # CRITICAL: Prevent rendering base64 image data as text - # Base64 image data typically starts with /9j/ (JPEG) or iVBORw0KGgo (PNG) - if text and (text.startswith("/9j/") or text.startswith("iVBORw0KGgo") or - (len(text) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in text[:100]))): - # This looks like base64 data - don't render as text - self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(text)})") + inlineRuns = [] + + if not inlineRuns: + return + + plainText = "".join(r.get("value", "") for r in inlineRuns) + if plainText and (plainText.startswith("/9j/") or plainText.startswith("iVBORw0KGgo") or + (len(plainText) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in plainText[:100]))): + self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(plainText)})") para = doc.add_paragraph("[Error: Image data found in text content - image embedding may have failed]") if para.runs: - para.runs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color for error + para.runs[0].font.color.rgb = RGBColor(255, 0, 0) return - - if text: - para = doc.add_paragraph() - self._addMarkdownInlineRuns(para, text) - paragraph_style = styles.get("paragraph", {}) - if paragraph_style: - # Pre-calculate and cache style objects - font_size_pt = None - text_color_rgb = None - if "font_size" in paragraph_style: - font_size_pt = Pt(paragraph_style["font_size"]) - if "color" in paragraph_style: - color_hex = paragraph_style["color"].lstrip('#') - text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) - bold = paragraph_style.get("bold", False) - - # Use direct access instead of iterating - if len(para.runs) > 0: - run = para.runs[0] - if font_size_pt: - run.font.size = font_size_pt - run.font.bold = bold - if text_color_rgb: - run.font.color.rgb = text_color_rgb + + para = doc.add_paragraph() + hasNewRuns = content.get("inlineRuns") if isinstance(content, dict) else None + if hasNewRuns: + self._renderInlineRuns(inlineRuns, para, styles) + else: + self._addMarkdownInlineRuns(para, plainText) + + paragraph_style = styles.get("paragraph", {}) + if paragraph_style: + font_size_pt = Pt(paragraph_style["font_size"]) if "font_size" in paragraph_style else None + text_color_rgb = None + if "color" in paragraph_style: + color_hex = paragraph_style["color"].lstrip('#') + text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + bold = paragraph_style.get("bold", False) + if len(para.runs) > 0: + run = para.runs[0] + if font_size_pt: + run.font.size = font_size_pt + run.font.bold = bold + if text_color_rgb: + run.font.color.rgb = text_color_rgb + if "align" in paragraph_style: + align = paragraph_style["align"] + if align == "center": + para.alignment = WD_ALIGN_PARAGRAPH.CENTER + elif align == "right": + para.alignment = WD_ALIGN_PARAGRAPH.RIGHT else: - # Create run if none exists - run = para.add_run() - if font_size_pt: - run.font.size = font_size_pt - run.font.bold = bold - if text_color_rgb: - run.font.color.rgb = text_color_rgb - - if "align" in paragraph_style: - align = paragraph_style["align"] - if align == "center": - para.alignment = WD_ALIGN_PARAGRAPH.CENTER - elif align == "right": - para.alignment = WD_ALIGN_PARAGRAPH.RIGHT - else: - para.alignment = WD_ALIGN_PARAGRAPH.LEFT - + para.alignment = WD_ALIGN_PARAGRAPH.LEFT + except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON code block to DOCX using AI-generated styles.""" try: - # Extract from nested content structure content = code_data.get("content", {}) if not isinstance(content, dict): return code = content.get("code", "") language = content.get("language", "") code_style = styles.get("code_block", {}) - + us = getattr(self, '_unifiedStyle', None) + if code: if language: lang_para = doc.add_paragraph(f"Code ({language}):") if len(lang_para.runs) > 0: lang_para.runs[0].bold = True - - # Pre-calculate and cache style objects - code_font_name = code_style.get("font", "Courier New") - code_font_size_pt = Pt(code_style.get("font_size", 9)) + + code_font_name = code_style.get("font", us["fonts"]["monospace"] if us else "Courier New") + code_font_size_pt = Pt(code_style.get("font_size", us["codeBlock"]["fontSizePt"] if us else 9)) code_text_color_rgb = None if "color" in code_style: color_hex = code_style["color"].lstrip('#') diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py index 58143ac2..b39efd50 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py @@ -40,7 +40,7 @@ class RendererHtml(BaseRenderer): from modules.datamodels.datamodelJson import supportedSectionTypes return list(supportedSectionTypes) - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """ Render HTML document with images as separate files. Returns list of documents: [HTML document, image1, image2, ...] @@ -54,7 +54,7 @@ class RendererHtml(BaseRenderer): self._renderedImages = images # Generate HTML using AI-analyzed styling - htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService) + htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService, style=style) # Replace base64 data URIs with relative file paths if images exist if images: @@ -107,11 +107,16 @@ class RendererHtml(BaseRenderer): return resultDocuments - async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: + async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str: """Generate HTML content from structured JSON document using AI-generated styling.""" try: - # Get style set: use styles from metadata if available, otherwise enhance with AI - styles = await self._getStyleSet(jsonContent, userPrompt, aiService) + # Use unified style when provided, otherwise fall back to existing flow + if style: + styles = self._convertUnifiedStyleToInternal(style) + self._unifiedStyle = style + else: + styles = await self._getStyleSet(jsonContent, userPrompt, aiService) + self._unifiedStyle = None # Validate JSON structure if not self._validateJsonStructure(jsonContent): @@ -272,6 +277,10 @@ class RendererHtml(BaseRenderer): def _generateCssStyles(self, styles: Dict[str, Any]) -> str: """Generate CSS from style definitions.""" + # When unified style is available, generate CSS directly from it + if getattr(self, "_unifiedStyle", None): + return self._generateCssFromUnifiedStyle(self._unifiedStyle) + css_parts = [] # Body styles @@ -368,6 +377,164 @@ class RendererHtml(BaseRenderer): return '\n'.join(css_parts) + def _generateCssFromUnifiedStyle(self, style: Dict[str, Any]) -> str: + """Generate CSS directly from unified style dict.""" + fonts = style.get("fonts", {}) + colors = style.get("colors", {}) + headings = style.get("headings", {}) + para = style.get("paragraph", {}) + tbl = style.get("table", {}) + lst = style.get("list", {}) + cb = style.get("codeBlock", {}) + page = style.get("page", {}) + + primaryFont = fonts.get("primary", "Arial, sans-serif") + monoFont = fonts.get("monospace", "Courier New, monospace") + bgColor = colors.get("background", "#FFFFFF") + primaryColor = colors.get("primary", "#1F3864") + paraColor = para.get("color", "#333333") + paraSizePt = para.get("sizePt", 11) + lineSpacing = para.get("lineSpacing", 1.15) + + css_parts = [] + + # Body + css_parts.append("body {") + css_parts.append(f" font-family: {primaryFont};") + css_parts.append(f" background: {bgColor};") + css_parts.append(f" color: {paraColor};") + css_parts.append(f" font-size: {paraSizePt}pt;") + css_parts.append(f" line-height: {lineSpacing};") + margins = page.get("marginsPt", {}) + if margins: + css_parts.append(f" margin: {margins.get('top', 60)}pt {margins.get('right', 60)}pt {margins.get('bottom', 60)}pt {margins.get('left', 60)}pt;") + else: + css_parts.append(" margin: 0; padding: 20px;") + css_parts.append("}") + + # Document title (uses h1 style) + h1 = headings.get("h1", {}) + css_parts.append(".document-title {") + css_parts.append(f" font-size: {h1.get('sizePt', 24)}pt;") + css_parts.append(f" color: {h1.get('color', primaryColor)};") + css_parts.append(f" font-weight: {h1.get('weight', 'bold')};") + css_parts.append(" margin: 0 0 1em 0;") + css_parts.append("}") + + # Headings h1-h4 + for level in range(1, 5): + key = f"h{level}" + h = headings.get(key, h1 if level == 1 else headings.get(f"h{level-1}", {})) + css_parts.append(f"h{level} {{") + css_parts.append(f" font-size: {h.get('sizePt', max(24 - (level-1)*4, 12))}pt;") + css_parts.append(f" color: {h.get('color', primaryColor)};") + css_parts.append(f" font-weight: {h.get('weight', 'bold')};") + css_parts.append(f" margin: 1.2em 0 0.4em 0;") + css_parts.append("}") + + # Paragraphs + css_parts.append("p {") + css_parts.append(f" font-size: {paraSizePt}pt;") + css_parts.append(f" color: {paraColor};") + css_parts.append(f" line-height: {lineSpacing};") + css_parts.append(" margin: 0 0 1em 0;") + css_parts.append("}") + + # Tables + borderColor = tbl.get("borderColor", "#DEE2E6") + css_parts.append("table {") + css_parts.append(f" border-collapse: collapse;") + css_parts.append(f" width: 100%;") + css_parts.append(f" margin: 1em 0;") + css_parts.append(f" border: 1px solid {borderColor};") + css_parts.append("}") + + # Table headers + css_parts.append("th {") + css_parts.append(f" background: {tbl.get('headerBg', '#1F3864')};") + css_parts.append(f" color: {tbl.get('headerFg', '#FFFFFF')};") + css_parts.append(" font-weight: bold;") + css_parts.append(" text-align: center;") + css_parts.append(f" padding: 10px;") + css_parts.append(f" border: 1px solid {borderColor};") + css_parts.append("}") + + # Table cells + css_parts.append("td {") + css_parts.append(f" color: {paraColor};") + css_parts.append(" padding: 8px;") + css_parts.append(f" border: 1px solid {borderColor};") + css_parts.append("}") + + # Lists + css_parts.append("ul {") + css_parts.append(f" font-size: {lst.get('sizePt', paraSizePt)}pt;") + css_parts.append(f" color: {paraColor};") + css_parts.append(f" padding-left: {lst.get('indentPt', 18)}pt;") + css_parts.append(" margin: 0 0 1em 0;") + css_parts.append("}") + + # Code blocks + css_parts.append("pre {") + css_parts.append(f" font-family: {monoFont};") + css_parts.append(f" font-size: {cb.get('fontSizePt', 9)}pt;") + css_parts.append(f" color: {paraColor};") + css_parts.append(f" background: {cb.get('background', '#F8F9FA')};") + css_parts.append(f" border: 1px solid {cb.get('borderColor', '#E2E8F0')};") + css_parts.append(" border-radius: 4px;") + css_parts.append(" padding: 1em;") + css_parts.append(" margin: 1em 0;") + css_parts.append(" overflow-x: auto;") + css_parts.append("}") + + # Images + css_parts.append("img {") + css_parts.append(" max-width: 100%;") + css_parts.append(" height: auto;") + css_parts.append(" margin: 1em 0;") + css_parts.append(" border-radius: 4px;") + css_parts.append("}") + + # Generated info + css_parts.append(".generated-info {") + css_parts.append(" font-size: 0.9em;") + css_parts.append(" color: #666;") + css_parts.append(" text-align: center;") + css_parts.append(" margin-top: 2em;") + css_parts.append(" padding-top: 1em;") + css_parts.append(" border-top: 1px solid #ddd;") + css_parts.append("}") + + return '\n'.join(css_parts) + + def _renderInlineRuns(self, runs: list) -> str: + """Convert inline runs to HTML markup.""" + import html as htmlLib + parts = [] + for run in runs: + runType = run.get("type", "text") + value = htmlLib.escape(run.get("value", "")) + if runType == "text": + parts.append(value) + elif runType == "bold": + parts.append(f"{value}") + elif runType == "italic": + parts.append(f"{value}") + elif runType == "code": + parts.append(f"{value}") + elif runType == "link": + href = htmlLib.escape(run.get("href", "")) + parts.append(f'{value}') + elif runType == "image": + b64 = run.get("base64Data", "") + mime = run.get("mimeType", "image/png") + alt = value + if b64: + parts.append(f'{alt}') + else: + parts.append(value) + return "".join(parts) + def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a single JSON section to HTML using AI-generated styles. Supports three content formats: reference, object (base64), extracted_text. @@ -419,6 +586,11 @@ class RendererHtml(BaseRenderer): # Regular paragraph element - extract from nested content structure (standard JSON format) content = element.get("content", {}) if isinstance(content, dict): + # New format: inlineRuns + inlineRuns = content.get("inlineRuns") + if inlineRuns and isinstance(inlineRuns, list): + htmlParts.append(f'

{self._renderInlineRuns(inlineRuns)}

') + continue text = content.get("text", "") elif isinstance(content, str): text = content @@ -495,7 +667,8 @@ class RendererHtml(BaseRenderer): # Table header htmlParts.append('') for header in headers: - htmlParts.append(f'{header}') + runs = self._inlineRunsForCell(header) + htmlParts.append(f'{self._renderInlineRuns(runs)}') htmlParts.append('') # Table body @@ -503,7 +676,8 @@ class RendererHtml(BaseRenderer): for row in rows: htmlParts.append('') for cellData in row: - htmlParts.append(f'{cellData}') + runs = self._inlineRunsForCell(cellData) + htmlParts.append(f'{self._renderInlineRuns(runs)}') htmlParts.append('') htmlParts.append('') @@ -528,10 +702,8 @@ class RendererHtml(BaseRenderer): htmlParts = ['
    '] for item in items: - if isinstance(item, str): - htmlParts.append(f'
  • {item}
  • ') - elif isinstance(item, dict) and "text" in item: - htmlParts.append(f'
  • {item["text"]}
  • ') + runs = self._inlineRunsForListItem(item) + htmlParts.append(f'
  • {self._renderInlineRuns(runs)}
  • ') htmlParts.append('
') return '\n'.join(htmlParts) @@ -571,6 +743,11 @@ class RendererHtml(BaseRenderer): if isinstance(el, dict): content = el.get("content", {}) if isinstance(content, dict): + # New format: inlineRuns + inlineRuns = content.get("inlineRuns") + if inlineRuns and isinstance(inlineRuns, list): + texts.append(self._renderInlineRuns(inlineRuns)) + continue text = content.get("text", "") elif isinstance(content, str): text = content @@ -581,16 +758,18 @@ class RendererHtml(BaseRenderer): elif isinstance(el, str): texts.append(el) if texts: - # Join multiple paragraphs with

tags return '\n'.join(f'

{text}

' for text in texts) return "" elif isinstance(paragraphData, str): return f'

{paragraphData}

' elif isinstance(paragraphData, dict): - # Handle nested content structure: element.content vs element.text # Extract from nested content structure content = paragraphData.get("content", {}) if isinstance(content, dict): + # New format: inlineRuns + inlineRuns = content.get("inlineRuns") + if inlineRuns and isinstance(inlineRuns, list): + return f'

{self._renderInlineRuns(inlineRuns)}

' text = content.get("text", "") elif isinstance(content, str): text = content diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py index df2aff10..31537980 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py @@ -106,17 +106,17 @@ class RendererPdf(BaseRenderer): from modules.datamodels.datamodelJson import supportedSectionTypes return list(supportedSectionTypes) - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """Render extracted JSON content to PDF format using AI-analyzed styling.""" try: if not REPORTLAB_AVAILABLE: # Fallback to HTML if reportlab not available from .rendererHtml import RendererHtml html_renderer = RendererHtml() - return await html_renderer.render(extractedContent, title, userPrompt, aiService) + return await html_renderer.render(extractedContent, title, userPrompt, aiService, style=style) # Generate PDF using AI-analyzed styling - pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService) + pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style) # Extract metadata for document type and other info metadata = extractedContent.get("metadata", {}) if extractedContent else {} @@ -163,11 +163,28 @@ class RendererPdf(BaseRenderer): ) ] - async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: + async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, unifiedStyle: Dict[str, Any] = None) -> str: """Generate PDF content from structured JSON document using AI-generated styling.""" try: - # Get style set: use styles from metadata if available, otherwise enhance with AI - styles = await self._getStyleSet(json_content, userPrompt, aiService) + # Get style set from unified style or legacy approach + if unifiedStyle: + styles = self._convertUnifiedStyleToInternal(unifiedStyle) + self._unifiedStyle = unifiedStyle + for level in range(1, 7): + hKey = f"heading{level}" + if hKey not in styles: + styles[hKey] = self._defaultHeadingStyleDef(level) + else: + styles[hKey].setdefault("space_after", 12) + styles[hKey].setdefault("space_before", 12) + styles["paragraph"].setdefault("space_after", 6) + styles["paragraph"].setdefault("line_height", unifiedStyle["paragraph"].get("lineSpacing", 1.2)) + styles["bullet_list"].setdefault("space_after", 3) + styles["code_block"].setdefault("space_after", 6) + styles["code_block"].setdefault("align", "left") + else: + styles = await self._getStyleSet(json_content, userPrompt, aiService) + self._unifiedStyle = None # Validate JSON structure if not self._validateJsonStructure(json_content): @@ -179,15 +196,13 @@ class RendererPdf(BaseRenderer): # Create a buffer to hold the PDF buffer = io.BytesIO() - # Create PDF document - doc = SimpleDocTemplate( - buffer, - pagesize=A4, - rightMargin=72, - leftMargin=72, - topMargin=72, - bottomMargin=18 - ) + # Create PDF document with unified page margins or defaults + pageCfg = unifiedStyle["page"] if unifiedStyle else None + if pageCfg: + m = pageCfg["marginsPt"] + doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=m["right"], leftMargin=m["left"], topMargin=m["top"], bottomMargin=m["bottom"]) + else: + doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18) # Build PDF content (no cover page — body starts on page 1; filename still uses `title`) story = [] @@ -609,6 +624,31 @@ class RendererPdf(BaseRenderer): .replace(">", ">") ) + def _renderInlineRunsToPdfXml(self, runs: list) -> str: + """Convert inline runs to ReportLab Paragraph XML.""" + parts = [] + us = getattr(self, '_unifiedStyle', None) + monoFont = us["fonts"]["monospace"] if us else "Courier" + for run in runs: + runType = run.get("type", "text") + value = self._escapeReportlabXml(run.get("value", "")) + if runType == "text": + parts.append(value) + elif runType == "bold": + parts.append(f"{value}") + elif runType == "italic": + parts.append(f"{value}") + elif runType == "code": + parts.append(f'{value}') + elif runType == "link": + href = self._escapeReportlabXml(run.get("href", "")) + parts.append(f'{value}') + elif runType == "image": + parts.append(f"[Image: {value}]") + else: + parts.append(value) + return "".join(parts) + def _applyInlineMarkdownToEscapedPlain(self, text: str) -> str: """Escape XML then apply bold/italic to a segment with no `code` spans (code is handled separately).""" if not text: @@ -744,10 +784,10 @@ class RendererPdf(BaseRenderer): return [] headers = content.get("headers", []) rows = content.get("rows", []) - + if not headers or not rows: return [] - + numCols = len(headers) colWidth = _PDF_CONTENT_WIDTH_PT / max(numCols, 1) colWidths = [colWidth] * numCols @@ -755,8 +795,12 @@ class RendererPdf(BaseRenderer): hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header") cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell") - def _cellPara(val, ps): - return self._paragraphFromInlineMarkdown(str(val) if val is not None else "", ps) + def _cellPara(cell, ps): + runs = self._inlineRunsForCell(cell) + if isinstance(cell, list): + xml = self._renderInlineRunsToPdfXml(runs) + return Paragraph(_wrapEmojiSpansInXml(xml), ps) + return self._paragraphFromInlineMarkdown(str(cell) if cell is not None else "", ps) headerRow = [_cellPara(h, hdrPs) for h in headers] bodyRows = [] @@ -786,7 +830,7 @@ class RendererPdf(BaseRenderer): ] table.setStyle(TableStyle(table_style)) return [table, Spacer(1, 12)] - + except Exception as e: self.logger.warning(f"Error rendering table: {str(e)}") return [] @@ -794,32 +838,29 @@ class RendererPdf(BaseRenderer): def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON bullet list to PDF elements using AI-generated styles.""" try: - # Extract from nested content structure content = list_data.get("content", {}) if not isinstance(content, dict): return [] items = content.get("items", []) - bullet_style_def = styles.get("bullet_list", {}) - + bulletStyleDef = styles.get("bullet_list", {}) + normalStyle = self._createNormalStyle(styles) + elements = [] for item in items: - if isinstance(item, str): - elements.append( - Paragraph(f"• {self._markdownInlineToReportlabXml(item)}", self._createNormalStyle(styles)) - ) + runs = self._inlineRunsForListItem(item) + if isinstance(item, list): + xml = self._renderInlineRunsToPdfXml(runs) + elements.append(Paragraph(f"\u2022 {_wrapEmojiSpansInXml(xml)}", normalStyle)) + elif isinstance(item, str): + elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item)}", normalStyle)) elif isinstance(item, dict) and "text" in item: - elements.append( - Paragraph( - f"• {self._markdownInlineToReportlabXml(item['text'])}", - self._createNormalStyle(styles), - ) - ) - + elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item['text'])}", normalStyle)) + if elements: - elements.append(Spacer(1, bullet_style_def.get("space_after", 3))) - + elements.append(Spacer(1, bulletStyleDef.get("space_after", 3))) + return elements - + except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") return [] @@ -848,20 +889,27 @@ class RendererPdf(BaseRenderer): def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON paragraph to PDF elements using AI-generated styles.""" try: - # Extract from nested content structure content = paragraph_data.get("content", {}) - if isinstance(content, dict): - text = content.get("text", "") - elif isinstance(content, str): - text = content - else: - text = "" - + if isinstance(content, str): + content = {"text": content} + if not isinstance(content, dict): + return [] + + normalStyle = self._createNormalStyle(styles) + + if "inlineRuns" in content: + runs = self._inlineRunsFromContent(content) + xml = self._renderInlineRunsToPdfXml(runs) + if xml: + return [Paragraph(_wrapEmojiSpansInXml(xml), normalStyle)] + return [] + + text = content.get("text", "") if text: - return [self._paragraphFromInlineMarkdown(text, self._createNormalStyle(styles))] - + return [self._paragraphFromInlineMarkdown(text, normalStyle)] + return [] - + except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") return [] diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py index 3bdff7f1..49ee8048 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py @@ -59,7 +59,7 @@ class RendererPptx(BaseRenderer): from modules.datamodels.datamodelJson import supportedSectionTypes return list(supportedSectionTypes) - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """ Render content as PowerPoint presentation from JSON data. @@ -68,7 +68,7 @@ class RendererPptx(BaseRenderer): title: Title for the presentation userPrompt: User prompt for AI styling aiService: AI service for styling - **kwargs: Additional rendering options + style: Unified style dict from pipeline (preferred over AI-generated styles) Returns: Base64-encoded PowerPoint presentation as string @@ -81,8 +81,19 @@ class RendererPptx(BaseRenderer): from pptx.dml.color import RGBColor import re - # Get style set: use styles from metadata if available, otherwise enhance with AI - styles = await self._getStyleSet(extractedContent, userPrompt, aiService) + # Get style set: prefer unified style, then metadata, then AI-enhanced + if style: + internalStyle = self._convertUnifiedStyleToInternal(style) + defaultPptx = self._getDefaultStyleSet() + for key in ("slide_size", "content_per_slide", "design_theme", "color_scheme", "background_style", "accent_colors", "professional_grade", "executive_ready"): + internalStyle[key] = defaultPptx.get(key) + internalStyle["heading"] = internalStyle["heading1"] + internalStyle["subheading"] = internalStyle["heading2"] + styles = internalStyle + self._unifiedStyle = style + else: + styles = await self._getStyleSet(extractedContent, userPrompt, aiService) + self._unifiedStyle = None # Create new presentation prs = Presentation() @@ -910,15 +921,17 @@ JSON ONLY. NO OTHER TEXT.""" # Extract from nested content structure content = paragraph_data.get("content", {}) if isinstance(content, dict): - text = content.get("text", "") + if content.get("inlineRuns"): + text = "".join(r.get("value", "") for r in content["inlineRuns"]) + else: + text = content.get("text", "") elif isinstance(content, str): text = content else: text = "" if text: - # Limit paragraph length based on content density - max_length = 200 # Default limit + max_length = 200 if len(text) > max_length: text = text[:max_length] + "..." @@ -1303,6 +1316,32 @@ JSON ONLY. NO OTHER TEXT.""" r.text = text[pos:] _applyBase(r) + def _renderInlineRunsPptx(self, runs, paragraph, fontSize=None, fontColor=None): + """Process InlineRun dicts into pptx text runs.""" + from pptx.util import Pt + paragraph.text = "" + us = getattr(self, '_unifiedStyle', None) + monoFont = us["fonts"]["monospace"] if us else "Courier New" + for run in runs: + runType = run.get("type", "text") + value = run.get("value", "") + r = paragraph.add_run() + r.text = value + if fontSize: + r.font.size = fontSize + if fontColor: + r.font.color.rgb = fontColor + if runType == "bold": + r.font.bold = True + elif runType == "italic": + r.font.italic = True + elif runType == "code": + r.font.name = monoFont + if fontSize and hasattr(fontSize, 'pt'): + r.font.size = Pt(max(8, int(fontSize.pt * 0.85))) + elif runType == "link": + r.font.underline = True + def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float = None, max_width: float = None) -> None: """Add a PowerPoint table to slide.""" try: @@ -1374,7 +1413,8 @@ JSON ONLY. NO OTHER TEXT.""" cell = table.cell(0, col_idx) # Clear existing text and set new text cell.text_frame.clear() - header_text = str(header) if header else "" + cellRuns = self._inlineRunsForCell(header) + header_text = "".join(r.get("value", "") for r in cellRuns) cell.text = header_text # Ensure paragraph exists @@ -1420,7 +1460,8 @@ JSON ONLY. NO OTHER TEXT.""" cell = table.cell(row_idx, col_idx) # Clear existing text and set new text cell.text_frame.clear() - cell_text = str(cell_data) if cell_data is not None else "" + cellRuns = self._inlineRunsForCell(cell_data) + cell_text = "".join(r.get("value", "") for r in cellRuns) cell.text = cell_text # Ensure paragraph exists @@ -1462,9 +1503,8 @@ JSON ONLY. NO OTHER TEXT.""" fontColor = RGBColor(*self._getSafeColor(listStyle.get("color", (47, 47, 47)))) for item in items: - itemText = item.get("text", "") if isinstance(item, dict) else str(item) - if not itemText or not itemText.strip(): - continue + runs = self._inlineRunsForListItem(item) + isNewFormat = isinstance(item, list) p = text_frame.add_paragraph() p.level = 0 @@ -1472,21 +1512,33 @@ JSON ONLY. NO OTHER TEXT.""" p.space_before = Pt(2) p.space_after = Pt(2) - # Consistent bullet prefix - self._addMarkdownInlineRuns(p, f" • {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) + if isNewFormat: + bulletRuns = [{"type": "text", "value": " \u2022 "}] + runs + self._renderInlineRunsPptx(bulletRuns, p, fontSize=fontSize, fontColor=fontColor) + else: + itemText = item.get("text", "") if isinstance(item, dict) else str(item) + if not itemText or not itemText.strip(): + continue + self._addMarkdownInlineRuns(p, f" \u2022 {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) - # Subitems + # Subitems (only for dict-style items) if isinstance(item, dict): for sub in item.get("subitems", []): - subText = sub.get("text", "") if isinstance(sub, dict) else str(sub) - if not subText: - continue + subRuns = self._inlineRunsForListItem(sub) + isSubNew = isinstance(sub, list) sp = text_frame.add_paragraph() sp.level = 0 sp.alignment = PP_ALIGN.LEFT sp.space_before = Pt(1) sp.space_after = Pt(1) - self._addMarkdownInlineRuns(sp, f" – {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) + if isSubNew: + subBulletRuns = [{"type": "text", "value": " \u2013 "}] + subRuns + self._renderInlineRunsPptx(subBulletRuns, sp, fontSize=fontSize, fontColor=fontColor) + else: + subText = sub.get("text", "") if isinstance(sub, dict) else str(sub) + if not subText: + continue + self._addMarkdownInlineRuns(sp, f" \u2013 {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) except Exception as e: logger.warning(f"Error adding bullet list to slide: {str(e)}") @@ -1540,42 +1592,53 @@ JSON ONLY. NO OTHER TEXT.""" # Extract from nested content structure content = element.get("content", {}) if isinstance(content, dict): + inlineRuns = self._inlineRunsFromContent(content) + hasInlineRuns = content.get("inlineRuns") is not None text = content.get("text", "") elif isinstance(content, str): text = content + inlineRuns = [{"type": "text", "value": text}] if text else [] + hasInlineRuns = False else: text = "" + inlineRuns = [] + hasInlineRuns = False - if text: - p = text_frame.add_paragraph() - p.level = 0 - - try: - if hasattr(p, 'paragraph_format'): - p.paragraph_format.bullet.type = None - except (AttributeError, TypeError): - pass - - paragraph_style = styles.get("paragraph", {}) - base_font_size = paragraph_style.get("font_size", 14) - calculated_size = max(10, int(base_font_size * font_size_multiplier)) - fSize = Pt(calculated_size) - fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) - fBold = paragraph_style.get("bold", False) + if not inlineRuns and not text: + return + + p = text_frame.add_paragraph() + p.level = 0 + + try: + if hasattr(p, 'paragraph_format'): + p.paragraph_format.bullet.type = None + except (AttributeError, TypeError): + pass + + paragraph_style = styles.get("paragraph", {}) + base_font_size = paragraph_style.get("font_size", 14) + calculated_size = max(10, int(base_font_size * font_size_multiplier)) + fSize = Pt(calculated_size) + fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + fBold = paragraph_style.get("bold", False) + + if hasInlineRuns: + self._renderInlineRunsPptx(inlineRuns, p, fontSize=fSize, fontColor=fColor) + else: self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=fBold) - - # Add proper spacing - p.space_before = Pt(6) # Space before paragraph - p.space_after = Pt(6) # Space after paragraph - p.line_spacing = 1.2 # Line spacing for readability - - align = paragraph_style.get("align", "left") - if align == "center": - p.alignment = PP_ALIGN.CENTER - elif align == "right": - p.alignment = PP_ALIGN.RIGHT - else: - p.alignment = PP_ALIGN.LEFT + + p.space_before = Pt(6) + p.space_after = Pt(6) + p.line_spacing = 1.2 + + align = paragraph_style.get("align", "left") + if align == "center": + p.alignment = PP_ALIGN.CENTER + elif align == "right": + p.alignment = PP_ALIGN.RIGHT + else: + p.alignment = PP_ALIGN.LEFT except Exception as e: logger.warning(f"Error adding paragraph to slide: {str(e)}") diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py index 79f5688c..3c6fdd5e 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py @@ -68,17 +68,17 @@ class RendererXlsx(BaseRenderer): from modules.datamodels.datamodelJson import supportedSectionTypes return list(supportedSectionTypes) - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """Render extracted JSON content to Excel format using AI-analyzed styling.""" try: if not OPENPYXL_AVAILABLE: # Fallback to CSV if openpyxl not available from .rendererCsv import RendererCsv csvRenderer = RendererCsv() - return await csvRenderer.render(extractedContent, title, userPrompt, aiService) + return await csvRenderer.render(extractedContent, title, userPrompt, aiService, style=style) # Generate Excel using AI-analyzed styling - excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService) + excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService, style=style) # Extract metadata for document type and other info metadata = extractedContent.get("metadata", {}) if extractedContent else {} @@ -298,15 +298,22 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not populate analysis sheet: {str(e)}") - async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: + async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str: """Generate Excel content from structured JSON document using AI-generated styling.""" try: # Debug output self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER") - # Get style set: use styles from metadata if available, otherwise enhance with AI - styles = await self._getStyleSet(jsonContent, userPrompt, aiService) + # Store unified style for use by inline-run helpers + self._unifiedStyle = style + + # Get style set: prefer unified style, fall back to legacy approach + if style: + styles = self._convertUnifiedStyleToInternal(style) + styles = self._convertColorsFormat(styles) + else: + styles = await self._getStyleSet(jsonContent, userPrompt, aiService) # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) if not self._validateJsonStructure(jsonContent): @@ -511,6 +518,10 @@ class RendererXlsx(BaseRenderer): "code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"} } + def _renderInlineRuns(self, runs: list) -> str: + """Flatten inline runs to plain text for Excel cells.""" + return "".join(r.get("value", "") for r in runs) + async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]: """Get AI styles with proper Excel color conversion.""" if not aiService: @@ -1206,7 +1217,9 @@ class RendererXlsx(BaseRenderer): # Add headers with formatting - OPTIMIZED: use cached style objects for col, header in enumerate(headers, 1): - sanitized_header = self._sanitizeCellValue(header) + runs = self._inlineRunsForCell(header) + headerText = self._renderInlineRuns(runs) + sanitized_header = self._sanitizeCellValue(headerText) cell = sheet.cell(row=headerRow, column=col, value=sanitized_header) # Apply styling with fallbacks - use pre-calculated objects @@ -1272,7 +1285,9 @@ class RendererXlsx(BaseRenderer): cell_values = cell_values[:header_count] for col, cell_value in enumerate(cell_values, 1): - sanitized_value = self._sanitizeCellValue(cell_value) + runs = self._inlineRunsForCell(cell_value) + cellText = self._renderInlineRuns(runs) + sanitized_value = self._sanitizeCellValue(cellText) cell = sheet.cell(row=startRow, column=col, value=sanitized_value) # Apply styling with fallbacks - use pre-calculated objects @@ -1311,20 +1326,20 @@ class RendererXlsx(BaseRenderer): def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: """Add a list element to Excel sheet. Expects nested content structure.""" try: - # Extract from nested content structure content = element.get("content", {}) if not isinstance(content, dict): return startRow - list_items = content.get("items") or [] - # Ensure list_items is a list - if not isinstance(list_items, list): - list_items = [] + listItems = content.get("items") or [] + if not isinstance(listItems, list): + listItems = [] - list_style = styles.get("bullet_list", {}) - for item in list_items: - sheet.cell(row=startRow, column=1, value=f"• {item}") - if list_style.get("color"): - sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"])) + listStyle = styles.get("bullet_list", {}) + for item in listItems: + runs = self._inlineRunsForListItem(item) + text = self._renderInlineRuns(runs) + sheet.cell(row=startRow, column=1, value=f"\u2022 {text}") + if listStyle.get("color"): + sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(listStyle["color"])) startRow += 1 return startRow @@ -1336,10 +1351,10 @@ class RendererXlsx(BaseRenderer): def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: """Add a paragraph element to Excel sheet. Expects nested content structure.""" try: - # Extract from nested content structure content = element.get("content", {}) if isinstance(content, dict): - text = content.get("text", "") + runs = self._inlineRunsFromContent(content) + text = self._renderInlineRuns(runs) elif isinstance(content, str): text = content else: diff --git a/modules/serviceCenter/services/serviceGeneration/styleDefaults.py b/modules/serviceCenter/services/serviceGeneration/styleDefaults.py new file mode 100644 index 00000000..b5a92641 --- /dev/null +++ b/modules/serviceCenter/services/serviceGeneration/styleDefaults.py @@ -0,0 +1,75 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +"""Default style definitions and style resolution for document rendering.""" + +from typing import Any, Dict + + +DEFAULT_STYLE: Dict[str, Any] = { + "fonts": { + "primary": "Calibri", + "monospace": "Consolas", + }, + "colors": { + "primary": "#1F3864", + "secondary": "#2C3E50", + "accent": "#2980B9", + "background": "#FFFFFF", + }, + "headings": { + "h1": {"sizePt": 24, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 12, "spaceAfterPt": 6}, + "h2": {"sizePt": 18, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 10, "spaceAfterPt": 4}, + "h3": {"sizePt": 14, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 8, "spaceAfterPt": 3}, + "h4": {"sizePt": 12, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 6, "spaceAfterPt": 2}, + }, + "paragraph": {"sizePt": 11, "lineSpacing": 1.15, "color": "#333333"}, + "table": { + "headerBg": "#1F3864", + "headerFg": "#FFFFFF", + "headerSizePt": 10, + "bodySizePt": 10, + "rowBandingEven": "#F2F6FC", + "rowBandingOdd": "#FFFFFF", + "borderColor": "#CBD5E1", + "borderWidthPt": 0.5, + }, + "list": {"bulletChar": "\u2022", "indentPt": 18, "sizePt": 11}, + "image": {"defaultWidthPt": 480, "maxWidthPt": 800, "alignment": "center"}, + "codeBlock": {"fontSizePt": 9, "background": "#F8F9FA", "borderColor": "#E2E8F0"}, + "page": { + "format": "A4", + "marginsPt": {"top": 60, "bottom": 60, "left": 60, "right": 60}, + "showPageNumbers": True, + "headerHeight": 30, + "footerHeight": 30, + "headerLogo": None, + "headerText": "", + "footerText": "", + }, +} + + +def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]: + """Recursively merge override into base. Both dicts left unchanged; returns new dict.""" + result = {} + for key in base: + if key in override: + baseVal = base[key] + overVal = override[key] + if isinstance(baseVal, dict) and isinstance(overVal, dict): + result[key] = _deepMerge(baseVal, overVal) + else: + result[key] = overVal + else: + result[key] = base[key] + for key in override: + if key not in base: + result[key] = override[key] + return result + + +def resolveStyle(agentStyle: dict | None) -> Dict[str, Any]: + """Deep-merge DEFAULT_STYLE <- agentStyle. Returns fully resolved style dict.""" + if not agentStyle: + return dict(DEFAULT_STYLE) + return _deepMerge(DEFAULT_STYLE, agentStyle) diff --git a/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py b/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py index 8a3e7cea..594fbe02 100644 --- a/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py +++ b/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py @@ -9,11 +9,70 @@ from typing import Any, Dict logger = logging.getLogger(__name__) +def _parseInlineRuns(text: str) -> list: + """ + Parse inline markdown formatting into a list of InlineRun dicts. + Handles: images, links, bold, italic, inline code, plain text. + Uses a regex-based tokenizer that processes tokens left-to-right. + """ + if not text: + return [{"type": "text", "value": ""}] + + # Pattern order matters: images before links, bold before italic + _TOKEN_RE = re.compile( + r'!\[(?P[^\]]*)\]\((?P[^)"]+)(?:\s+"(?P\d+)pt")?\)' # image + r'|\[(?P[^\]]+)\]\((?P[^)]+)\)' # link + r'|`(?P[^`]+)`' # inline code + r'|\*\*(?P.+?)\*\*' # bold + r'|(?.+?)\*(?!\w)' # italic *x* + r'|(?.+?)_(?!\w)' # italic _x_ + ) + + runs = [] + lastEnd = 0 + + for m in _TOKEN_RE.finditer(text): + # Plain text before this match + if m.start() > lastEnd: + runs.append({"type": "text", "value": text[lastEnd:m.start()]}) + + if m.group("imgAlt") is not None or m.group("imgSrc") is not None: + alt = (m.group("imgAlt") or "").strip() or "Image" + src = (m.group("imgSrc") or "").strip() + widthStr = m.group("imgWidth") + run = {"type": "image", "value": alt} + if src.startswith("file:"): + run["fileId"] = src[5:] + else: + run["href"] = src + if widthStr: + run["widthPt"] = int(widthStr) + runs.append(run) + elif m.group("linkText") is not None: + runs.append({"type": "link", "value": m.group("linkText"), "href": m.group("linkHref")}) + elif m.group("code") is not None: + runs.append({"type": "code", "value": m.group("code")}) + elif m.group("bold") is not None: + runs.append({"type": "bold", "value": m.group("bold")}) + elif m.group("italic1") is not None: + runs.append({"type": "italic", "value": m.group("italic1")}) + elif m.group("italic2") is not None: + runs.append({"type": "italic", "value": m.group("italic2")}) + + lastEnd = m.end() + + # Trailing plain text + if lastEnd < len(text): + runs.append({"type": "text", "value": text[lastEnd:]}) + + return runs if runs else [{"type": "text", "value": text}] + + def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]: """ - Convert markdown content to the standard document JSON format expected by renderReport. - Supports headings, code blocks, tables, lists, images (file: refs), paragraphs. - For plain text: wraps entire content in a single paragraph section. + Convert markdown content to the standard document JSON format with Inline-Run model. + Sections use inlineRuns (list of run dicts) instead of plain text strings. + Supports headings, code blocks, tables, lists, images, paragraphs. """ if not isinstance(markdown, str): markdown = str(markdown) if markdown else "" @@ -31,7 +90,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D while i < len(lines): line = lines[i] - # Headings + # Headings (plain text, no inline formatting) headingMatch = re.match(r"^(#{1,6})\s+(.+)", line) if headingMatch: level = len(headingMatch.group(1)) @@ -43,7 +102,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D i += 1 continue - # Fenced code blocks + # Fenced code blocks (no inline formatting) codeMatch = re.match(r"^```(\w*)", line) if codeMatch: lang = codeMatch.group(1) or "text" @@ -59,14 +118,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D }) continue - # Tables + # Tables - cells are List[InlineRun] tableMatch = re.match(r"^\|(.+)\|$", line) if tableMatch and (i + 1) < len(lines) and re.match(r"^\|[\s\-:|]+\|$", lines[i + 1]): - headerCells = [c.strip() for c in tableMatch.group(1).split("|")] + headerCells = [_parseInlineRuns(c.strip()) for c in tableMatch.group(1).split("|")] i += 2 rows = [] while i < len(lines) and re.match(r"^\|(.+)\|$", lines[i]): - rowCells = [c.strip() for c in lines[i][1:-1].split("|")] + rowCells = [_parseInlineRuns(c.strip()) for c in lines[i][1:-1].split("|")] rows.append(rowCells) i += 1 sections.append({ @@ -75,14 +134,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D }) continue - # Bullet / numbered lists + # Bullet / numbered lists - items are List[List[InlineRun]] listMatch = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", line) if listMatch: isNumbered = bool(re.match(r"\d+[.)]", listMatch.group(2))) items = [] while i < len(lines) and re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i]): m = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i]) - items.append({"text": m.group(3).strip()}) + items.append(_parseInlineRuns(m.group(3).strip())) i += 1 sections.append({ "id": _nextId(), "content_type": "bullet_list", "order": order, @@ -95,46 +154,50 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D i += 1 continue - # Images (simplified: store as paragraph with ref for now - full resolution needs Knowledge Store) - imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)]+)\)", line) + # Standalone image on its own line -> block-level image section + imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)\"]+)(?:\s+\"(\d+)pt\")?\)\s*$", line) if imgMatch: altText = imgMatch.group(1).strip() or "Image" src = imgMatch.group(2).strip() + widthStr = imgMatch.group(3) fileId = src[5:] if src.startswith("file:") else "" + content = { + "altText": altText, + "base64Data": "", + "_fileRef": fileId, + "_srcUrl": src if not fileId else "", + } + if widthStr: + content["widthPt"] = int(widthStr) sections.append({ "id": _nextId(), "content_type": "image", "order": order, - "elements": [{ - "content": { - "altText": altText, - "base64Data": "", - "_fileRef": fileId, - "_srcUrl": src if not fileId else "", - } - }], + "elements": [{"content": content}], }) i += 1 continue - # Paragraph + # Paragraph - produces inlineRuns paraLines = [] while i < len(lines) and lines[i].strip() and not re.match( - r"^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )", lines[i] + r"^(#{1,6}\s|```|\|.+\||!\[[^\]]*\]\([^)]+\)\s*$|(\s*)([-*+]|\d+[.)]) )", lines[i] ): paraLines.append(lines[i]) i += 1 if paraLines: + combinedText = " ".join(paraLines) sections.append({ "id": _nextId(), "content_type": "paragraph", "order": order, - "elements": [{"content": {"text": " ".join(paraLines)}}], + "elements": [{"content": {"inlineRuns": _parseInlineRuns(combinedText)}}], }) continue i += 1 if not sections: + fallbackText = markdown.strip() or "(empty)" sections.append({ "id": _nextId(), "content_type": "paragraph", "order": order, - "elements": [{"content": {"text": markdown.strip() or "(empty)"}}], + "elements": [{"content": {"inlineRuns": _parseInlineRuns(fallbackText)}}], }) return { diff --git a/modules/workflows/methods/methodAi/_common.py b/modules/workflows/methods/methodAi/_common.py new file mode 100644 index 00000000..9e77d431 --- /dev/null +++ b/modules/workflows/methods/methodAi/_common.py @@ -0,0 +1,18 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Shared helpers for AI workflow actions.""" + + +def applyCommonAiParams(parameters: dict, request) -> None: + """Apply common AI parameters (requireNeutralization, allowedModels) from node to request.""" + requireNeutralization = parameters.get("requireNeutralization") + if requireNeutralization is not None: + request.requireNeutralization = bool(requireNeutralization) + + allowedModels = parameters.get("allowedModels") + if allowedModels and isinstance(allowedModels, list): + if not request.options: + from modules.datamodels.datamodelAi import AiCallOptions + request.options = AiCallOptions() + request.options.allowedModels = allowedModels diff --git a/modules/workflows/methods/methodAi/actions/consolidate.py b/modules/workflows/methods/methodAi/actions/consolidate.py index fa622507..7483507e 100644 --- a/modules/workflows/methods/methodAi/actions/consolidate.py +++ b/modules/workflows/methods/methodAi/actions/consolidate.py @@ -67,6 +67,8 @@ async def consolidate(self, parameters: Dict[str, Any]) -> ActionResult: prompt=prompt, options=AiCallOptions(operationType=OperationTypeEnum.DATA_ANALYSE), ) + from modules.workflows.methods.methodAi._common import applyCommonAiParams + applyCommonAiParams(parameters, req) resp = await ai_service.callAi(req) except (SubscriptionInactiveException, BillingContextError): raise diff --git a/modules/workflows/methods/methodAi/actions/convertDocument.py b/modules/workflows/methods/methodAi/actions/convertDocument.py index 39d6e16f..b2ed908b 100644 --- a/modules/workflows/methods/methodAi/actions/convertDocument.py +++ b/modules/workflows/methods/methodAi/actions/convertDocument.py @@ -36,6 +36,10 @@ async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult: } if parentOperationId: processParams["parentOperationId"] = parentOperationId + if parameters.get("allowedModels"): + processParams["allowedModels"] = parameters["allowedModels"] + if parameters.get("requireNeutralization") is not None: + processParams["requireNeutralization"] = parameters["requireNeutralization"] return await self.process(processParams) diff --git a/modules/workflows/methods/methodAi/actions/generateCode.py b/modules/workflows/methods/methodAi/actions/generateCode.py index 313057a0..5ec6b51d 100644 --- a/modules/workflows/methods/methodAi/actions/generateCode.py +++ b/modules/workflows/methods/methodAi/actions/generateCode.py @@ -55,6 +55,16 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult: processingMode=ProcessingModeEnum.DETAILED ) + # Apply node-level AI params + allowedModels = parameters.get("allowedModels") + if allowedModels and isinstance(allowedModels, list): + options.allowedModels = allowedModels + requireNeutralization = parameters.get("requireNeutralization") + if requireNeutralization is not None: + _ctx = getattr(self.services, '_context', None) + if _ctx: + _ctx.requireNeutralization = bool(requireNeutralization) + # outputFormat: Optional - if None, formats determined from prompt by AI aiResponse: AiResponse = await self.services.ai.callAiContent( prompt=prompt, diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py index 0709b924..18c158c1 100644 --- a/modules/workflows/methods/methodAi/actions/generateDocument.py +++ b/modules/workflows/methods/methodAi/actions/generateDocument.py @@ -59,6 +59,16 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: compressContext=False ) + # Apply node-level AI params + allowedModels = parameters.get("allowedModels") + if allowedModels and isinstance(allowedModels, list): + options.allowedModels = allowedModels + requireNeutralization = parameters.get("requireNeutralization") + if requireNeutralization is not None: + _ctx = getattr(self.services, '_context', None) + if _ctx: + _ctx.requireNeutralization = bool(requireNeutralization) + # outputFormat: Optional - if None, formats determined from prompt by AI aiResponse: AiResponse = await self.services.ai.callAiContent( prompt=prompt, diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py index 63e0f33e..d82ac4f7 100644 --- a/modules/workflows/methods/methodAi/actions/process.py +++ b/modules/workflows/methods/methodAi/actions/process.py @@ -212,6 +212,9 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: ) ) + from modules.workflows.methods.methodAi._common import applyCommonAiParams + applyCommonAiParams(parameters, request) + aiResponse_obj = await self.services.ai.callAi(request) # Convert AiCallResponse to AiResponse format @@ -243,6 +246,16 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE ) + # Apply node-level AI params (allowedModels, requireNeutralization) + allowedModels = parameters.get("allowedModels") + if allowedModels and isinstance(allowedModels, list): + options.allowedModels = allowedModels + requireNeutralization = parameters.get("requireNeutralization") + if requireNeutralization is not None: + _ctx = getattr(self.services, '_context', None) + if _ctx: + _ctx.requireNeutralization = bool(requireNeutralization) + # Get generationIntent from parameters (required for DATA_GENERATE) # Default to "document" if not provided (most common use case) # For code generation, use ai.generateCode action or explicitly pass generationIntent="code" diff --git a/modules/workflows/methods/methodAi/actions/summarizeDocument.py b/modules/workflows/methods/methodAi/actions/summarizeDocument.py index e32c1965..4c2bb2bc 100644 --- a/modules/workflows/methods/methodAi/actions/summarizeDocument.py +++ b/modules/workflows/methods/methodAi/actions/summarizeDocument.py @@ -39,6 +39,10 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult: } if parentOperationId: processParams["parentOperationId"] = parentOperationId + if parameters.get("allowedModels"): + processParams["allowedModels"] = parameters["allowedModels"] + if parameters.get("requireNeutralization") is not None: + processParams["requireNeutralization"] = parameters["requireNeutralization"] return await self.process(processParams) diff --git a/modules/workflows/methods/methodAi/actions/translateDocument.py b/modules/workflows/methods/methodAi/actions/translateDocument.py index bb6f8437..dc0533a9 100644 --- a/modules/workflows/methods/methodAi/actions/translateDocument.py +++ b/modules/workflows/methods/methodAi/actions/translateDocument.py @@ -41,6 +41,10 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult: processParams["resultType"] = resultType if parentOperationId: processParams["parentOperationId"] = parentOperationId + if parameters.get("allowedModels"): + processParams["allowedModels"] = parameters["allowedModels"] + if parameters.get("requireNeutralization") is not None: + processParams["requireNeutralization"] = parameters["requireNeutralization"] return await self.process(processParams) diff --git a/tests/serviceAi/__init__.py b/tests/serviceAi/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/serviceAi/test_allowed_models_whitelist.py b/tests/serviceAi/test_allowed_models_whitelist.py new file mode 100644 index 00000000..4593afd9 --- /dev/null +++ b/tests/serviceAi/test_allowed_models_whitelist.py @@ -0,0 +1,14 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +import pytest +from modules.datamodels.datamodelAi import AiCallOptions + + +def test_allowed_models_field_exists(): + opts = AiCallOptions(allowedModels=["gpt-5-mini", "claude-4-7-opus"]) + assert opts.allowedModels == ["gpt-5-mini", "claude-4-7-opus"] + + +def test_allowed_models_default_none(): + opts = AiCallOptions() + assert opts.allowedModels is None diff --git a/tests/serviceGeneration/__init__.py b/tests/serviceGeneration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/serviceGeneration/test_inline_image_paragraph.py b/tests/serviceGeneration/test_inline_image_paragraph.py new file mode 100644 index 00000000..be0c5d19 --- /dev/null +++ b/tests/serviceGeneration/test_inline_image_paragraph.py @@ -0,0 +1,23 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +import pytest +from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson + + +def test_inline_image_in_paragraph(): + md = "Results show ![chart](file:abc \"200pt\") clearly." + result = markdownToDocumentJson(md, "Test") + runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"] + types = [r["type"] for r in runs] + assert "text" in types + assert "image" in types + imgRun = next(r for r in runs if r["type"] == "image") + assert imgRun.get("fileId") == "abc" + + +def test_multiple_inline_images(): + md = "A ![x](file:1) B ![y](file:2) C" + result = markdownToDocumentJson(md, "Test") + runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"] + images = [r for r in runs if r["type"] == "image"] + assert len(images) == 2 diff --git a/tests/serviceGeneration/test_md_to_json_consolidation.py b/tests/serviceGeneration/test_md_to_json_consolidation.py new file mode 100644 index 00000000..83118374 --- /dev/null +++ b/tests/serviceGeneration/test_md_to_json_consolidation.py @@ -0,0 +1,71 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +import pytest +from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson + + +def test_basic_paragraph(): + result = markdownToDocumentJson("Hello world", "Test") + doc = result["documents"][0] + section = doc["sections"][0] + assert section["content_type"] == "paragraph" + assert section["elements"][0]["content"]["inlineRuns"][0] == {"type": "text", "value": "Hello world"} + + +def test_inline_bold(): + result = markdownToDocumentJson("This is **bold** text", "Test") + runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"] + assert any(r["type"] == "bold" and r["value"] == "bold" for r in runs) + + +def test_inline_image(): + result = markdownToDocumentJson("Text ![logo](file:abc123) more", "Test") + runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"] + assert any(r["type"] == "image" and r.get("fileId") == "abc123" for r in runs) + + +def test_inline_link(): + result = markdownToDocumentJson("Click [here](https://example.com)", "Test") + runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"] + assert any(r["type"] == "link" and r.get("href") == "https://example.com" for r in runs) + + +def test_table_cells_are_inline_runs(): + md = "| A | B |\n| --- | --- |\n| **x** | y |" + result = markdownToDocumentJson(md, "Test") + section = result["documents"][0]["sections"][0] + assert section["content_type"] == "table" + rows = section["elements"][0]["content"]["rows"] + assert isinstance(rows[0][0], list) + + +def test_bullet_list_inline_runs(): + md = "- Item **one**\n- Item two" + result = markdownToDocumentJson(md, "Test") + section = result["documents"][0]["sections"][0] + assert section["content_type"] == "bullet_list" + items = section["elements"][0]["content"]["items"] + assert isinstance(items[0], list) + + +def test_standalone_image_block(): + md = "![Big chart](file:chart123)" + result = markdownToDocumentJson(md, "Test") + section = result["documents"][0]["sections"][0] + assert section["content_type"] == "image" + + +def test_heading_unchanged(): + result = markdownToDocumentJson("# Title", "Test") + section = result["documents"][0]["sections"][0] + assert section["content_type"] == "heading" + assert section["elements"][0]["content"]["text"] == "Title" + assert section["elements"][0]["content"]["level"] == 1 + + +def test_code_block_unchanged(): + md = "```python\nprint('hi')\n```" + result = markdownToDocumentJson(md, "Test") + section = result["documents"][0]["sections"][0] + assert section["content_type"] == "code_block" + assert section["elements"][0]["content"]["code"] == "print('hi')" diff --git a/tests/serviceGeneration/test_style_resolver.py b/tests/serviceGeneration/test_style_resolver.py new file mode 100644 index 00000000..6b2b649a --- /dev/null +++ b/tests/serviceGeneration/test_style_resolver.py @@ -0,0 +1,39 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +import pytest +from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle, DEFAULT_STYLE + + +def test_resolve_none_returns_defaults(): + result = resolveStyle(None) + assert result == DEFAULT_STYLE + + +def test_resolve_empty_returns_defaults(): + result = resolveStyle({}) + assert result == DEFAULT_STYLE + + +def test_override_single_color(): + result = resolveStyle({"colors": {"primary": "#FF0000"}}) + assert result["colors"]["primary"] == "#FF0000" + assert result["colors"]["secondary"] == DEFAULT_STYLE["colors"]["secondary"] + + +def test_override_nested_heading(): + result = resolveStyle({"headings": {"h1": {"sizePt": 30}}}) + assert result["headings"]["h1"]["sizePt"] == 30 + assert result["headings"]["h1"]["weight"] == "bold" + + +def test_override_font(): + result = resolveStyle({"fonts": {"primary": "Arial"}}) + assert result["fonts"]["primary"] == "Arial" + assert result["fonts"]["monospace"] == "Consolas" + + +def test_full_style_passthrough(): + custom = {"fonts": {"primary": "Helvetica", "monospace": "Monaco"}} + result = resolveStyle(custom) + assert result["fonts"]["primary"] == "Helvetica" + assert result["fonts"]["monospace"] == "Monaco" From b500bfa6c1320f05b43a5c63c7934a69d0bf8a4c Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Wed, 29 Apr 2026 23:27:52 +0200 Subject: [PATCH 4/5] plan D fixed --- .../services/serviceAi/mainServiceAi.py | 4 ++ .../renderers/rendererPdf.py | 39 ++++++++++++------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/modules/serviceCenter/services/serviceAi/mainServiceAi.py b/modules/serviceCenter/services/serviceAi/mainServiceAi.py index 18ac46bc..3b800fb5 100644 --- a/modules/serviceCenter/services/serviceAi/mainServiceAi.py +++ b/modules/serviceCenter/services/serviceAi/mainServiceAi.py @@ -51,6 +51,10 @@ class _ServicesAdapter: def workflow(self): return self._context.workflow + @workflow.setter + def workflow(self, value): + self._context.workflow = value + @property def chat(self): return self._get_service("chat") diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py index 31537980..7913a246 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py @@ -247,13 +247,28 @@ class RendererPdf(BaseRenderer): removed = False for idx, flowable in enumerate(story): fRepr = repr(flowable) + if "Image" in fRepr and hasattr(flowable, 'drawWidth') and hasattr(flowable, 'drawHeight'): + from reportlab.platypus import Image as ReportLabImage + if isinstance(flowable, ReportLabImage): + frameH = 650.0 + frameW = 450.0 + if flowable.drawHeight > frameH or flowable.drawWidth > frameW: + scaleW = frameW / flowable.drawWidth if flowable.drawWidth > frameW else 1.0 + scaleH = frameH / flowable.drawHeight if flowable.drawHeight > frameH else 1.0 + s = min(scaleW, scaleH) * 0.9 + flowable.drawWidth = flowable.drawWidth * s + flowable.drawHeight = flowable.drawHeight * s + flowable._width = flowable.drawWidth + flowable._height = flowable.drawHeight + removed = True + break if "Table" in fRepr and hasattr(flowable, '_cellvalues'): try: nRows = len(flowable._cellvalues) nCols = len(flowable._cellvalues[0]) if flowable._cellvalues else 0 if nRows == 1 and nCols == 1: errPara = Paragraph( - "[Code block omitted — content too large for PDF page]", + "[Code block omitted - content too large for PDF page]", self._createNormalStyle({}), ) story[idx] = errPara @@ -1078,20 +1093,18 @@ class RendererPdf(BaseRenderer): pilImage = PILImage.open(imageStream) originalWidth, originalHeight = pilImage.size - # Calculate available page dimensions (A4 with margins: 72pt left/right, 72pt top, 18pt bottom) pageWidth = A4[0] # 595.27 points pageHeight = A4[1] # 841.89 points - leftMargin = 72 - rightMargin = 72 - topMargin = 72 - bottomMargin = 18 - - # Use actual frame dimensions from SimpleDocTemplate - # Frame is smaller than page minus margins due to internal spacing - # From error message: frame is 439.27559055118115 x 739.8897637795277 - # Use conservative values with safety margin - availableWidth = 430.0 # Slightly smaller than frame width for safety - availableHeight = 730.0 # Slightly smaller than frame height for safety + # Use page dimensions minus margins with generous safety buffer + # A4 = 595.27 x 841.89 pt; frame = page - margins - internal padding + _us = getattr(self, '_unifiedStyle', None) or {} + _pageMgn = (_us.get('page') or {}).get('marginsPt') or {} + marginTop = _pageMgn.get('top', 60) + marginBottom = _pageMgn.get('bottom', 60) + marginLeft = _pageMgn.get('left', 60) + marginRight = _pageMgn.get('right', 60) + availableWidth = pageWidth - marginLeft - marginRight - 20 # 20pt safety + availableHeight = pageHeight - marginTop - marginBottom - 80 # 80pt safety for header/footer # Convert original image size from pixels to points # PIL provides size in pixels, need to convert to points From c140bd14d45d89665b4d9412dee8e7d747cbc9ad Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Thu, 30 Apr 2026 23:54:45 +0200 Subject: [PATCH 5/5] fixed nodes handovers --- modules/datamodels/datamodelDocref.py | 10 +- .../graphicalEditor/nodeDefinitions/ai.py | 21 ++-- modules/features/trustee/mainTrustee.py | 4 +- modules/interfaces/interfaceBootstrap.py | 95 ++++++++++++++++++ modules/routes/routeAutomationWorkspace.py | 97 +++++++++++++++---- .../services/serviceChat/mainServiceChat.py | 6 -- .../methods/methodAi/actions/process.py | 66 ++++++++++++- .../workflows/methods/methodAi/methodAi.py | 17 ++++ 8 files changed, 276 insertions(+), 40 deletions(-) diff --git a/modules/datamodels/datamodelDocref.py b/modules/datamodels/datamodelDocref.py index 27ba5e2b..e20fb072 100644 --- a/modules/datamodels/datamodelDocref.py +++ b/modules/datamodels/datamodelDocref.py @@ -110,11 +110,13 @@ class DocumentReferenceList(BaseModel): # docItem:documentId references.append(DocumentItemReference(documentId=parts[0])) - # Unknown format - skip or log warning else: - # Try to parse as simple string (backward compatibility) - # Assume it's a label if it doesn't match known patterns - if refStr: + if not refStr: + continue + import re + if re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', refStr, re.I): + references.append(DocumentItemReference(documentId=refStr)) + else: references.append(DocumentListReference(label=refStr)) return cls(references=references) diff --git a/modules/features/graphicalEditor/nodeDefinitions/ai.py b/modules/features/graphicalEditor/nodeDefinitions/ai.py index 0336e382..65e97654 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/ai.py +++ b/modules/features/graphicalEditor/nodeDefinitions/ai.py @@ -24,8 +24,13 @@ AI_NODES = [ {"name": "resultType", "type": "string", "required": False, "frontendType": "select", "frontendOptions": {"options": ["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"]}, "description": t("Ausgabeformat"), "default": "txt"}, - {"name": "documentList", "type": "string", "required": False, "frontendType": "hidden", - "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""}, + {"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "dataRef", + "description": t("Dokumentenliste (Upstream-Output binden)"), "default": ""}, + {"name": "context", "type": "string", "required": False, "frontendType": "dataRef", + "description": t("Kontextdaten fuer den Prompt (Upstream-Output binden)"), "default": ""}, + {"name": "documentTheme", "type": "string", "required": False, "frontendType": "select", + "frontendOptions": {"options": ["general", "finance", "legal", "technical", "hr"]}, + "description": t("Dokument-Thema (Style-Hinweis fuer den Renderer)"), "default": "general"}, {"name": "simpleMode", "type": "boolean", "required": False, "frontendType": "checkbox", "description": t("Einfacher Modus"), "default": True}, ] + _AI_COMMON_PARAMS, @@ -62,8 +67,8 @@ AI_NODES = [ "label": t("Dokument zusammenfassen"), "description": t("Dokumentinhalt zusammenfassen"), "parameters": [ - {"name": "documentList", "type": "string", "required": True, "frontendType": "hidden", - "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""}, + {"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef", + "description": t("Dokumentenliste (Upstream-Output binden)"), "default": ""}, {"name": "summaryLength", "type": "string", "required": False, "frontendType": "select", "frontendOptions": {"options": ["brief", "medium", "detailed"]}, "description": t("Kurz, mittel oder ausführlich"), "default": "medium"}, @@ -82,8 +87,8 @@ AI_NODES = [ "label": t("Dokument übersetzen"), "description": t("Dokument in Zielsprache übersetzen"), "parameters": [ - {"name": "documentList", "type": "string", "required": True, "frontendType": "hidden", - "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""}, + {"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef", + "description": t("Dokumentenliste (Upstream-Output binden)"), "default": ""}, {"name": "targetLanguage", "type": "string", "required": True, "frontendType": "text", "description": t("Zielsprache (z.B. de, en, French)")}, ] + _AI_COMMON_PARAMS, @@ -101,8 +106,8 @@ AI_NODES = [ "label": t("Dokument konvertieren"), "description": t("Dokument in anderes Format konvertieren"), "parameters": [ - {"name": "documentList", "type": "string", "required": True, "frontendType": "hidden", - "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""}, + {"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef", + "description": t("Dokumentenliste (Upstream-Output binden)"), "default": ""}, {"name": "targetFormat", "type": "string", "required": True, "frontendType": "select", "frontendOptions": {"options": ["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"]}, "description": t("Zielformat")}, diff --git a/modules/features/trustee/mainTrustee.py b/modules/features/trustee/mainTrustee.py index d8f7a804..b8ab853d 100644 --- a/modules/features/trustee/mainTrustee.py +++ b/modules/features/trustee/mainTrustee.py @@ -383,7 +383,7 @@ def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]: "parameters": { "aiPrompt": prompt + _FINANCE_STYLE_HINT, "context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]}, - "requireNeutralization": True, + "requireNeutralization": False, "simpleMode": False, }, "position": {"x": 500, "y": 0}}, ], @@ -478,7 +478,7 @@ TEMPLATE_WORKFLOWS = [ ), "resultType": "xlsx", "documentTheme": "finance", - "requireNeutralization": True, + "requireNeutralization": False, "documentList": {"type": "ref", "nodeId": "trigger", "path": ["payload", "documentList"]}, "context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]}, "simpleMode": False, diff --git a/modules/interfaces/interfaceBootstrap.py b/modules/interfaces/interfaceBootstrap.py index 4bcd0e97..b7a56a02 100644 --- a/modules/interfaces/interfaceBootstrap.py +++ b/modules/interfaces/interfaceBootstrap.py @@ -115,6 +115,10 @@ def initBootstrap(db: DatabaseConnector) -> None: # Bootstrap system workflow templates for graphical editor _bootstrapSystemTemplates(db) + # Sync feature template workflows (update graph of existing instance workflows + # whose templateSourceId matches a current code-defined template) + _syncFeatureTemplateWorkflows() + # Ensure billing settings and accounts exist for all mandates _bootstrapBilling() @@ -190,6 +194,97 @@ def _bootstrapSystemTemplates(db: DatabaseConnector) -> None: logger.warning(f"System workflow template bootstrap failed: {e}") +def _syncFeatureTemplateWorkflows() -> None: + """Sync existing instance-scoped workflows with current code-defined templates. + + For each feature that exposes getTemplateWorkflows(), find all AutoWorkflow + rows whose templateSourceId matches a template ID and update their graph + if the code-defined version has changed. Preserves instance-specific + fields (label, tags, targetFeatureInstanceId, invocations, active). + Idempotent, runs on every boot. + """ + import json + + try: + from modules.system.registry import loadFeatureMainModules + from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import AutoWorkflow + from modules.features.graphicalEditor.interfaceFeatureGraphicalEditor import graphicalEditorDatabase + + mainModules = loadFeatureMainModules() + + templatesBySourceId: dict = {} + for featureCode, mod in mainModules.items(): + getTemplateWorkflows = getattr(mod, "getTemplateWorkflows", None) + if not getTemplateWorkflows: + continue + try: + templates = getTemplateWorkflows() or [] + except Exception: + continue + for tpl in templates: + tplId = tpl.get("id") + if tplId: + templatesBySourceId[tplId] = tpl + + if not templatesBySourceId: + logger.info("_syncFeatureTemplateWorkflows: no templates found, skipping") + return + logger.info(f"_syncFeatureTemplateWorkflows: found {len(templatesBySourceId)} template(s): {list(templatesBySourceId.keys())}") + + greenfieldDb = DatabaseConnector( + dbHost=APP_CONFIG.get("DB_HOST", "localhost"), + dbDatabase=graphicalEditorDatabase, + dbUser=APP_CONFIG.get("DB_USER"), + dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD"), + ) + + updated = 0 + for sourceId, tpl in templatesBySourceId.items(): + instances = greenfieldDb.getRecordset(AutoWorkflow, recordFilter={ + "templateSourceId": sourceId, + "isTemplate": False, + }) + if not instances: + continue + + canonicalGraph = tpl.get("graph", {}) + + for inst in instances: + instId = inst.get("id") if isinstance(inst, dict) else getattr(inst, "id", None) + targetInstanceId = ( + inst.get("targetFeatureInstanceId") if isinstance(inst, dict) + else getattr(inst, "targetFeatureInstanceId", None) + ) or "" + + graphJson = json.dumps(canonicalGraph) + graphJson = graphJson.replace("{{featureInstanceId}}", targetInstanceId) + newGraph = json.loads(graphJson) + + existingGraph = inst.get("graph") if isinstance(inst, dict) else getattr(inst, "graph", None) + if isinstance(existingGraph, str): + try: + existingGraph = json.loads(existingGraph) + except Exception: + existingGraph = None + + if existingGraph == newGraph: + logger.debug(f"_syncFeatureTemplateWorkflows: graph unchanged for workflow {instId} (template={sourceId})") + continue + logger.debug(f"_syncFeatureTemplateWorkflows: graph DIFFERS for workflow {instId} (template={sourceId}), updating") + + greenfieldDb.recordModify(AutoWorkflow, instId, {"graph": newGraph}) + updated += 1 + logger.info(f"_syncFeatureTemplateWorkflows: updated graph for workflow {instId} (template={sourceId})") + + if updated: + logger.info(f"_syncFeatureTemplateWorkflows: synced {updated} workflow(s) with current templates") + else: + logger.info("_syncFeatureTemplateWorkflows: all instance graphs already match current templates") + greenfieldDb.close() + except Exception as e: + logger.warning(f"Feature template workflow sync failed: {e}") + + def _buildSystemTemplates(): """Build the graph definitions for platform system templates.""" return [ diff --git a/modules/routes/routeAutomationWorkspace.py b/modules/routes/routeAutomationWorkspace.py index 6efbdeb6..b742d7ea 100644 --- a/modules/routes/routeAutomationWorkspace.py +++ b/modules/routes/routeAutomationWorkspace.py @@ -58,6 +58,36 @@ def _getUserAccessibleInstanceIds(userId: str) -> list[str]: ] +_FILE_REF_KEYS = ("fileId", "documentId", "fileIds", "documents") + + +def _extractFileIdsFromValue(value, accumulator: set[str]) -> None: + """Recursively scan a value (dict/list/str) for file id references.""" + if isinstance(value, dict): + for key, sub in value.items(): + if key in _FILE_REF_KEYS: + _collectFileIdsFromRef(sub, accumulator) + else: + _extractFileIdsFromValue(sub, accumulator) + elif isinstance(value, list): + for item in value: + _extractFileIdsFromValue(item, accumulator) + + +def _collectFileIdsFromRef(val, accumulator: set[str]) -> None: + """Add file ids from a value located under a known file-reference key.""" + if isinstance(val, str) and val: + accumulator.add(val) + elif isinstance(val, list): + for v in val: + if isinstance(v, str) and v: + accumulator.add(v) + elif isinstance(v, dict) and v.get("id"): + accumulator.add(v["id"]) + elif isinstance(val, dict) and val.get("id"): + accumulator.add(val["id"]) + + @router.get("") @limiter.limit("60/minute") def listWorkspaceRuns( @@ -198,40 +228,68 @@ def getWorkspaceRunDetail( steps = [dict(s) for s in stepRecords] steps.sort(key=lambda s: s.get("startedAt") or 0) - fileItems: list = [] + allFileIds: set[str] = set() + perStepFileIds: list[tuple[set[str], set[str]]] = [] + for step in steps: + inputIds: set[str] = set() + outputIds: set[str] = set() + _extractFileIdsFromValue(step.get("inputSnapshot") or {}, inputIds) + _extractFileIdsFromValue(step.get("output") or {}, outputIds) + perStepFileIds.append((inputIds, outputIds)) + allFileIds.update(inputIds) + allFileIds.update(outputIds) + + nodeOutputs = run.get("nodeOutputs") or {} + runLevelIds: set[str] = set() + _extractFileIdsFromValue(nodeOutputs, runLevelIds) + allFileIds.update(runLevelIds) + + fileMetaById: dict[str, dict] = {} try: from modules.datamodels.datamodelFiles import FileItem from modules.interfaces.interfaceDbManagement import ComponentObjects mgmtDb = ComponentObjects().db if mgmtDb._ensureTableExists(FileItem): - nodeOutputs = run.get("nodeOutputs") or {} - fileIds: set[str] = set() - for nodeId, output in nodeOutputs.items(): - if not isinstance(output, dict): - continue - for key in ("fileId", "documentId", "fileIds", "documents"): - val = output.get(key) - if isinstance(val, str) and val: - fileIds.add(val) - elif isinstance(val, list): - for v in val: - if isinstance(v, str) and v: - fileIds.add(v) - elif isinstance(v, dict) and v.get("id"): - fileIds.add(v["id"]) - for fid in fileIds: + for fid in allFileIds: try: rec = mgmtDb.getRecord(FileItem, fid) if rec: - fileItems.append(dict(rec)) + recDict = dict(rec) + fileMetaById[fid] = { + "id": fid, + "fileName": recDict.get("fileName") or recDict.get("name"), + } except Exception: pass except Exception as e: logger.warning("getWorkspaceRunDetail: file lookup failed: %s", e) + def _resolveFileList(ids: set[str]) -> list[dict]: + return [fileMetaById[fid] for fid in ids if fid in fileMetaById] + + assignedFileIds: set[str] = set() + for step, (inputIds, outputIds) in zip(steps, perStepFileIds): + step["inputFiles"] = _resolveFileList(inputIds) + step["outputFiles"] = _resolveFileList(outputIds) + assignedFileIds.update(inputIds) + assignedFileIds.update(outputIds) + + unassignedFiles = _resolveFileList(allFileIds - assignedFileIds) + allFiles = _resolveFileList(allFileIds) + run["workflowLabel"] = run.get("label") or workflow.get("label") or wfId run["targetFeatureInstanceId"] = tid + targetInstanceLabel = None + if tid: + try: + from modules.routes.routeHelpers import resolveInstanceLabels + labelMap = resolveInstanceLabels([tid]) + targetInstanceLabel = labelMap.get(tid) + except Exception: + pass + run["targetInstanceLabel"] = targetInstanceLabel + return { "run": run, "workflow": { @@ -242,5 +300,6 @@ def getWorkspaceRunDetail( "tags": workflow.get("tags", []), } if workflow else None, "steps": steps, - "files": fileItems, + "files": allFiles, + "unassignedFiles": unassignedFiles, } diff --git a/modules/serviceCenter/services/serviceChat/mainServiceChat.py b/modules/serviceCenter/services/serviceChat/mainServiceChat.py index 0630c83b..077596b8 100644 --- a/modules/serviceCenter/services/serviceChat/mainServiceChat.py +++ b/modules/serviceCenter/services/serviceChat/mainServiceChat.py @@ -199,13 +199,8 @@ class ChatService: label = parts[1] messageFound = None for message in workflow.messages: - # Validate message belongs to this workflow msgWorkflowId = getattr(message, 'workflowId', None) if not msgWorkflowId or msgWorkflowId != workflowId: - if msgWorkflowId: - logger.warning(f"Message {message.id} has workflowId {msgWorkflowId} but belongs to workflow {workflowId}. Skipping.") - else: - logger.warning(f"Message {message.id} has no workflowId. Skipping.") continue msgLabel = getattr(message, 'documentsLabel', None) @@ -213,7 +208,6 @@ class ChatService: messageFound = message break - # If found, add documents if messageFound and messageFound.documents: allDocuments.extend(messageFound.documents) else: diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py index d82ac4f7..50500929 100644 --- a/modules/workflows/methods/methodAi/actions/process.py +++ b/modules/workflows/methods/methodAi/actions/process.py @@ -73,6 +73,47 @@ def _action_docs_to_content_parts(services, docs: List[Any]) -> List[ContentPart logger.info(f"ai.process: Extracted {len(ec.parts)} parts from {name} (no persistence)") return all_parts +def _resolve_file_refs_to_content_parts(services, fileIdRefs) -> List[ContentPart]: + """Fetch files by ID from the file store and extract content. + Used for automation2 workflows where documents are file-store references, + not chat message attachments.""" + from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy + + mgmt = getattr(services, 'interfaceDbComponent', None) + extraction = getattr(services, 'extraction', None) + if not mgmt or not extraction: + logger.warning("_resolve_file_refs_to_content_parts: missing interfaceDbComponent or extraction service") + return [] + + allParts: List[ContentPart] = [] + opts = ExtractionOptions(prompt="", mergeStrategy=MergeStrategy()) + for ref in fileIdRefs: + fileId = ref.documentId + fileMeta = mgmt.getFile(fileId) + if not fileMeta: + logger.warning(f"_resolve_file_refs_to_content_parts: file {fileId} not found") + continue + fileData = mgmt.getFileData(fileId) + if not fileData: + logger.warning(f"_resolve_file_refs_to_content_parts: no data for file {fileId}") + continue + fileName = getattr(fileMeta, 'fileName', fileId) + mimeType = getattr(fileMeta, 'mimeType', 'application/octet-stream') + ec = extraction.extractContentFromBytes( + documentBytes=fileData, + fileName=fileName, + mimeType=mimeType, + documentId=fileId, + options=opts, + ) + for p in ec.parts: + if p.data or getattr(p, "typeGroup", "") == "image": + p.metadata.setdefault("originalFileName", fileName) + allParts.append(p) + logger.info(f"_resolve_file_refs_to_content_parts: extracted {len(ec.parts)} parts from {fileName}") + return allParts + + async def process(self, parameters: Dict[str, Any]) -> ActionResult: operationId = None try: @@ -129,6 +170,17 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: f"ai.process: Coerced documentList ({type(documentListParam).__name__}) " f"to DocumentReferenceList with {len(documentList.references)} references" ) + + # Resolve DocumentItemReferences (file-ID refs from automation2) directly + # from the file store. These cannot be resolved via chat messages. + from modules.datamodels.datamodelDocref import DocumentItemReference + fileIdRefs = [r for r in documentList.references if isinstance(r, DocumentItemReference)] + if fileIdRefs: + extractedParts = _resolve_file_refs_to_content_parts(self.services, fileIdRefs) + if extractedParts: + inline_content_parts = (inline_content_parts or []) + extractedParts + remaining = [r for r in documentList.references if not isinstance(r, DocumentItemReference)] + documentList = DocumentReferenceList(references=remaining) # Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only. resultType = parameters.get("resultType") @@ -157,7 +209,19 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: mimeMap = {"txt": "text/plain", "json": "application/json", "html": "text/html", "md": "text/markdown", "csv": "text/csv", "xml": "application/xml"} output_mime_type = mimeMap.get(normalized_result_type, "text/plain") if normalized_result_type else "text/plain" - + + # Normalize context: workflow refs may resolve to dict/list instead of str + paramContext = parameters.get("context") + if paramContext is not None and not isinstance(paramContext, str): + try: + paramContext = json.dumps(paramContext, ensure_ascii=False, default=str) + parameters["context"] = paramContext + logger.info(f"ai.process: Serialized non-string context ({type(parameters.get('context')).__name__}) to JSON ({len(paramContext)} chars)") + except Exception as e: + logger.warning(f"ai.process: Failed to serialize context: {e}") + paramContext = str(paramContext) + parameters["context"] = paramContext + # Phase 7.3: Pass documentList and/or contentParts to AI service contentParts: Optional[List[ContentPart]] = inline_content_parts if "contentParts" in parameters and not inline_content_parts: diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py index 5265f5c9..ecd60b12 100644 --- a/modules/workflows/methods/methodAi/methodAi.py +++ b/modules/workflows/methods/methodAi/methodAi.py @@ -56,6 +56,23 @@ class MethodAi(MethodBase): required=False, description="Document reference(s) in any format to use as input/context" ), + "context": WorkflowActionParameter( + name="context", + type="str", + frontendType=FrontendType.TEXTAREA, + required=False, + default="", + description="Additional context data (string or upstream-bound dict/list, e.g. accounting data) appended to the prompt. Non-string values are JSON-serialized." + ), + "documentTheme": WorkflowActionParameter( + name="documentTheme", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["general", "finance", "legal", "technical", "hr"], + required=False, + default="general", + description="Style hint for the document renderer (e.g. finance, legal). Used by the AI agent to choose colors and layout." + ), "resultType": WorkflowActionParameter( name="resultType", type="str",