From e800bc0b71f9b897f232bc569d2b85998c2f3d1b Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sat, 23 May 2026 23:54:29 +0200 Subject: [PATCH] Sync: full codebase from GitHub gateway main Co-authored-by: Cursor --- .../swift_ios_app_nachbau_3dc75f35.plan.md | 1036 --------- .../swift_ios_app_nachbau_80bb1212.plan.md | 741 ------ ...g-mietzinsbestaetigung-pilot.workflow.json | 1 - env-gateway-dev.20260515_122326.backup | 97 - env-gateway-int.20260515_122326.backup | 92 - env-gateway-prod.20260515_122326.backup | 92 - modules/auth/oauthConnectTicket.py | 101 + .../graphicalEditor/conditionOperators.py | 612 +++++ .../features/graphicalEditor/entryPoints.py | 55 +- .../interfaceFeatureGraphicalEditor.py | 22 +- .../graphicalEditor/mainGraphicalEditor.py | 5 - .../graphicalEditor/nodeDefinitions/ai.py | 199 +- .../nodeDefinitions/clickup.py | 70 +- .../nodeDefinitions/context.py | 438 +++- .../nodeDefinitions/contextPickerHelp.py | 22 + .../graphicalEditor/nodeDefinitions/data.py | 23 +- .../graphicalEditor/nodeDefinitions/email.py | 44 +- .../graphicalEditor/nodeDefinitions/file.py | 20 +- .../graphicalEditor/nodeDefinitions/input.py | 41 +- .../nodeDefinitions/redmine.py | 14 +- .../nodeDefinitions/sharepoint.py | 41 +- .../nodeDefinitions/triggers.py | 27 +- .../nodeDefinitions/trustee.py | 18 +- .../features/graphicalEditor/nodeRegistry.py | 36 +- modules/features/graphicalEditor/portTypes.py | 225 +- .../routeFeatureGraphicalEditor.py | 98 +- .../features/graphicalEditor/switchOutput.py | 308 +++ .../graphicalEditor/upstreamPathsService.py | 160 +- .../trustee/accounting/accountingBridge.py | 64 +- .../accounting/accountingConnectorBase.py | 19 +- .../connectors/accountingConnectorAbacus.py | 319 ++- modules/interfaces/interfaceBootstrap.py | 2 - modules/interfaces/interfaceDbManagement.py | 27 +- .../migration/seedData/ui_language_seed.json | 330 +++ modules/routes/routeAdminDatabaseHealth.py | 321 ++- modules/routes/routeAutomationWorkspace.py | 4 +- modules/routes/routeDataConnections.py | 25 +- modules/routes/routeDataSources.py | 2 +- modules/routes/routeSecurityClickup.py | 29 +- modules/routes/routeSecurityGoogle.py | 28 +- modules/routes/routeSecurityMsft.py | 28 +- modules/routes/routeWorkflowDashboard.py | 47 +- .../extractors/extractorPdf.py | 217 +- .../renderers/rendererMarkdown.py | 606 ++--- .../renderers/rendererPdf.py | 3 +- .../serviceGeneration/subDocumentUtility.py | 68 +- .../serviceKnowledge/_costEstimate.py | 20 +- modules/shared/debugLogger.py | 6 + modules/shared/frontendTypes.py | 9 + modules/system/databaseMigration.py | 611 +++++ .../workflows/automation2/executionEngine.py | 770 ++++++- .../executors/actionNodeExecutor.py | 544 +++-- .../automation2/executors/flowExecutor.py | 280 ++- .../automation2/executors/inputExecutor.py | 17 +- .../automation2/executors/ioExecutor.py | 6 +- .../automation2/executors/triggerExecutor.py | 8 + modules/workflows/automation2/graphUtils.py | 307 ++- .../graphicalEditorRunFileLogger.py | 215 ++ .../automation2/pickNotPushMigration.py | 222 +- .../automation2/workflowArtifactVisibility.py | 32 + modules/workflows/methods/methodAi/_common.py | 84 +- .../methods/methodAi/actions/process.py | 43 +- .../workflows/methods/methodAi/methodAi.py | 41 +- modules/workflows/methods/methodBase.py | 45 +- .../methodContext/actions/extractContent.py | 1991 +++++++++++++++-- .../methodContext/actions/filterContext.py | 141 ++ .../methodContext/actions/mergeContext.py | 254 +++ .../methodContext/actions/neutralizeData.py | 460 ++-- .../methodContext/actions/setContext.py | 459 ++++ .../methodContext/actions/transformContext.py | 223 ++ .../methods/methodContext/contextEnvelope.py | 42 + .../methods/methodContext/methodContext.py | 256 ++- .../methods/methodFile/actions/create.py | 338 ++- .../methods/methodFile/methodFile.py | 14 +- .../processing/core/actionExecutor.py | 26 +- .../processing/core/messageCreator.py | 11 + ...xecute_graph_loop_aggregate_consolidate.py | 50 +- ...test_accountingConnectorAbacus_balances.py | 27 +- .../test_condition_operator_catalog.py | 49 + .../test_resolve_value_kind.py | 60 + .../test_upstream_paths_and_graph_schema.py | 19 + tests/unit/services/test_costEstimate.py | 10 +- .../workflow/test_extract_content_handover.py | 681 ++++++ .../workflow/test_flow_executor_conditions.py | 94 + .../workflow/test_merge_context_handover.py | 206 ++ tests/unit/workflow/test_node_combinations.py | 702 ++++++ .../unit/workflow/test_phase3_context_node.py | 47 +- .../workflow/test_phase4_workflow_nodes.py | 21 +- .../workflow/test_switch_filtered_output.py | 359 +++ .../workflows/test_automation2_graphUtils.py | 93 +- tests/unit/workflows/test_trigger_executor.py | 31 + 91 files changed, 13133 insertions(+), 3568 deletions(-) delete mode 100644 .cursor/plans/swift_ios_app_nachbau_3dc75f35.plan.md delete mode 100644 .cursor/plans/swift_ios_app_nachbau_80bb1212.plan.md delete mode 100644 env-gateway-dev.20260515_122326.backup delete mode 100644 env-gateway-int.20260515_122326.backup delete mode 100644 env-gateway-prod.20260515_122326.backup create mode 100644 modules/auth/oauthConnectTicket.py create mode 100644 modules/features/graphicalEditor/conditionOperators.py create mode 100644 modules/features/graphicalEditor/nodeDefinitions/contextPickerHelp.py create mode 100644 modules/features/graphicalEditor/switchOutput.py create mode 100644 modules/system/databaseMigration.py create mode 100644 modules/workflows/automation2/graphicalEditorRunFileLogger.py create mode 100644 modules/workflows/automation2/workflowArtifactVisibility.py create mode 100644 modules/workflows/methods/methodContext/actions/filterContext.py create mode 100644 modules/workflows/methods/methodContext/actions/mergeContext.py create mode 100644 modules/workflows/methods/methodContext/actions/setContext.py create mode 100644 modules/workflows/methods/methodContext/actions/transformContext.py create mode 100644 modules/workflows/methods/methodContext/contextEnvelope.py create mode 100644 tests/unit/graphicalEditor/test_condition_operator_catalog.py create mode 100644 tests/unit/graphicalEditor/test_resolve_value_kind.py create mode 100644 tests/unit/workflow/test_extract_content_handover.py create mode 100644 tests/unit/workflow/test_flow_executor_conditions.py create mode 100644 tests/unit/workflow/test_merge_context_handover.py create mode 100644 tests/unit/workflow/test_node_combinations.py create mode 100644 tests/unit/workflow/test_switch_filtered_output.py create mode 100644 tests/unit/workflows/test_trigger_executor.py diff --git a/.cursor/plans/swift_ios_app_nachbau_3dc75f35.plan.md b/.cursor/plans/swift_ios_app_nachbau_3dc75f35.plan.md deleted file mode 100644 index 8fa5384c..00000000 --- a/.cursor/plans/swift_ios_app_nachbau_3dc75f35.plan.md +++ /dev/null @@ -1,1036 +0,0 @@ ---- -name: Swift iOS App Nachbau -overview: Vollstaendiger Implementierungsplan fuer den Nachbau des React-Web-Frontends (frontend_nyla) als native Swift/SwiftUI iOS 18+ App fuer iPhone und iPad. Baut auf dem bestehenden Plan auf, korrigiert Fehler und integriert alle geklaerten Entscheidungen (kein Backend-Aenderungen, kein Voice, kein Push, ASWebAuthenticationSession fuer OAuth, read-only Billing, nativer Code-Editor). -todos: - - id: phase-0 - content: "Phase 0: Xcode-Projekt erstellen, Ordnerstruktur, SPM-Dependencies (Runestone, MarkdownUI), Build-Configs (Dev/Int/Prod), TestFlight Setup" - status: pending - - id: phase-1 - content: "Phase 1: Core Networking Layer -- APIClient (URLSession + Cookies + Headers), SSEClient (POST-basiert, async bytes), CSRFManager" - status: pending - - id: phase-2 - content: "Phase 2: Authentication -- LocalAuth, ASWebAuthenticationSession fuer MS/Google, Keychain, Biometrie, 401-Handler" - status: pending - - id: phase-3 - content: "Phase 3: Domain Models (Mandate, Feature, Instance, Permissions, Pagination) + FeatureStore (@Observable)" - status: pending - - id: phase-4 - content: "Phase 4: App Shell -- NavigationSplitView (iPad/iPhone adaptiv), Backend-driven Sidebar, Dashboard, SF Symbol Mapping" - status: pending - - id: phase-5 - content: "Phase 5: i18n String Catalogs (de/en/fr) + Theme System (System/Light/Dark) + DesignTokens" - status: pending - - id: phase-6 - content: "Phase 6: Shared UI -- FormGeneratorForm + FormGeneratorTable + Report, ContentPreview, ChatMessage, Toast, SearchBar" - status: pending - - id: phase-7 - content: "Phase 7: Core Pages -- Store, GDPR, Basedata (Prompts/Files/Connections), Billing (read-only), Settings" - status: pending - - id: phase-8 - content: "Phase 8: Admin Module -- 16 Admin-Seiten (Mandates, Users, RBAC, Invitations, Wizards, Billing Admin, Logs)" - status: pending - - id: phase-9 - content: "Phase 9: Feature Trustee -- Dashboard, Documents, Positions, Roles, Expense-Import, VisionKit Scan, Accounting" - status: pending - - id: phase-10 - content: "Phase 10: Feature Workspace -- SSE Chat-Streaming, Workflows, Files, Datasources, Pending Edits, KeepAlive" - status: pending - - id: phase-11 - content: "Phase 11: Feature Chatbot -- SSE-Streaming Chat, Threads, Conversations" - status: pending - - id: phase-12 - content: "Phase 12: Feature Teamsbot -- Sessions, WebSocket Bot, Config, MFA, Screenshots" - status: pending - - id: phase-13 - content: "Phase 13: Feature CommCoach -- Coaching Sessions, Text-Streaming, Tasks, Badges, Scores, Export" - status: pending - - id: phase-14 - content: "Phase 14: Feature Automation -- Definitions CRUD, Templates, Execute, Workflow-Management" - status: pending - - id: phase-15 - content: "Phase 15: Feature Automation2 -- Workflows CRUD, Tasks, Execute (ohne visuellen Node-Editor)" - status: pending - - id: phase-16 - content: "Phase 16: Feature CodeEditor -- Runestone Editor, SSE-Stream, Pending Edits, Syntax-Highlighting" - status: pending - - id: phase-17 - content: "Phase 17: Feature RealEstate/PEK -- MapKit Integration, Parcels, Address-Search, BZO, Koordinaten-Transformation" - status: pending - - id: phase-18 - content: "Phase 18: Feature Neutralization -- Config, Neutralize Text/File, Stats" - status: pending -isProject: false ---- - -# Nyla iOS/iPadOS App -- Aktualisierter Implementierungsplan - -## Aenderungen gegenueber dem bestehenden Plan - -Basierend auf der vollstaendigen Code-Analyse und den Klaerungen: - -- **Korrektur Auth**: Backend-Popup-Flow (window.open) wird durch `ASWebAuthenticationSession` ersetzt -- KEIN MSAL iOS SDK noetig, da das Backend die OAuth-Flows selbst handled -- **Entfernt**: Voice-Features (Phase ueberall vereinfacht), Push Notifications (Phase 8 entfaellt), Automation2 visueller Node-Editor, Stripe Checkout -- **Hinzugefuegt**: Detailliertes FormGenerator-Mapping, Workspace-Layout nach Apple HIG, nativer Code-Editor mit Runestone -- **Korrektur Billing**: Nur read-only (Balance, Transaktionen, Statistiken) -- kein Checkout -- **iOS 18+**: Erlaubt Nutzung aller modernen SwiftUI APIs -- **Team**: 2-3 Entwickler, parallelisierbare Feature-Module - ---- - -## Architektur-Ueberblick - -```mermaid -graph TD - subgraph presentation [SwiftUI Views] - Views[Screens und Components] - FormGen[FormGenerator SwiftUI] - ChatUI[Chat und Streaming UI] - end - - subgraph viewmodels [ViewModels] - VM["@Observable ViewModels"] - end - - subgraph repositories [Repositories] - Repo[Repository Protokolle] - end - - subgraph networking [Networking Layer] - APIClient[APIClient URLSession] - SSEClient[SSEClient async bytes] - CSRFMgr[CSRFManager] - CookieStore[HTTPCookieStorage] - end - - subgraph backend [Gateway FastAPI] - API["/api/* Endpoints"] - end - - Views --> VM - FormGen --> VM - ChatUI --> VM - VM --> Repo - Repo --> APIClient - Repo --> SSEClient - APIClient --> CSRFMgr - APIClient --> CookieStore - APIClient --> API - SSEClient --> API -``` - - - -### Technische Entscheidungen (aktualisiert) - -- **Plattform**: iOS 18+ / iPadOS 18+ (iPhone + iPad mit adaptivem Layout) -- **UI-Framework**: SwiftUI (rein, kein UIKit ausser wo zwingend noetig) -- **Architektur**: MVVM + Repository Pattern -- **Networking**: URLSession + async/await + Codable -- **SSE**: Custom Client auf `URLSession.bytes(for:)` Basis (POST-basiert, nicht EventSource) -- **Auth**: ASWebAuthenticationSession fuer MS/Google OAuth, Local Login via API, Keychain fuer Token-Storage -- **State**: `@Observable` (Observation Framework) -- **Navigation**: `NavigationSplitView` (iPad 2-3 Spalten) + `NavigationStack` (iPhone auto-collapse) -- **DI**: SwiftUI `@Environment` -- **Packages**: SPM -- Runestone (Code-Editor), ggf. MarkdownUI -- **Karten**: MapKit (SwiftUI) -- **Charts**: Swift Charts Framework -- **i18n**: String Catalogs (`.xcstrings`) fuer de/en/fr -- **Persistenz**: Keychain (Secrets), UserDefaults (Preferences) -- **Distribution**: TestFlight - -### Projektstruktur - -``` -NylaApp/ - NylaApp.swift - Config/ - AppConfig.swift # API URLs per Build Config - Environment.swift # Dev/Int/Prod - Core/ - Networking/ - APIClient.swift # Zentraler HTTP-Client (= api.ts) - APIError.swift - APIEndpoints.swift - SSEClient.swift # Server-Sent Events (= sseClient.ts) - CSRFManager.swift # CSRF Token (= csrfUtils.ts) - RequestContext.swift # X-Mandate-Id, X-Instance-Id - Auth/ - AuthManager.swift # Zentrale Auth-Logik - LocalAuthService.swift # Username/Password - OAuthService.swift # ASWebAuthenticationSession (MS + Google) - KeychainService.swift - Navigation/ - NavigationStore.swift # Backend-driven Nav State - AppRouter.swift # Root Navigation Coordinator - Localization/ - Localizable.xcstrings - LanguageManager.swift - Theme/ - ThemeManager.swift - DesignTokens.swift - Domain/ - Models/ # Codable Structs - Repositories/ # Protokolle - Data/ - API/ # 21 API-Module (= src/api/*.ts) - Repositories/ # Implementierungen - Features/ # Feature-Module (je Ordner) - Dashboard/ - Store/ - Settings/ - GDPR/ - Basedata/ - Billing/ - Admin/ - Trustee/ - Workspace/ - Chatbot/ - Teamsbot/ - CommCoach/ - Automation/ - Automation2/ - RealEstate/ - Neutralization/ - Shared/ - Components/ - FormGenerator/ # Dynamische Formulare - ContentPreview/ - ChatMessage/ - AccessRules/ - SearchBar/ - LoadingView/ - ErrorView/ - EmptyStateView/ - Extensions/ - Utilities/ - Resources/ - Assets.xcassets -``` - ---- - -## Phase 0: Projekt-Setup (1-2 Tage) - -- Xcode-Projekt erstellen (iOS 18+, SwiftUI App Lifecycle, iPhone + iPad) -- Ordnerstruktur gemaess obigem Schema -- SPM Dependencies: - - **Runestone** (Code-Editor mit TreeSitter) -- fuer Phase 17 - - **MarkdownUI** (oder Apple's native AttributedString) -- fuer Chat-Rendering - - Alle anderen Features nutzen System-Frameworks (MapKit, Charts, PDFKit) -- Build-Konfigurationen (`.xcconfig`): **Dev** / **Int** / **Prod** - - `API_BASE_URL`: `http://localhost:8000` / INT-URL / PROD-URL - - Analog zu [frontend_nyla/config/config.ts](frontend_nyla/config/config.ts) (`VITE_API_BASE_URL`) -- TestFlight: App ID, Provisioning Profile, Code Signing -- SwiftLint Konfiguration -- Imlementiere unter zuhilfenahme der rules unter /.cursor - ---- - -## Phase 1: Core Networking Layer (3-5 Tage) - -### APIClient.swift -- Equivalent zu [frontend_nyla/src/api.ts](frontend_nyla/src/api.ts) - -Zentrale Anforderungen aus der Web-Analyse: - -- **Cookie-basierte Auth**: `URLSessionConfiguration.default` mit `httpCookieStorage = .shared` -- httpOnly Cookies werden automatisch gesendet (`withCredentials: true` Equivalent) -- **Bearer Token**: Aus Keychain lesen, als `Authorization: Bearer {token}` Header setzen (nur wenn vorhanden, analog Zeile ~100-110 in `api.ts`) -- **Kontext-Headers**: `X-Mandate-Id` und `X-Instance-Id` aus dem aktuellen NavigationStore (analog `getContextFromUrl()` in `api.ts` das die URL parst -- in Swift kommt der Context aus dem Navigation State, nicht aus einer URL) -- **CSRF**: Fuer POST/PUT/PATCH/DELETE automatisch `X-CSRF-Token` Header setzen -- **401 Handler**: Auth-State clearen und zur Login-Ansicht navigieren (analog `api.ts` Response Interceptor) -- **429 Handler**: Rate-Limit Warning anzeigen -- **Error Parsing**: FastAPI `detail` kann String oder Array sein -- beide Faelle behandeln - -```swift -// Konzept APIClient -@Observable class APIClient { - func get(_ path: String, query: [String: String]?) async throws -> T - func post(_ path: String, body: Encodable?) async throws -> T - func put(_ path: String, body: Encodable?) async throws -> T - func delete(_ path: String) async throws - func upload(_ path: String, fileData: Data, fileName: String, fields: [String: String]?) async throws -> T -} -``` - -### SSEClient.swift -- Equivalent zu [frontend_nyla/src/utils/sseClient.ts](frontend_nyla/src/utils/sseClient.ts) - -**Kritisch**: Das Web nutzt `fetch(POST)` mit `ReadableStream` -- NICHT standard EventSource (GET). In Swift: - -```swift -func startStream(url: URL, body: Encodable, onEvent: @escaping (SSEEvent) -> Void) async throws { - var request = URLRequest(url: url) - request.httpMethod = "POST" - request.httpBody = try JSONEncoder().encode(body) - // + Auth headers, CSRF, Content-Type - - let (bytes, response) = try await URLSession.shared.bytes(for: request) - for try await line in bytes.lines { - if line.hasPrefix("data: ") { - let json = String(line.dropFirst(6)) - let event = try JSONDecoder().decode(SSEEvent.self, from: json.data(using: .utf8)!) - onEvent(event) - } - } -} -``` - -Wird benoetigt fuer: Workspace Chat, Chatbot, CommCoach, Trustee Streaming. - -### CSRFManager.swift -- Equivalent zu [frontend_nyla/src/utils/csrfUtils.ts](frontend_nyla/src/utils/csrfUtils.ts) - -- Token: 16 Hex-Zeichen via `SecRandomCopyBytes` (analog `crypto.getRandomValues`) -- Gespeichert im Keychain (nicht UserDefaults -- Sicherheit) -- Automatisch bei jedem mutierenden Request angehaengt - ---- - -## Phase 2: Authentication (3-5 Tage) - -### Kritische Erkenntnis: Zwei Auth-Patterns koexistieren - -Die Web-App nutzt **zwei verschiedene** Auth-Mechanismen gleichzeitig: - -1. **Local Login**: `POST /api/local/login` (form-encoded) setzt httpOnly Cookies -- danach Cookie-basierte Auth -2. **Microsoft/Google**: Backend-Popup-Flow setzt sowohl Cookies ALS AUCH `localStorage.authToken` + Bearer Header - -**Fuer die native App (ohne Backend-Aenderungen):** - -- **Local Login**: Funktioniert direkt -- `POST /api/local/login` setzt Cookies, `URLSession` speichert sie automatisch -- **Microsoft OAuth**: `ASWebAuthenticationSession` oeffnet `/api/msft/auth/login` im Safari-Sheet. Das Backend macht den OAuth-Dance und redirected am Ende zurueck. Der Callback-URL-Handler muss die Cookies aus dem Safari-Sheet in die URLSession uebernehmen. **Herausforderung**: ASWebAuthenticationSession teilt Cookies mit Safari, NICHT mit URLSession. Loesung: Nach dem OAuth-Flow `GET /api/msft/me` aufrufen -- wenn Cookies korrekt gesetzt sind, kommt der User zurueck. Falls nicht: `prefersEphemeralWebBrowserSession = false` verwenden, damit Cookies persistent sind. -- **Google OAuth**: Gleicher Flow wie Microsoft via `ASWebAuthenticationSession` mit `/api/google/auth/login` - -### AuthManager.swift - -```swift -@Observable class AuthManager { - var isAuthenticated = false - var currentUser: User? - var authAuthority: String? // "local", "msft", "google" - - func loginLocal(username: String, password: String) async throws - func loginMicrosoft(presentingWindow: ASPresentationAnchor) async throws - func loginGoogle(presentingWindow: ASPresentationAnchor) async throws - func fetchCurrentUser() async throws -> User - func logout() async throws -} -``` - -Analog zu: - -- [frontend_nyla/src/hooks/useAuthentication.ts](frontend_nyla/src/hooks/useAuthentication.ts) -- `useAuth`, `useMsalAuth`, `useGoogleAuth` -- [frontend_nyla/src/api/authApi.ts](frontend_nyla/src/api/authApi.ts) -- `loginApi`, `fetchCurrentUserApi`, `logoutApi` - -### Login Screen - -- Username/Password Felder -- "Mit Microsoft anmelden" Button -> ASWebAuthenticationSession -- "Mit Google anmelden" Button -> ASWebAuthenticationSession -- Face ID / Touch ID (wenn zuvor erfolgreich eingeloggt) -- Links: Registrierung, Passwort vergessen -- Analog zu [frontend_nyla/src/pages/Login.tsx](frontend_nyla/src/pages/Login.tsx) - -### Biometrische Auth (Bonus, nicht im Web) - -- Nach erstem Login: Frage ob Face ID/Touch ID aktiviert werden soll -- Credentials verschluesselt im Keychain speichern -- Bei App-Start: Biometrie -> Auto-Login - ---- - -## Phase 3: Domain Models + Feature Store (2-3 Tage) - -### Zentrale Models - -Mapping der TypeScript-Types aus [frontend_nyla/src/types/mandate.ts](frontend_nyla/src/types/mandate.ts): - -```swift -struct I18nLabel: Codable { - var de: String; var en: String; var fr: String? - func localized(_ lang: String) -> String { - switch lang { - case "en": return en - case "fr": return fr ?? de - default: return de - } - } -} - -enum AccessLevel: String, Codable { case none = "n", my = "m", group = "g", all = "a" } - -struct TablePermission: Codable { - var view: Bool - var read, create, update, delete: AccessLevel -} - -struct FieldPermission: Codable { var read: Bool; var write: Bool } - -struct InstancePermissions: Codable { - var tables: [String: TablePermission] - var fields: [String: [String: FieldPermission]]? - var views: [String: Bool] - var isAdmin: Bool? -} - -struct FeatureInstance: Codable, Identifiable { - var id: String - var featureCode, mandateId, mandateName, instanceLabel: String - var userRoles: [String] - var permissions: InstancePermissions -} - -struct MandateFeature: Codable { - var code: String; var label: I18nLabel; var icon: String - var instances: [FeatureInstance] -} - -struct Mandate: Codable, Identifiable { - var id, name: String - var label, code: String? - var features: [MandateFeature] -} -``` - -### FeatureStore -- Analog zu [frontend_nyla/src/stores/featureStore.tsx](frontend_nyla/src/stores/featureStore.tsx) - -```swift -@Observable class FeatureStore { - var mandates: [Mandate] = [] - var isLoading = false - var isInitialized = false - - func loadFeatures() async throws // GET /api/features/my - func getMandateById(_ id: String) -> Mandate? - func getInstanceById(_ id: String) -> FeatureInstance? -} -``` - -### Pagination Model -- Analog zum Backend `PaginatedResponse` - -```swift -struct PaginatedResponse: Codable { - var items: [T] - var total: Int - var page: Int - var pageSize: Int - var totalPages: Int -} - -struct PaginationParams: Encodable { - var page: Int = 1 - var pageSize: Int = 25 - var search: String? - var sort: [SortConfig]? - var filters: [String: String]? -} -``` - ---- - -## Phase 4: App Shell + Navigation (4-6 Tage) - -### Adaptive Layout nach Apple HIG - -**iPad** (Regular Width): - -```mermaid -graph LR - subgraph splitView [NavigationSplitView] - Sidebar[Sidebar: Navigation] - Content[Content Area] - end - Sidebar --> Content -``` - - - -- `NavigationSplitView` mit Sidebar + Detail -- Sidebar: Backend-driven Navigation (Mandate > Feature > Instance > Views) -- Detail: NavigationStack fuer den Content-Bereich - -**iPhone** (Compact Width): - -- Automatisches Collapse durch `NavigationSplitView` in `NavigationStack` -- Tab-basierte Hauptnavigation fuer schnellen Zugriff auf Dashboard, Settings -- Sidebar oeffnet sich als Sheet oder wird zum NavigationStack - -### Backend-driven Navigation - -`GET /api/navigation?language={lang}` liefert den kompletten Navigationsbaum. - -Analog zu [frontend_nyla/src/hooks/useNavigation.ts](frontend_nyla/src/hooks/useNavigation.ts) und [frontend_nyla/src/components/Navigation/MandateNavigation.tsx](frontend_nyla/src/components/Navigation/MandateNavigation.tsx): - -- **Static Blocks**: "Meine Sicht" (Dashboard, Store, Settings, etc.), "Administration" (Admin-Seiten mit Subgroups) -- **Dynamic Block**: Mandate > Features > Instances > Views (hierarchischer Baum) -- Icon-Mapping: Web `react-icons` -> SF Symbols (Mapping-Tabelle erstellen) - -### Screen-Routing - -```swift -enum AppDestination: Hashable { - case dashboard - case store - case settings - case gdpr - case basedata(BasedataSection) - case billing - case admin(AdminSection) - case feature(mandateId: String, featureCode: String, instanceId: String, view: String) -} -``` - -Analog zum Route-Setup in [frontend_nyla/src/App.tsx](frontend_nyla/src/App.tsx) -- der `VIEW_COMPONENTS` Map in [frontend_nyla/src/pages/FeatureView.tsx](frontend_nyla/src/pages/FeatureView.tsx). - -### Feature-View-Dispatcher - -```swift -@ViewBuilder -func featureView(code: String, view: String, instance: FeatureInstance) -> some View { - switch (code, view) { - case ("trustee", "dashboard"): TrusteeDashboardView(instance: instance) - case ("trustee", "documents"): TrusteeDocumentsView(instance: instance) - case ("workspace", "dashboard"): WorkspaceView(instance: instance) - case ("chatbot", "conversations"): ChatbotConversationsView(instance: instance) - // ... alle Mappings - default: NotFoundView() - } -} -``` - -### MainLayout-Equivalent - -Analog zu [frontend_nyla/src/layouts/MainLayout.tsx](frontend_nyla/src/layouts/MainLayout.tsx): - -- Logo oben in der Sidebar -- MandateNavigation als Hauptinhalt der Sidebar -- UserSection am unteren Rand der Sidebar (Profilbild, Name, Logout) -- FeatureLayout: Breadcrumb Header (Mandate > Feature > Instance) + Role Badge, analog [frontend_nyla/src/layouts/FeatureLayout.tsx](frontend_nyla/src/layouts/FeatureLayout.tsx) - -### Dashboard - -Analog zu [frontend_nyla/src/pages/Dashboard.tsx](frontend_nyla/src/pages/Dashboard.tsx): - -- Titel "Uebersicht" mit Anzahl Instanzen/Mandate -- Sektionen pro Mandate mit Instance-Karten -- Jede Karte linkt zur ersten View der Instanz (`instance.views[0].uiPath`) -- Bei 0 Instanzen: Navigation zum Store - ---- - -## Phase 5: i18n + Theme (2-3 Tage) - -### Internationalisierung - -- **String Catalogs** (`.xcstrings`) fuer de/en/fr -- Alle statischen Strings aus [frontend_nyla/src/locales/de.ts](frontend_nyla/src/locales/de.ts), `en.ts`, `fr.ts` uebernehmen -- Dynamische Labels: `I18nLabel.localized(lang)` Helper -- `LanguageManager` speichert Praeferenz in UserDefaults, default `de` -- Sprachauswahl in Settings analog Web - -### Theme - -- `@AppStorage("theme") var theme: String = "system"` -- `.preferredColorScheme()` fuer System-Integration (auto/light/dark) -- `DesignTokens`: Farben als `Color` Extensions, Spacing als CGFloat Constants -- CSS-Variable-Equivalent: SwiftUI `@Environment(\.colorScheme)` + Custom EnvironmentValues -- Mapping der CSS-Variablen aus dem Web (`--primary-color`, `--border-color`, etc.) zu Swift `Color` Assets - ---- - -## Phase 6: Shared UI Components (5-8 Tage) - -### FormGenerator (KRITISCH -- wird von fast allen Features genutzt) - -Analog zu [frontend_nyla/src/components/FormGenerator/](frontend_nyla/src/components/FormGenerator/): - -**Konzept**: Dynamische Formulare und Tabellen basierend auf `AttributeDefinition` Arrays vom Backend (`GET /api/attributes/{entityType}`). - -#### Attribut-Typ-Mapping (Web -> Swift) - -Basierend auf [frontend_nyla/src/utils/attributeTypeMapper.ts](frontend_nyla/src/utils/attributeTypeMapper.ts): - -- `text` / `string` -> `TextField` -- `textarea` -> `TextEditor` (mit `minRows`/`maxRows` -> `.frame(minHeight:maxHeight:)`) -- `email` -> `TextField` mit `.keyboardType(.emailAddress)` -- `url` -> `TextField` mit `.keyboardType(.URL)` -- `password` -> `SecureField` -- `number` / `integer` / `float` -> `TextField` mit `.keyboardType(.decimalPad)` + NumberFormatter -- `select` / `enum` -> `Picker` (Wheel/Menu/Inline je nach Kontext) -- `multiselect` -> Multi-Selection List oder Chip-basierte Auswahl -- `checkbox` / `boolean` -> `Toggle` -- `date` -> `DatePicker(.date)` -- `time` -> `DatePicker(.hourAndMinute)` -- `timestamp` -> `DatePicker` (date + time) -- `multilingual` -> Custom View mit 3 TextFields (de/en/fr) und Sprach-Tabs -- `file` -> File-Picker Button + Vorschau -- `readonly` -> `Text` (nicht editierbar) - -#### FormGeneratorForm (Formular-Ansicht) - -```swift -struct FormGeneratorForm: View { - let entityType: String - let mode: FormMode // .create, .edit, .view - @Binding var data: [String: AnyCodable] - var attributes: [AttributeDefinition]? // optional, sonst von API laden - var onSubmit: (([String: AnyCodable]) async throws -> Void)? - var filterFields: [String]? - var customValidator: (([String: AnyCodable], [AttributeDefinition]) -> [String: String])? -} -``` - -- Laedt Attribute von `GET /api/attributes/{entityType}` wenn nicht uebergeben -- Sortiert nach `order` (default 999) -- Filtert nach `visible`, `editableOnCreate`/`editableOnUpdate` -- Options-Loading: Wenn `options` ein String ist (API-Pfad), parallele GETs mit `{instanceId}` Replacement - -#### FormGeneratorTable (Tabellen-Ansicht) - -Analog zu [frontend_nyla/src/components/FormGenerator/FormGeneratorTable/FormGeneratorTable.tsx](frontend_nyla/src/components/FormGenerator/FormGeneratorTable/FormGeneratorTable.tsx): - -- **Server-seitige** Pagination, Sortierung, Filterung (kein Client-Side!) -- Debounced Suche (300ms) -> `paginationParams.search` -- Multi-Column Sort (asc -> desc -> entfernt) -- Filter-Dropdowns pro Spalte (Werte von `{apiEndpoint}/filter-values?column=...`) -- Pagination Controls: First/Prev/Next/Last + Seitenzahlen -- Row Actions: Edit, Delete, Download, Custom Actions -- Inline Editing fuer unterstuetzte Typen -- Group-By Support mit Custom Group Renderer - -**Auf iOS/iPad**: `List` oder `LazyVStack` statt HTML-Table. Auf iPad breites Layout, auf iPhone Card-basiert oder horizontales Scrollen. - -#### FormGeneratorReport (Statistik-Ansicht) - -- Sektionen: KPI Grid, Bar, Line, Area, Pie Charts, Tabellen -- Toolbar: Periodenfilter, Datumsbereich -- Swift Charts fuer alle Chart-Typen (Recharts-Equivalent) - -### ContentPreview - -- PDF: `PDFKitView` (UIViewRepresentable mit PDFKit) -- Bilder: AsyncImage -- JSON: Syntax-Highlighting -- HTML: WKWebView (Mini) - -### ChatMessage Components - -Analog zu [frontend_nyla/src/components/UiComponents/Messages/](frontend_nyla/src/components/UiComponents/Messages/): - -- User vs. Assistant Bubbles -- Markdown-Rendering (MarkdownUI oder AttributedString) -- Code-Blocks mit Syntax-Highlighting -- File-Attachments (Karten mit Icon + Name) -- Streaming-Indicator (Typing Animation) -- Tool-Activity Anzeige -- Auto-Scroll zum neuesten Eintrag - -### Weitere Shared Components - -- **SearchBar**: Debounced Suchfeld -- **LoadingView**: Spinner/Skeleton -- **ErrorView**: Fehlermeldung mit Retry-Button -- **EmptyStateView**: Illustration + Text + Action -- **Toast**: Analog [frontend_nyla/src/contexts/ToastContext.tsx](frontend_nyla/src/contexts/ToastContext.tsx) -- Types: success (5s), error (8s), warning (6s), info (5s) - ---- - -## Phase 7: Core Pages (5-7 Tage) - -### Store (Feature Marketplace) - -- `GET /api/store/features` -> Feature-Liste als Karten -- `POST /api/store/activate` / `deactivate` -- Analog [frontend_nyla/src/pages/Store.tsx](frontend_nyla/src/pages/Store.tsx) - -### GDPR - -- `GET /api/user/me/data-export`, `/data-portability` -- `DELETE /api/user/me/` (mit Bestaetigung) -- Analog [frontend_nyla/src/pages/GDPR.tsx](frontend_nyla/src/pages/GDPR.tsx) - -### Basedata -- Prompts - -- FormGeneratorTable + FormGeneratorForm fuer CRUD auf `/api/prompts` -- Analog [frontend_nyla/src/pages/PromptsPage.tsx](frontend_nyla/src/pages/PromptsPage.tsx) - -### Basedata -- Files - -- `GET /api/files/list`, Upload, Download, Preview -- Ordnerstruktur: `GET /api/files/folders` -- Upload: `UIDocumentPickerViewController` (via UIViewControllerRepresentable) -- Vorschau: QuickLook Framework -- Batch-Delete, Move, Folder-Management -- Analog [frontend_nyla/src/pages/FilesPage.tsx](frontend_nyla/src/pages/FilesPage.tsx) - -### Basedata -- Connections - -- FormGeneratorTable + FormGeneratorForm fuer CRUD auf `/api/connections/` -- Connect/Disconnect Aktionen -- Analog [frontend_nyla/src/pages/ConnectionsPage.tsx](frontend_nyla/src/pages/ConnectionsPage.tsx) - -### Billing (Read-Only) - -- `GET /api/billing/balance` -> Saldo-Anzeige -- `GET /api/billing/transactions` -> Transaktionsliste -- `GET /api/billing/statistics/{period}` -> Charts mit Swift Charts -- KEIN Checkout/Stripe in der App -- Analog [frontend_nyla/src/pages/billing/BillingDataView.tsx](frontend_nyla/src/pages/billing/BillingDataView.tsx) - -### Settings - -- Theme-Toggle (System/Light/Dark) -- Sprachauswahl (de/en/fr) -- Profil-Bearbeitung (via `/api/users/{userId}`) -- Passwort aendern (`POST /api/users/change-password`) -- Analog [frontend_nyla/src/pages/Settings.tsx](frontend_nyla/src/pages/Settings.tsx) - ---- - -## Phase 8: Admin Module (5-7 Tage) - -Alle Admin-Seiten nutzen den FormGenerator intensiv. Analog zu [frontend_nyla/src/pages/admin/](frontend_nyla/src/pages/admin/): - -- **Mandates**: CRUD `/api/mandates/` + User-Zuweisung -- **Users**: CRUD `/api/users/` + Password-Link senden -- **User-Mandates**: `/api/mandates/{id}/users` -- Zuweisungen verwalten -- **Access Hub**: `/api/rbac/permissions`, `/api/rbac/rules` -- Regel-Editor -- **Feature Instances**: `/api/features/instances` -- CRUD + Sync-Roles -- **Feature Roles**: `/api/features/templates/roles` -- Template-Rollen -- **Feature Users**: `/api/features/instances/{id}/users` -- Benutzer pro Instanz -- **Invitations**: CRUD `/api/invitations/` + Token-Validierung -- **Mandate Roles**: `/api/rbac/roles` -- Rollen-Verwaltung -- **Role Permissions**: `/api/rbac/rules/by-role/{roleId}` -- Matrix-Ansicht -- **User Access Overview**: `/api/admin/user-access-overview/` -- Read-Only Uebersicht -- **Billing Admin**: `/api/billing/admin/` -- Read-Only (Accounts, Transactions, Settings) -- **Subscriptions Admin**: `/api/subscription/admin/all` -- Uebersicht -- **Automation Events**: `/api/admin/automation-events` -- Liste + Sync -- **Logs**: `/api/admin/logs` -- Systemlogs mit Download -- **Mandate Wizard**: Kombination von Mandate erstellen + Features zuweisen + Benutzer einladen -- **Invitation Wizard**: Gesteuerter Einladungs-Flow - ---- - -## Phase 9: Feature Trustee (5-7 Tage) - -API-Basis: `/api/trustee/{instanceId}/` - -Views (aus [frontend_nyla/src/pages/views/trustee/](frontend_nyla/src/pages/views/trustee/)): - -- **Dashboard**: Uebersicht ueber Organisationen, Vertraege, Positionen -- **Documents**: CRUD + Upload (`POST .../documents/upload`) + Download -- **Positions**: CRUD mit verschachtelten Position-Documents -- **Instance-Roles**: Rollenverwaltung pro Instanz -- **Expense-Import**: CSV/Excel Import von Belegen (Automation-basiert) -- **Scan-Upload**: Dokument-Scan -> auf iOS: `VNDocumentCameraViewController` (VisionKit) fuer native Scan-Funktion (besser als Web!) -- **Accounting Settings**: Connector-Konfiguration, Sync, Chart of Accounts - -Besonderheiten: - -- Viele Options-Endpoints fuer Dropdowns (`.../organisations/options`, `.../roles/options`, etc.) -- Hierarchische Daten: Organisation > Contract > Document > Position -- Scan-Upload ist auf iOS BESSER als im Web dank VisionKit - ---- - -## Phase 10: Feature Workspace (5-7 Tage) - -API-Basis: `/api/workspace/{instanceId}/` - -**Haupt-View**: AI Chat mit SSE-Streaming - -### Workspace Layout (Apple HIG Best Practice) - -**iPad** (Regular Width) -- `NavigationSplitView` mit 3 Spalten: - -- **Sidebar**: Conversations/Workflows Liste (analog linke Spalte im Web) -- **Content**: Chat-Bereich mit Nachrichten + Input-Feld -- **Detail (Inspector)**: Activity Panel, File Preview, Pending Edits (als `.inspector` Modifier oder dritte Spalte) - -**iPhone** (Compact Width) -- Automatischer Collapse: - -- Chat ist Primary View -- Conversations-Liste als Back-Navigation -- Activity/Files als Toolbar-Button -> Sheet/Overlay - -### SSE Chat-Streaming - -Analog zu [frontend_nyla/src/pages/views/workspace/useWorkspace.ts](frontend_nyla/src/pages/views/workspace/useWorkspace.ts): - -- `POST /api/workspace/{instanceId}/start/stream` mit JSON Body: - - `prompt`, `fileIds`, `dataSourceIds`, `featureDataSourceIds`, `userLanguage`, `workflowId`, `allowedProviders` -- Event-Types aus dem Stream: `message`, `chunk`, `status`, `tool_activity`, `agent_progress`, `agent_summary`, `fileCreated`, `dataSourceAccess`, `workflowUpdated`, `complete`, `stopped`, `error` -- Stop: `POST .../stop/{workflowId}` - -### Weitere Workspace-Features (ohne Voice) - -- Workflows CRUD: List, Create, Patch, Delete -- Messages: `GET .../workflows/{id}/messages` -- Files + Folders: Browse, Upload, Preview -- Datasources: CRUD + Feature-Datasources -- Pending Edits: Accept/Reject (einzeln und alle) -- Settings: General + RAG-Statistiken - -### WorkspaceKeepAlive-Equivalent - -Im Web bleibt die Workspace-Komponente gemounted (analog [frontend_nyla/src/pages/views/workspace/WorkspaceKeepAlive.tsx](frontend_nyla/src/pages/views/workspace/WorkspaceKeepAlive.tsx)). - -In Swift: `@Observable WorkspaceViewModel` wird im Environment gehalten und ueberlebt Navigation-Wechsel. Der SSE-Stream bleibt aktiv solange die Instanz ausgewaehlt ist. - ---- - -## Phase 11: Feature Chatbot (3-5 Tage) - -API-Basis: `/api/chatbot/{instanceId}/` - -Views: - -- **Conversations**: Thread-Liste + Chat-Stream -- **Settings**: Placeholder im Web, aber Endpoints existieren - -Technisch identisch zum Workspace-Chat-Pattern: - -- `POST .../start/stream` -> SSE -- `POST .../stop/{workflowId}` -- Threads: List, Delete -- Verwendet dieselben ChatMessage-Components aus Phase 6 - ---- - -## Phase 12: Feature Teamsbot (3-5 Tage, OHNE Voice) - -API-Basis: `/api/teamsbot/{instanceId}/` - -Views: - -- **Dashboard**: Uebersicht Sessions + Config -- **Sessions**: Session-Liste, Session-Details mit Stream -- **Settings**: Config, System Bots, User Account - -Technisch: - -- SSE fuer Session-Stream (`GET .../sessions/{sessionId}/stream`) -- Config/Settings CRUD -- Screenshots anzeigen (`GET .../sessions/{sessionId}/screenshots`) -- **WebSocket** (`/bot/ws/{sessionId}`) fuer Live-Bot-Interaktion -- `URLSessionWebSocketTask` -- MFA-Support fuer Session-Authentifizierung - ---- - -## Phase 13: Feature CommCoach (4-6 Tage, OHNE Audio-Streaming) - -API-Basis: `/api/commcoach/{instanceId}/` - -Views: - -- **Dashboard**: Kontext-Uebersicht, aktive Sessions, Fortschritt -- **Coaching**: Session starten, Nachrichten-Stream (SSE), Tasks -- **Dossier**: Export, Score-History, Badges -- **Settings**: Personas, Documents - -Technisch: - -- Contexts CRUD (Create, Archive, Activate) -- Sessions: Start, Complete, Cancel -- Message-Stream: SSE fuer Coaching-Nachrichten -- Tasks: CRUD + Status-Updates -- Badges + Scores: Visualisierung mit Swift Charts -- Export als PDF/Download - -**Hinweis**: Audio-Streaming (Mikrofon -> Backend) wird in dieser Version uebersprungen. Text-basiertes Coaching ist voll funktional. - ---- - -## Phase 14: Feature Automation (3-5 Tage) - -API-Basis: `/api/automations/` - -Views: - -- **Definitions**: Automation-Definitionen CRUD + Execute + Duplicate -- **Templates**: Template-Verwaltung - -Technisch: - -- FormGeneratorTable fuer Definitionen/Templates -- Execute mit Status-Tracking -- Workflow-Management: List, Get, Status, Logs, Messages -- Actions-Endpoint fuer verfuegbare Aktionen - ---- - -## Phase 15: Feature Automation2 (2-3 Tage, OHNE visuellen Editor) - -API-Basis: `/api/automation2/{instanceId}/` - -Views: - -- **Workflows**: Liste aller Workflows + CRUD -- **Workflow-Tasks**: Offene Tasks mit Complete-Aktion - -**Kein visueller Node-Editor** -- nur Listen-basierte Verwaltung: - -- Workflows: List, Create, Update, Delete -- Workflow Runs: `GET .../workflows/{id}/runs` -- Execute: `POST .../execute` -- Tasks: List + Complete -- Node-Types und Connections als Reference-Info - ---- - -## Phase 16: Feature CodeEditor (3-5 Tage) - -API-Basis: Nutzt Workspace/Automation2 API-Patterns - -Views: - -- **Editor**: Code-Anzeige mit Syntax-Highlighting (Runestone) -- **Workflows**: Workflow-Liste - -Technisch: - -- **Runestone** (SPM Package) fuer nativen Code-Editor mit: - - TreeSitter-basiertes Syntax-Highlighting - - Zeilennummern - - Theme-Integration (Light/Dark) -- SSE-Stream fuer Code-Generierung -- Pending Edits: Accept/Reject wie im Workspace -- File-Content anzeigen: `GET .../files/{fileId}/content` - ---- - -## Phase 17: Feature RealEstate/PEK (5-7 Tage) - -API-Basis: `/api/realestate/{instanceId}/` - -Views: - -- **Dashboard (Map)**: Karten-Visualisierung mit Parzellen -- **Instance-Roles**: Rollen-Verwaltung - -Technisch: - -- **MapKit** (SwiftUI `Map` View): - - Parzellen als Polygone auf der Karte - - Parcel-Selection durch Tap - - Adjacent Parcels Highlight - - Cluster-Ansicht fuer viele Parzellen -- Address-Autocomplete: `GET /api/realestate/address/autocomplete` + optionales MKLocalSearchCompleter -- Projects + Parcels CRUD -- BZO Information: Bauvorschriften anzeigen -- WFS (Web Feature Service): Parcel-Geometrie laden -- Selection Summary - -**Hinweis**: Leaflet (Web) -> MapKit (iOS) erfordert Koordinaten-Transformation. `proj4` im Web wird durch MapKit's native Projektionen ersetzt. Falls Swiss-spezifische Koordinatensysteme (LV95/LV03) benoetigt werden, braucht es einen Converter. - ---- - -## Phase 18: Feature Neutralization (2-3 Tage) - -API-Basis: `/api/neutralization/` - -Views: - -- **Dashboard/Playground** (gleiche View): Text eingeben -> neutralisieren/aufloesen - -Technisch: - -- Config: `GET/POST /api/neutralization/config` -- Neutralize Text: `POST .../neutralize-text` -- Resolve Text: `POST .../resolve-text` -- Neutralize File: `POST .../neutralize-file` (File-Upload) -- Stats: `GET .../stats` -- Attributes: `GET .../attributes` - ---- - -## API-Header-Konvention (fuer alle Requests) - -Jeder Request muss folgende Header senden (analog [frontend_nyla/src/api.ts](frontend_nyla/src/api.ts)): - -- `Authorization: Bearer {token}` -- aus Keychain, wenn JWT vorhanden -- `X-Mandate-Id: {mandateId}` -- aus NavigationStore, bei Feature-Seiten -- `X-Instance-Id: {instanceId}` -- aus NavigationStore, bei Feature-Seiten -- `X-CSRF-Token: {token}` -- aus CSRFManager, bei POST/PUT/PATCH/DELETE -- `Content-Type: application/json` -- Standard fuer JSON Bodies -- Cookies (httpOnly) -- automatisch via URLSession HTTPCookieStorage - ---- - -## Parallelisierungsstrategie (2-3 Entwickler) - -```mermaid -gantt - title Entwicklungsplan 2-3 Entwickler - dateFormat YYYY-MM-DD - axisFormat %d.%m - - section Gemeinsam - Phase0_Setup :p0, 2026-04-01, 2d - Phase1_Networking :p1, after p0, 5d - Phase2_Auth :p2, after p1, 5d - Phase3_Models :p3, after p2, 3d - - section Dev1_Core - Phase4_AppShell :p4, after p3, 6d - Phase5_i18n_Theme :p5, after p4, 3d - Phase7_CorePages :p7, after p5, 7d - Phase8_Admin :p8, after p7, 7d - - section Dev2_Components - Phase6_SharedUI :p6, after p3, 8d - Phase10_Workspace :p10, after p6, 7d - Phase11_Chatbot :p11, after p10, 5d - Phase13_CommCoach :p13, after p11, 6d - - section Dev3_Features - Phase9_Trustee :p9, after p6, 7d - Phase12_Teamsbot :p12, after p9, 5d - Phase14_Automation :p14, after p12, 5d - Phase15_Automation2 :p15, after p14, 3d - Phase16_CodeEditor :p16, after p15, 5d - Phase17_RealEstate :p17, after p16, 7d - Phase18_Neutralization :p18, after p17, 3d -``` - - - -- **Phase 0-3**: Gemeinsam (Basis fuer alle) -- **Ab Phase 4**: Parallel -- Core/Admin, Shared UI/Chat-Features, Trustee/Weitere Features -- Features sind nach Phase 6 (Shared Components, besonders FormGenerator) unabhaengig voneinander - ---- - -## Gesamtaufwand-Schaetzung (aktualisiert) - -- Phase 0: Setup -- 1-2 Tage -- Phase 1: Networking -- 3-5 Tage -- Phase 2: Authentication -- 3-5 Tage -- Phase 3: Domain Models -- 2-3 Tage -- Phase 4: App Shell + Navigation -- 4-6 Tage -- Phase 5: i18n + Theme -- 2-3 Tage -- Phase 6: Shared UI Components -- 5-8 Tage -- Phase 7: Core Pages -- 5-7 Tage -- Phase 8: Admin -- 5-7 Tage -- Phase 9: Trustee -- 5-7 Tage -- Phase 10: Workspace -- 5-7 Tage -- Phase 11: Chatbot -- 3-5 Tage -- Phase 12: Teamsbot -- 3-5 Tage -- Phase 13: CommCoach -- 4-6 Tage -- Phase 14: Automation -- 3-5 Tage -- Phase 15: Automation2 -- 2-3 Tage -- Phase 16: CodeEditor -- 3-5 Tage -- Phase 17: RealEstate -- 5-7 Tage -- Phase 18: Neutralization -- 2-3 Tage -- **Gesamt sequentiell**: ~65-100 Tage -- **Gesamt mit 3 Devs parallel**: ~35-50 Tage (nach Phase 3) - ---- - -## Risiken und offene Punkte - -1. **Auth ohne Backend-Aenderungen**: `ASWebAuthenticationSession` teilt Cookies mit Safari, nicht direkt mit `URLSession`. Falls das Backend nach dem OAuth-Redirect nur httpOnly Cookies setzt (ohne Token im Redirect-URL), muss getestet werden ob die Cookie-Uebernahme funktioniert. Worst case: Backend muss doch einen Token-Parameter im Callback-URL zurueckgeben. -2. **CSRF Validierung**: Das Backend validiert CSRF-Tokens mit einem Hex-Format-Check. Die client-seitige Generierung muss identisch sein (16 Hex-Zeichen). Testen ob das Backend Session-gebundene CSRF-Validierung macht oder nur Format-Check. -3. **SSE ueber POST**: Standard `EventSource` ist GET-only. Das Web nutzt `fetch(POST)` + `ReadableStream`. In Swift: `URLSession.bytes(for:)` mit POST Request -- funktioniert, aber muss getestet werden mit dem spezifischen Backend-Setup. -4. **FormGenerator Komplexitaet**: Der FormGenerator ist das komplexeste Shared Component. Die Tabellen-Ansicht auf iPhone (schmaler Screen) braucht ein alternatives Layout (Cards statt Tabelle). Dies erfordert sorgfaeltiges Responsive Design. -5. **MapKit vs Leaflet**: Falls Swiss-spezifische Koordinatensysteme (LV95/LV03) benoetigt werden, muss ein nativer Koordinaten-Transformer implementiert werden (im Web: `proj4`). -6. **Keine Offline-Faehigkeit**: Bei schlechter Netzwerkverbindung (z.B. auf dem Bau) koennen Ladezustaende frustrierend sein. Empfehlung: Zumindest ein Request-Cache fuer die letzte erfolgreiche Response pro Endpoint. -7. **TestFlight Limitierung**: Max 10'000 Tester, 90-Tage Build-Ablauf. Fuer laengerfristigen Einsatz Enterprise Distribution oder App Store evaluieren. - diff --git a/.cursor/plans/swift_ios_app_nachbau_80bb1212.plan.md b/.cursor/plans/swift_ios_app_nachbau_80bb1212.plan.md deleted file mode 100644 index c8ae939c..00000000 --- a/.cursor/plans/swift_ios_app_nachbau_80bb1212.plan.md +++ /dev/null @@ -1,741 +0,0 @@ ---- -name: Swift iOS App Nachbau -overview: Vollständiger Implementierungsplan für den Nachbau des React-Web-Frontends (frontend_nyla) als native Swift/SwiftUI iOS/iPadOS-App. Die App kommuniziert mit dem bestehenden FastAPI-Gateway-Backend und bildet alle UI-Screens, Navigation und API-Schnittstellen nach. -todos: - - id: phase-0 - content: "Phase 0: Xcode-Projekt erstellen, Ordnerstruktur, SPM-Dependencies, Build-Configs (Dev/Int/Prod)" - status: pending - - id: phase-1 - content: "Phase 1: Core Networking Layer -- APIClient, SSEClient, WebSocketClient, CSRFManager (analog api.ts + sseClient.ts)" - status: pending - - id: phase-2 - content: "Phase 2: Authentication -- LocalAuth, MSAL, Google, Biometrie, Keychain (analog authApi.ts + AuthProvider.tsx)" - status: pending - - id: phase-3 - content: "Phase 3: Domain Models + FeatureStore (analog mandate.ts + featureStore.tsx)" - status: pending - - id: phase-4 - content: "Phase 4: App Shell -- NavigationSplitView (iPad) / TabView (iPhone), Dashboard, Settings, backend-driven Sidebar" - status: pending - - id: phase-5 - content: "Phase 5: i18n String Catalogs (de/en/fr) + Theme System (Light/Dark)" - status: pending - - id: phase-6 - content: "Phase 6: Core Pages -- Store, GDPR, Basedata (Prompts/Files/Connections), Billing Transactions" - status: pending - - id: phase-7 - content: "Phase 7: Shared UI Components -- FormGenerator, ContentPreview, ChatMessage, AccessRules, NotificationBell" - status: pending - - id: phase-8 - content: "Phase 8: Push Notifications (APNs Registration, Deep-Link Handling)" - status: pending - - id: phase-9 - content: "Phase 9: Admin Module -- alle 16 Admin-Seiten (Mandates, Users, RBAC, Invitations, Wizards, etc.)" - status: pending - - id: phase-10 - content: "Phase 10: Feature Trustee -- Dashboard, Documents, Positions, Roles, Expense-Import, Scan, Accounting" - status: pending - - id: phase-11 - content: "Phase 11: Feature Workspace -- Chat-Streaming (SSE), Files, Datasources, Voice" - status: pending - - id: phase-12 - content: "Phase 12: Feature Chatbot -- SSE-Streaming Chat, Threads, Conversations" - status: pending - - id: phase-13 - content: "Phase 13: Feature Teamsbot -- Sessions, WebSocket Bot-Kommunikation, Voice, MFA" - status: pending - - id: phase-14 - content: "Phase 14: Feature CommCoach -- Coaching Sessions, Audio-Streaming, Personas, Dossier" - status: pending - - id: phase-15 - content: "Phase 15: Feature ChatPlayground -- Workflows, Playground mit SSE-Stream" - status: pending - - id: phase-16 - content: "Phase 16: Feature Automation -- Definitions, Templates, Logs, Execute" - status: pending - - id: phase-17 - content: "Phase 17: Feature CodeEditor -- Editor mit SSE-Stream, Code-Anzeige, Apply" - status: pending - - id: phase-18 - content: "Phase 18: Feature RealEstate/PEK -- MapKit-Integration, Parcels, Address-Search, BZO" - status: pending - - id: phase-19 - content: "Phase 19: Feature Neutralization -- Config, Neutralize Text/File" - status: pending - - id: phase-20 - content: "Phase 20: Billing-Erweiterung -- Admin-Views, Stripe Checkout" - status: pending -isProject: false ---- - -# Nyla iOS/iPadOS App -- Vollständiger Implementierungsplan - -## Ausgangslage - -Das bestehende Web-Frontend (`frontend_nyla`) ist eine **React 19 + Vite + TypeScript** Anwendung mit: - -- **12+ Feature-Module** (Trustee, Workspace, Chatbot, Teamsbot, CommCoach, CodeEditor, Automation, RealEstate, Neutralization, ChatPlayground, Billing, Admin) -- **21 API-Module** unter `src/api/*.ts` mit insgesamt **200+ API-Endpunkten** -- **120+ UI-Komponenten** inkl. dynamischem FormGenerator, ContentPreview, Chat-Streaming, Maps, Charts -- **Multi-Tenant-Architektur**: Mandate > Features > Instanzen > Views/Permissions -- **3 Auth-Provider**: Local, Microsoft MSAL, Google OAuth -- **Echtzeit**: SSE-Streaming (Chat, Workspace, CodeEditor) + WebSockets (Voice) -- **Backend**: FastAPI (Python) auf PostgreSQL, erreichbar unter konfigurierbarer `VITE_API_BASE_URL` - ---- - -## Technische Entscheidungen - - -| Aspekt | Entscheidung | -| -------------------- | --------------------------------------------------- | -| Plattform | iOS 18+ / iPadOS 18+ | -| UI-Framework | SwiftUI | -| Architektur | **MVVM + Repository Pattern** (s. unten) | -| Networking | URLSession + async/await | -| SSE | Custom SSE-Client auf URLSession-Basis | -| WebSocket | URLSessionWebSocketTask | -| Auth | MSAL SDK, Google Sign-In SDK, Keychain + Local Auth | -| Biometrie | LocalAuthentication (Face ID / Touch ID) | -| State | `@Observable` (Observation Framework, iOS 17+) | -| Navigation | `NavigationStack` + `NavigationSplitView` (iPad) | -| Dependency Injection | Environment-basiert (SwiftUI `@Environment`) | -| Package Manager | Swift Package Manager (SPM) | -| Karten | MapKit (SwiftUI) | -| Charts | Swift Charts | -| i18n | String Catalogs (`.xcstrings`) fuer de/en/fr | -| Push | APNs + UserNotifications Framework | -| PDF-Anzeige | PDFKit | -| Markdown | Native AttributedString (iOS 15+) | -| Persistenz | Keychain (Secrets), UserDefaults (Preferences) | -| Distribution | TestFlight | - - -### Architektur: MVVM + Repository Pattern - -``` -Presentation Layer (SwiftUI Views) - | - v - ViewModels (@Observable) - | - v - Repositories (Protokolle) - | - v - API Services (URLSession) - | - v - Gateway Backend (FastAPI) -``` - -Begründung: SwiftUI ist nativ MVVM-orientiert. Das Repository Pattern kapselt die Datenzugriffe und macht den Code testbar. `@Observable` (iOS 17+) ist leichter als `ObservableObject` und performanter. - -### Projektstruktur - -``` -NylaApp/ - NylaApp.swift // App Entry Point - Config/ - AppConfig.swift // API URLs, Build Configs - Environment.swift // Dev/Int/Prod Environments - Core/ - Networking/ - APIClient.swift // Zentraler HTTP-Client (= api.ts) - APIError.swift // Error Types - APIEndpoints.swift // Endpoint Definitionen - SSEClient.swift // Server-Sent Events Client - WebSocketClient.swift // WebSocket Client - CSRFManager.swift // CSRF Token Handling - RequestInterceptor.swift // Auth/Mandate Headers - Auth/ - AuthManager.swift // Zentrale Auth-Logik - LocalAuthService.swift // Username/Password - MSALAuthService.swift // Microsoft MSAL - GoogleAuthService.swift // Google Sign-In - BiometricAuthService.swift // Face ID / Touch ID - KeychainService.swift // Secure Storage - Navigation/ - AppRouter.swift // Root Navigation - NavigationStore.swift // Backend-driven Nav State - DeepLinkHandler.swift // URL Scheme Handling - Localization/ - Localizable.xcstrings // String Catalog - LanguageManager.swift // Sprachauswahl - Theme/ - ThemeManager.swift // Light/Dark Mode - DesignTokens.swift // Farben, Spacing, Fonts - Permissions/ - PermissionChecker.swift // RBAC Client-Checks - Domain/ - Models/ // Shared Domain Models - Mandate.swift // Mandate, Feature, Instance - User.swift // User Model - Permissions.swift // AccessLevel, TablePermission - Pagination.swift // PaginatedResponse - I18nLabel.swift // Mehrsprachige Labels - Repositories/ // Repository Protokolle - AuthRepository.swift - MandateRepository.swift - FeatureRepository.swift - ... - Data/ - API/ // API-Implementierungen (= src/api/*.ts) - AuthAPI.swift - UserAPI.swift - MandateAPI.swift - FeaturesAPI.swift - BillingAPI.swift - TrusteeAPI.swift - ... (21 Module) - Repositories/ // Repository Implementierungen - DefaultAuthRepository.swift - DefaultMandateRepository.swift - ... - Features/ // Feature-Module (je Ordner) - Dashboard/ - Store/ - Settings/ - GDPR/ - Basedata/ - Prompts/ - Files/ - Connections/ - Billing/ - Admin/ - Mandates/ - Users/ - Access/ - Invitations/ - ... - Trustee/ - Workspace/ - Chatbot/ - Teamsbot/ - CommCoach/ - CodeEditor/ - ChatPlayground/ - Automation/ - RealEstate/ - Neutralization/ - Shared/ - Components/ // Wiederverwendbare UI (= src/components/) - FormGenerator/ // Dynamische Formulare - ContentPreview/ // PDF, Bild, JSON Vorschau - ChatMessage/ // Chat-Nachrichten-Rendering - AccessRules/ // Zugriffsregeln-Editor - NotificationBell/ // Notification Badge + Overlay - SearchBar/ - LoadingView/ - ErrorView/ - EmptyStateView/ - Extensions/ - Utilities/ - Resources/ - Assets.xcassets -``` - ---- - -## Phasen-Plan - -### Phase 0: Projekt-Setup (1-2 Tage) - -- Xcode-Projekt erstellen (iOS 18+, SwiftUI App Lifecycle) -- Ordnerstruktur nach obigem Schema anlegen -- SPM Dependencies einrichten: - - `MSAL` (Microsoft Authentication Library for iOS) - - `GoogleSignIn` (Google Sign-In SDK) - - Keine weiteren externen Deps noetig (MapKit, Charts, PDFKit sind System-Frameworks) -- Build-Konfigurationen: **Dev** / **Int** / **Prod** mit je eigenem `API_BASE_URL` - - Analog zu den `.env.dev` / `.env.int` / `.env.prod` Dateien im Web-Frontend - - Werte: `http://localhost:8000` (Dev), INT-URL, PROD-URL -- TestFlight-Vorbereitung: App ID, Provisioning Profile, Signing - -### Phase 1: Core Networking Layer (3-5 Tage) - -**Ziel**: Equivalent zu `[src/api.ts](frontend_nyla/src/api.ts)` + `[src/hooks/useApi.ts](frontend_nyla/src/hooks/useApi.ts)` - -**APIClient.swift** -- Zentraler HTTP-Client: - -- `URLSession.shared` mit Custom-Configuration -- Cookie-basierte Auth (`httpCookieStorage`) -- Request-Interceptor fuer: - - `Authorization: Bearer` Header (aus Keychain) - - `X-Mandate-Id` / `X-Instance-Id` Header (aus aktuellem Navigation-Context) - - CSRF-Token fuer POST/PUT/PATCH/DELETE -- Response-Handler: - - 401 -> Redirect zu Login (analog Web `api.ts` Zeile 127-151) - - 429 -> Rate-Limit Warning - - Generische Fehlerextraktion (FastAPI `detail` Array/String) -- Generische Request-Methoden: `get()`, `post()`, `put()`, `delete()`, `upload()` -- `Codable`-basierte JSON Serialisierung - -**SSEClient.swift** -- Server-Sent Events: - -- Analog zu `[src/utils/sseClient.ts](frontend_nyla/src/utils/sseClient.ts)` -- URLSession mit `bytes(for:)` async stream -- Parsing von `data:` Lines -- Callbacks: `onMessage`, `onError`, `onComplete` -- Wird benoetigt fuer: Workspace, Chatbot, CodeEditor, CommCoach Streaming - -**WebSocketClient.swift** -- WebSockets: - -- `URLSessionWebSocketTask` -- Fuer Voice-Features (Teamsbot: `/api/teamsbot/{instanceId}/bot/ws/{sessionId}`) -- Ping/Pong, Reconnect-Logik - -**CSRFManager.swift**: - -- Token-Generierung und -Speicherung -- Analog zu `[src/utils/csrfUtils.ts](frontend_nyla/src/utils/csrfUtils.ts)` - -### Phase 2: Authentication (3-5 Tage) - -**Ziel**: Alle 3 Auth-Provider + Biometrie - -**Mapping Web -> Swift:** - - -| Web (authApi.ts) | Swift | -| ---------------------------------------- | -------------------------------------------- | -| `POST /api/local/login` (form-data) | `LocalAuthService.login(username:password:)` | -| `POST /api/local/register` | `LocalAuthService.register(...)` | -| `POST /api/local/password-reset-request` | `LocalAuthService.requestPasswordReset(...)` | -| `POST /api/local/password-reset` | `LocalAuthService.resetPassword(...)` | -| `GET /api/local/available?username=` | `LocalAuthService.checkAvailability(...)` | -| `GET /api/local/me` | `AuthManager.fetchCurrentUser()` | -| `POST /api/local/logout` | `AuthManager.logout()` | -| MSAL Login/Callback | `MSALAuthService` via MSAL SDK | -| `GET /api/msft/me` | `MSALAuthService.fetchUser()` | -| Google Login/Callback | `GoogleAuthService` via Google Sign-In SDK | -| `GET /api/google/me` | `GoogleAuthService.fetchUser()` | - - -**AuthManager.swift** (zentral): - -- Verwaltet aktiven Auth-Provider (`local` / `msft` / `google`) -- Speichert Auth-State in Keychain (nicht UserDefaults!) -- Published `isAuthenticated`, `currentUser`, `authAuthority` -- Analog zu `[src/providers/auth/AuthProvider.tsx](frontend_nyla/src/providers/auth/AuthProvider.tsx)` - -**BiometricAuthService.swift**: - -- `LAContext.evaluatePolicy(.deviceOwnerAuthenticationWithBiometrics)` -- Nach erstem erfolgreichen Login: Credentials in Keychain speichern -- Bei App-Start: Face ID/Touch ID -> Keychain Credentials -> Auto-Login - -**Login Screen (SwiftUI)**: - -- Username/Password Felder -- "Anmelden mit Microsoft" Button (MSAL) -- "Anmelden mit Google" Button (Google Sign-In) -- "Face ID / Touch ID" Option (wenn verfuegbar) -- Registrierung / Passwort vergessen Links -- Analog zu `[src/pages/Login.tsx](frontend_nyla/src/pages/Login.tsx)` - -### Phase 3: Domain Models + Feature Store (2-3 Tage) - -**Ziel**: Alle geteilten Datenmodelle + Feature-State - -Zentrale Models (analog zu `[src/types/mandate.ts](frontend_nyla/src/types/mandate.ts)`): - -```swift -// Mandate.swift -struct I18nLabel: Codable { var de: String; var en: String; var fr: String? } -enum AccessLevel: String, Codable { case none = "n", my = "m", group = "g", all = "a" } -struct TablePermission: Codable { var view: Bool; var read, create, update, delete: AccessLevel } -struct FieldPermission: Codable { var read: Bool; var write: Bool } -struct InstancePermissions: Codable { var tables: [String: TablePermission]; var fields: [String: [String: FieldPermission]]?; var views: [String: Bool]; var isAdmin: Bool? } -struct FeatureInstance: Codable, Identifiable { var id: String; var featureCode, mandateId, mandateName, instanceLabel: String; var userRoles: [String]; var permissions: InstancePermissions } -struct MandateFeature: Codable { var code: String; var label: I18nLabel; var icon: String; var instances: [FeatureInstance] } -struct Mandate: Codable, Identifiable { var id, name: String; var label, code: String?; var features: [MandateFeature] } -struct FeaturesMyResponse: Codable { var mandates: [Mandate] } -``` - -**FeatureStore.swift** (analog zu `[src/stores/featureStore.tsx](frontend_nyla/src/stores/featureStore.tsx)`): - -- `@Observable class FeatureStore` -- `loadFeatures()` -> `GET /api/features/my` -- Cache: `[String: FeatureInstance]` fuer schnellen Zugriff -- Methoden: `getMandateById()`, `getInstanceById()`, `getAllInstances()`, etc. -- Injected via SwiftUI `@Environment` - -### Phase 4: App Shell + Navigation (4-6 Tage) - -**Ziel**: MainLayout + FeatureLayout + backend-driven Navigation - -**Adaptive Layout:** - -- **iPad**: `NavigationSplitView` (Sidebar + Detail) -- analog Web-Sidebar -- **iPhone**: `TabView` mit Hauptbereichen + Navigation Stack pro Tab - -**Sidebar / Navigation:** - -- Backend-driven: `GET /api/navigation?language={lang}` liefert Navigationsbaum -- Analog zu `[src/components/Navigation/MandateNavigation.tsx](frontend_nyla/src/components/Navigation/MandateNavigation.tsx)` -- Hierarchie: Mandate > Feature > Instance > Views -- Icon-Mapping: SF Symbols statt React Icons (Mapping-Tabelle erstellen) - -**Screen-Routing:** - -- `NavigationStack` mit `NavigationPath` fuer programmatische Navigation -- Deep-Link-Schema: `nyla://mandates/{mandateId}/{featureCode}/{instanceId}/{view}` -- Feature-View-Dispatcher: analog zu `[src/pages/FeatureView.tsx](frontend_nyla/src/pages/FeatureView.tsx)` `VIEW_COMPONENTS` - -**Screens in Phase 4:** - -- Dashboard (`/`) -- Mandate/Instance-Karten, analog `[src/pages/Dashboard.tsx](frontend_nyla/src/pages/Dashboard.tsx)` -- Settings (`/settings`) -- Theme-Toggle, Sprache (de/en/fr), Profil -- UserSection im Sidebar-Footer - -### Phase 5: i18n + Theme (2-3 Tage) - -**Internationalisierung:** - -- Xcode String Catalog (`.xcstrings`) fuer de/en/fr -- Alle statischen Strings aus den Web-Locales uebernehmen: `[src/locales/de.ts](frontend_nyla/src/locales/de.ts)`, `en.ts`, `fr.ts` -- Dynamische Labels (I18nLabel vom Backend): Helper `label.localized(lang:)` analog `getLabel()` im Web -- `LanguageManager` speichert Praeferenz in UserDefaults - -**Theme:** - -- SwiftUI `.preferredColorScheme()` fuer System-Integration -- Custom `DesignTokens` fuer konsistente Farben/Spacing -- Analog zu `[src/styles/themes/light.css](frontend_nyla/src/styles/themes/light.css)` + `.dark-theme` - -### Phase 6: Core Pages (5-7 Tage) - -**Store** (Feature Marketplace): - -- `GET /api/store/features` -> Feature-Liste -- `POST /api/store/activate` / `POST /api/store/deactivate` -- Analog `[src/pages/Store.tsx](frontend_nyla/src/pages/Store.tsx)` - -**GDPR**: - -- `GET /api/user/me/data-export` + `/data-portability` -- `DELETE /api/user/me/` -- Analog `[src/pages/GDPR.tsx](frontend_nyla/src/pages/GDPR.tsx)` - -**Basedata - Prompts** (`/basedata/prompts`): - -- CRUD auf `/api/prompts` mit FormGenerator -- Analog `[src/pages/PromptsPage.tsx](frontend_nyla/src/pages/PromptsPage.tsx)` - -**Basedata - Files** (`/basedata/files`): - -- `GET /api/files/list`, Upload, Download, Preview -- Analog `[src/pages/FilesPage.tsx](frontend_nyla/src/pages/FilesPage.tsx)` -- Nutzung von `UIDocumentPickerViewController` (via UIKit-Bridge) fuer File-Upload -- `QuickLook` fuer Dateivorschau - -**Basedata - Connections** (`/basedata/connections`): - -- CRUD auf `/api/connections/` -- Connect/Disconnect Aktionen -- Analog `[src/pages/ConnectionsPage.tsx](frontend_nyla/src/pages/ConnectionsPage.tsx)` - -**Billing** (`/billing/transactions`): - -- `GET /api/billing/balance`, `/transactions`, `/statistics/{period}` -- Swift Charts fuer Statistik-Visualisierung -- Analog `[src/pages/billing/BillingDataView.tsx](frontend_nyla/src/pages/billing/BillingDataView.tsx)` - -### Phase 7: Shared UI Components (5-8 Tage) - -**FormGenerator** (zentral, wird von fast allen Features genutzt): - -- Analog zu `[src/components/FormGenerator/](frontend_nyla/src/components/FormGenerator/)` -- Dynamische Formulare basierend auf `AttributeDefinition[]` vom Backend (`GET /api/attributes/{entityType}`) -- Feldtypen: String, Email, Select, Multiselect, Textarea, Checkbox, File, Number, DateTime, Multilingual -- Tabellen-Ansicht (`FormGeneratorTable`) + Listen-Ansicht (`FormGeneratorList`) -- Action Buttons (Edit, Delete, Download, Custom) -- Pagination-Support - -**ContentPreview**: - -- PDF: `PDFKitView` (UIKit PDFView in UIViewRepresentable) -- Bilder: AsyncImage -- JSON: Syntax-Highlighting -- HTML: WKWebView -- Analog `[src/components/ContentPreview/](frontend_nyla/src/components/ContentPreview/)` - -**NotificationBell**: - -- `GET /api/notifications/unread-count` (Polling) -- Push Notifications via APNs -- In-App Notification Sheet -- Analog `[src/components/NotificationBell/](frontend_nyla/src/components/NotificationBell/)` - -**Chat Message Components**: - -- Message-Bubbles mit Markdown-Rendering -- File-Attachments -- Streaming-Indicator (typing animation) -- Auto-Scroll -- Analog `[src/components/UiComponents/Messages/](frontend_nyla/src/components/UiComponents/Messages/)` - -**AccessRules Components**: - -- Tabelle + Editor fuer RBAC-Regeln -- Analog `[src/components/AccessRules/](frontend_nyla/src/components/AccessRules/)` - -### Phase 8: Push Notifications (2-3 Tage) - -- APNs-Registrierung in `AppDelegate` -- Device Token an Backend senden (neuer Endpoint oder bestehender `/api/messaging/subscriptions`) -- `UNUserNotificationCenter` fuer lokale + remote Notifications -- Deep-Link Handling aus Notification-Tap - -### Phase 9: Admin Module (5-7 Tage) - -Alle Admin-Seiten analog zu `[src/pages/admin/](frontend_nyla/src/pages/admin/)`: - - -| Admin-Seite | API-Endpunkte | -| -------------------- | ------------------------------------------ | -| Mandates | CRUD `/api/mandates/` | -| Users | CRUD `/api/users/` | -| User-Mandates | `/api/mandates/{id}/users` | -| Access Hub | `/api/rbac/permissions`, `/api/rbac/rules` | -| Feature Instances | `/api/features/instances` | -| Feature Roles | `/api/features/templates/roles` | -| Feature Users | `/api/features/instances/{id}/users` | -| Invitations | CRUD `/api/invitations/` | -| Mandate Roles | `/api/rbac/roles` | -| Role Permissions | `/api/rbac/rules/by-role/{roleId}` | -| User Access Overview | `/api/admin/user-access-overview/`* | -| Billing Admin | `/api/billing/admin/`* | -| Automation Events | `/api/admin/automation-events` | -| Logs | `/api/admin/logs` | -| Mandate Wizard | Kombination mehrerer Endpoints | -| Invitation Wizard | Kombination mehrerer Endpoints | - - -### Phase 10-20: Feature-Module (je 3-7 Tage pro Feature) - -Jedes Feature folgt demselben Pattern: - -1. **API-Modul** erstellen (alle Endpunkte des Features) -2. **ViewModels** fuer jede View -3. **SwiftUI Views** fuer jede registrierte View -4. **Feature-spezifische Komponenten** wo noetig - ---- - -#### Phase 10: Trustee (5-7 Tage) - -Views: Dashboard, Documents, Positions, Instance-Roles, Expense-Import, Scan-Upload, Accounting Settings - -API-Basis: `/api/trustee/{instanceId}/` - -- Organisations, Roles, Access, Contracts, Documents, Positions CRUD -- Accounting: Connectors, Config, Sync -- Document Upload mit base64-Konvertierung -- Options-Endpoints fuer Dropdowns - -Besonderheiten: - -- Viele verschachtelte CRUD-Entitaeten (Organisation > Contract > Document > Position) -- Scan-Upload: iOS-Kamera-Integration + VisionKit (OCR) - -#### Phase 11: Workspace (5-7 Tage) - -Views: Dashboard (Chat-Stream), Settings - -API-Basis: `/api/workspace/{instanceId}/` - -- SSE-Streaming fuer Chat (`POST .../start/stream`) -- Workflows, Messages, Files, Datasources CRUD -- Voice: Transcribe, Synthesize, Settings -- File Browser mit Ordnerstruktur - -Besonderheiten: - -- **Zentrales SSE-Streaming** -- das Keep-Alive-Pattern aus dem Web (`WorkspaceKeepAlive`) muss in Swift via Task/Actor geloest werden -- Voice: AVFoundation fuer Audio-Aufnahme, URLSession fuer Upload - -#### Phase 12: Chatbot (3-5 Tage) - -Views: Conversations, Settings - -API-Basis: `/api/chatbot/{instanceId}/` - -- `POST .../start/stream` -- SSE-Streaming via fetch (nicht Axios!) -- Threads: List, Get, Delete -- Stop Workflow - -Besonderheiten: - -- Streaming-Chat mit File-Attachments -- Analog zu `chatbotApi.startChatbotStreamApi` -- Custom SSE via POST - -#### Phase 13: Teamsbot (4-6 Tage) - -Views: Dashboard, Sessions, Settings - -API-Basis: `/api/teamsbot/{instanceId}/` - -- Sessions CRUD + Stream (EventSource/SSE) -- Config, System Bots, User Account -- Voice Test -- MFA fuer Sessions -- WebSocket fuer Bot-Kommunikation (`/bot/ws/{sessionId}`) - -Besonderheiten: - -- **WebSocket** fuer Live-Bot-Interaction -- SSE via EventSource fuer Session-Stream -- Screenshot-Anzeige - -#### Phase 14: CommCoach (4-6 Tage) - -Views: Dashboard, Coaching, Dossier, Settings - -API-Basis: `/api/commcoach/{instanceId}/` - -- Contexts CRUD + Archive/Activate -- Sessions: Start, Message-Stream, Audio-Stream, Complete, Cancel -- Tasks CRUD + Status -- Personas CRUD, Documents, Badges, Score History -- Voice: Languages, Voices, TTS -- Export (Dossier, Session) - -Besonderheiten: - -- **Audio-Streaming**: Mikrofon-Aufnahme -> POST Audio-Stream -- SSE fuer Session-Nachrichten -- Score/Badge-Visualisierung - -#### Phase 15: ChatPlayground (3-5 Tage) - -Views: Playground, Workflows - -API-Basis: `/api/chatplayground/{instanceId}/` - -- Start/Stop Workflow (mit SSE-Stream) -- Workflows CRUD + Status/Logs/Messages -- Attributes, Actions - -#### Phase 16: Automation (3-5 Tage) - -Views: Definitions, Templates, Logs - -API-Basis: `/api/automations/` - -- Automations CRUD + Execute + Duplicate -- Templates CRUD -- Workflow-Management (gleiche API wie ChatPlayground, anderer Base-Path) - -#### Phase 17: CodeEditor (3-5 Tage) - -Views: Editor, Workflows - -API-Basis: `/api/codeeditor/{instanceId}/` - -- Start/Stop/Apply (mit SSE-Stream) -- ChatData, Workflows, Files, File Content - -Besonderheiten: - -- Code-Darstellung: Syntax-Highlighting (z.B. via `Highlightr` SPM Package oder custom) -- Diff-Ansicht fuer Code-Apply - -#### Phase 18: RealEstate / PEK (5-7 Tage) - -Views: Dashboard (Map), Instance-Roles - -API-Basis: `/api/realestate/{instanceId}/` - -- Projects + Parcels CRUD -- Parcel Search, WFS, Selection Summary, Adjacent Parcels -- Address Autocomplete -- BZO Information, Parcel Documents -- Gemeinden - -Besonderheiten: - -- **MapKit** Integration: Parcel-Visualisierung auf Karte -- Address-Autocomplete: MKLocalSearchCompleter oder Backend-API -- Komplexe Karteninteraktion (Parcel-Selektion, Adjacent Parcels) - -#### Phase 19: Neutralization (2-3 Tage) - -Views: Dashboard/Playground (gleiche View) - -API-Basis: `/api/neutralization/` - -- Config GET/POST -- Neutralize File/Text, Resolve Text -- Process SharePoint, Batch Process -- Stats, Attributes - -#### Phase 20: Billing View-Erweiterung (1-2 Tage) - -Admin-Billing-Views falls in Phase 9 nicht vollstaendig abgedeckt: - -- Checkout (Stripe -- SFSafariViewController fuer Redirect) -- Mandate/User Balances und Transaktionen - ---- - -## API-Header-Konvention (fuer alle Requests) - -Jeder API-Request muss folgende Header senden (analog `[src/api.ts](frontend_nyla/src/api.ts)`): - - -| Header | Quelle | Wann | -| -------------------------------- | ------------------ | --------------------- | -| `Authorization: Bearer {token}` | Keychain | Wenn JWT vorhanden | -| `X-Mandate-Id: {mandateId}` | Navigation Context | Bei Feature-Seiten | -| `X-Instance-Id: {instanceId}` | Navigation Context | Bei Feature-Seiten | -| `X-CSRF-Token: {token}` | CSRFManager | POST/PUT/PATCH/DELETE | -| `Content-Type: application/json` | Standard | JSON Bodies | -| Cookie (httpOnly) | URLSession | Automatisch | - - ---- - -## Gesamtaufwand-Schaetzung - - -| Phase | Tage (geschaetzt) | -| ------------------------------- | ----------------- | -| Phase 0: Setup | 1-2 | -| Phase 1: Networking | 3-5 | -| Phase 2: Authentication | 3-5 | -| Phase 3: Domain Models + Store | 2-3 | -| Phase 4: App Shell + Navigation | 4-6 | -| Phase 5: i18n + Theme | 2-3 | -| Phase 6: Core Pages | 5-7 | -| Phase 7: Shared UI Components | 5-8 | -| Phase 8: Push Notifications | 2-3 | -| Phase 9: Admin | 5-7 | -| Phase 10: Trustee | 5-7 | -| Phase 11: Workspace | 5-7 | -| Phase 12: Chatbot | 3-5 | -| Phase 13: Teamsbot | 4-6 | -| Phase 14: CommCoach | 4-6 | -| Phase 15: ChatPlayground | 3-5 | -| Phase 16: Automation | 3-5 | -| Phase 17: CodeEditor | 3-5 | -| Phase 18: RealEstate | 5-7 | -| Phase 19: Neutralization | 2-3 | -| Phase 20: Billing Erweit. | 1-2 | -| **Gesamt** | **~70-105 Tage** | - - -Hinweis: Dies ist eine Einzelperson-Schaetzung. Mit Team (z.B. 2-3 Devs) kann parallelisiert werden, besonders ab Phase 10+ (Features sind unabhaengig voneinander). - ---- - -## Offene Punkte / Risiken - -1. **Backend-Anpassungen**: Das Backend setzt teilweise httpOnly Cookies nach Browser-Redirect (MSAL, Google). Fuer eine native App muss das Backend ggf. alternative Token-Flows unterstuetzen (z.B. Device Code Flow oder Token-Exchange). -2. **Push Notifications**: Das Backend hat aktuell kein APNs-Token-Management. Ein neuer Endpoint `/api/notifications/register-device` muss im Gateway implementiert werden. -3. **SSE ueber POST**: Die Web-App nutzt `fetch` POST + ReadableStream fuer SSE (nicht standard EventSource GET). In Swift muss dies mit `URLSession.bytes(for:)` nachgebaut werden. -4. **Stripe Checkout**: Im Web oeffnet sich ein Stripe-Redirect. In iOS: SFSafariViewController oder Stripe iOS SDK. -5. **SharePoint Integration**: Einige Features nutzen SharePoint-Folder-Picker. In iOS muss eine alternative UI gebaut werden (Liste statt Filepicker). -6. **WebSocket Auth**: Der Web-Client nutzt Cookies fuer WebSocket-Auth. iOS `URLSessionWebSocketTask` unterstuetzt Cookies via URLSession Configuration. - diff --git a/demoData/workflows/pwg-mietzinsbestaetigung-pilot.workflow.json b/demoData/workflows/pwg-mietzinsbestaetigung-pilot.workflow.json index 78f50751..eaf1a941 100644 --- a/demoData/workflows/pwg-mietzinsbestaetigung-pilot.workflow.json +++ b/demoData/workflows/pwg-mietzinsbestaetigung-pilot.workflow.json @@ -38,7 +38,6 @@ "title": "Pro Scan-Dokument", "parameters": { "items": {"type": "ref", "nodeId": "n2", "path": ["files"]}, - "level": "auto", "concurrency": 1 } }, diff --git a/env-gateway-dev.20260515_122326.backup b/env-gateway-dev.20260515_122326.backup deleted file mode 100644 index 0517f627..00000000 --- a/env-gateway-dev.20260515_122326.backup +++ /dev/null @@ -1,97 +0,0 @@ -# Development Environment Configuration - -# System Configuration -APP_ENV_TYPE = dev -APP_ENV_LABEL = Development Instance Patrick -APP_API_URL = http://localhost:8000 -APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/notes/key.txt -APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9 -APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9 - -# PostgreSQL DB Host -DB_HOST=localhost -DB_USER=poweron_dev -DB_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9 -DB_PORT=5432 - -# Security Configuration -APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ== -APP_TOKEN_EXPIRY=300 - -# CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net - -# Logging configuration -APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs -APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s -APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S -APP_LOGGING_CONSOLE_ENABLED = True -APP_LOGGING_FILE_ENABLED = True -APP_LOGGING_ROTATION_SIZE = 10485760 -APP_LOGGING_BACKUP_COUNT = 5 - -# OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP. -Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 -Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kxaG9WY1FJaWdCbVFVaTllUlJfU3Y3MmJkRmkzMDVDWUNtZEhlNVhISzJPcy00ZUVZcklYLXFMV0dIODV3NXNSSFBKQ0ZsZllES3diTEgySDF0T1ZCbFZHREZtcXFGSWNZN1NJbzJzczRRQWxoeVNsNzlsa0VzMHJPWHUydjBBclo= -Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback -Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 -Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyUW96aXFVOVJlLUdyRlVvT1hVU09ILWtMZnV2M19mVUxGMnFPV3FzNTdQa3dTbHVGTDBHTk01ZThLcjh6QUR5VldVZUpfcDlZNTh5YldtLWtjTll6VzJNQ3JCQ3ZubHdmd2JvaExDOXdvQ1pjWDVQTUtFWVAtUHhwS1lFQnJXWk4= -Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback - -Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyd1hPd09vcVFtbVg0Sm5Nd1VYVEEtWjZMZkFndmFVS0ZlcTU0dzJnYVYzRkZWbjh0QldyZkhseDV2cUgxYkNHTzF6MXhqQlZ2N0UtbmhPeWRKUHBVdzV0Q1ROaWNuN2xjMmVzMjNZQ2ZYZ3dOTHgxaU5sTGRjVHpfakhYeWF0ZGU= -Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback -Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kySXoyd1BmTnhOd1owTUJOWm53WlZMMjFHNGJhSUwyd2NDUW9BanlRWVJPLU5jYzRlcm5QeW96d0JYUkVWVWd2dGNBVEpJbElZY2lWb0o5S0gyNnhoV1pnNXhpSFEyaklZZjcwX2lVU0ktMEJGN01DMDhXQ3k4R1BXc1Q3ejFjOEg= -Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback - -# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. -Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4 -Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ== -Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback - -# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI. - -# Stripe Billing (both end with _SECRET for encryption script) -STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09 -STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09 -STRIPE_API_VERSION = 2026-01-28.clover -STRIPE_AUTOMATIC_TAX_ENABLED = false -STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0 - -# AI configuration -Connector_AiOpenai_API_SECRET = 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 -Connector_AiAnthropic_API_SECRET = D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 -Connector_AiPerplexity_API_SECRET = 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 -Connector_AiTavily_API_SECRET = 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 -Connector_AiPrivateLlm_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGRHM5eFdUVmVZU1R1cHBwN1RlMUx4T0NlLTJLUFFVX3J2OElDWFpuZmJHVmp4Z3BNNWMwZUVVZUd2TFhRSjVmVkVlcFlVRWtybXh0ZHloZ01ZcnVvX195YjdlWVdEcjZSWFFTTlNBWUlaTlNoLWhqVFBIb0thVlBiaWhjYjFQOFY= -Connector_AiMistral_API_SECRET = ogaEVD2fFmiIWHDhKn8oGM0FShFxnAtT - -Service_MSFT_TENANT_ID = common - -# Google Cloud Speech Services configuration -Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0= - -# Feature SyncDelta JIRA configuration -Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0= - -# Teamsbot Browser Bot Service -# For local testing: run the bot locally with `npm run dev` in service-teams-browser-bot -# The bot will connect back to localhost:8000 via WebSocket -TEAMSBOT_BROWSER_BOT_URL = http://localhost:4100 - -# Debug Configuration -APP_DEBUG_CHAT_WORKFLOW_ENABLED = True -APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug -APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True -APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync - -# Azure Communication Services Email Configuration -MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt -MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss - -# Zurich WFS Parcels (dynamic map layer). Default: Stadt Zürich OGD. Override for full canton if wfs.zh.ch resolves. -# Connector_ZhWfsParcels_WFS_URL = https://wfs.zh.ch/av -# Connector_ZhWfsParcels_TYPENAMES = av_li_liegenschaften_a - diff --git a/env-gateway-int.20260515_122326.backup b/env-gateway-int.20260515_122326.backup deleted file mode 100644 index a3033e5a..00000000 --- a/env-gateway-int.20260515_122326.backup +++ /dev/null @@ -1,92 +0,0 @@ -# Integration Environment Configuration - -# System Configuration -APP_ENV_TYPE = int -APP_ENV_LABEL = Integration Instance -APP_API_URL = https://gateway-int.poweron.swiss -# Force SameSite=None+Secure for auth cookies (cross-site UI on poweron-center.net). Optional if APP_API_URL is https:// -APP_COOKIE_SECURE = true -APP_KEY_SYSVAR = CONFIG_KEY -APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9 -APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9 - -# PostgreSQL DB Host -DB_HOST=gateway-int-server.postgres.database.azure.com -DB_USER=heeshkdlby -DB_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9 -DB_PORT=5432 - -# Security Configuration -APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ== -APP_TOKEN_EXPIRY=300 - -# CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net - -# Logging configuration -APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_DIR = /home/site/wwwroot/ -APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s -APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S -APP_LOGGING_CONSOLE_ENABLED = True -APP_LOGGING_FILE_ENABLED = True -APP_LOGGING_ROTATION_SIZE = 10485760 -APP_LOGGING_BACKUP_COUNT = 5 - -# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs) -Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 -Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kydlVubld1d1h6SUNSWW1aZ3p4X3Zod1NDTjhZVnVYS2lqOERGTFp2OXJ4TGRiNlRLVFpzLUVDTUhkZGhGUWdxa1djdEV5UWkyblN1UHZoaFBjaExNTEpGMG1PRGJEbDdHVll0Ungwcl9JemZ4ZXFzZUNFQmFlZi1DZFlCekU1S3E= -Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback -Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 -Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyS1hWZXEzUzZTTE5MUlJncVowMU95Y0hmV1hveDBZOWdLU1RIUWt3SGlXNGxVTXVKc2QyQmtmWTlJRU43ZnRDdnlDTGxQY0hTU25CWWFFdDhUem9HU0VYcTFJTVFEbVk0dUhmVzJNVlEzNTNWdjdmaW9WeUVDVW5PRmNFZEQzNTY= -Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback - -Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyUTUwNXNGaHRNaGxxbF9sdWJ3Q0xLYU5yOHB4Yk8zMDZvQ29yaEhWOE5JMENXRk5jb2ZBdzRKQ2ZTTld6ZlIxemhOYzN1VE10TjBDRWZEMXlLVWRNYjZ0VG5RZ3I3NWt0SEJzMzdsUmRzcVNmbktRNHZqTUF6a2EyUkVUSFJnZFE= -Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback -Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyV1FRVjF0c0d3d0dyWU1TdW9HdXVkdHdsVWZKYTJjbGZPRDhMRjA2M0FkaUZIVmhIUmFKNjg2ekFodHd6NG80VTI3TC1icW1LZ01jWVZuQ1pKRm5nMW5UREJEaGp2Wl9oRDRCSmZVT0JpTnkwXzgwY0pkV29yczQ5akF2d1ZGcVY= -Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback - -# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. -Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4 -Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ== -Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/clickup/auth/connect/callback - -# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI. - -# Stripe Billing (both end with _SECRET for encryption script) -STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09 -STRIPE_WEBHOOK_SECRET = INT_ENC:Z0FBQUFBQnA4UXZiUUVqTl9lREVRWTh1aHFDcFpwcXRkOUx4MS1ham9Ddkl6T0xzMnJuM1hhUHdGNG5CenY1MUg4RlJBOGFQTWl5cVd5MjJ2REItcHYyRmdLX3ZlT2p5Z3BRVkMtQnRoTVkteXlfaU92MVBtOEI0Ni1kbGlfa0NiRmFRRXNHLVE2NHI= -STRIPE_API_VERSION = 2026-01-28.clover -STRIPE_AUTOMATIC_TAX_ENABLED = false -STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0 - -# AI configuration -Connector_AiOpenai_API_SECRET = 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 -Connector_AiAnthropic_API_SECRET = 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 -Connector_AiPerplexity_API_SECRET = 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 -Connector_AiTavily_API_SECRET = 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 -Connector_AiPrivateLlm_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGSjZ1NWh0aWc1R3Z4MHNaeS1HamtUbndhcUZFZDlqUDhjSmg5eHFfdlVkU0RsVkJ2UVRaMWs3aWhraG5jSlc0YkxNWHVmR2JoSW5ENFFCdkJBM0VienlKSnhzNnBKbTJOUTFKczRfWlQ3bWpmUkRTT1I1OGNUSTlQdExacGRpeXg= -Connector_AiMistral_API_SECRET = ogaEVD2fFmiIWHDhKn8oGM0FShFxnAtT - -Service_MSFT_TENANT_ID = common - -# Google Cloud Speech Services configuration -Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0= - -# Feature SyncDelta JIRA configuration -Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0= - -# Teamsbot Browser Bot Service -TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io - -# Debug Configuration -APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE -APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat -APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE -APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync - -# Azure Communication Services Email Configuration -MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt -MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss diff --git a/env-gateway-prod.20260515_122326.backup b/env-gateway-prod.20260515_122326.backup deleted file mode 100644 index 8ed1c612..00000000 --- a/env-gateway-prod.20260515_122326.backup +++ /dev/null @@ -1,92 +0,0 @@ -# Production Environment Configuration - -# System Configuration -APP_ENV_TYPE = prod -APP_ENV_LABEL = Production Instance -APP_KEY_SYSVAR = CONFIG_KEY -APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3UnJRV0sySFlDblpXUlREclREaW1WbUt6bGtQYkdrNkZDOXNOLXFua1hqeFF2RHJnRXJ5VlVGV3hOZm41QjZOMlNTb0duYXNxZi05dXVTc2xDVkx0SVBFLUhncVo5T0VUZHE0UTZLWWw3ck09 -APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3QVpIY19DQVZSSzJmc2F0VEZvQlU1cHBhTEgxdHdnR3g4eW01aTEzYTUxc1gxTDR1RVVpSHRXYjV6N1BLZUdCUGlfOW1qdy0xSHFVRkNBcGZvaGlSSkZycXRuUllaWnpyVGRoeFg1dGEyNUk9 -APP_API_URL = https://gateway-prod.poweron.swiss -APP_COOKIE_SECURE = true - -# PostgreSQL DB Host -DB_HOST=gateway-prod-server.postgres.database.azure.com -DB_USER=gzxxmcrdhn -DB_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3Y1JScGxjZG9TdUkwaHRzSHZhRHpNcDV3N1U2TnIwZ21PRG5TWFFfR1k0N3BiRk5WelVadjlnXzVSTDZ6NXFQNFpqbnJ1R3dNVkJocm1zVEgtSk0xaDRiR19zNDBEbVIzSk51ekNlQ0Z3b0U9 -DB_PORT=5432 - -# Security Configuration -APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUlV5SVpaWXBNX1hpa2xPZGdMSWpnN2ZINHQxeGZnNHJweU5pZjlyYlY5Qm9zOUZEbl9wUEgtZHZXd1NhR19JSG9kbFU4MnFGQnllbFhRQVphRGQyNHlFVWR5VHQyUUpqN0stUmRuY2QyTi1oalczRHpLTEJqWURjZWs4YjZvT2U5YnFqcXEwdEpxV05fX05QMmtrPQ== -APP_TOKEN_EXPIRY=300 - -# CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net - -# Logging configuration -APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_DIR = /home/site/wwwroot/ -APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s -APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S -APP_LOGGING_CONSOLE_ENABLED = True -APP_LOGGING_FILE_ENABLED = True -APP_LOGGING_ROTATION_SIZE = 10485760 -APP_LOGGING_BACKUP_COUNT = 5 - -# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs) -Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 -Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySFR2NjBKM084QTNpeUlyUmM4R0N0SU1BZ2x4MmVTZTVHQkVzRE9GdmFkV041MzhudFhobjU0RWNnd3lqeXpKUXA5aGtNZkhtYU12QjBtX0NjemVmdEZBdC1TbXVBSXJTcF9vMlJXd0ZNRTRKRFBMUXNjTF85eTBxakR4RVNfYmU= -Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback -Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 -Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyNVU4cVRIZFdjS3l2S1RJVTVlc1ozQ1liZXZDX1VwdFZQUzFtS0N6UWYyeGxkNGNmY1hoaWxEUDBXVU5QR2t3Vi1ZV1A2QkxqbnpobzJwOXdzYTBZaFZYdnNkeDE1VVl0bm4weHFiLXdON2gtZzAwMTkxNWRoZldFM2djSkNHVS0= -Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback - -Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyUmJleVpTOF9OaFV3NGVfcWVBX2oxSjUwMWRGOFZRWFRIN1FZRzZ6U3VQMlg5a21RY1drTHh3U254LW4zM1A1cXQ1TTFWYlNoek9hSHJIeE4tbm1wU1lKRXlKNU5HVWI4VGZwTVE0VnJGaV8wZmNvdkVrMjJGeXdmZ3UyNmVXN1E= -Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback -Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyY2pxMDh0U0RqWERianBMTTNtSUZPSzhKUzh4S0RTenR2MmxnRDlvQzJjbDVTczRWLUJtVnhxWTE2MmUxQjJia2xJcVUzVlFlUnpma040NFdHRzVNRUt0OXR0c2JkTkRmQ1RIYllXbXFFaExIQWNycFVHbUxHbmtYOVhOVUV2MFY= -Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback - -# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. -Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4 -Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ== -Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/clickup/auth/connect/callback - -# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI. - -# Stripe Billing (both end with _SECRET for encryption script) -STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09 -STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08= -STRIPE_API_VERSION = 2026-01-28.clover -STRIPE_AUTOMATIC_TAX_ENABLED = false -STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah - - -# AI configuration -Connector_AiOpenai_API_SECRET = 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 -Connector_AiAnthropic_API_SECRET = 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 -Connector_AiPerplexity_API_SECRET = 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 -Connector_AiTavily_API_SECRET = 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 -Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo= -Connector_AiMistral_API_SECRET = H55rGkR3ojIhcp4YMMlgUStgvz7Wym5c - -Service_MSFT_TENANT_ID = common - -# Google Cloud Speech Services configuration -Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0= - -# Feature SyncDelta JIRA configuration -Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4d3Z4d2x6N1FhUktMU0RKbkxfY2pTQkRzXzJ6UXVEbDNCaFM3UHMtQVFGYzNmYWs4N0lMM1R2SFJuZTVFVmx6MGVEbXc5U3NOTnY1TWN0ZDNaamlHQWloalM3VldmREJNSHQ1TlVkSVFJMTVhQWVGSVRMTGw4UTBqNGlQZFVuaHp4WUlKemR5UnBXZlh0REJFLXJ4ejR3PT0= - -# Teamsbot Browser Bot Service -TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io - -# Debug Configuration -APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE -APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat -APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE -APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync - -# Azure Communication Services Email Configuration -MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt -MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss diff --git a/modules/auth/oauthConnectTicket.py b/modules/auth/oauthConnectTicket.py new file mode 100644 index 00000000..f54187cb --- /dev/null +++ b/modules/auth/oauthConnectTicket.py @@ -0,0 +1,101 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Short-lived signed tickets for OAuth data-connection popups. + +The UI authenticates API calls with a Bearer token in localStorage, but +``window.open(authUrl)`` cannot send that header. Cross-origin httpOnly cookies +are unreliable in int/prod (UI on poweron-center.net, API on poweron.swiss). +Login popups work without a session because ``/auth/login`` is public; connect +popups hit ``/auth/connect``, which used to require ``getCurrentUser``. + +Flow: POST ``/api/connections/{id}/connect`` (Bearer-authenticated) issues a +ticket; the popup opens ``/auth/connect?connectTicket=...`` which validates the +ticket instead of cookies. +""" + +import time +from typing import Any, Dict, Tuple + +from fastapi import HTTPException, status +from jose import JWTError, jwt as jose_jwt + +from modules.auth.jwtService import ALGORITHM, SECRET_KEY +from modules.datamodels.datamodelUam import AuthAuthority, User, UserConnection +from modules.interfaces.interfaceDbApp import getInterface, getRootInterface +from modules.shared.i18nRegistry import apiRouteContext + +_msg = apiRouteContext("oauthConnectTicket") + +_CONNECT_TICKET_TTL_SEC = 600 + + +def issue_connect_ticket(flow: str, connection_id: str, user_id: str) -> str: + """Issue a short-lived JWT for starting a data-connection OAuth popup.""" + body = { + "flow": flow, + "connectionId": connection_id, + "userId": str(user_id), + "exp": int(time.time()) + _CONNECT_TICKET_TTL_SEC, + } + return jose_jwt.encode(body, SECRET_KEY, algorithm=ALGORITHM) + + +def parse_connect_ticket(ticket: str, expected_flow: str) -> Dict[str, Any]: + """Validate connect ticket signature, expiry, and flow.""" + try: + data = jose_jwt.decode(ticket, SECRET_KEY, algorithms=[ALGORITHM]) + except JWTError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=_msg("Invalid or expired connect ticket"), + ) from e + if data.get("flow") != expected_flow: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=_msg("Invalid connect ticket flow"), + ) + connection_id = data.get("connectionId") + user_id = data.get("userId") + if not connection_id or not user_id: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=_msg("Incomplete connect ticket"), + ) + return data + + +def resolve_connect_context( + connect_ticket: str, + connection_id: str, + expected_flow: str, + authority: AuthAuthority, +) -> Tuple[User, UserConnection]: + """Validate ticket and return the user + connection for OAuth redirect.""" + state = parse_connect_ticket(connect_ticket, expected_flow) + if state.get("connectionId") != connection_id: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=_msg("Connection ID does not match connect ticket"), + ) + + root = getRootInterface() + user = root.getUser(state["userId"]) + if not user: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=_msg("User not found"), + ) + + interface = getInterface(user) + connection = None + for conn in interface.getUserConnections(user.id): + if conn.id == connection_id and conn.authority == authority: + connection = conn + break + if not connection: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=_msg("Connection not found"), + ) + return user, connection diff --git a/modules/features/graphicalEditor/conditionOperators.py b/modules/features/graphicalEditor/conditionOperators.py new file mode 100644 index 00000000..b375e407 --- /dev/null +++ b/modules/features/graphicalEditor/conditionOperators.py @@ -0,0 +1,612 @@ +# Copyright (c) 2025 Patrick Motsch +"""Backend-driven condition operator catalog and value-kind resolution for flow.ifElse.""" + +from __future__ import annotations + +import logging +import re +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES +from modules.shared.i18nRegistry import resolveText, t + +logger = logging.getLogger(__name__) + +VALUE_KINDS = ( + "string", + "number", + "boolean", + "datetime", + "array", + "object", + "file", + "context", + "unknown", +) + +CONTENT_TYPE_OPTIONS = ("text", "image", "table", "code", "media") +OUTPUT_MODE_OPTIONS = ("blob", "lines", "pages", "chunks", "structured") +LANGUAGE_OPTIONS = ("de", "en", "fr", "it") +MIME_EXAMPLE_OPTIONS = ( + "application/pdf", + "image/png", + "image/jpeg", + "text/plain", + "text/csv", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", +) + +_NODE_BY_TYPE = {n["id"]: n for n in STATIC_NODE_TYPES} + + +def _op( + op_id: str, + label_key: str, + *, + needs_value: bool = True, + value_input: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + out: Dict[str, Any] = {"id": op_id, "labelKey": label_key, "needsValue": needs_value} + if value_input is not None: + out["valueInput"] = value_input + return out + + +def _build_catalog() -> Dict[str, List[Dict[str, Any]]]: + text_in = {"kind": "text"} + num_in = {"kind": "number"} + date_in = {"kind": "date"} + regex_in = {"kind": "regex"} + select = lambda opts, kind: {"kind": kind, "options": list(opts)} + + return { + "string": [ + _op("eq", "condition.op.eq", value_input=text_in), + _op("neq", "condition.op.neq", value_input=text_in), + _op("contains", "condition.op.contains", value_input=text_in), + _op("not_contains", "condition.op.not_contains", value_input=text_in), + _op("starts_with", "condition.op.starts_with", value_input=text_in), + _op("ends_with", "condition.op.ends_with", value_input=text_in), + _op("regex", "condition.op.regex", value_input=regex_in), + _op("empty", "condition.op.empty", needs_value=False), + _op("not_empty", "condition.op.not_empty", needs_value=False), + ], + "number": [ + _op("eq", "condition.op.eq", value_input=num_in), + _op("neq", "condition.op.neq", value_input=num_in), + _op("lt", "condition.op.lt", value_input=num_in), + _op("lte", "condition.op.lte", value_input=num_in), + _op("gt", "condition.op.gt", value_input=num_in), + _op("gte", "condition.op.gte", value_input=num_in), + _op("empty", "condition.op.empty", needs_value=False), + _op("not_empty", "condition.op.not_empty", needs_value=False), + ], + "boolean": [ + _op("is_true", "condition.op.is_true", needs_value=False), + _op("is_false", "condition.op.is_false", needs_value=False), + ], + "datetime": [ + _op("eq", "condition.op.eq", value_input=date_in), + _op("neq", "condition.op.neq", value_input=date_in), + _op("before", "condition.op.before", value_input=date_in), + _op("after", "condition.op.after", value_input=date_in), + _op("empty", "condition.op.empty", needs_value=False), + _op("not_empty", "condition.op.not_empty", needs_value=False), + ], + "array": [ + _op("contains", "condition.op.contains", value_input=text_in), + _op("not_contains", "condition.op.not_contains", value_input=text_in), + _op("empty", "condition.op.empty", needs_value=False), + _op("not_empty", "condition.op.not_empty", needs_value=False), + _op("length_eq", "condition.op.length_eq", value_input=num_in), + _op("length_gt", "condition.op.length_gt", value_input=num_in), + _op("length_lt", "condition.op.length_lt", value_input=num_in), + ], + "object": [ + _op("empty", "condition.op.empty", needs_value=False), + _op("not_empty", "condition.op.not_empty", needs_value=False), + ], + "file": [ + _op("exists", "condition.op.exists", needs_value=False), + _op("not_exists", "condition.op.not_exists", needs_value=False), + _op("mime_is", "condition.op.mime_is", value_input=select(MIME_EXAMPLE_OPTIONS, "mime")), + _op("mime_contains", "condition.op.mime_contains", value_input=text_in), + _op("empty", "condition.op.empty", needs_value=False), + _op("not_empty", "condition.op.not_empty", needs_value=False), + ], + "context": [ + _op( + "contains_content", + "condition.op.contains_content", + value_input=select(CONTENT_TYPE_OPTIONS, "contentType"), + ), + _op("language_is", "condition.op.language_is", value_input=select(LANGUAGE_OPTIONS, "language")), + _op( + "output_mode_is", + "condition.op.output_mode_is", + value_input=select(OUTPUT_MODE_OPTIONS, "outputMode"), + ), + _op("file_count_eq", "condition.op.file_count_eq", value_input=num_in), + _op("file_count_gt", "condition.op.file_count_gt", value_input=num_in), + _op("file_count_lt", "condition.op.file_count_lt", value_input=num_in), + _op("slot_count_eq", "condition.op.slot_count_eq", value_input=num_in), + _op("slot_count_gt", "condition.op.slot_count_gt", value_input=num_in), + _op("slot_count_lt", "condition.op.slot_count_lt", value_input=num_in), + _op("regex_on_text", "condition.op.regex_on_text", value_input=regex_in), + _op("empty", "condition.op.empty", needs_value=False), + _op("not_empty", "condition.op.not_empty", needs_value=False), + ], + "unknown": [ + _op("eq", "condition.op.eq", value_input=text_in), + _op("empty", "condition.op.empty", needs_value=False), + _op("not_empty", "condition.op.not_empty", needs_value=False), + ], + } + + +CONDITION_OPERATOR_CATALOG: Dict[str, List[Dict[str, Any]]] = _build_catalog() + +_LABEL_KEYS = { + "condition.op.eq": t("ist gleich"), + "condition.op.neq": t("ist ungleich"), + "condition.op.contains": t("enthält"), + "condition.op.not_contains": t("enthält nicht"), + "condition.op.starts_with": t("beginnt mit"), + "condition.op.ends_with": t("endet mit"), + "condition.op.regex": t("Regex-Match"), + "condition.op.empty": t("ist leer"), + "condition.op.not_empty": t("ist nicht leer"), + "condition.op.lt": t("kleiner als"), + "condition.op.lte": t("≤"), + "condition.op.gt": t("größer als"), + "condition.op.gte": t("≥"), + "condition.op.is_true": t("ist wahr"), + "condition.op.is_false": t("ist falsch"), + "condition.op.before": t("vor"), + "condition.op.after": t("nach"), + "condition.op.exists": t("vorhanden"), + "condition.op.not_exists": t("nicht vorhanden"), + "condition.op.mime_is": t("MIME-Typ ist"), + "condition.op.mime_contains": t("MIME-Typ enthält"), + "condition.op.contains_content": t("enthält Inhaltstyp"), + "condition.op.language_is": t("Sprache ist"), + "condition.op.output_mode_is": t("Ausgabemodus ist"), + "condition.op.file_count_eq": t("Dateianzahl gleich"), + "condition.op.file_count_gt": t("Dateianzahl größer als"), + "condition.op.file_count_lt": t("Dateianzahl kleiner als"), + "condition.op.slot_count_eq": t("Slot-Anzahl gleich"), + "condition.op.slot_count_gt": t("Slot-Anzahl größer als"), + "condition.op.slot_count_lt": t("Slot-Anzahl kleiner als"), + "condition.op.regex_on_text": t("Regex auf extrahiertem Text"), + "condition.op.length_eq": t("Länge gleich"), + "condition.op.length_gt": t("Länge größer als"), + "condition.op.length_lt": t("Länge kleiner als"), +} + + +def localize_operator_catalog(lang: str = "de") -> Dict[str, List[Dict[str, Any]]]: + """Serialize catalog with resolved labels for API consumers.""" + out: Dict[str, List[Dict[str, Any]]] = {} + for kind, ops in CONDITION_OPERATOR_CATALOG.items(): + loc_ops: List[Dict[str, Any]] = [] + for op in ops: + entry = dict(op) + label_key = op.get("labelKey", "") + label_src = _LABEL_KEYS.get(str(label_key), label_key) + entry["label"] = resolveText(label_src, lang) + loc_ops.append(entry) + out[kind] = loc_ops + return out + + +def catalog_type_to_value_kind(catalog_type: str) -> str: + """Map port-catalog / dataPickOptions type strings to condition valueKind.""" + ct = (catalog_type or "").strip() + if not ct or ct == "Any": + return "unknown" + low = ct.lower() + if low in ("str", "string", "email", "url"): + return "string" + if low in ("int", "float", "number"): + return "number" + if low == "bool": + return "boolean" + if low in ("date", "datetime", "timestamp"): + return "datetime" + if low.startswith("list[") or low == "list": + return "array" + if low.startswith("dict") or low == "dict": + return "object" + if low in ("file", "actiondocument", "fileref"): + return "file" + return "unknown" + + +def _paths_equal(a: List[Any], b: List[Any]) -> bool: + if len(a) != len(b): + return False + return all(str(x) == str(y) for x, y in zip(a, b)) + + +def _is_context_producer(node_type: str) -> bool: + return node_type in ("context.extractContent", "context.mergeContext", "context.setContext") + + +def _path_suggests_context(path: List[Any], producer_type: str) -> bool: + if not path: + return _is_context_producer(producer_type) + last = str(path[-1]) + if last in ("data", "files", "merged", "presentation"): + return True + if "files" in [str(p) for p in path]: + return True + if _is_context_producer(producer_type) and path[0] in ("data", "response", "merged"): + return True + return False + + +def _path_suggests_file(path: List[Any], producer_type: str) -> bool: + path_str = [str(p) for p in path] + if producer_type == "input.upload": + return True + if "file" in path_str or "documents" in path_str or "mimeType" in path_str or "fileName" in path_str: + return True + if producer_type.startswith("sharepoint.") and "file" in path_str: + return True + return False + + +def resolve_value_kind(graph: Dict[str, Any], ref: Dict[str, Any], *, _skip_upstream: bool = False) -> str: + """Resolve condition valueKind for a DataRef against the workflow graph.""" + if not isinstance(ref, dict): + return "unknown" + producer_id = ref.get("nodeId") + path = ref.get("path") or [] + if not isinstance(path, list): + path = [] + if not producer_id: + return "unknown" + + nodes = graph.get("nodes") or [] + node_by_id = {n.get("id"): n for n in nodes if n.get("id")} + producer = node_by_id.get(producer_id) or {} + producer_type = str(producer.get("type") or "") + + if _path_suggests_context(path, producer_type): + return "context" + if _path_suggests_file(path, producer_type): + tail = str(path[-1]) if path else "" + if tail in ("mimeType", "fileName"): + return "string" + return "file" + + if not _skip_upstream: + from modules.features.graphicalEditor.upstreamPathsService import compute_upstream_paths + + target_id = graph.get("targetNodeId") or producer_id + matched_type: Optional[str] = None + for entry in compute_upstream_paths(graph, target_id): + if entry.get("producerNodeId") != producer_id: + continue + entry_path = entry.get("path") or [] + if _paths_equal(list(entry_path), list(path)): + matched_type = str(entry.get("type") or "Any") + break + + if matched_type is None and path: + parent_path = list(path[:-1]) + for entry in compute_upstream_paths(graph, target_id): + if entry.get("producerNodeId") != producer_id: + continue + if _paths_equal(list(entry.get("path") or []), parent_path): + matched_type = str(entry.get("type") or "Any") + break + + if matched_type: + vk = catalog_type_to_value_kind(matched_type) + if vk != "unknown": + return vk + + if producer_type in ("trigger.form", "input.form") and path and str(path[0]) == "payload": + return "string" + + return "unknown" + + +def resolve_condition_meta( + graph: Dict[str, Any], + ref: Dict[str, Any], + *, + lang: str = "de", +) -> Dict[str, Any]: + """Return valueKind and localized operators for a DataRef.""" + value_kind = resolve_value_kind(graph, ref) + catalog = localize_operator_catalog(lang) + operators = catalog.get(value_kind) or catalog.get("unknown", []) + return {"valueKind": value_kind, "operators": operators} + + +def _is_empty_value(val: Any) -> bool: + if val is None: + return True + if val == "": + return True + if isinstance(val, (list, dict, tuple)) and len(val) == 0: + return True + return False + + +def _parse_datetime(val: Any) -> Optional[datetime]: + if val is None: + return None + if hasattr(val, "timestamp"): + return val # type: ignore[return-value] + s = str(val).strip() + if not s: + return None + for fmt in ("%Y-%m-%d", "%d.%m.%Y", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S"): + try: + return datetime.strptime(s, fmt) + except ValueError: + continue + try: + return datetime.fromisoformat(s.replace("Z", "+00:00")) + except ValueError: + return None + + +def _compare_dates(left: Any, right: Any, op) -> bool: + try: + a, b = _parse_datetime(left), _parse_datetime(right) + if a is None or b is None: + return False + return op(a, b) + except Exception as e: + logger.warning("_compare_dates failed: left=%s right=%s: %s", left, right, e) + return False + + +def _file_exists(val: Any) -> bool: + if val is None: + return False + if isinstance(val, dict): + return bool(val.get("url") or val.get("name") or val.get("fileId")) + if isinstance(val, str): + return len(val.strip()) > 0 + return bool(val) + + +def _extract_mime(val: Any) -> str: + if isinstance(val, dict): + return str(val.get("mimeType") or val.get("contentType") or "") + return "" + + +def _presentation_envelopes_from_value(val: Any) -> List[Dict[str, Any]]: + try: + from modules.workflows.methods.methodContext.actions.extractContent import ( + normalize_presentation_envelopes, + ) + + return normalize_presentation_envelopes(val) + except Exception as e: + logger.debug("_presentation_envelopes_from_value: %s", e) + return [] + + +def _joined_text_from_context(val: Any) -> str: + try: + from modules.workflows.methods.methodContext.actions.extractContent import ( + joined_text_from_extract_node_data, + ) + + return joined_text_from_extract_node_data(val) or "" + except Exception: + return "" + + +def _iter_presentation_parts(envelope: Dict[str, Any]) -> List[Dict[str, Any]]: + parts: List[Dict[str, Any]] = [] + files = envelope.get("files") or {} + if not isinstance(files, dict): + return parts + for bucket in files.values(): + if not isinstance(bucket, dict): + continue + data = bucket.get("data") + mode = str(bucket.get("outputMode") or "").strip().lower() + if mode == "blob" and isinstance(data, str): + from modules.workflows.methods.methodContext.actions.extractContent import parse_blob_data_segments + + parts.extend(parse_blob_data_segments(data)) + continue + if isinstance(data, list): + for slot in data: + if isinstance(slot, dict): + parts.append(slot) + elif isinstance(data, dict): + parts.append(data) + return parts + + +def _context_has_content_type(val: Any, content_type: str) -> bool: + target = (content_type or "").strip().lower() + if not target: + return False + for env in _presentation_envelopes_from_value(val): + for part in _iter_presentation_parts(env): + tg = (part.get("typeGroup") or part.get("contentType") or "").strip().lower() + if target == "media": + if tg in ("image", "media", "video", "audio"): + return True + elif tg == target: + return True + return False + + +def _guess_language_code(text: str) -> str: + sample = (text or "").strip()[:2000] + if not sample: + return "" + de_hits = len(re.findall(r"\b(der|die|das|und|ist|nicht|mit)\b", sample, re.I)) + en_hits = len(re.findall(r"\b(the|and|is|not|with|for)\b", sample, re.I)) + fr_hits = len(re.findall(r"\b(le|la|les|et|est|pas|avec)\b", sample, re.I)) + it_hits = len(re.findall(r"\b(il|la|lo|gli|e|non|con)\b", sample, re.I)) + scores = {"de": de_hits, "en": en_hits, "fr": fr_hits, "it": it_hits} + best = max(scores, key=scores.get) + return best if scores[best] > 0 else "" + + +def _context_language(val: Any) -> str: + if isinstance(val, dict): + meta = val.get("_meta") + if isinstance(meta, dict): + lang = meta.get("language") or meta.get("detectedLanguage") + if lang: + return str(lang).strip().lower()[:2] + text = _joined_text_from_context(val) + return _guess_language_code(text) + + +def _context_output_mode(val: Any) -> str: + for env in _presentation_envelopes_from_value(val): + om = env.get("outputMode") + if om: + return str(om) + files = env.get("files") or {} + if isinstance(files, dict): + for bucket in files.values(): + if isinstance(bucket, dict) and bucket.get("outputMode"): + return str(bucket.get("outputMode")) + if isinstance(val, dict) and val.get("outputMode"): + return str(val.get("outputMode")) + return "" + + +def _context_file_count(val: Any) -> int: + for env in _presentation_envelopes_from_value(val): + fo = env.get("fileOrder") + if isinstance(fo, list): + return len(fo) + return 0 + + +def _context_slot_count(val: Any) -> int: + total = 0 + for env in _presentation_envelopes_from_value(val): + files = env.get("files") or {} + if not isinstance(files, dict): + continue + for bucket in files.values(): + if not isinstance(bucket, dict): + continue + data = bucket.get("data") + if isinstance(data, list): + total += len(data) + elif data is not None: + total += 1 + return total + + +def apply_condition_operator(left: Any, operator: str, right: Any, value_kind: Optional[str] = None) -> bool: + """Evaluate a single condition operator against a resolved left-hand value.""" + op = (operator or "eq").strip() + vk = (value_kind or "unknown").strip() + + if op == "eq": + if vk == "datetime": + return _compare_dates(left, right, lambda a, b: a == b) + return left == right + if op == "neq": + if vk == "datetime": + return _compare_dates(left, right, lambda a, b: a != b) + return left != right + if op in ("lt", "lte", "gt", "gte"): + try: + l = float(left) if left is not None else 0 + r = float(right) if right is not None else 0 + if op == "lt": + return l < r + if op == "lte": + return l <= r + if op == "gt": + return l > r + return l >= r + except (TypeError, ValueError): + return False + if op == "contains": + if isinstance(left, (list, tuple, set)): + return right in left or any(str(right) == str(x) for x in left) + return right is not None and str(right) in str(left or "") + if op == "not_contains": + if isinstance(left, (list, tuple, set)): + return right not in left and not any(str(right) == str(x) for x in left) + return right is None or str(right) not in str(left or "") + if op == "starts_with": + return right is not None and str(left or "").startswith(str(right)) + if op == "ends_with": + return right is not None and str(left or "").endswith(str(right)) + if op == "regex": + try: + return bool(re.search(str(right or ""), str(left or ""))) + except re.error as e: + logger.warning("regex operator failed: %s", e) + return False + if op == "empty": + return _is_empty_value(left) + if op == "not_empty": + return not _is_empty_value(left) + if op == "is_true": + return bool(left) + if op == "is_false": + return not bool(left) + if op == "before": + return _compare_dates(left, right, lambda a, b: a < b) + if op == "after": + return _compare_dates(left, right, lambda a, b: a > b) + if op == "exists": + return _file_exists(left) + if op == "not_exists": + return not _file_exists(left) + if op == "mime_is": + return _extract_mime(left).lower() == str(right or "").lower() + if op == "mime_contains": + return str(right or "").lower() in _extract_mime(left).lower() + if op in ("length_eq", "length_gt", "length_lt"): + try: + length = len(left) if left is not None else 0 + r = int(float(right)) + if op == "length_eq": + return length == r + if op == "length_gt": + return length > r + return length < r + except (TypeError, ValueError): + return False + if op == "contains_content": + return _context_has_content_type(left, str(right or "")) + if op == "language_is": + return _context_language(left) == str(right or "").strip().lower()[:2] + if op == "output_mode_is": + return _context_output_mode(left) == str(right or "") + if op == "file_count_eq": + return _context_file_count(left) == int(float(right)) + if op == "file_count_gt": + return _context_file_count(left) > int(float(right)) + if op == "file_count_lt": + return _context_file_count(left) < int(float(right)) + if op == "slot_count_eq": + return _context_slot_count(left) == int(float(right)) + if op == "slot_count_gt": + return _context_slot_count(left) > int(float(right)) + if op == "slot_count_lt": + return _context_slot_count(left) < int(float(right)) + if op == "regex_on_text": + try: + text = _joined_text_from_context(left) + return bool(re.search(str(right or ""), text)) + except re.error as e: + logger.warning("regex_on_text failed: %s", e) + return False + return False diff --git a/modules/features/graphicalEditor/entryPoints.py b/modules/features/graphicalEditor/entryPoints.py index 9ade2e96..e70cfebb 100644 --- a/modules/features/graphicalEditor/entryPoints.py +++ b/modules/features/graphicalEditor/entryPoints.py @@ -83,7 +83,60 @@ def normalize_invocations_list(items: Optional[List[Any]]) -> List[Dict[str, Any return out -# Schedule / cron: wire an external job runner (APScheduler, Celery, system cron) to call +_NODE_TYPE_TO_KIND = { + "trigger.manual": "manual", + "trigger.form": "form", + "trigger.schedule": "schedule", +} + + +def invocations_synced_with_graph( + graph: Optional[Dict[str, Any]], + stored_invocations: Optional[List[Any]], +) -> List[Dict[str, Any]]: + """Derive primary invocation (index 0) from the first start node in ``graph``. + + If the graph has no start node, only non-primary stored invocations are kept + (no injected default). Document order in ``nodes`` defines which start wins. + """ + from modules.workflows.automation2.graphUtils import getTriggerNodes + + g = graph if isinstance(graph, dict) else {} + nodes = g.get("nodes") or [] + stored = list(stored_invocations or []) + rest: List[Dict[str, Any]] = [] + for raw in stored[1:]: + if isinstance(raw, dict): + rest.append(normalize_invocation_entry(raw)) + + triggers = getTriggerNodes(nodes) + if not triggers: + return rest + + node = triggers[0] + nt = str(node.get("type", "")).strip() + kind = _NODE_TYPE_TO_KIND.get(nt, "manual") + nid = node.get("id") + if not nid: + nid = str(uuid.uuid4()) + raw_title = node.get("title") or node.get("label") or "Start" + + old_primary = stored[0] if stored and isinstance(stored[0], dict) else {} + config: Dict[str, Any] = {} + if isinstance(old_primary.get("config"), dict) and old_primary.get("kind") == kind: + config = dict(old_primary["config"]) + desc = old_primary.get("description") if isinstance(old_primary.get("description"), dict) else {} + + primary_raw: Dict[str, Any] = { + "id": str(nid), + "kind": kind, + "enabled": True, + "title": raw_title, + "description": desc, + "config": config, + } + primary = normalize_invocation_entry(primary_raw) + return [primary] + rest # POST .../execute with entryPointId set to a schedule entry — no separate in-process scheduler here yet. diff --git a/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py b/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py index b0291600..09192d2e 100644 --- a/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py +++ b/modules/features/graphicalEditor/interfaceFeatureGraphicalEditor.py @@ -49,7 +49,7 @@ from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import ( AutoRun as Automation2WorkflowRun, AutoTask as Automation2HumanTask, ) -from modules.features.graphicalEditor.entryPoints import normalize_invocations_list +from modules.features.graphicalEditor.entryPoints import invocations_synced_with_graph from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.shared.configuration import APP_CONFIG from modules.shared.dbRegistry import registerDatabase @@ -109,7 +109,7 @@ def getAllWorkflowsForScheduling() -> List[Dict[str, Any]]: if r.get("active") is False: continue wf = dict(r) - wf["invocations"] = normalize_invocations_list(wf.get("invocations")) + wf["invocations"] = invocations_synced_with_graph(wf.get("graph") or {}, wf.get("invocations")) invocations = wf.get("invocations") or [] primary = invocations[0] if invocations else {} if not isinstance(primary, dict): @@ -204,7 +204,7 @@ class GraphicalEditorObjects: ) rows = [dict(r) for r in records] if records else [] for wf in rows: - wf["invocations"] = normalize_invocations_list(wf.get("invocations")) + wf["invocations"] = invocations_synced_with_graph(wf.get("graph") or {}, wf.get("invocations")) return rows def getWorkflow(self, workflowId: str) -> Optional[Dict[str, Any]]: @@ -221,7 +221,7 @@ class GraphicalEditorObjects: if not records: return None wf = dict(records[0]) - wf["invocations"] = normalize_invocations_list(wf.get("invocations")) + wf["invocations"] = invocations_synced_with_graph(wf.get("graph") or {}, wf.get("invocations")) return wf def createWorkflow(self, data: Dict[str, Any]) -> Dict[str, Any]: @@ -234,10 +234,10 @@ class GraphicalEditorObjects: data["targetFeatureInstanceId"] = self.featureInstanceId if "active" not in data or data.get("active") is None: data["active"] = True - data["invocations"] = normalize_invocations_list(data.get("invocations")) + data["invocations"] = invocations_synced_with_graph(data.get("graph") or {}, data.get("invocations")) created = self.db.recordCreate(Automation2Workflow, data) out = dict(created) - out["invocations"] = normalize_invocations_list(out.get("invocations")) + out["invocations"] = invocations_synced_with_graph(out.get("graph") or {}, out.get("invocations")) try: from modules.shared.callbackRegistry import callbackRegistry callbackRegistry.trigger(_CALLBACK_WORKFLOW_CHANGED) @@ -252,11 +252,15 @@ class GraphicalEditorObjects: return None data.pop("mandateId", None) data.pop("featureInstanceId", None) - if "invocations" in data: - data["invocations"] = normalize_invocations_list(data.get("invocations")) + if "graph" in data or "invocations" in data: + g = data["graph"] if "graph" in data else existing.get("graph") + if not isinstance(g, dict): + g = {} + inv = data["invocations"] if "invocations" in data else existing.get("invocations") + data["invocations"] = invocations_synced_with_graph(g, inv) updated = self.db.recordModify(Automation2Workflow, workflowId, data) out = dict(updated) - out["invocations"] = normalize_invocations_list(out.get("invocations")) + out["invocations"] = invocations_synced_with_graph(out.get("graph") or {}, out.get("invocations")) try: from modules.shared.callbackRegistry import callbackRegistry callbackRegistry.trigger(_CALLBACK_WORKFLOW_CHANGED) diff --git a/modules/features/graphicalEditor/mainGraphicalEditor.py b/modules/features/graphicalEditor/mainGraphicalEditor.py index 86530123..d3d70381 100644 --- a/modules/features/graphicalEditor/mainGraphicalEditor.py +++ b/modules/features/graphicalEditor/mainGraphicalEditor.py @@ -32,11 +32,6 @@ UI_OBJECTS = [ "label": t("Editor", context="UI"), "meta": {"area": "editor"} }, - { - "objectKey": "ui.feature.graphicalEditor.workflows", - "label": t("Workflows", context="UI"), - "meta": {"area": "workflows"} - }, { "objectKey": "ui.feature.graphicalEditor.templates", "label": t("Vorlagen", context="UI"), diff --git a/modules/features/graphicalEditor/nodeDefinitions/ai.py b/modules/features/graphicalEditor/nodeDefinitions/ai.py index 43136394..a709f0be 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/ai.py +++ b/modules/features/graphicalEditor/nodeDefinitions/ai.py @@ -3,6 +3,131 @@ from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import ( + CONTEXT_BUILDER_PARAM_DESCRIPTION, +) +from modules.features.graphicalEditor.nodeDefinitions.flow import ( + CONTEXT_ENVELOPE_DATA_PICK_OPTIONS, +) + +# Shared authoritative DataPicker paths (same handover idea as ``context.extractContent`` outputPorts). +ACTION_RESULT_DATA_PICK_OPTIONS = [ + { + "path": ["documents", 0, "documentData"], + "pickerLabel": t("Gesamter Inhalt"), + "detail": t( + "Strukturiertes Handover als JSON inklusive aller Textteile " + "und Verweisen auf ausgelagerte Bilder." + ), + "recommended": True, + "type": "Any", + }, + { + "path": ["response"], + "pickerLabel": t("Nur Text"), + "detail": t("Verketteter Klartext aus allen erkannten Textteilen."), + "recommended": True, + "type": "str", + }, + { + "path": ["imageDocumentsOnly"], + "pickerLabel": t("Nur Bilder"), + "detail": t("Nur die extrahierten Bilddokumente als Liste, ohne JSON-Handover."), + "recommended": False, + "type": "List[ActionDocument]", + }, + { + "path": ["documents"], + "pickerLabel": t("Alle Dateitypen"), + "detail": t("Alle Ausgabedokumente nacheinander: JSON-Handover und Bilder."), + "recommended": False, + "type": "List[ActionDocument]", + }, +] + +AI_RESULT_DATA_PICK_OPTIONS = [ + *CONTEXT_ENVELOPE_DATA_PICK_OPTIONS, + { + "path": ["documents", 0, "documentData"], + "pickerLabel": t("Gesamter Inhalt"), + "detail": t( + "Hauptausgabedatei oder strukturierter Inhalt von ``documents[0]`` " + "(z. B. erzeugtes Dokument, JSON-Handover)." + ), + "recommended": False, + "type": "Any", + }, + { + "path": ["response"], + "pickerLabel": t("Nur Text"), + "detail": t("Modell-Antwort als reiner Fließtext (ohne eingebettete Bildbytes)."), + "recommended": False, + "type": "str", + }, + { + "path": ["imageDocumentsOnly"], + "pickerLabel": t("Nur Bilder"), + "detail": t("Nur Bild-Dokumente aus ``documents`` (ohne erstes Nicht-Bild-Artefakt, falls gesetzt)."), + "recommended": False, + "type": "List[ActionDocument]", + }, + { + "path": ["documents"], + "pickerLabel": t("Alle Ausgabedateien"), + "detail": t("Alle Dokumente der KI-Antwort: erzeugte Dateien, Bilder, Anhänge."), + "recommended": False, + "type": "List[Document]", + }, +] + +DOCUMENT_LIST_DATA_PICK_OPTIONS = [ + { + "path": ["documents"], + "pickerLabel": t("Alle Dokumente"), + "detail": t("Die vollständige Dokumentenliste."), + "recommended": True, + "type": "List[Document]", + }, + { + "path": ["documents", 0], + "pickerLabel": t("Erstes Dokument"), + "detail": t("Metadaten und Pfade des ersten Listeneintrags."), + "recommended": False, + "type": "Document", + }, + { + "path": ["count"], + "pickerLabel": t("Anzahl"), + "detail": t("Anzahl der Dokumente."), + "recommended": False, + "type": "int", + }, +] + +CONSOLIDATE_RESULT_DATA_PICK_OPTIONS = [ + { + "path": ["result"], + "pickerLabel": t("Konsolidiertes Ergebnis"), + "detail": t("Text oder Struktur nach Konsolidierung."), + "recommended": True, + "type": "Any", + }, + { + "path": ["mode"], + "pickerLabel": t("Modus"), + "detail": t("Verwendeter Konsolidierungsmodus."), + "recommended": False, + "type": "str", + }, + { + "path": ["count"], + "pickerLabel": t("Anzahl"), + "detail": t("Anzahl zusammengeführter Elemente."), + "recommended": False, + "type": "int", + }, +] + _AI_COMMON_PARAMS = [ {"name": "requireNeutralization", "type": "bool", "required": False, "frontendType": "checkbox", "default": False, @@ -25,12 +150,11 @@ AI_NODES = [ "frontendOptions": {"options": ["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"]}, "description": t("Ausgabeformat"), "default": "txt"}, {"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden", - "description": t("Dokumente aus vorherigen Schritten"), "default": ""}, + "description": t("Dokumente aus vorherigen Schritten"), "default": "", + "graphInherit": {"port": 0, "kind": "documentListWire"}}, {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder", - "description": t("Daten aus vorherigen Schritten"), "default": ""}, - {"name": "documentTheme", "type": "str", "required": False, "frontendType": "select", - "frontendOptions": {"options": ["general", "finance", "legal", "technical", "hr"]}, - "description": t("Dokument-Thema (Style-Hinweis fuer den Renderer)"), "default": "general"}, + "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "", + "graphInherit": {"port": 0, "kind": "primaryTextRef"}}, {"name": "simpleMode", "type": "bool", "required": False, "frontendType": "checkbox", "description": t("Einfacher Modus"), "default": True}, ] + _AI_COMMON_PARAMS, @@ -39,7 +163,8 @@ AI_NODES = [ "inputPorts": {0: {"accepts": [ "FormPayload", "DocumentList", "AiResult", "TextResult", "Transit", "LoopItem", "ActionResult", ]}}, - "outputPorts": {0: {"schema": "AiResult"}}, + "outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}}, + "paramMappers": ["aiPromptLegacyAlias"], "meta": {"icon": "mdi-robot", "color": "#9C27B0", "usesAi": True}, "_method": "ai", "_action": "process", @@ -53,16 +178,18 @@ AI_NODES = [ {"name": "prompt", "type": "str", "required": True, "frontendType": "textarea", "description": t("Recherche-Anfrage")}, {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder", - "description": t("Daten aus vorherigen Schritten"), "default": ""}, + "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "", + "graphInherit": {"port": 0, "kind": "primaryTextRef"}}, {"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden", - "description": t("Dokumente aus vorherigen Schritten"), "default": ""}, + "description": t("Dokumente aus vorherigen Schritten"), "default": "", + "graphInherit": {"port": 0, "kind": "documentListWire"}}, ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": [ "FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult", ]}}, - "outputPorts": {0: {"schema": "AiResult"}}, + "outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-magnify", "color": "#9C27B0", "usesAi": True}, "_method": "ai", "_action": "webResearch", @@ -74,15 +201,22 @@ AI_NODES = [ "description": t("Dokumentinhalt zusammenfassen"), "parameters": [ {"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef", - "description": t("Dokumente aus vorherigen Schritten")}, + "description": t("Dokumente aus vorherigen Schritten"), + "graphInherit": {"port": 0, "kind": "documentListWire"}}, + {"name": "resultType", "type": "str", "required": False, "frontendType": "select", + "frontendOptions": {"options": ["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"]}, + "description": t("Ausgabeformat"), "default": "txt"}, {"name": "summaryLength", "type": "str", "required": False, "frontendType": "select", "frontendOptions": {"options": ["brief", "medium", "detailed"]}, "description": t("Kurz, mittel oder ausführlich"), "default": "medium"}, + {"name": "folderId", "type": "str", "required": False, "frontendType": "userFileFolder", + "description": t("Zielordner in Meine Dateien"), + "default": ""}, ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}}, - "outputPorts": {0: {"schema": "AiResult"}}, + "outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-file-document-outline", "color": "#9C27B0", "usesAi": True}, "_method": "ai", "_action": "summarizeDocument", @@ -94,14 +228,21 @@ AI_NODES = [ "description": t("Dokument in Zielsprache übersetzen"), "parameters": [ {"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef", - "description": t("Dokumente aus vorherigen Schritten")}, + "description": t("Dokumente aus vorherigen Schritten"), + "graphInherit": {"port": 0, "kind": "documentListWire"}}, + {"name": "resultType", "type": "str", "required": False, "frontendType": "select", + "frontendOptions": {"options": ["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"]}, + "description": t("Ausgabeformat"), "default": "txt"}, {"name": "targetLanguage", "type": "str", "required": True, "frontendType": "text", "description": t("Zielsprache (z.B. de, en, French)")}, + {"name": "folderId", "type": "str", "required": False, "frontendType": "userFileFolder", + "description": t("Zielordner in Meine Dateien"), + "default": ""}, ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}}, - "outputPorts": {0: {"schema": "AiResult"}}, + "outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-translate", "color": "#9C27B0", "usesAi": True}, "_method": "ai", "_action": "translateDocument", @@ -113,15 +254,19 @@ AI_NODES = [ "description": t("Dokument in anderes Format konvertieren"), "parameters": [ {"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef", - "description": t("Dokumente aus vorherigen Schritten")}, + "description": t("Dokumente aus vorherigen Schritten"), + "graphInherit": {"port": 0, "kind": "documentListWire"}}, {"name": "targetFormat", "type": "str", "required": True, "frontendType": "select", "frontendOptions": {"options": ["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"]}, "description": t("Zielformat")}, + {"name": "folderId", "type": "str", "required": False, "frontendType": "userFileFolder", + "description": t("Zielordner in Meine Dateien"), + "default": ""}, ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}}, - "outputPorts": {0: {"schema": "DocumentList"}}, + "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-file-convert", "color": "#9C27B0", "usesAi": True}, "_method": "ai", "_action": "convertDocument", @@ -142,17 +287,22 @@ AI_NODES = [ {"name": "documentType", "type": "str", "required": False, "frontendType": "select", "frontendOptions": {"options": ["letter", "memo", "proposal", "contract", "report", "email"]}, "description": t("Dokumentart (Inhaltshinweis fuer die KI)"), "default": "proposal"}, + {"name": "folderId", "type": "str", "required": False, "frontendType": "userFileFolder", + "description": t("Zielordner in Meine Dateien"), + "default": ""}, {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder", - "description": t("Daten aus vorherigen Schritten"), "default": ""}, + "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "", + "graphInherit": {"port": 0, "kind": "primaryTextRef"}}, {"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden", - "description": t("Dokumente aus vorherigen Schritten"), "default": ""}, + "description": t("Dokumente aus vorherigen Schritten"), "default": "", + "graphInherit": {"port": 0, "kind": "documentListWire"}}, ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": [ "FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult", ]}}, - "outputPorts": {0: {"schema": "DocumentList"}}, + "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-file-plus", "color": "#9C27B0", "usesAi": True}, "_method": "ai", "_action": "generateDocument", @@ -168,17 +318,22 @@ AI_NODES = [ {"name": "resultType", "type": "str", "required": False, "frontendType": "select", "frontendOptions": {"options": ["py", "js", "ts", "html", "java", "cpp", "txt", "json", "csv", "xml"]}, "description": t("Datei-Endung der erzeugten Code-Datei"), "default": "py"}, + {"name": "folderId", "type": "str", "required": False, "frontendType": "userFileFolder", + "description": t("Zielordner in Meine Dateien"), + "default": ""}, {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder", - "description": t("Daten aus vorherigen Schritten"), "default": ""}, + "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "", + "graphInherit": {"port": 0, "kind": "primaryTextRef"}}, {"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden", - "description": t("Dokumente aus vorherigen Schritten"), "default": ""}, + "description": t("Dokumente aus vorherigen Schritten"), "default": "", + "graphInherit": {"port": 0, "kind": "documentListWire"}}, ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": [ "FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult", "LoopItem", "TextResult", ]}}, - "outputPorts": {0: {"schema": "AiResult"}}, + "outputPorts": {0: {"schema": "AiResult", "dataPickOptions": AI_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-code-tags", "color": "#9C27B0", "usesAi": True}, "_method": "ai", "_action": "generateCode", @@ -198,7 +353,7 @@ AI_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}}, - "outputPorts": {0: {"schema": "ConsolidateResult"}}, + "outputPorts": {0: {"schema": "ConsolidateResult", "dataPickOptions": CONSOLIDATE_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-table-merge-cells", "color": "#9C27B0", "usesAi": True}, "_method": "ai", "_action": "consolidate", diff --git a/modules/features/graphicalEditor/nodeDefinitions/clickup.py b/modules/features/graphicalEditor/nodeDefinitions/clickup.py index 53b75d4b..c1981097 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/clickup.py +++ b/modules/features/graphicalEditor/nodeDefinitions/clickup.py @@ -4,6 +4,63 @@ from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS + +TASK_LIST_DATA_PICK_OPTIONS = [ + { + "path": ["tasks"], + "pickerLabel": t("Alle Aufgaben"), + "detail": t("Vollständige Aufgabenliste."), + "recommended": True, + "type": "List[TaskItem]", + }, + { + "path": ["tasks", 0], + "pickerLabel": t("Erste Aufgabe"), + "detail": t("Erstes Listenelement."), + "recommended": False, + "type": "TaskItem", + }, + { + "path": ["count"], + "pickerLabel": t("Anzahl"), + "detail": t("Anzahl der Aufgaben."), + "recommended": False, + "type": "int", + }, + { + "path": ["listId"], + "pickerLabel": t("Listen-ID"), + "detail": t("ClickUp-Listen-Kontext, falls gesetzt."), + "recommended": False, + "type": "str", + }, +] + +TASK_RESULT_DATA_PICK_OPTIONS = [ + { + "path": ["success"], + "pickerLabel": t("Erfolg"), + "detail": t("Ob der API-Aufruf erfolgreich war."), + "recommended": True, + "type": "bool", + }, + { + "path": ["taskId"], + "pickerLabel": t("Aufgaben-ID"), + "detail": t("ID der betroffenen Aufgabe."), + "recommended": True, + "type": "str", + }, + { + "path": ["task"], + "pickerLabel": t("Aufgabendaten"), + "detail": t("Vollständiges Task-Objekt (Dict)."), + "recommended": True, + "type": "Dict", + }, +] + CLICKUP_NODES = [ { "id": "clickup.searchTasks", @@ -33,7 +90,7 @@ CLICKUP_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "TaskList"}}, + "outputPorts": {0: {"schema": "TaskList", "dataPickOptions": TASK_LIST_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-magnify", "color": "#7B68EE", "usesAi": False}, "_method": "clickup", "_action": "searchTasks", @@ -58,7 +115,7 @@ CLICKUP_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "TaskList"}}, + "outputPorts": {0: {"schema": "TaskList", "dataPickOptions": TASK_LIST_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-format-list-bulleted", "color": "#7B68EE", "usesAi": False}, "_method": "clickup", "_action": "listTasks", @@ -80,7 +137,7 @@ CLICKUP_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "TaskResult"}}, + "outputPorts": {0: {"schema": "TaskResult", "dataPickOptions": TASK_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-file-document-outline", "color": "#7B68EE", "usesAi": False}, "_method": "clickup", "_action": "getTask", @@ -124,7 +181,7 @@ CLICKUP_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "TaskResult"}}, + "outputPorts": {0: {"schema": "TaskResult", "dataPickOptions": TASK_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-plus-circle-outline", "color": "#7B68EE", "usesAi": False}, "_method": "clickup", "_action": "createTask", @@ -148,7 +205,8 @@ CLICKUP_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["TaskResult", "Transit"]}}, - "outputPorts": {0: {"schema": "TaskResult"}}, + "outputPorts": {0: {"schema": "TaskResult", "dataPickOptions": TASK_RESULT_DATA_PICK_OPTIONS}}, + "paramMappers": ["clickupTaskUpdateMerge"], "meta": {"icon": "mdi-pencil-outline", "color": "#7B68EE", "usesAi": False}, "_method": "clickup", "_action": "updateTask", @@ -174,7 +232,7 @@ CLICKUP_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-attachment", "color": "#7B68EE", "usesAi": False}, "_method": "clickup", "_action": "uploadAttachment", diff --git a/modules/features/graphicalEditor/nodeDefinitions/context.py b/modules/features/graphicalEditor/nodeDefinitions/context.py index f6757cc8..743d92e8 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/context.py +++ b/modules/features/graphicalEditor/nodeDefinitions/context.py @@ -1,29 +1,447 @@ # Copyright (c) 2025 Patrick Motsch -# Context node definitions — structural extraction without AI. +# Context node definitions — structural extraction without AI plus +# generic key/value, merge, filter and transform helpers. from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.flow import ( + CONTEXT_ENVELOPE_DATA_PICK_OPTIONS, + CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS, +) + +_CONTEXT_INPUT_SCHEMAS = [ + "Transit", + "ActionResult", + "AiResult", + "MergeResult", + "FormPayload", + "DocumentList", + "EmailList", + "TaskList", + "FileList", + "LoopItem", + "UdmDocument", +] + + CONTEXT_NODES = [ { "id": "context.extractContent", "category": "context", "label": t("Inhalt extrahieren"), - "description": t("Dokumentstruktur extrahieren ohne KI (Seiten, Abschnitte, Bilder, Tabellen)"), + "description": t( + "Extrahiert Inhalt ohne KI. ``data`` ist die gewählte **Presentation** (`fileOrder`, `files` je " + "Quelldatei, kanonisches `data` pro Bucket) plus ``_meta`` (Quellnamen, Operation, Persist). " + "``response`` für diesen Knoten bleibt leer — kein zusätzlicher Fließtext. " + "``imageDocumentsOnly`` enthält Bilder über persistierte Artefakte." + ), + "injectRunContext": True, "parameters": [ {"name": "documentList", "type": "str", "required": True, "frontendType": "hidden", - "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""}, - {"name": "extractionOptions", "type": "object", "required": False, "frontendType": "json", - "description": t( - "Extraktions-Optionen (JSON), z.B. {\"includeImages\": true, \"includeTables\": true, " - "\"outputDetail\": \"full\"}"), - "default": {}}, + "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": "", + "graphInherit": {"port": 0, "kind": "documentListWire"}}, + { + "name": "contentFilter", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "options": [ + {"value": "all", "label": t("Alles (Text, Tabellen, Bilder)")}, + {"value": "textOnly", "label": t("Nur Text und Tabellen")}, + {"value": "imagesOnly", "label": t("Nur Bilder")}, + {"value": "noImages", "label": t("Alles ausser Bilder")}, + ] + }, + "default": "all", + "description": t( + "Welche extrahierten Parts weiterverwendet werden. " + "all = alle Typgruppen inkl. Bilder; " + "textOnly = ausschliesslich Text-, Tabellen- und Struktur-Parts; " + "imagesOnly = ausschliesslich Bild-Parts; " + "noImages = alle Parts ausser Bildern (weiter als textOnly: " + "auch kuenftige Nicht-Bild-Typen bleiben erhalten)." + ), + }, + { + "name": "outputMode", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "options": [ + {"value": "blob", "label": t("Ausgabe: ein Textblock (blob)")}, + {"value": "lines", "label": t("Ausgabe: Zeilen / Segmente")}, + {"value": "pages", "label": t("Ausgabe: nach Seite (z. B. PDF)")}, + {"value": "chunks", "label": t("Ausgabe: Chunks (fixe Groesse)")}, + {"value": "structured", "label": t("Ausgabe: Parts als Liste")}, + ] + }, + "default": "lines", + "description": t( + "Wie das Ergebnis unter ``files`` strukturiert wird (``outputMode``: blob, lines, …)." + ), + }, + { + "name": "splitBy", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "options": [ + {"value": "newline", "label": t("Trennen: Zeilenumbruch")}, + {"value": "paragraph", "label": t("Trennen: Absatz (Leerzeilen)")}, + {"value": "sentence", "label": t("Trennen: Saetze (heuristisch)")}, + ] + }, + "default": "newline", + "description": t( + "Gueltig fuer ``outputMode`` lines und chunks: welches Trennzeichen der " + "zusammenhaengende Klartext zuerst erhaelt." + ), + }, + { + "name": "chunkSizeUnit", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "dependsOn": "outputMode", + "showWhen": ["chunks"], + "options": [ + {"value": "tokens", "label": t("Chunk-Groesse: Tokens (approx. ~4 Zeichen)")}, + {"value": "characters", "label": t("Chunk-Groesse: Zeichen")}, + {"value": "words", "label": t("Chunk-Groesse: Woerter")}, + ] + }, + "default": "tokens", + "description": t("Einheit fuer ``chunkSize`` / ``chunkOverlap`` wenn outputMode chunks."), + }, + { + "name": "chunkSize", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "dependsOn": "outputMode", + "showWhen": ["chunks"], + "options": [ + {"value": "256", "label": "256"}, + {"value": "500", "label": "500"}, + {"value": "1000", "label": "1000"}, + {"value": "2000", "label": "2000"}, + {"value": "4000", "label": "4000"}, + ] + }, + "default": "500", + "description": t("Zielgroesse pro Chunk (siehe chunkSizeUnit). Nur bei outputMode chunks."), + }, + { + "name": "chunkOverlap", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "dependsOn": "outputMode", + "showWhen": ["chunks"], + "options": [ + {"value": "0", "label": "0"}, + {"value": "25", "label": "25"}, + {"value": "50", "label": "50"}, + {"value": "100", "label": "100"}, + {"value": "200", "label": "200"}, + ] + }, + "default": "0", + "description": t("Ueberlappung zwischen aufeinanderfolgenden Chunks (gleiche Einheit wie chunkSize)."), + }, + { + "name": "filterEmptyLines", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "options": [ + {"value": "true", "label": t("Ja")}, + {"value": "false", "label": t("Nein")}, + ] + }, + "default": "true", + "description": t("Leere bzw. nur-Whitespace-Segmente bei lines/chunks entfernen."), + }, + { + "name": "trimWhitespace", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "options": [ + {"value": "true", "label": t("Ja")}, + {"value": "false", "label": t("Nein")}, + ] + }, + "default": "true", + "description": t("Fuehrende und nachfolgende Leerzeichen pro Segment trimmen."), + }, + { + "name": "includeLineNumbers", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "options": [ + {"value": "true", "label": t("Ja")}, + {"value": "false", "label": t("Nein")}, + ] + }, + "default": "false", + "description": t("Bei lines: jedem Eintrag eine Zeilennummer (1-based) zuweisen."), + }, + { + "name": "includeMetadata", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "options": [ + {"value": "true", "label": t("Ja")}, + {"value": "false", "label": t("Nein")}, + ] + }, + "default": "false", + "description": t("Dateiname und einfache Offsets bei lines/chunks/pages an Eintraege haengen."), + }, + { + "name": "csvHeaderRow", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "options": [ + {"value": "true", "label": t("Ja")}, + {"value": "false", "label": t("Nein")}, + ] + }, + "default": "true", + "description": t( + "Bei CSV-Dateien: erste Zeile als Spaltenkoepfe interpretieren " + "und ``csvRows`` als Liste von Objekten in ``presentation`` schreiben." + ), + }, + { + "name": "pdfExtractMode", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "options": [ + {"value": "text", "label": t("PDF/Parts: Text & Tabellen (keine Bild-Parts)")}, + {"value": "tables", "label": t("PDF/Parts: nur Tabellen-Parts")}, + {"value": "images", "label": t("PDF/Parts: nur Bild-Parts")}, + {"value": "all", "label": t("PDF/Parts: alle Typgruppen")}, + ] + }, + "default": "all", + "description": t( + "Filtert fuer die Presentation-Schicht nach typeGroup/MIME " + "(gilt fuer alle Dokumenttypen analog, nicht nur PDF). " + "Passt zum Inhaltsfilter „Alles“; „Text & Tabellen“ blendet Bild-Parts in der Presentation aus." + ), + }, + { + "name": "markdownPreserveFormatting", + "type": "str", + "required": False, + "frontendType": "select", + "frontendOptions": { + "options": [ + {"value": "true", "label": t("Markdown beibehalten")}, + {"value": "false", "label": t("zu vereinfachtem Klartext reduzieren")}, + ] + }, + "default": "false", + "description": t( + "Bei text/markdown-Parts: leichte Entfernung von Markup-Zeichen wenn false." + ), + }, ], "inputs": 1, "outputs": 1, - "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, - "outputPorts": {0: {"schema": "UdmDocument"}}, + "inputPorts": {0: {"accepts": ["DocumentList", "Transit", "LoopItem"]}}, + "outputPorts": { + 0: { + "schema": "ActionResult", + # Override the schema-level primaryTextRef path: ``response`` is intentionally + # empty for this node; downstream nodes with ``primaryTextRef`` should resolve to + # the full presentation object under ``data``. + "primaryTextRefPath": ["data"], + # Authoritative DataPicker paths (same idea as ``parameters`` for configuration). + # Frontend uses only this list — no schema expansion merge for this port. + "dataPickOptions": [ + { + "path": ["data"], + "pickerLabel": t("Vollständiges data-Objekt"), + "detail": t( + "Presentation-Envelope (``schemaVersion``, ``kind``, ``fileOrder``, ``files``) " + "plus ``_meta`` (``operationRef``, ``sourceFileNames``, Persist)." + ), + "recommended": True, + "type": "Any", + }, + { + "path": ["data", "files"], + "pickerLabel": t("Alle Dateibuckets"), + "detail": t("Map Dateischlüssel → Bucket (Zeilenliste, Blob, CSV-Tabelle bei structured, …)."), + "recommended": False, + "type": "Any", + }, + { + "path": ["imageDocumentsOnly"], + "pickerLabel": t("Nur Bilder"), + "detail": t( + "Nur die Bilder aus der Extraktion (persistierte Artefakte bzw. inline), " + "als Liste fuer nachgelagerte Schritte." + ), + "recommended": False, + "type": "List[ActionDocument]", + }, + { + "path": ["data", "_meta"], + "pickerLabel": t("Metadaten (_meta)"), + "detail": t( + "``operationRef``, ``sourceFileNames``, Presentation-Parameter, Liste persistierter Bilder." + ), + "recommended": False, + "type": "Any", + }, + ], + } + }, "meta": {"icon": "mdi-file-tree-outline", "color": "#00897B", "usesAi": False}, "_method": "context", "_action": "extractContent", + # Executor behaviour flags — drives actionNodeExecutor without hardcoded type checks. + "skipUnifiedPresentation": True, + "clearResponse": True, + "imageDocumentsFromExtractData": True, + "popDocumentsFromOutput": True, + }, + { + "id": "context.mergeContext", + "category": "context", + "label": t("Kontext zusammenführen"), + "description": t( + "Führt eine Liste von Ergebnissen zu einem einzigen Kontext zusammen. " + "Ausgabe ``data``: versionierter Umschlag (``schemaVersion``, ``kind``), Felder wie " + "``merged`` / ``first`` / ``response`` sowie ``_meta``. " + "Wähle als Datenquelle die Option Alle Schleifen-Ergebnisse einer Schleife, " + "um alle Iterationsergebnisse in einem Datensatz zu vereinen." + ), + "parameters": [ + { + "name": "dataSource", + "type": "Any", + "required": True, + "frontendType": "dataRef", + "description": t( + "Datenquelle: Liste von Einträgen zum Zusammenführen " + "(z. B. Schleife → Alle Schleifen-Ergebnisse)" + ), + }, + ], + "inputs": 1, + "outputs": 1, + "inputPorts": {0: {"accepts": _CONTEXT_INPUT_SCHEMAS}}, + "outputPorts": { + 0: {"schema": "ActionResult", "dataPickOptions": CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS} + }, + "injectUpstreamPayload": True, + # Same contract as transformContext: picker paths like ``merged`` / ``first`` must match + # ``nodeOutputs`` (see actionNodeExecutor ``surfaceDataAsTopLevel``); merge payloads live in ``data``. + "surfaceDataAsTopLevel": True, + "meta": {"icon": "mdi-call-merge", "color": "#7B1FA2", "usesAi": False}, + "_method": "context", + "_action": "mergeContext", + # Image documents live on ``data.merged.imageDocumentsOnly`` (accumulated across + # iterations) rather than the top-level ``documents`` list which is always empty. + "imageDocumentsFromMerged": True, + }, + { + "id": "context.transformContext", + "category": "context", + "label": t("Kontext transformieren"), + "description": t( + "Verändert die Struktur des eingehenden Datenstroms. " + "Ausgabe ``data``: versionierter Umschlag (``schemaVersion``, ``kind``: transform), " + "konfigurierte Ausgabe-Felder und ``_meta``. " + "Operationen pro Mapping: 'rename' (Key umbenennen), 'cast' (Typ konvertieren), " + "'nest' (mehrere Felder unter neuem Objekt zusammenfassen), " + "'flatten' (verschachteltes Objekt auf oberste Ebene heben), " + "'compute' (neues Feld aus Template-/{{...}}-Ausdruck berechnen). " + "Jedes Mapping definiert: 'sourceField' (Eingangspfad / Ausdruck), " + "'outputField' (Ausgabe-Key), 'operation' und 'type' (Zieltyp). " + "Das Ergebnis ist ein neues Objekt — der ursprüngliche Datenstrom " + "wird nicht automatisch weitergegeben (ausser 'passthroughUnmapped: true')." + ), + "parameters": [ + { + "name": "mappings", + "type": "list", + "required": True, + "frontendType": "mappingTable", + "default": [], + "description": t( + "Liste von Mapping-Einträgen. Jeder Eintrag: " + "sourceField (DataRef-Pfad oder Ausdruck), " + "outputField (Ziel-Key im Output), " + "operation (rename | cast | nest | flatten | compute), " + "type (str | int | bool | float | object | list — für cast), " + "expression (für compute: Template oder Ausdruck, z.B. '{{firstName}} {{lastName}}')." + ), + }, + { + "name": "passthroughUnmapped", + "type": "bool", + "required": False, + "frontendType": "checkbox", + "default": False, + "description": t( + "Alle nicht gemappten Felder des Eingangs zusätzlich in den Output übernehmen." + ), + }, + { + "name": "flattenDepth", + "type": "int", + "required": False, + "frontendType": "number", + "default": 1, + "description": t("Tiefe für flatten-Operation (1 = eine Ebene, -1 = vollständig)"), + }, + ], + "inputs": 1, + "outputs": 1, + "inputPorts": {0: {"accepts": _CONTEXT_INPUT_SCHEMAS}}, + "outputPorts": { + 0: { + "schema": { + "kind": "fromGraph", + "parameter": "mappings", + "nameField": "outputField", + "schemaName": "Transform_dynamic", + }, + "dynamic": True, + "deriveFrom": "mappings", + "deriveNameField": "outputField", + "dataPickOptions": CONTEXT_ENVELOPE_DATA_PICK_OPTIONS, + # ActionResult is the correct normalization schema — NOT FormPayload. + # The output is a versionned ActionResult envelope built by contextEnvelope. + "fromGraphResultSchema": "ActionResult", + } + }, + "injectUpstreamPayload": True, + "surfaceDataAsTopLevel": True, + "meta": {"icon": "mdi-swap-horizontal", "color": "#EF6C00", "usesAi": False}, + "_method": "context", + "_action": "transformContext", }, ] diff --git a/modules/features/graphicalEditor/nodeDefinitions/contextPickerHelp.py b/modules/features/graphicalEditor/nodeDefinitions/contextPickerHelp.py new file mode 100644 index 00000000..116164c1 --- /dev/null +++ b/modules/features/graphicalEditor/nodeDefinitions/contextPickerHelp.py @@ -0,0 +1,22 @@ +# Copyright (c) 2025 Patrick Motsch +# Shared parameter copy for ``contextBuilder`` fields (upstream data pick). + +from modules.shared.i18nRegistry import t + +CONTEXT_BUILDER_PARAM_DESCRIPTION = t( + "Inhalt aus vorherigen Schritten wählen (DataRef / Daten-Picker): z. B. „response“ für Klartext, " + "Handover-Pfade für strukturiertes JSON oder Medienlisten. " + "Die Auflösung erfolgt vollständig serverseitig (`resolveParameterReferences`). " + "Formular-Schritte speichern Antworten unter „payload“ — fehlt ein gewählter Pfad am Root, " + "wird derselbe Pfad automatisch unter „payload“ nachgeschlagen (Kompatibilität mit älteren " + "und neuen Picker-Pfaden). " + "In Freitext-/Template-Feldern werden weiterhin Platzhalter `{{KnotenId.feld.b.z.}}` ersetzt " + "(gleiche Semantik inkl. optionalem Nachschlagen unter „payload“)." +) + +# Kurzreferenz für Node-Beschreibungen (optional einbinden): dieselbe Auflösungslogik +# wie bei DataRefs — kein separates Variablen-Subsystem. +REF_AND_TEMPLATE_COMPATIBILITY_SUMMARY = t( + "Verweise: typisierte DataRefs im Parameter; Zeichenketten-Templates mit {{…}}; " + "Formular-Felder unter output.payload." +) diff --git a/modules/features/graphicalEditor/nodeDefinitions/data.py b/modules/features/graphicalEditor/nodeDefinitions/data.py index ca1f9035..118de127 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/data.py +++ b/modules/features/graphicalEditor/nodeDefinitions/data.py @@ -3,6 +3,25 @@ from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.ai import CONSOLIDATE_RESULT_DATA_PICK_OPTIONS + +AGGREGATE_RESULT_DATA_PICK_OPTIONS = [ + { + "path": ["items"], + "pickerLabel": t("Gesammelte Elemente"), + "detail": t("Alle aus der Schleife gesammelten Werte."), + "recommended": True, + "type": "List[Any]", + }, + { + "path": ["count"], + "pickerLabel": t("Anzahl"), + "detail": t("Anzahl gesammelter Elemente."), + "recommended": False, + "type": "int", + }, +] + DATA_NODES = [ { "id": "data.aggregate", @@ -17,7 +36,7 @@ DATA_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit", "AiResult", "LoopItem"]}}, - "outputPorts": {0: {"schema": "AggregateResult"}}, + "outputPorts": {0: {"schema": "AggregateResult", "dataPickOptions": AGGREGATE_RESULT_DATA_PICK_OPTIONS}}, "executor": "data", "meta": {"icon": "mdi-playlist-plus", "color": "#607D8B", "usesAi": False}, }, @@ -55,7 +74,7 @@ DATA_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}}, - "outputPorts": {0: {"schema": "ConsolidateResult"}}, + "outputPorts": {0: {"schema": "ConsolidateResult", "dataPickOptions": CONSOLIDATE_RESULT_DATA_PICK_OPTIONS}}, "executor": "data", "meta": {"icon": "mdi-table-merge-cells", "color": "#607D8B", "usesAi": False}, }, diff --git a/modules/features/graphicalEditor/nodeDefinitions/email.py b/modules/features/graphicalEditor/nodeDefinitions/email.py index 8f316605..cc4f1474 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/email.py +++ b/modules/features/graphicalEditor/nodeDefinitions/email.py @@ -3,6 +3,35 @@ from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import ( + CONTEXT_BUILDER_PARAM_DESCRIPTION, +) +from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS + +EMAIL_LIST_DATA_PICK_OPTIONS = [ + { + "path": ["emails"], + "pickerLabel": t("Alle E-Mails"), + "detail": t("Die vollständige E-Mail-Liste des Schritts."), + "recommended": True, + "type": "List[EmailItem]", + }, + { + "path": ["emails", 0], + "pickerLabel": t("Erste E-Mail"), + "detail": t("Das erste Element der Liste."), + "recommended": False, + "type": "EmailItem", + }, + { + "path": ["count"], + "pickerLabel": t("Anzahl"), + "detail": t("Anzahl gefundener E-Mails."), + "recommended": False, + "type": "int", + }, +] + EMAIL_NODES = [ { "id": "email.checkEmail", @@ -23,7 +52,8 @@ EMAIL_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "EmailList"}}, + "outputPorts": {0: {"schema": "EmailList", "dataPickOptions": EMAIL_LIST_DATA_PICK_OPTIONS}}, + "paramMappers": ["emailCheckFilter"], "meta": {"icon": "mdi-email-check", "color": "#1976D2", "usesAi": False}, "_method": "outlook", "_action": "readEmails", @@ -47,7 +77,8 @@ EMAIL_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "EmailList"}}, + "outputPorts": {0: {"schema": "EmailList", "dataPickOptions": EMAIL_LIST_DATA_PICK_OPTIONS}}, + "paramMappers": ["emailSearchQuery"], "meta": {"icon": "mdi-email-search", "color": "#1976D2", "usesAi": False}, "_method": "outlook", "_action": "searchEmails", @@ -63,11 +94,13 @@ EMAIL_NODES = [ "frontendOptions": {"authority": "msft"}, "description": t("E-Mail-Konto")}, {"name": "context", "type": "Any", "required": False, "frontendType": "templateTextarea", - "description": t("Daten aus vorherigen Schritten (oder direkte Beschreibung)"), "default": ""}, + "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "", + "graphInherit": {"port": 0, "kind": "primaryTextRef"}}, {"name": "to", "type": "str", "required": False, "frontendType": "text", "description": t("Empfänger (komma-separiert, optional für Entwurf)"), "default": ""}, {"name": "documentList", "type": "str", "required": False, "frontendType": "hidden", - "description": t("Anhang-Dokumente (via Wire oder DataRef)"), "default": ""}, + "description": t("Anhang-Dokumente (via Wire oder DataRef)"), "default": "", + "graphInherit": {"port": 0, "kind": "documentListWire"}}, {"name": "emailContent", "type": "str", "required": False, "frontendType": "hidden", "description": t("Direkt vorbereiteter Inhalt {subject, body, to} (via Wire — überspringt KI)"), "default": ""}, @@ -78,7 +111,8 @@ EMAIL_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["EmailDraft", "AiResult", "Transit", "ConsolidateResult", "DocumentList"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, + "paramMappers": ["emailDraftContextFromSubjectBody"], "meta": {"icon": "mdi-email-edit", "color": "#1976D2", "usesAi": False}, "_method": "outlook", "_action": "composeAndDraftEmailWithContext", diff --git a/modules/features/graphicalEditor/nodeDefinitions/file.py b/modules/features/graphicalEditor/nodeDefinitions/file.py index ffa4d722..a10999a2 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/file.py +++ b/modules/features/graphicalEditor/nodeDefinitions/file.py @@ -3,27 +3,41 @@ from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.contextPickerHelp import ( + CONTEXT_BUILDER_PARAM_DESCRIPTION, +) +from modules.features.graphicalEditor.nodeDefinitions.ai import DOCUMENT_LIST_DATA_PICK_OPTIONS + FILE_NODES = [ { "id": "file.create", "category": "file", "label": t("Datei erstellen"), - "description": t("Erstellt eine Datei aus Kontext (Text/Markdown von KI)."), + "description": t( + "Erstellt eine Datei aus der Presentation von „Inhalt extrahieren“ " + "(``data`` oder Schleifen-``bodyResults``). Ausgabe über den Generation-Service." + ), "parameters": [ {"name": "outputFormat", "type": "str", "required": True, "frontendType": "select", "frontendOptions": {"options": ["docx", "pdf", "txt", "html", "md"]}, "description": t("Ausgabeformat"), "default": "docx"}, {"name": "title", "type": "str", "required": False, "frontendType": "text", "description": t("Dokumenttitel")}, + {"name": "folderId", "type": "str", "required": False, "frontendType": "userFileFolder", + "description": t("Zielordner in Meine Dateien"), + "default": ""}, {"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder", - "description": t("Daten aus vorherigen Schritten"), "default": ""}, + "description": CONTEXT_BUILDER_PARAM_DESCRIPTION, "default": "", + "graphInherit": {"port": 0, "kind": "recommendedDataPickRef"}}, ], "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["AiResult", "TextResult", "Transit", "FormPayload", "LoopItem", "ActionResult"]}}, - "outputPorts": {0: {"schema": "DocumentList"}}, + "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-file-plus-outline", "color": "#2196F3", "usesAi": False}, "_method": "file", "_action": "create", + # Emit a debug log tracing how the ``context`` parameter was resolved. + "logContextResolution": True, }, ] diff --git a/modules/features/graphicalEditor/nodeDefinitions/input.py b/modules/features/graphicalEditor/nodeDefinitions/input.py index e2d0271a..5bf84e74 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/input.py +++ b/modules/features/graphicalEditor/nodeDefinitions/input.py @@ -3,6 +3,35 @@ from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.ai import DOCUMENT_LIST_DATA_PICK_OPTIONS + +BOOL_RESULT_DATA_PICK_OPTIONS = [ + { + "path": ["result"], + "pickerLabel": t("Ergebnis"), + "detail": t("Boolesches Ergebnis (z. B. Genehmigung ja/nein)."), + "recommended": True, + "type": "bool", + }, + { + "path": ["reason"], + "pickerLabel": t("Begründung"), + "detail": t("Optionale textuelle Begründung."), + "recommended": False, + "type": "str", + }, +] + +TEXT_RESULT_DATA_PICK_OPTIONS = [ + { + "path": ["text"], + "pickerLabel": t("Text"), + "detail": t("Vom Benutzer eingegebener oder gewählter Text."), + "recommended": True, + "type": "str", + }, +] + # Canonical form field types — single source of truth. # portType maps to the PORT_TYPE_CATALOG primitive used by DataPicker / validateGraph. FORM_FIELD_TYPES = [ @@ -55,7 +84,7 @@ INPUT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "BoolResult"}}, + "outputPorts": {0: {"schema": "BoolResult", "dataPickOptions": BOOL_RESULT_DATA_PICK_OPTIONS}}, "executor": "input", "meta": {"icon": "mdi-check-decagram", "color": "#4CAF50", "usesAi": False}, }, @@ -78,7 +107,7 @@ INPUT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "DocumentList"}}, + "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}}, "executor": "input", "meta": {"icon": "mdi-upload", "color": "#2196F3", "usesAi": False}, }, @@ -96,7 +125,7 @@ INPUT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "TextResult"}}, + "outputPorts": {0: {"schema": "TextResult", "dataPickOptions": TEXT_RESULT_DATA_PICK_OPTIONS}}, "executor": "input", "meta": {"icon": "mdi-comment-text", "color": "#FF9800", "usesAi": False}, }, @@ -115,7 +144,7 @@ INPUT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "BoolResult"}}, + "outputPorts": {0: {"schema": "BoolResult", "dataPickOptions": BOOL_RESULT_DATA_PICK_OPTIONS}}, "executor": "input", "meta": {"icon": "mdi-magnify-scan", "color": "#673AB7", "usesAi": False}, }, @@ -133,7 +162,7 @@ INPUT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "TextResult"}}, + "outputPorts": {0: {"schema": "TextResult", "dataPickOptions": TEXT_RESULT_DATA_PICK_OPTIONS}}, "executor": "input", "meta": {"icon": "mdi-format-list-checks", "color": "#009688", "usesAi": False}, }, @@ -153,7 +182,7 @@ INPUT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "BoolResult"}}, + "outputPorts": {0: {"schema": "BoolResult", "dataPickOptions": BOOL_RESULT_DATA_PICK_OPTIONS}}, "executor": "input", "meta": {"icon": "mdi-checkbox-marked-circle", "color": "#8BC34A", "usesAi": False}, }, diff --git a/modules/features/graphicalEditor/nodeDefinitions/redmine.py b/modules/features/graphicalEditor/nodeDefinitions/redmine.py index 2d8ebb59..675fe957 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/redmine.py +++ b/modules/features/graphicalEditor/nodeDefinitions/redmine.py @@ -4,6 +4,8 @@ from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS + # Typed FeatureInstance binding (replaces legacy `string, hidden`). # - type FeatureInstanceRef[redmine] is filtered by the DataPicker. # - frontendType "featureInstance" is rendered by FeatureInstancePicker which @@ -31,7 +33,7 @@ REDMINE_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-ticket-outline", "color": "#4A6FA5", "usesAi": False}, "_method": "redmine", "_action": "readTicket", @@ -59,7 +61,7 @@ REDMINE_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-format-list-bulleted", "color": "#4A6FA5", "usesAi": False}, "_method": "redmine", "_action": "listTickets", @@ -91,7 +93,7 @@ REDMINE_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-ticket-plus-outline", "color": "#4A6FA5", "usesAi": False}, "_method": "redmine", "_action": "createTicket", @@ -127,7 +129,7 @@ REDMINE_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-ticket-confirmation-outline", "color": "#4A6FA5", "usesAi": False}, "_method": "redmine", "_action": "updateTicket", @@ -151,7 +153,7 @@ REDMINE_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-chart-bar", "color": "#4A6FA5", "usesAi": False}, "_method": "redmine", "_action": "getStats", @@ -169,7 +171,7 @@ REDMINE_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-database-sync", "color": "#4A6FA5", "usesAi": False}, "_method": "redmine", "_action": "runSync", diff --git a/modules/features/graphicalEditor/nodeDefinitions/sharepoint.py b/modules/features/graphicalEditor/nodeDefinitions/sharepoint.py index b47a6b54..2a1a1a32 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/sharepoint.py +++ b/modules/features/graphicalEditor/nodeDefinitions/sharepoint.py @@ -3,6 +3,35 @@ from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.ai import ( + ACTION_RESULT_DATA_PICK_OPTIONS, + DOCUMENT_LIST_DATA_PICK_OPTIONS, +) + +FILE_LIST_DATA_PICK_OPTIONS = [ + { + "path": ["files"], + "pickerLabel": t("Alle Dateien"), + "detail": t("Die vollständige Dateiliste."), + "recommended": True, + "type": "List[FileItem]", + }, + { + "path": ["files", 0], + "pickerLabel": t("Erste Datei"), + "detail": t("Das erste Listenelement."), + "recommended": False, + "type": "FileItem", + }, + { + "path": ["count"], + "pickerLabel": t("Anzahl"), + "detail": t("Anzahl der Dateien."), + "recommended": False, + "type": "int", + }, +] + SHAREPOINT_NODES = [ { "id": "sharepoint.findFile", @@ -23,7 +52,7 @@ SHAREPOINT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "FileList"}}, + "outputPorts": {0: {"schema": "FileList", "dataPickOptions": FILE_LIST_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-file-search", "color": "#0078D4", "usesAi": False}, "_method": "sharepoint", "_action": "findDocumentPath", @@ -44,7 +73,7 @@ SHAREPOINT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["FileList", "Transit", "LoopItem"]}}, - "outputPorts": {0: {"schema": "DocumentList"}}, + "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-file-document", "color": "#0078D4", "usesAi": False}, "_method": "sharepoint", "_action": "readDocuments", @@ -67,7 +96,7 @@ SHAREPOINT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-upload", "color": "#0078D4", "usesAi": False}, "_method": "sharepoint", "_action": "uploadFile", @@ -88,7 +117,7 @@ SHAREPOINT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "FileList"}}, + "outputPorts": {0: {"schema": "FileList", "dataPickOptions": FILE_LIST_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-folder-open", "color": "#0078D4", "usesAi": False}, "_method": "sharepoint", "_action": "listDocuments", @@ -109,7 +138,7 @@ SHAREPOINT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["FileList", "Transit", "LoopItem"]}}, - "outputPorts": {0: {"schema": "DocumentList"}}, + "outputPorts": {0: {"schema": "DocumentList", "dataPickOptions": DOCUMENT_LIST_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-download", "color": "#0078D4", "usesAi": False}, "_method": "sharepoint", "_action": "downloadFileByPath", @@ -133,7 +162,7 @@ SHAREPOINT_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-content-copy", "color": "#0078D4", "usesAi": False}, "_method": "sharepoint", "_action": "copyFile", diff --git a/modules/features/graphicalEditor/nodeDefinitions/triggers.py b/modules/features/graphicalEditor/nodeDefinitions/triggers.py index 443f8c02..074125e2 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/triggers.py +++ b/modules/features/graphicalEditor/nodeDefinitions/triggers.py @@ -1,27 +1,29 @@ # Copyright (c) 2025 Patrick Motsch -# Canvas start nodes — variant reflects workflow configuration (gear in editor). +# Start nodes (palette category ``start``); kinds align with workflow entry points / run envelope. from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS + TRIGGER_NODES = [ { "id": "trigger.manual", - "category": "trigger", + "category": "start", "label": t("Start"), - "description": t("Manuell, API oder Hintergrund-Starts (Webhook, E-Mail, …)."), + "description": t("Manuell Trigger. Workflow startet nur, wenn auf Start-Button geklickt wird."), "parameters": [], "inputs": 0, "outputs": 1, "inputPorts": {}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "executor": "trigger", "meta": {"icon": "mdi-play", "color": "#4CAF50", "usesAi": False}, }, { "id": "trigger.form", - "category": "trigger", + "category": "start", "label": t("Start (Formular)"), - "description": t("Felder werden beim Start befüllt; konfigurieren Sie die Felder auf dieser Node."), + "description": t("Formular Trigger. Workflow startet nur, wenn das Formular ausgefüllt und abgeschickt wird."), "parameters": [ { "name": "formFields", @@ -40,9 +42,9 @@ TRIGGER_NODES = [ }, { "id": "trigger.schedule", - "category": "trigger", + "category": "start", "label": t("Start (Zeitplan)"), - "description": t("Cron-Ausdruck für geplante Läufe."), + "description": t("Workflow startet nach dem ausgewählten Zeitplan."), "parameters": [ { "name": "cron", @@ -51,11 +53,18 @@ TRIGGER_NODES = [ "frontendType": "cron", "description": t("Cron-Ausdruck"), }, + { + "name": "schedule", + "type": "json", + "required": False, + "frontendType": "hidden", + "description": t("Zeitplan (intern, für Editor-Roundtrip)"), + }, ], "inputs": 0, "outputs": 1, "inputPorts": {}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "executor": "trigger", "meta": {"icon": "mdi-clock", "color": "#2196F3", "usesAi": False}, }, diff --git a/modules/features/graphicalEditor/nodeDefinitions/trustee.py b/modules/features/graphicalEditor/nodeDefinitions/trustee.py index 3adc9d3f..d6a82e4b 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/trustee.py +++ b/modules/features/graphicalEditor/nodeDefinitions/trustee.py @@ -3,6 +3,8 @@ from modules.shared.i18nRegistry import t +from modules.features.graphicalEditor.nodeDefinitions.ai import ACTION_RESULT_DATA_PICK_OPTIONS + # Typed FeatureInstance binding (replaces legacy `string, hidden`). # - type uses the discriminator notation `FeatureInstanceRef[]` so the # DataPicker / RequiredAttributePicker can filter compatible upstream paths. @@ -35,7 +37,7 @@ TRUSTEE_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-database-refresh", "color": "#4CAF50", "usesAi": False}, "_method": "trustee", "_action": "refreshAccountingData", @@ -62,7 +64,7 @@ TRUSTEE_NODES = [ # Runtime returns ActionResult.isSuccess(documents=[...]) — see # actions/extractFromFiles.py. Declaring DocumentList here was adapter # drift and broke the DataPicker for downstream nodes. - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-file-document-scan", "color": "#4CAF50", "usesAi": True}, "_method": "trustee", "_action": "extractFromFiles", @@ -77,13 +79,14 @@ TRUSTEE_NODES = [ # is List[ActionDocument] (see datamodelChat.ActionResult). The # DataPicker uses this string to filter compatible upstream paths. {"name": "documentList", "type": "List[ActionDocument]", "required": True, "frontendType": "dataRef", - "description": t("Dokumente aus vorherigen Schritten")}, + "description": t("Dokumente aus vorherigen Schritten"), + "graphInherit": {"port": 0, "kind": "documentListWire"}}, dict(_TRUSTEE_INSTANCE_PARAM), ], "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["ActionResult", "DocumentList", "Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-file-document-check", "color": "#4CAF50", "usesAi": False}, "_method": "trustee", "_action": "processDocuments", @@ -95,13 +98,14 @@ TRUSTEE_NODES = [ "description": t("Trustee-Positionen in Buchhaltungssystem übertragen."), "parameters": [ {"name": "documentList", "type": "List[ActionDocument]", "required": True, "frontendType": "dataRef", - "description": t("Dokumente aus vorherigen Schritten")}, + "description": t("Dokumente aus vorherigen Schritten"), + "graphInherit": {"port": 0, "kind": "documentListWire"}}, dict(_TRUSTEE_INSTANCE_PARAM), ], "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["ActionResult", "DocumentList", "Transit"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-calculator", "color": "#4CAF50", "usesAi": False}, "_method": "trustee", "_action": "syncToAccounting", @@ -138,7 +142,7 @@ TRUSTEE_NODES = [ "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit", "AiResult", "ConsolidateResult", "UdmDocument"]}}, - "outputPorts": {0: {"schema": "ActionResult"}}, + "outputPorts": {0: {"schema": "ActionResult", "dataPickOptions": ACTION_RESULT_DATA_PICK_OPTIONS}}, "meta": {"icon": "mdi-database-search", "color": "#4CAF50", "usesAi": False}, "_method": "trustee", "_action": "queryData", diff --git a/modules/features/graphicalEditor/nodeRegistry.py b/modules/features/graphicalEditor/nodeRegistry.py index a3c8bd0b..0b0c09fd 100644 --- a/modules/features/graphicalEditor/nodeRegistry.py +++ b/modules/features/graphicalEditor/nodeRegistry.py @@ -1,13 +1,14 @@ # Copyright (c) 2025 Patrick Motsch # All rights reserved. """ -Node Type Registry for graphicalEditor - static node definitions (ai, email, sharepoint, trigger, flow, data, input). +Node Type Registry for graphicalEditor - static node definitions (start, input, flow, data, ai, email, …). Nodes are defined first; IO/method actions are used at execution time. """ import logging from typing import Dict, List, Any, Optional +from modules.features.graphicalEditor.conditionOperators import localize_operator_catalog from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES from modules.features.graphicalEditor.nodeDefinitions.input import FORM_FIELD_TYPES from modules.features.graphicalEditor.nodeAdapter import bindsActionFromLegacy @@ -82,6 +83,34 @@ def _localizeNode(node: Dict[str, Any], language: str) -> Dict[str, Any]: pc["description"] = resolveText(pd, lang) params.append(pc) out["parameters"] = params + + out_ports: Dict[Any, Dict[str, Any]] = {} + for idx, po in (node.get("outputPorts") or {}).items(): + if not isinstance(po, dict): + continue + port_copy = dict(po) + opts = port_copy.get("dataPickOptions") + if isinstance(opts, list): + loc_opts: List[Dict[str, Any]] = [] + for o in opts: + if not isinstance(o, dict): + continue + oc = dict(o) + pl = oc.get("pickerLabel") + if pl is not None: + oc["pickerLabel"] = resolveText(pl, lang) + dt = oc.get("detail") + if dt is not None: + oc["detail"] = resolveText(dt, lang) + loc_opts.append(oc) + port_copy["dataPickOptions"] = loc_opts + out_ports[idx] = port_copy + if isinstance(node.get("outputPorts"), dict): + out["outputPorts"] = out_ports + + # Legacy node-level key no longer used — do not expose. + out.pop("outputPickHints", None) + return out @@ -95,7 +124,7 @@ def getNodeTypesForApi( nodes = getNodeTypes(services, language) localized = [_localizeNode(n, language) for n in nodes] categories = [ - {"id": "trigger", "label": "Trigger"}, + {"id": "start", "label": "Start"}, {"id": "input", "label": "Eingabe/Mensch"}, {"id": "flow", "label": "Ablauf"}, {"id": "data", "label": "Daten"}, @@ -112,13 +141,14 @@ def getNodeTypesForApi( for name, schema in PORT_TYPE_CATALOG.items(): catalogSerialized[name] = { "name": schema.name, - "fields": [f.model_dump() for f in schema.fields], + "fields": [f.model_dump(by_alias=True, exclude_none=True) for f in schema.fields], } return { "nodeTypes": localized, "categories": categories, "portTypeCatalog": catalogSerialized, + "conditionOperatorCatalog": localize_operator_catalog(language), "systemVariables": SYSTEM_VARIABLES, "formFieldTypes": FORM_FIELD_TYPES, } diff --git a/modules/features/graphicalEditor/portTypes.py b/modules/features/graphicalEditor/portTypes.py index a08ebd12..12c2d90f 100644 --- a/modules/features/graphicalEditor/portTypes.py +++ b/modules/features/graphicalEditor/portTypes.py @@ -13,9 +13,9 @@ import time import uuid from typing import Any, Dict, List, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field -from modules.shared.i18nRegistry import resolveText +from modules.shared.i18nRegistry import resolveText, t logger = logging.getLogger(__name__) @@ -25,6 +25,8 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- class PortField(BaseModel): + model_config = ConfigDict(populate_by_name=True) + name: str type: str # str, int, bool, List[str], List[Document], Dict[str,Any], ConnectionRef, … description: str = "" @@ -36,11 +38,19 @@ class PortField(BaseModel): discriminator: bool = False # Surfaces this field at the top of the DataPicker list as the most common pick. recommended: bool = False + # Human DataPicker title (camelCase JSON for frontend). Omit for technical paths-only. + picker_label: Optional[str] = Field(default=None, serialization_alias="pickerLabel") + # For List[T] fields: segment between parent and inner field (iteration / one list item). + picker_item_label: Optional[str] = Field(default=None, serialization_alias="pickerItemLabel") class PortSchema(BaseModel): name: str # e.g. "EmailDraft", "AiResult", "Transit" fields: List[PortField] + # Declarative flag for the engine: when True, the executor attaches + # connection provenance ({id, authority, label}) onto the output. Replaces + # hard-coded schema lists in actionNodeExecutor._attachConnectionProvenance. + carriesConnectionProvenance: bool = False class InputPortDef(BaseModel): @@ -153,7 +163,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = { PortField(name="text", type="str", required=False, description="Textinhalt"), PortField(name="children", type="List[Any]", required=False, description="Unterblöcke"), ]), - "DocumentList": PortSchema(name="DocumentList", fields=[ + "DocumentList": PortSchema(name="DocumentList", carriesConnectionProvenance=True, fields=[ PortField(name="documents", type="List[Document]", description="Dokumente aus vorherigen Schritten", recommended=True), PortField(name="connection", type="ConnectionRef", required=False, @@ -163,7 +173,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = { PortField(name="count", type="int", required=False, description="Anzahl Dokumente"), ]), - "FileList": PortSchema(name="FileList", fields=[ + "FileList": PortSchema(name="FileList", carriesConnectionProvenance=True, fields=[ PortField(name="files", type="List[FileItem]", description="Dateiliste"), PortField(name="connection", type="ConnectionRef", required=False, @@ -173,7 +183,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = { PortField(name="count", type="int", required=False, description="Anzahl Dateien"), ]), - "EmailDraft": PortSchema(name="EmailDraft", fields=[ + "EmailDraft": PortSchema(name="EmailDraft", carriesConnectionProvenance=True, fields=[ PortField(name="subject", type="str", description="Betreff"), PortField(name="body", type="str", @@ -187,7 +197,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = { PortField(name="connection", type="ConnectionRef", required=False, description="Outlook-/Graph-Verbindung"), ]), - "EmailList": PortSchema(name="EmailList", fields=[ + "EmailList": PortSchema(name="EmailList", carriesConnectionProvenance=True, fields=[ PortField(name="emails", type="List[EmailItem]", description="E-Mails"), PortField(name="connection", type="ConnectionRef", required=False, @@ -195,7 +205,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = { PortField(name="count", type="int", required=False, description="Anzahl"), ]), - "TaskList": PortSchema(name="TaskList", fields=[ + "TaskList": PortSchema(name="TaskList", carriesConnectionProvenance=True, fields=[ PortField(name="tasks", type="List[TaskItem]", description="Aufgaben"), PortField(name="connection", type="ConnectionRef", required=False, @@ -219,15 +229,39 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = { ]), "AiResult": PortSchema(name="AiResult", fields=[ PortField(name="prompt", type="str", - description="Prompt"), + description="Prompt", + picker_label=t("Eingabe (Prompt des Schritts)"), + ), PortField(name="response", type="str", - description="Antworttext", recommended=True), + description=( + "Antworttext (Modell-Fließtext o. ä.; Bilder liegen in documents, nicht hier)." + ), + recommended=True, + picker_label=t("Ausgabetext (Modell)"), + ), PortField(name="responseData", type="Dict", required=False, - description="Strukturierte Antwort (nur bei JSON-Ausgabe)"), + description="Strukturierte Antwort (nur bei JSON-Ausgabe)", + picker_label=t("Strukturierte Antwortdaten")), PortField(name="context", type="str", - description="Kontext"), + description="Kontext", + picker_label=t("Eingabe-Kontext")), PortField(name="documents", type="List[Document]", - description="Dokumente"), + description=( + "Erzeugte oder mitgegebene Dateien (z. B. Bilder); documentData = Nutzlast pro Eintrag." + ), + picker_label=t("Alle Ausgabe-Dateien (Liste)"), + picker_item_label=t("je Datei"), + ), + PortField(name="data", type="Dict", required=False, + description=( + "Internes Payload-Objekt (entspricht ``ActionResult.data``-Semantik). " + "Wird vom Executor gesetzt und enthält denselben Inhalt wie ``response`` " + "in strukturierter Form; primär für nachgelagerte Kontext-Nodes." + ), + picker_label=t("Technische Detaildaten (data)")), + PortField(name="imageDocumentsOnly", type="List[Document]", required=False, + description="Nur Bild-bezogene Einträge aus documents.", + picker_label=t("Nur Bilder (Liste)")), ]), "BoolResult": PortSchema(name="BoolResult", fields=[ PortField(name="result", type="bool", @@ -237,7 +271,8 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = { ]), "TextResult": PortSchema(name="TextResult", fields=[ PortField(name="text", type="str", - description="Text"), + description="Text", + picker_label=t("Text (Schrittausgabe)")), ]), "LoopItem": PortSchema(name="LoopItem", fields=[ PortField(name="currentItem", type="Any", @@ -263,13 +298,32 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = { PortField(name="merged", type="Dict", description="Zusammengeführte Daten"), ]), + "ContextBranch": PortSchema(name="ContextBranch", fields=[ + PortField(name="items", type="List[Any]", + description="Schleifen-fertige Elemente aus dem (gefilterten) Kontext", + recommended=True, + picker_label=t("Gefilterte Elemente")), + PortField(name="data", type="Dict", required=False, + description="Gefilterter Presentation-Umschlag oder Eingabe-Spiegel", + picker_label=t("Kontext (data)")), + PortField(name="filterApplied", type="bool", required=False, + description="True wenn ein Kontext-Inhaltsfilter angewendet wurde"), + PortField(name="contentType", type="str", required=False, + description="Angewendeter Inhaltstyp-Filter (z. B. image)"), + PortField(name="match", type="int", required=False, + description="Aktiver Ausgangs-Index (Fall oder Sonst)"), + ]), "ActionDocument": PortSchema(name="ActionDocument", fields=[ PortField(name="documentName", type="str", - description="Dokumentname"), + description="Dokumentname", + picker_label=t("Dateiname")), PortField(name="documentData", type="Any", - description="Inhalt / Rohdaten (z.B. JSON-String, Bytes)"), + description="Inhalt / Rohdaten (z.B. JSON-String, Bytes)", + picker_label=t("Dateiinhalt (JSON, Text oder Bild)"), + recommended=True), PortField(name="mimeType", type="str", - description="MIME-Typ"), + description="MIME-Typ", + picker_label=t("Dateityp (MIME)")), PortField(name="fileId", type="str", required=False, description="Persistierte FileItem.id (vom Engine ergänzt)"), PortField(name="fileName", type="str", required=False, @@ -285,12 +339,62 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = { # Without it in the catalog the DataPicker cannot offer downstream # bindings like `processDocuments → documents → *` for syncToAccounting. PortField(name="documents", type="List[ActionDocument]", required=False, - description="Erzeugte Dokumente (immer befüllt für Trustee/AI/Email/...)"), + description=( + "Dokumentliste für Actions mit echten Artefakt-Dokumenten. " + "Beim Knoten „Inhalt extrahieren“ fehlt dieses Feld in der Knotenausgabe." + ), + picker_label=t("Alle Ausgabe-Dokumente"), + picker_item_label=t("je Dokument"), + ), PortField(name="data", type="Dict", required=False, - description="Ergebnisdaten"), + description=( + "Strukturierter Inhalt. Bei **context.extractContent**: **Presentation**-Root " + "(`schemaVersion`, `kind`, `fileOrder`, `files`) plus **`_meta`** — ohne " + "zusätzliches `response`/`contentExtracted`-Duplikat." + ), + picker_label=t("Technische Detaildaten (data)")), + # Mirror AiResult primary text fields so DataPicker / primaryTextRef behave the same + PortField(name="prompt", type="str", required=False, + description="Optional: auslösender Prompt / Schrittname", + picker_label=t("Auslöser / Prompt (falls vorhanden)")), + PortField(name="response", type="str", required=False, + description=( + "Fließtext wo die Action einen liefert. Bei **„Inhalt extrahieren“** absichtlich leer — " + "Inhalt liegt in ``data``.``files``." + ), + recommended=True, + picker_label=t("Nur Fließtext (gesamt)")), + PortField(name="context", type="str", required=False, + description="Optional: Eingabe-Kontext", + picker_label=t("Mitgegebener Kontext")), + PortField(name="imageDocumentsOnly", type="List[ActionDocument]", required=False, + description=( + "Nur Bild-bezogene Einträge. Bei „Inhalt extrahieren“: synthetische " + "Einträge mit ``fileId`` aus persistierten Extrakt-Bildern (kein separates JSON-Dokument)." + ), + picker_label=t("Nur Bilder (Liste)")), + PortField(name="responseData", type="Dict", required=False, + description="Optional: strukturierte Zusatzdaten", + picker_label=t("Strukturierte Zusatzdaten")), + PortField(name="presentation", type="Dict", required=False, + description=( + "Selten: Top-Level-Spiegel von Präsentationsdaten andere Actions. " + "Bei „Inhalt extrahieren“ liegt alles direkt unter ``data`` (kein zusätzlicher Spiegel)." + ), + picker_label=t("Presentation (Top-Level-Spiegel)")), + PortField(name="presentationSummary", type="Dict", required=False, + description=( + "Kompakte Metadaten zu ``presentation`` (Debugging / traces)." + ), + picker_label=t("Presentation-Zusammenfassung")), + PortField(name="presentationConfig", type="Dict", required=False, + description=( + "Optional: Debugging-Konfiguration; bei Extract liegt die Primärquelle in ``validationMetadata`` des JSON-Dokuments." + ), + picker_label=t("Presentation-Konfiguration")), ]), "Transit": PortSchema(name="Transit", fields=[]), - "UdmDocument": PortSchema(name="UdmDocument", fields=[ + "UdmDocument": PortSchema(name="UdmDocument", carriesConnectionProvenance=True, fields=[ PortField(name="id", type="str", description="Dokument-ID"), PortField(name="sourceType", type="str", description="Quellformat (pdf, docx, …)"), PortField(name="sourcePath", type="str", description="Quellpfad"), @@ -620,6 +724,24 @@ SYSTEM_VARIABLES: Dict[str, Dict[str, str]] = { } +# --------------------------------------------------------------------------- +# Graph inheritance (executeGraph materialization + ActionNodeExecutor wiring) +# --------------------------------------------------------------------------- +# +# When a parameter declares ``graphInherit.kind == "primaryTextRef"``, executeGraph +# inserts an explicit DataRef before run (see pickNotPushMigration.materializePrimaryTextHandover). +# ``recommendedDataPickRef`` uses upstream ``outputPorts.dataPickOptions`` where ``recommended: true`` +# (see pickNotPushMigration.materializeRecommendedDataPickRef). +# Schema names are catalog output port types (e.g. AiResult). + +PRIMARY_TEXT_HANDOVER_REF_PATH: Dict[str, List[Any]] = { + "AiResult": ["response"], + "ActionResult": ["response"], + "TextResult": ["text"], + "ConsolidateResult": ["result"], +} + + def resolveSystemVariable(variable: str, context: Dict[str, Any]) -> Any: """Resolve a system variable name to its runtime value.""" from datetime import datetime, timezone @@ -817,8 +939,22 @@ def _resolveTransitChain( # Schema derivation for dynamic outputs # --------------------------------------------------------------------------- -def deriveFormPayloadSchemaFromParam(node: Dict[str, Any], param_key: str) -> Optional[PortSchema]: - """Derive output schema from a field-builder JSON list (``fields``, ``formFields``, …).""" +def deriveFormPayloadSchemaFromParam( + node: Dict[str, Any], + param_key: str, + name_field: str = "name", + type_field: str = "type", + label_field: str = "label", + schema_name: str = "FormPayload_dynamic", +) -> Optional[PortSchema]: + """Derive an output schema from a graph-defined parameter. + + Supports three parameter shapes: + - List[Dict] with ``name_field`` (e.g. ``fields[].name``, ``entries[].key``, + ``mappings[].outputField``). + - Group-fields: ``type == "group"`` recursed via ``fields``. + - List[str]: each string is taken as a leaf path key (used for ``filterContext.keys``). + """ from modules.features.graphicalEditor.nodeDefinitions.input import FORM_FIELD_TYPES _FORM_TYPE_TO_PORT: Dict[str, str] = {f["id"]: f["portType"] for f in FORM_FIELD_TYPES} @@ -841,21 +977,35 @@ def deriveFormPayloadSchemaFromParam(node: Dict[str, Any], param_key: str) -> Op )) for f in fields_param: - if not isinstance(f, dict) or not f.get("name"): + if isinstance(f, str): + if f.strip(): + _append_field(f.strip(), "str", None, False) continue - fname = str(f["name"]) - if str(f.get("type", "")).lower() == "group" and isinstance(f.get("fields"), list): + if not isinstance(f, dict): + continue + fname_raw = f.get(name_field) + if not fname_raw and name_field == "contextKey": + fname_raw = f.get("key") + if not fname_raw: + continue + fname = str(fname_raw) + if str(f.get(type_field, "")).lower() == "group" and isinstance(f.get("fields"), list): for sub in f["fields"]: - if isinstance(sub, dict) and sub.get("name"): + if isinstance(sub, dict) and sub.get(name_field): _append_field( - f"{fname}.{sub['name']}", - sub.get("type", "str"), - sub.get("label"), + f"{fname}.{sub[name_field]}", + sub.get(type_field, "str"), + sub.get(label_field), bool(sub.get("required", False)), ) continue - _append_field(fname, f.get("type", "str"), f.get("label"), bool(f.get("required", False))) - return PortSchema(name="FormPayload_dynamic", fields=portFields) if portFields else None + _append_field( + fname, + f.get(type_field, "str"), + f.get(label_field), + bool(f.get("required", False)), + ) + return PortSchema(name=schema_name, fields=portFields) if portFields else None def _deriveFormPayloadSchema(node: Dict[str, Any]) -> Optional[PortSchema]: @@ -880,9 +1030,20 @@ def parse_graph_defined_output_schema( schema_spec = output_port.get("schema") if isinstance(schema_spec, dict) and schema_spec.get("kind") == "fromGraph": param_key = str(schema_spec.get("parameter") or "fields") - return deriveFormPayloadSchemaFromParam(node, param_key) + name_field = str(schema_spec.get("nameField") or "name") + type_field = str(schema_spec.get("typeField") or "type") + label_field = str(schema_spec.get("labelField") or "label") + schema_name = str(schema_spec.get("schemaName") or "FormPayload_dynamic") + return deriveFormPayloadSchemaFromParam( + node, param_key, + name_field=name_field, type_field=type_field, + label_field=label_field, schema_name=schema_name, + ) if output_port.get("dynamic") and output_port.get("deriveFrom"): - return deriveFormPayloadSchemaFromParam(node, str(output_port.get("deriveFrom"))) + name_field = str(output_port.get("deriveNameField") or "name") + return deriveFormPayloadSchemaFromParam( + node, str(output_port.get("deriveFrom")), name_field=name_field, + ) if isinstance(schema_spec, str) and schema_spec: return PORT_TYPE_CATALOG.get(schema_spec) return None diff --git a/modules/features/graphicalEditor/routeFeatureGraphicalEditor.py b/modules/features/graphicalEditor/routeFeatureGraphicalEditor.py index 4748f39a..663f87e4 100644 --- a/modules/features/graphicalEditor/routeFeatureGraphicalEditor.py +++ b/modules/features/graphicalEditor/routeFeatureGraphicalEditor.py @@ -26,7 +26,8 @@ from modules.workflows.automation2.runEnvelope import ( normalize_run_envelope, ) from modules.features.graphicalEditor.entryPoints import find_invocation -from modules.features.graphicalEditor.upstreamPathsService import compute_upstream_paths +from modules.features.graphicalEditor.conditionOperators import resolve_condition_meta +from modules.features.graphicalEditor.upstreamPathsService import compute_upstream_paths, compute_graph_data_sources from modules.shared.i18nRegistry import apiRouteContext, resolveText routeApiMsg = apiRouteContext("routeFeatureGraphicalEditor") @@ -192,6 +193,56 @@ def post_upstream_paths( return {"paths": paths} +@router.post("/{instanceId}/condition-meta") +@limiter.limit("120/minute") +def post_condition_meta( + request: Request, + instanceId: str = Path(..., description="Feature instance ID"), + body: Dict[str, Any] = Body(...), + language: str = Query("de", description="Localization (en, de, fr)"), + context: RequestContext = Depends(getRequestContext), +) -> dict: + """Return valueKind and operators for a DataRef (backend-driven If/Else UI).""" + _validateInstanceAccess(instanceId, context) + graph = body.get("graph") + ref = body.get("ref") + node_id = body.get("nodeId") + if not isinstance(graph, dict) or not isinstance(ref, dict): + raise HTTPException(status_code=400, detail=routeApiMsg("graph and ref are required")) + graph_payload = dict(graph) + if node_id: + graph_payload["targetNodeId"] = str(node_id) + return resolve_condition_meta(graph_payload, ref, lang=language) + + +@router.post("/{instanceId}/graph-data-sources") +@limiter.limit("120/minute") +def post_graph_data_sources( + request: Request, + instanceId: str = Path(..., description="Feature instance ID"), + body: Dict[str, Any] = Body(...), + context: RequestContext = Depends(getRequestContext), +) -> dict: + """Scope-aware data sources for the DataPicker. + + Takes ``{ nodeId, graph: { nodes, connections } }`` and returns:: + + { + "availableSourceIds": [...], # ancestors minus loop-body nodes on Done branch + "portIndexOverrides": {nodeId: n}, # use outputPorts[n] instead of 0 + "loopBodyContextIds": [...], # loops whose body the node is in + } + + All loop scope logic lives here so the frontend has zero topology knowledge. + """ + _validateInstanceAccess(instanceId, context) + graph = body.get("graph") + node_id = body.get("nodeId") + if not isinstance(graph, dict) or not node_id: + raise HTTPException(status_code=400, detail=routeApiMsg("graph and nodeId are required")) + return compute_graph_data_sources(graph, str(node_id)) + + @router.get("/{instanceId}/upstream-paths/{node_id}") @limiter.limit("60/minute") def get_upstream_paths_saved( @@ -1724,6 +1775,51 @@ async def complete_task( ) +@router.post("/{instanceId}/tasks/{taskId}/cancel") +@limiter.limit("30/minute") +def cancel_pending_task_stop_run( + request: Request, + instanceId: str = Path(..., description="Feature instance ID"), + taskId: str = Path(..., description="Human task ID"), + context: RequestContext = Depends(getRequestContext), +) -> dict: + """Cancel a pending human task and stop the workflow run behind it.""" + mandateId = _validateInstanceAccess(instanceId, context) + iface = getGraphicalEditorInterface(context.user, mandateId, instanceId) + task = iface.getTask(taskId) + if not task: + raise HTTPException(status_code=404, detail=routeApiMsg("Task not found")) + + wf_ids = {w.get("id") for w in iface.getWorkflows() if w.get("id")} + if task.get("workflowId") not in wf_ids: + raise HTTPException(status_code=404, detail=routeApiMsg("Task not found")) + + if task.get("status") != "pending": + raise HTTPException(status_code=400, detail=routeApiMsg("Task already completed")) + + run_id = task.get("runId") + + from modules.workflows.automation2.executionEngine import requestRunStop + + if run_id: + requestRunStop(run_id) + db_run = iface.getRun(run_id) + if db_run: + current = db_run.get("status") or "" + if current not in ("completed", "failed", "cancelled"): + iface.updateRun(run_id, status="cancelled") + + pending = iface.getTasks(runId=run_id, status="pending") + for t in pending: + tid = t.get("id") + if tid: + iface.updateTask(tid, status="cancelled") + else: + iface.updateTask(taskId, status="cancelled") + + return {"success": True, "runId": run_id, "taskId": taskId} + + # ------------------------------------------------------------------------- # Monitoring / Metrics # ------------------------------------------------------------------------- diff --git a/modules/features/graphicalEditor/switchOutput.py b/modules/features/graphicalEditor/switchOutput.py new file mode 100644 index 00000000..be469ead --- /dev/null +++ b/modules/features/graphicalEditor/switchOutput.py @@ -0,0 +1,308 @@ +# Copyright (c) 2025 Patrick Motsch +"""Build flow.switch branch payloads: filtered context + loop-ready items.""" + +from __future__ import annotations + +import copy +import re +from typing import Any, Dict, List, Optional + +from modules.features.graphicalEditor.portTypes import unwrapTransit + +_CONTEXT_FILTER_OPERATORS = frozenset({"contains_content"}) +_BLOB_IMAGE_CHUNK_RE = re.compile(r"^\[image(?:\:([^\]]+))?\]$") + + +def _artifacts_by_part_id_from_presentation(inp: Any) -> Dict[str, str]: + plain = _unwrap_input(inp) + meta = plain.get("_meta") if isinstance(plain, dict) else None + if not isinstance(meta, dict): + return {} + out: Dict[str, str] = {} + for art in meta.get("persistedImageArtifacts") or []: + if not isinstance(art, dict): + continue + sp = str(art.get("sourcePartId") or "").strip() + fid = str(art.get("fileId") or "").strip() + if sp and fid: + out[sp] = fid + return out + + +def _enrich_image_slot(slot: Dict[str, Any], artifacts_by_part: Dict[str, str]) -> None: + if (slot.get("typeGroup") or "").strip().lower() != "image": + return + existing = str(slot.get("embeddedImageFileId") or "").strip() + if existing and existing in artifacts_by_part.values(): + return + candidates: List[str] = [] + sid = str(slot.get("id") or "").strip() + if sid: + candidates.append(sid) + data = slot.get("data") + if isinstance(data, str): + m = _BLOB_IMAGE_CHUNK_RE.fullmatch(data.strip()) + if m: + tok = (m.group(1) or "").strip() + if tok: + candidates.append(tok) + for cand in candidates: + fid = artifacts_by_part.get(cand) + if fid: + slot["embeddedImageFileId"] = fid + return + + +def _slot_matches_content_type(slot: Dict[str, Any], content_type: str) -> bool: + target = (content_type or "").strip().lower() + if not target: + return False + tg = (slot.get("typeGroup") or slot.get("contentType") or "").strip().lower() + if target == "media": + return tg in ("image", "media", "video", "audio") + if target == "text": + return tg in ("text", "table", "structure") + return tg == target + + +def _filter_bucket_slots(bucket: Dict[str, Any], content_type: str) -> Dict[str, Any]: + """Return a copy of a presentation file bucket with filtered ``data`` slots.""" + mode = str(bucket.get("outputMode") or "").strip().lower() + data = bucket.get("data") + if mode == "blob" and isinstance(data, str): + from modules.workflows.methods.methodContext.actions.extractContent import ( + filter_blob_bucket_by_content_type, + ) + + return filter_blob_bucket_by_content_type(bucket, content_type) + out = copy.deepcopy(bucket) + if isinstance(data, list): + out["data"] = [s for s in data if isinstance(s, dict) and _slot_matches_content_type(s, content_type)] + elif isinstance(data, dict) and _slot_matches_content_type(data, content_type): + out["data"] = data + else: + out["data"] = [] if isinstance(data, list) else data + return out + + +def _filter_presentation_envelope(envelope: Dict[str, Any], content_type: str) -> Dict[str, Any]: + """Filter all slots in a presentation envelope by content type group.""" + from modules.workflows.methods.methodContext.actions.extractContent import ( + PRESENTATION_KIND, + PRESENTATION_SCHEMA_VERSION, + ) + + out = copy.deepcopy(envelope) + files = out.get("files") or {} + if not isinstance(files, dict): + return out + filtered_files: Dict[str, Any] = {} + kept_order: List[str] = [] + for fk in out.get("fileOrder") or list(files.keys()): + bucket = files.get(fk) + if not isinstance(bucket, dict): + continue + fb = _filter_bucket_slots(bucket, content_type) + data = fb.get("data") + has_data = ( + (isinstance(data, list) and len(data) > 0) + or (isinstance(data, dict)) + or (isinstance(data, str) and str(data).strip()) + ) + if has_data: + filtered_files[str(fk)] = fb + kept_order.append(str(fk)) + out["schemaVersion"] = out.get("schemaVersion") or PRESENTATION_SCHEMA_VERSION + out["kind"] = out.get("kind") or PRESENTATION_KIND + out["fileOrder"] = kept_order + out["files"] = filtered_files + return out + + +def _slots_from_bucket(bucket: Dict[str, Any]) -> List[Any]: + data = bucket.get("data") + mode = str(bucket.get("outputMode") or "").strip().lower() + if mode == "blob" and isinstance(data, str) and data.strip(): + from modules.workflows.methods.methodContext.actions.extractContent import parse_blob_data_segments + + return parse_blob_data_segments(data) + if isinstance(data, list): + return [s for s in data if isinstance(s, dict)] + if isinstance(data, dict): + return [data] + if isinstance(data, str) and data.strip(): + return [{"typeGroup": "text", "data": data}] + items = bucket.get("items") + if isinstance(items, list): + return [i for i in items if isinstance(i, dict)] + return [] + + +def _items_from_presentation_envelope( + envelope: Dict[str, Any], + *, + artifacts_by_part: Optional[Dict[str, str]] = None, +) -> List[Any]: + items: List[Any] = [] + files = envelope.get("files") or {} + if not isinstance(files, dict): + return items + for fk in envelope.get("fileOrder") or list(files.keys()): + bucket = files.get(fk) + if isinstance(bucket, dict): + for slot in _slots_from_bucket(bucket): + if artifacts_by_part: + _enrich_image_slot(slot, artifacts_by_part) + sid = str(slot.get("id") or slot.get("label") or len(items)) + items.append({"name": f"{fk}:{sid}", "value": slot}) + return items + + +def expand_items_from_input(raw: Any) -> List[Any]: + """Best-effort loop items from transit/presentation/list/dict input.""" + if raw is None: + return [] + if isinstance(raw, dict) and isinstance(raw.get("items"), list): + return list(raw["items"]) + plain = unwrapTransit(raw) if isinstance(raw, dict) and raw.get("_transit") else raw + if isinstance(plain, dict) and isinstance(plain.get("items"), list): + return list(plain["items"]) + from modules.workflows.methods.methodContext.actions.extractContent import ( + normalize_presentation_envelopes, + ) + + envelopes = normalize_presentation_envelopes(plain) + if envelopes: + out: List[Any] = [] + for env in envelopes: + out.extend(_items_from_presentation_envelope(env)) + return out + if isinstance(plain, list): + return list(plain) + if isinstance(plain, dict): + children = plain.get("children") + if isinstance(children, list) and children: + return list(children) + return [{"name": k, "value": v} for k, v in plain.items()] + return [plain] + + +def _unwrap_input(inp: Any) -> Any: + if isinstance(inp, dict) and inp.get("_transit"): + return unwrapTransit(inp) + return inp + + +def build_switch_branch_payload( + inp: Any, + case: Dict[str, Any], + *, + value_kind: str = "unknown", + match_index: int = 0, +) -> Dict[str, Any]: + """Payload for a matched switch case (ContextBranch inner data).""" + operator = str(case.get("operator") or "eq") + right = case.get("value") + plain_in = _unwrap_input(inp) + + if operator in _CONTEXT_FILTER_OPERATORS and value_kind == "context": + content_type = str(right or "") + from modules.workflows.methods.methodContext.actions.extractContent import ( + normalize_presentation_envelopes, + ) + + source = plain_in + if isinstance(source, dict) and "data" in source and not source.get("kind"): + nested = source.get("data") + if isinstance(nested, dict): + source = nested + envelopes = normalize_presentation_envelopes(source) + if not envelopes and isinstance(plain_in, dict): + envelopes = normalize_presentation_envelopes(plain_in) + filtered_envs = [_filter_presentation_envelope(env, content_type) for env in envelopes] + artifacts_by_part = _artifacts_by_part_id_from_presentation(plain_in) + items: List[Any] = [] + for env in filtered_envs: + items.extend(_items_from_presentation_envelope(env, artifacts_by_part=artifacts_by_part)) + if len(filtered_envs) == 1: + data_out: Any = filtered_envs[0] + elif filtered_envs: + data_out = {"envelopes": filtered_envs} + else: + data_out = {} + return { + "data": data_out, + "items": items, + "filterApplied": True, + "contentType": content_type, + "match": match_index, + } + + data_out = plain_in if isinstance(plain_in, dict) else {"value": plain_in} + return { + "data": data_out, + "items": expand_items_from_input(inp), + "filterApplied": False, + "match": match_index, + } + + +def build_switch_default_payload(inp: Any, *, match_index: int) -> Dict[str, Any]: + """Sonst branch: unmodified input passthrough.""" + plain_in = _unwrap_input(inp) + data_out = plain_in if isinstance(plain_in, dict) else {"value": plain_in} + return { + "data": data_out, + "items": expand_items_from_input(inp), + "filterApplied": False, + "match": match_index, + } + + +def build_switch_combined_output( + inp: Any, + cases: List[Any], + *, + matched_indices: List[int], + value_kind: str = "unknown", +) -> Dict[str, Any]: + """Build per-port branch payloads; primary fields mirror the first active match.""" + branches: Dict[str, Dict[str, Any]] = {} + default_idx = len(cases) + for idx in matched_indices: + if idx == default_idx: + branches[str(idx)] = build_switch_default_payload(inp, match_index=default_idx) + elif 0 <= idx < len(cases): + c = cases[idx] if isinstance(cases[idx], dict) else {"operator": "eq", "value": cases[idx]} + branches[str(idx)] = build_switch_branch_payload( + inp, c, value_kind=value_kind, match_index=idx, + ) + primary_idx = matched_indices[0] if matched_indices else default_idx + primary = branches.get(str(primary_idx)) or build_switch_default_payload(inp, match_index=default_idx) + return {**primary, "branches": branches} + + +def switch_branch_payload(transit: Any, source_output: int) -> Optional[Dict[str, Any]]: + """Return the ContextBranch inner dict for a specific switch output port.""" + if not isinstance(transit, dict): + return None + data = transit.get("data") if transit.get("_transit") else transit + if not isinstance(data, dict): + return None + branches = data.get("branches") + if isinstance(branches, dict): + branch = branches.get(str(source_output)) + if isinstance(branch, dict): + return branch + if transit.get("_transit"): + return data + return data + + +def unwrap_transit_for_port(output: Any, source_output: Optional[int] = None) -> Any: + """Unwrap transit; when ``source_output`` is set, pick that switch branch payload.""" + if source_output is not None: + branch = switch_branch_payload(output, source_output) + if branch is not None: + return branch + return unwrapTransit(output) diff --git a/modules/features/graphicalEditor/upstreamPathsService.py b/modules/features/graphicalEditor/upstreamPathsService.py index 8075fd00..ade9524a 100644 --- a/modules/features/graphicalEditor/upstreamPathsService.py +++ b/modules/features/graphicalEditor/upstreamPathsService.py @@ -4,9 +4,10 @@ from __future__ import annotations from typing import Any, Dict, List, Set +from modules.features.graphicalEditor.conditionOperators import catalog_type_to_value_kind, resolve_value_kind from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES from modules.features.graphicalEditor.portTypes import PORT_TYPE_CATALOG, PortSchema, parse_graph_defined_output_schema -from modules.workflows.automation2.graphUtils import buildConnectionMap +from modules.workflows.automation2.graphUtils import buildConnectionMap, getLoopBodyNodeIds, getLoopDoneNodeIds _NODE_BY_TYPE = {n["id"]: n for n in STATIC_NODE_TYPES} @@ -36,6 +37,31 @@ def _paths_for_port_schema(schema: PortSchema, producer_node_id: str) -> List[Di return out +def _paths_for_data_pick_options( + options: List[Dict[str, Any]], + producer_node_id: str, +) -> List[Dict[str, Any]]: + """Explicit per-port pick list from node definition (authoritative; no catalog expansion).""" + out: List[Dict[str, Any]] = [] + for o in options: + if not isinstance(o, dict): + continue + path = o.get("path") + if not isinstance(path, list): + continue + label = o.get("pickerLabel") + out.append( + { + "producerNodeId": producer_node_id, + "path": path, + "type": o.get("type") or "Any", + "label": label if isinstance(label, str) else ".".join(str(p) for p in path), + "scopeOrigin": "data", + } + ) + return out + + def _paths_for_schema(schema_name: str, producer_node_id: str) -> List[Dict[str, Any]]: if not schema_name or schema_name == "Transit": return [] @@ -83,22 +109,39 @@ def compute_upstream_paths(graph: Dict[str, Any], target_node_id: str) -> List[D if not ndef: continue out0 = (ndef.get("outputPorts") or {}).get(0, {}) - derived = parse_graph_defined_output_schema(anode, out0 if isinstance(out0, dict) else {}) - if derived: - for entry in _paths_for_port_schema(derived, aid): - entry["producerLabel"] = (anode.get("title") or "").strip() or aid - paths.append(entry) - else: - raw_schema = out0.get("schema") if isinstance(out0, dict) else None - schema_name = raw_schema if isinstance(raw_schema, str) and raw_schema else "ActionResult" - for entry in _paths_for_schema(schema_name, aid): - entry["producerLabel"] = (anode.get("title") or "").strip() or aid - paths.append(entry) + out0 = out0 if isinstance(out0, dict) else {} + dpo = out0.get("dataPickOptions") - # Lexical loop hints (flow.loop): any loop node in ancestors adds synthetic paths + bases: List[Dict[str, Any]] = [] + if isinstance(dpo, list): + bases = _paths_for_data_pick_options(dpo, aid) + derived = parse_graph_defined_output_schema(anode, out0) + derived_paths: List[Dict[str, Any]] = [] + if derived: + derived_paths = _paths_for_port_schema(derived, aid) + + merged_list = bases + derived_paths + if merged_list: + plab = (anode.get("title") or "").strip() or aid + for entry in merged_list: + entry["producerLabel"] = plab + paths.append(entry) + continue + + raw_schema = out0.get("schema") if isinstance(out0, dict) else None + schema_name = raw_schema if isinstance(raw_schema, str) and raw_schema else "ActionResult" + plab = (anode.get("title") or "").strip() or aid + for entry in _paths_for_schema(schema_name, aid): + entry["producerLabel"] = plab + paths.append(entry) + + # Lexical loop hints (flow.loop): only for nodes inside the loop body for aid in ancestors: anode = node_by_id.get(aid) or {} - if anode.get("type") == "flow.loop": + if anode.get("type") != "flow.loop": + continue + body_ids = getLoopBodyNodeIds(aid, conn_map) + if target_node_id in body_ids: paths.extend( [ { @@ -125,4 +168,93 @@ def compute_upstream_paths(graph: Dict[str, Any], target_node_id: str) -> List[D ] ) + for entry in paths: + ct = str(entry.get("type") or "Any") + vk = catalog_type_to_value_kind(ct) + if vk == "unknown": + ref = { + "nodeId": entry.get("producerNodeId"), + "path": entry.get("path") or [], + } + graph_with_target = {**graph, "targetNodeId": target_node_id} + vk = resolve_value_kind(graph_with_target, ref, _skip_upstream=True) + entry["valueKind"] = vk + return paths + + +def compute_graph_data_sources(graph: Dict[str, Any], target_node_id: str) -> Dict[str, Any]: + """Return scope-aware data sources for the DataPicker. + + Determines which ancestor nodes are valid sources for ``target_node_id``, + taking loop scoping into account: + + - If ``target_node_id`` is on the *Done* branch of a ``flow.loop``, the + loop body nodes are excluded from ``availableSourceIds`` and the loop + node itself is mapped to its *Fertig* output port (index 1) via + ``portIndexOverrides``. + - If ``target_node_id`` is *inside* the loop body, the loop node id is + included in ``loopBodyContextIds`` so the frontend can show the lexical + loop variables (currentItem, currentIndex, count). + + Returns:: + + { + "availableSourceIds": [...], # ordered list + "portIndexOverrides": {nodeId: n}, # non-zero port indices + "loopBodyContextIds": [...], # loops whose body this node is in + } + """ + nodes = graph.get("nodes") or [] + connections = graph.get("connections") or [] + node_by_id: Dict[str, Any] = {n["id"]: n for n in nodes if n.get("id")} + + if target_node_id not in node_by_id: + return {"availableSourceIds": [], "portIndexOverrides": {}, "loopBodyContextIds": []} + + conn_map = buildConnectionMap(connections) + + # Collect all ancestors via backward BFS + preds: Dict[str, Set[str]] = {} + for tgt, pairs in conn_map.items(): + for src, _, _ in pairs: + preds.setdefault(tgt, set()).add(src) + + seen: Set[str] = set() + stack = [target_node_id] + ancestors: Set[str] = set() + while stack: + cur = stack.pop() + for p in preds.get(cur, ()): + if p not in seen: + seen.add(p) + ancestors.add(p) + stack.append(p) + + body_nodes_to_exclude: Set[str] = set() + port_index_overrides: Dict[str, int] = {} + loop_body_context_ids: List[str] = [] + + for aid in ancestors: + anode = node_by_id.get(aid) or {} + if anode.get("type") != "flow.loop": + continue + body_ids = getLoopBodyNodeIds(aid, conn_map) + done_ids = getLoopDoneNodeIds(aid, conn_map) + + if target_node_id in body_ids: + loop_body_context_ids.append(aid) + elif target_node_id in done_ids: + body_nodes_to_exclude.update(body_ids) + port_index_overrides[aid] = 1 + + available_source_ids = [ + aid for aid in sorted(ancestors) + if aid not in body_nodes_to_exclude + ] + + return { + "availableSourceIds": available_source_ids, + "portIndexOverrides": port_index_overrides, + "loopBodyContextIds": loop_body_context_ids, + } diff --git a/modules/features/trustee/accounting/accountingBridge.py b/modules/features/trustee/accounting/accountingBridge.py index fec36d2d..4e0a4d59 100644 --- a/modules/features/trustee/accounting/accountingBridge.py +++ b/modules/features/trustee/accounting/accountingBridge.py @@ -151,15 +151,20 @@ class AccountingBridge: logger.info("Accounting sync skipped (no accounts): positionId=%s", positionId) return SyncResult(success=True, errorMessage="Position hat keine Kontierung (Soll-/Haben-Konto) – Sync übersprungen") - # 1) First: ensure all documents are in RMA (upload or duplicate); collect Beleg-IDs for linking + # Collect document references documentIds = [] for key in ("documentId", "bankDocumentId"): docId = position.get(key) if docId: documentIds.append(docId) - if documentIds: + + pendingDocs = [] # [(documentId, fileName, fileContent, mimeType)] for post-booking attach + postBookingAttach = connector.requiresPostBookingDocAttach + + # 1) Pre-booking document upload (RMA-style: upload first, link via belegId) + if documentIds and not postBookingAttach: from modules.features.trustee.datamodelFeatureTrustee import TrusteeDocument as TrusteeDocumentModel - logger.info("Accounting sync: positionId=%s, syncing %s document(s) to RMA ...", positionId, len(documentIds)) + logger.info("Accounting sync: positionId=%s, uploading %s document(s) pre-booking ...", positionId, len(documentIds)) belegIds = [] belegLabels = [] for documentId in documentIds: @@ -185,24 +190,40 @@ class AccountingBridge: comment=booking.reference, ) if not uploadResult.success: - errMsg = f"Dokument konnte nicht nach RMA hochgeladen werden: {uploadResult.errorMessage}" logger.error( "Accounting sync failed (document upload): positionId=%s, documentId=%s, error=%s", positionId, documentId, uploadResult.errorMessage, ) - return SyncResult(success=False, errorMessage=errMsg) + return SyncResult(success=False, errorMessage=f"Dokument-Upload fehlgeschlagen: {uploadResult.errorMessage}") belegId = uploadResult.externalId if belegId: self._trusteeInterface.db.recordModify(TrusteeDocumentModel, documentId, {"externalBelegId": belegId}) logger.info("Accounting sync: document uploaded & belegId=%s stored on document %s", belegId, documentId) - else: - logger.info("Accounting sync: document uploaded but no belegId in response (409 duplicate?), fileName=%s", fileName) belegIds.append(belegId) belegLabels.append(fileName) if belegIds or belegLabels: booking.externalDocumentIds = belegIds booking.externalDocumentLabels = belegLabels - logger.info("Accounting sync: positionId=%s, document sync done, pushing GL booking (POST /gl) ...", positionId) + logger.info("Accounting sync: positionId=%s, document upload done, pushing booking ...", positionId) + + # 1b) Post-booking flow: collect raw doc data now, attach after pushBooking + if documentIds and postBookingAttach: + from modules.features.trustee.datamodelFeatureTrustee import TrusteeDocument as TrusteeDocumentModel + for documentId in documentIds: + doc = self._trusteeInterface.getDocument(documentId) + if not doc: + continue + existingBelegId = getattr(doc, "externalBelegId", None) + if existingBelegId: + continue + docData = self._trusteeInterface.getDocumentData(documentId) + if docData is None: + continue + fileName = getattr(doc, "documentName", None) or "beleg.pdf" + mimeType = getattr(doc, "documentMimeType", None) or "application/pdf" + pendingDocs.append((documentId, fileName, docData, mimeType)) + if pendingDocs: + logger.info("Accounting sync: positionId=%s, %s document(s) queued for post-booking attach", positionId, len(pendingDocs)) # Duplicate check: if locally marked as synced, verify with Buha system accountingSyncId = position.get("accountingSyncId") @@ -218,7 +239,6 @@ class AccountingBridge: positionId, booking.reference, ) return SyncResult(success=False, errorMessage="Position already synced to this system") - # Not found in Buha (e.g. deleted there): clear local records and re-push logger.info( "Accounting sync: reference %s not found in Buha (deleted?), clearing local records and re-pushing positionId=%s", booking.reference, positionId, @@ -230,9 +250,9 @@ class AccountingBridge: if rid: self._trusteeInterface.db.recordDelete(TrusteeAccountingSync, rid) - # 2) Then: push booking (with reference to document IDs so RMA can link) + # 2) Push booking if not documentIds: - logger.info("Accounting sync: positionId=%s, no documents, pushing GL booking (POST /gl) ...", positionId) + logger.info("Accounting sync: positionId=%s, no documents, pushing booking ...", positionId) result = await connector.pushBooking(plainConfig, booking) if not result.success: logger.error( @@ -241,6 +261,28 @@ class AccountingBridge: result.errorMessage or "unknown", ) + # 3) Post-booking document attach (Abacus-style: entry must exist before attaching docs) + if result.success and pendingDocs and result.externalId: + from modules.features.trustee.datamodelFeatureTrustee import TrusteeDocument as TrusteeDocumentModel + logger.info("Accounting sync: positionId=%s, attaching %s document(s) to entry %s ...", positionId, len(pendingDocs), result.externalId) + for documentId, fileName, docData, mimeType in pendingDocs: + attachResult = await connector.attachDocumentToEntry( + plainConfig, + entryId=result.externalId, + fileName=fileName, + fileContent=docData, + mimeType=mimeType, + ) + if not attachResult.success: + logger.warning( + "Accounting sync: document attach failed (non-blocking): positionId=%s, documentId=%s, error=%s", + positionId, documentId, attachResult.errorMessage, + ) + continue + if attachResult.externalId: + self._trusteeInterface.db.recordModify(TrusteeDocumentModel, documentId, {"externalBelegId": attachResult.externalId}) + logger.info("Accounting sync: document attached, externalId=%s stored on document %s", attachResult.externalId, documentId) + # Save sync record import uuid syncRecord = { diff --git a/modules/features/trustee/accounting/accountingConnectorBase.py b/modules/features/trustee/accounting/accountingConnectorBase.py index 5d76c997..6a59509f 100644 --- a/modules/features/trustee/accounting/accountingConnectorBase.py +++ b/modules/features/trustee/accounting/accountingConnectorBase.py @@ -171,6 +171,12 @@ class BaseAccountingConnector(ABC): """ return [] + @property + def requiresPostBookingDocAttach(self) -> bool: + """If True, documents must be attached AFTER pushBooking (e.g. Abacus GeneralLedgerEntryDocuments). + If False (default), documents are uploaded BEFORE the booking (e.g. RMA belege).""" + return False + async def uploadDocument( self, config: Dict[str, Any], @@ -179,5 +185,16 @@ class BaseAccountingConnector(ABC): mimeType: str = "application/pdf", comment: Optional[str] = None, ) -> SyncResult: - """Upload a document/receipt (e.g. beleg). comment can link to booking reference. Override in connectors that support it.""" + """Upload a document/receipt before booking (pre-booking flow). Override in connectors that support it.""" return SyncResult(success=False, errorMessage="Document upload not supported by this connector") + + async def attachDocumentToEntry( + self, + config: Dict[str, Any], + entryId: str, + fileName: str, + fileContent: bytes, + mimeType: str = "application/pdf", + ) -> SyncResult: + """Attach a document to an existing booking/entry (post-booking flow). Override in connectors that need it.""" + return SyncResult(success=False, errorMessage="Post-booking document attach not supported by this connector") diff --git a/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py b/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py index e03e7df7..a1947b27 100644 --- a/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py +++ b/modules/features/trustee/accounting/connectors/accountingConnectorAbacus.py @@ -11,7 +11,7 @@ Account balances: Abacus exposes an ``AccountBalances`` entity (per fiscal year), but its availability depends on the customer's Abacus license / Profile and is NOT guaranteed for all instances. The robust default is therefore to - aggregate balances locally from ``GeneralJournalEntries`` (always + aggregate balances locally from ``GeneralLedgerEntries`` (always present). If a future iteration confirms the entity for a specific instance, ``getAccountBalances`` can be extended to prefer that source via a config flag (e.g. ``useAccountBalancesEntity: true``). @@ -58,6 +58,10 @@ class AccountingConnectorAbacus(BaseAccountingConnector): def __init__(self): self._tokenCache: Dict[str, Dict[str, Any]] = {} + @property + def requiresPostBookingDocAttach(self) -> bool: + return True + def getConnectorType(self) -> str: return "abacus" @@ -92,6 +96,14 @@ class AccountingConnectorAbacus(BaseAccountingConnector): fieldType="password", secret=True, ), + ConnectorConfigField( + key="defaultCostCentre", + label=t("Standard-Kostenstelle"), + fieldType="text", + secret=False, + required=False, + placeholder="e.g. 100", + ), ] def _buildBaseUrl(self, config: Dict[str, Any]) -> str: @@ -165,7 +177,9 @@ class AccountingConnectorAbacus(BaseAccountingConnector): clientName = config.get("clientName") if not clientName: raise ValueError("Missing required config: clientName") - return f"{baseUrl}/{clientName}/{entity}" + if "/api/entity/v1" not in baseUrl: + baseUrl = f"{baseUrl}/api/entity/v1" + return f"{baseUrl}/mandants/{clientName}/{entity}" async def _buildAuthHeaders(self, config: Dict[str, Any]) -> Optional[Dict[str, str]]: token = await self._getAccessToken(config) @@ -218,53 +232,135 @@ class AccountingConnectorAbacus(BaseAccountingConnector): data = await resp.json() for item in data.get("value", []): + label = "" + for d in (item.get("Designations") or []): + if d.get("Language") == "de": + label = d.get("Text", "") + break + if not label: + desigs = item.get("Designations") or [] + label = desigs[0].get("Text", "") if desigs else "" charts.append(AccountingChart( - accountNumber=str(item.get("AccountNumber", item.get("Id", ""))), - label=item.get("Name", item.get("Description", "")), - accountType=item.get("AccountType", None), + accountNumber=str(item.get("Id", "")), + label=label, + accountType=item.get("Segment", None), )) url = data.get("@odata.nextLink") except Exception as e: logger.error(f"Abacus getChartOfAccounts error: {e}") return charts + async def _fetchJournals(self, config: Dict[str, Any], headers: Dict[str, str]) -> List[Dict[str, Any]]: + """Fetch all journals from Abacus.""" + try: + async with aiohttp.ClientSession() as session: + url = self._buildEntityUrl(config, "Journals") + async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as resp: + if resp.status != 200: + return [] + data = await resp.json() + return data.get("value", []) + except Exception: + return [] + + async def _resolveJournalId(self, config: Dict[str, Any], headers: Dict[str, str], bookingDate: str) -> Optional[str]: + """Find the open journal that covers the booking date.""" + for j in await self._fetchJournals(config, headers): + start = j.get("StartDate", "") + end = j.get("EndDate", "") + if start <= bookingDate <= end: + return j.get("Id") + return None + + async def _buildJournalFilter(self, config: Dict[str, Any], headers: Dict[str, str], dateFrom: Optional[str] = None, dateTo: Optional[str] = None) -> Optional[str]: + """Build an OData $filter on JournalId for journals overlapping the date range. + Abacus only allows filtering by JournalId, not by Date. + """ + journals = await self._fetchJournals(config, headers) + if not journals: + return None + matchingIds = [] + for j in journals: + jStart = j.get("StartDate", "") + jEnd = j.get("EndDate", "") + if dateTo and jStart > dateTo: + continue + if dateFrom and jEnd < dateFrom: + continue + matchingIds.append(j.get("Id")) + if not matchingIds: + return None + if len(matchingIds) == 1: + return f"JournalId eq '{matchingIds[0]}'" + parts = " or ".join(f"JournalId eq '{jid}'" for jid in matchingIds) + return f"({parts})" + async def pushBooking(self, config: Dict[str, Any], booking: AccountingBooking) -> SyncResult: headers = await self._buildAuthHeaders(config) if not headers: return SyncResult(success=False, errorMessage="Failed to obtain access token") + debitLine = None + creditLine = None + for line in booking.lines: + if line.debitAmount > 0: + debitLine = line + if line.creditAmount > 0: + creditLine = line + if not debitLine or not creditLine: + return SyncResult(success=False, errorMessage="Booking must have at least one debit and one credit line") + + amount = debitLine.debitAmount + + journalId = await self._resolveJournalId(config, headers, booking.bookingDate) + if not journalId: + return SyncResult(success=False, errorMessage=f"No open journal found for date {booking.bookingDate}") + try: - lines = [] - for line in booking.lines: - entry: Dict[str, Any] = { - "AccountId": line.accountNumber, - "Text": line.description or booking.description, - } - if line.debitAmount > 0: - entry["DebitAmount"] = line.debitAmount - if line.creditAmount > 0: - entry["CreditAmount"] = line.creditAmount - if line.taxCode: - entry["TaxCode"] = line.taxCode - if line.costCenter: - entry["CostCenterId"] = line.costCenter - lines.append(entry) + debitAccountId = int(debitLine.accountNumber) + creditAccountId = int(creditLine.accountNumber) + except ValueError: + return SyncResult(success=False, errorMessage=f"Account numbers must be numeric: debit={debitLine.accountNumber}, credit={creditLine.accountNumber}") - payload = { - "JournalDate": booking.bookingDate, - "Reference": booking.reference, - "Text": booking.description, - "Lines": lines, - } + debitSide: Dict[str, Any] = {"AccountId": debitAccountId, "EnterpriseId": 0, "CrossDivisionId": 0} + creditSide: Dict[str, Any] = {"AccountId": creditAccountId, "EnterpriseId": 0, "CrossDivisionId": 0} + defaultCC = config.get("defaultCostCentre") + for line, side in [(debitLine, debitSide), (creditLine, creditSide)]: + cc = line.costCenter or defaultCC + if cc: + try: + side["CostCentre1Id"] = int(cc) + except ValueError: + side["CostCentre1Id"] = cc + payload: Dict[str, Any] = { + "Date": booking.bookingDate, + "JournalId": journalId, + "DivisionId": 0, + "Direction": "Debit", + "Debit": debitSide, + "Credit": creditSide, + "Amount": {"KeyAmount": amount}, + "Texts": {"Text1": (booking.description or "")[:80]}, + } + ref = (booking.reference or "")[:10] + if ref: + payload["Document"] = {"Number": ref} + if debitLine.taxCode: + payload["Tax"] = {"CodeId": debitLine.taxCode[:3]} + + try: async with aiohttp.ClientSession() as session: - url = self._buildEntityUrl(config, "GeneralJournalEntries") + url = self._buildEntityUrl(config, "GeneralLedgerEntries") async with session.post(url, headers=headers, json=payload, timeout=aiohttp.ClientTimeout(total=30)) as resp: body = await resp.json() if resp.content_type and "json" in resp.content_type else {"raw": await resp.text()} if resp.status in (200, 201): externalId = str(body.get("Id", "")) if isinstance(body, dict) else None return SyncResult(success=True, externalId=externalId, rawResponse=body) - return SyncResult(success=False, errorMessage=f"HTTP {resp.status}", rawResponse=body) + errDetail = "" + if isinstance(body, dict) and "error" in body: + errDetail = body["error"].get("message", "") + return SyncResult(success=False, errorMessage=f"HTTP {resp.status}: {errDetail or str(body)[:200]}", rawResponse=body) except Exception as e: return SyncResult(success=False, errorMessage=str(e)) @@ -274,7 +370,7 @@ class AccountingConnectorAbacus(BaseAccountingConnector): return SyncResult(success=False, errorMessage="Failed to obtain access token") try: async with aiohttp.ClientSession() as session: - url = self._buildEntityUrl(config, f"GeneralJournalEntries({externalId})") + url = self._buildEntityUrl(config, f"GeneralLedgerEntries({externalId})") async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as resp: if resp.status == 200: return SyncResult(success=True, externalId=externalId) @@ -283,22 +379,20 @@ class AccountingConnectorAbacus(BaseAccountingConnector): return SyncResult(success=False, errorMessage=str(e)) async def getJournalEntries(self, config: Dict[str, Any], dateFrom: Optional[str] = None, dateTo: Optional[str] = None, accountNumbers: Optional[List[str]] = None) -> List[Dict[str, Any]]: - """Read GeneralJournalEntries from Abacus (OData V4, paginated).""" + """Read GeneralLedgerEntries from Abacus (OData V4, paginated). + Each Abacus entry is a single-line (one debit + one credit account). + We map it to our multi-line format with two lines per entry. + Abacus only allows filtering by JournalId, so date filtering is done client-side. + """ headers = await self._buildAuthHeaders(config) if not headers: return [] - filterParts = [] - if dateFrom: - filterParts.append(f"JournalDate ge {dateFrom}") - if dateTo: - filterParts.append(f"JournalDate le {dateTo}") - queryParams = "" - if filterParts: - queryParams = "?$filter=" + " and ".join(filterParts) + journalFilter = await self._buildJournalFilter(config, headers, dateFrom, dateTo) + queryParams = f"?$filter={journalFilter}" if journalFilter else "" entries: List[Dict[str, Any]] = [] - url: Optional[str] = self._buildEntityUrl(config, f"GeneralJournalEntries{queryParams}") + url: Optional[str] = self._buildEntityUrl(config, f"GeneralLedgerEntries{queryParams}") try: async with aiohttp.ClientSession() as session: while url: @@ -308,28 +402,28 @@ class AccountingConnectorAbacus(BaseAccountingConnector): data = await resp.json() for item in data.get("value", []): - lines = [] - totalAmt = 0.0 - for line in (item.get("Lines") or []): - debit = float(line.get("DebitAmount", 0)) - credit = float(line.get("CreditAmount", 0)) - lines.append({ - "accountNumber": str(line.get("AccountId", "")), - "debitAmount": debit, - "creditAmount": credit, - "description": line.get("Text", ""), - "taxCode": line.get("TaxCode"), - "costCenter": line.get("CostCenterId"), - }) - totalAmt += max(debit, credit) + entryDate = str(item.get("Date", "")).split("T")[0] + if dateFrom and entryDate < dateFrom: + continue + if dateTo and entryDate > dateTo: + continue + amt = float((item.get("Amount") or {}).get("KeyAmount", 0)) + debitAcc = str((item.get("Debit") or {}).get("AccountId", "")) + creditAcc = str((item.get("Credit") or {}).get("AccountId", "")) + texts = item.get("Texts") or {} + desc = texts.get("Text1", "") + docInfo = item.get("Document") or {} entries.append({ "externalId": str(item.get("Id", "")), - "bookingDate": str(item.get("JournalDate", "")).split("T")[0], - "reference": item.get("Reference", ""), - "description": item.get("Text", ""), + "bookingDate": entryDate, + "reference": docInfo.get("Number", ""), + "description": desc, "currency": "CHF", - "totalAmount": totalAmt, - "lines": lines, + "totalAmount": amt, + "lines": [ + {"accountNumber": debitAcc, "debitAmount": amt, "creditAmount": 0, "description": desc}, + {"accountNumber": creditAcc, "debitAmount": 0, "creditAmount": amt, "description": desc}, + ], }) url = data.get("@odata.nextLink") except Exception as e: @@ -374,23 +468,11 @@ class AccountingConnectorAbacus(BaseAccountingConnector): years: List[int], accountNumbers: Optional[List[str]] = None, ) -> List[AccountingPeriodBalance]: - """Aggregate account balances from ``GeneralJournalEntries`` (OData V4). + """Aggregate account balances from GeneralLedgerEntries (OData V4). - Strategy: - 1. Page through ``GET GeneralJournalEntries?$filter=JournalDate le YYYY-12-31`` - until ``@odata.nextLink`` is exhausted. Including ALL prior years - is required to compute the carry-over for balance-sheet accounts. - 2. Per (account, year, month) accumulate ``DebitAmount``/``CreditAmount`` - from ``Lines``. - 3. Income-statement accounts (3xxx-9xxx) reset to 0 per fiscal year; - balance-sheet accounts (1xxx-2xxx) carry their cumulative balance. - - Optional optimization (not yet active): if the customer's Abacus - instance ships the ``AccountBalances`` OData entity, it can return - authoritative period balances directly. Detect via a probe GET on - ``AccountBalances?$top=1`` and prefer that source. This is intentionally - deferred until we hit a customer where the entity is available -- - the local aggregation is always-correct fallback. + Each Abacus entry is a single line with Debit.AccountId, Credit.AccountId, + and Amount.KeyAmount. We expand this into two movements per entry + (debit account gets +amount, credit account gets -amount). """ if not years: return [] @@ -409,7 +491,7 @@ class AccountingConnectorAbacus(BaseAccountingConnector): movements: Dict[Tuple[str, int, int], Dict[str, float]] = {} seenAccounts: set = set() for entry in rawEntries: - dateRaw = str(entry.get("JournalDate") or "")[:10] + dateRaw = str(entry.get("Date") or "")[:10] if len(dateRaw) < 7: continue try: @@ -417,18 +499,15 @@ class AccountingConnectorAbacus(BaseAccountingConnector): month = int(dateRaw[5:7]) except ValueError: continue - for line in (entry.get("Lines") or []): - accNo = str(line.get("AccountId") or "").strip() + amt = float((entry.get("Amount") or {}).get("KeyAmount", 0)) + if amt == 0: + continue + debitAcc = str((entry.get("Debit") or {}).get("AccountId", "")).strip() + creditAcc = str((entry.get("Credit") or {}).get("AccountId", "")).strip() + for accNo, debit, credit in [(debitAcc, amt, 0.0), (creditAcc, 0.0, amt)]: if not accNo: continue seenAccounts.add(accNo) - try: - debit = float(line.get("DebitAmount") or 0) - credit = float(line.get("CreditAmount") or 0) - except (TypeError, ValueError): - continue - if debit == 0 and credit == 0: - continue bucket = movements.setdefault((accNo, year, month), {"debit": 0.0, "credit": 0.0}) bucket["debit"] += debit bucket["credit"] += credit @@ -495,14 +574,13 @@ class AccountingConnectorAbacus(BaseAccountingConnector): headers: Dict[str, str], dateTo: str, ) -> List[Dict[str, Any]]: - """Page through ``GeneralJournalEntries`` (OData V4) following ``@odata.nextLink``. - - We filter ``JournalDate le dateTo`` to bound the result, but include - ALL prior years (no lower bound) so cumulative balance-sheet - carry-over is correct. + """Page through GeneralLedgerEntries (OData V4) following @odata.nextLink. + Abacus only allows filtering by JournalId, so date filtering is done client-side. """ results: List[Dict[str, Any]] = [] - baseUrl = self._buildEntityUrl(config, f"GeneralJournalEntries?$filter=JournalDate le {dateTo}") + journalFilter = await self._buildJournalFilter(config, headers, dateTo=dateTo) + queryParams = f"?$filter={journalFilter}" if journalFilter else "" + baseUrl = self._buildEntityUrl(config, f"GeneralLedgerEntries{queryParams}") nextUrl: Optional[str] = baseUrl async with aiohttp.ClientSession() as session: while nextUrl: @@ -510,11 +588,11 @@ class AccountingConnectorAbacus(BaseAccountingConnector): async with session.get(nextUrl, headers=headers, timeout=aiohttp.ClientTimeout(total=60)) as resp: if resp.status != 200: body = await resp.text() - logger.warning("Abacus GeneralJournalEntries HTTP %s: %s", resp.status, body[:200]) + logger.warning("Abacus GeneralLedgerEntries HTTP %s: %s", resp.status, body[:200]) break data = await resp.json() except Exception as ex: - logger.warning("Abacus GeneralJournalEntries request failed: %s", ex) + logger.warning("Abacus GeneralLedgerEntries request failed: %s", ex) break page = data.get("value") or [] if not isinstance(page, list): @@ -522,3 +600,60 @@ class AccountingConnectorAbacus(BaseAccountingConnector): results.extend(page) nextUrl = data.get("@odata.nextLink") return results + + async def attachDocumentToEntry( + self, + config: Dict[str, Any], + entryId: str, + fileName: str, + fileContent: bytes, + mimeType: str = "application/pdf", + ) -> SyncResult: + """Attach a document to a GeneralLedgerEntry via OData V4 two-step flow: + 1) POST GeneralLedgerEntryDocuments (metadata) → get document ID + 2) PUT GeneralLedgerEntryDocuments({id})/Content (binary stream) + """ + headers = await self._buildAuthHeaders(config) + if not headers: + return SyncResult(success=False, errorMessage="Failed to obtain access token") + + try: + async with aiohttp.ClientSession() as session: + # Step 1: create document metadata + docUrl = self._buildEntityUrl(config, "GeneralLedgerEntryDocuments") + payload = { + "Name": fileName, + "GeneralLedgerEntryId": entryId, + } + async with session.post(docUrl, headers=headers, json=payload, timeout=aiohttp.ClientTimeout(total=30)) as resp: + body = await resp.text() + if resp.status not in (200, 201): + logger.error("Abacus document create failed: HTTP %s: %s", resp.status, body[:500]) + return SyncResult(success=False, errorMessage=f"HTTP {resp.status}: {body[:200]}") + try: + docData = await resp.json(content_type=None) + except Exception: + docData = {} + docId = docData.get("Id") + + if not docId: + logger.error("Abacus document create: no Id in response: %s", body[:300]) + return SyncResult(success=False, errorMessage="No document Id returned by Abacus") + + # Step 2: upload binary content stream + contentUrl = self._buildEntityUrl(config, f"GeneralLedgerEntryDocuments({docId})/Content") + streamHeaders = { + "Authorization": headers["Authorization"], + "Content-Type": mimeType, + } + async with session.put(contentUrl, headers=streamHeaders, data=fileContent, timeout=aiohttp.ClientTimeout(total=60)) as resp2: + if resp2.status not in (200, 204): + body2 = await resp2.text() + logger.error("Abacus document content upload failed: HTTP %s: %s", resp2.status, body2[:500]) + return SyncResult(success=False, errorMessage=f"Content upload HTTP {resp2.status}: {body2[:200]}") + + logger.info("Abacus document attached: docId=%s, entryId=%s, fileName=%s", docId, entryId, fileName) + return SyncResult(success=True, externalId=str(docId)) + except Exception as e: + logger.error("Abacus attachDocumentToEntry error: %s", e) + return SyncResult(success=False, errorMessage=str(e)) diff --git a/modules/interfaces/interfaceBootstrap.py b/modules/interfaces/interfaceBootstrap.py index b7a56a02..1f450d0c 100644 --- a/modules/interfaces/interfaceBootstrap.py +++ b/modules/interfaces/interfaceBootstrap.py @@ -308,7 +308,6 @@ def _buildSystemTemplates(): "title": "Pro E-Mail", "parameters": { "items": {"type": "ref", "nodeId": "n2", "path": ["emails"]}, - "level": "auto", "concurrency": 1, }, }, @@ -348,7 +347,6 @@ def _buildSystemTemplates(): "title": "Pro Dokument", "parameters": { "items": {"type": "ref", "nodeId": "n2", "path": ["files"]}, - "level": "auto", "concurrency": 1, }, }, diff --git a/modules/interfaces/interfaceDbManagement.py b/modules/interfaces/interfaceDbManagement.py index 4dc8a206..3b87611d 100644 --- a/modules/interfaces/interfaceDbManagement.py +++ b/modules/interfaces/interfaceDbManagement.py @@ -990,6 +990,10 @@ class ComponentObjects: If pagination is provided: PaginatedResult with items and metadata """ def _convertFileItems(files): + from modules.workflows.automation2.workflowArtifactVisibility import ( + suppress_workflow_file_in_workspace_ui, + ) + fileItems = [] for file in files: try: @@ -1002,6 +1006,8 @@ class ComponentObjects: fileName = file.get("fileName") if not fileName or fileName == "None": continue + if suppress_workflow_file_in_workspace_ui(file): + continue if file.get("scope") is None: file["scope"] = "personal" @@ -1342,16 +1348,34 @@ class ComponentObjects: return newfileName counter += 1 - def createFile(self, name: str, mimeType: str, content: bytes) -> FileItem: + def createFile( + self, + name: str, + mimeType: str, + content: bytes, + folderId: Optional[str] = None, + ) -> FileItem: """Creates a new file entry if user has permission. Computes fileHash and fileSize from content. Duplicate check: if a file with the same user + fileHash + fileName already exists, the existing file is returned instead of creating a new one. Same hash with different name is allowed (intentional copy by user). + + When ``folderId`` is set, the folder must exist and the user must be allowed to modify it. """ if not self.checkRbacPermission(FileItem, "create"): raise PermissionError("No permission to create files") + resolved_folder_id: Optional[str] = None + if folderId is not None: + raw = str(folderId).strip() + if raw: + folder = self.getFolder(raw) + if not folder: + raise FileNotFoundError(f"Folder {raw} not found") + self._requireFolderWriteAccess(folder, raw, "update") + resolved_folder_id = raw + # Compute file size and hash fileSize = len(content) fileHash = hashlib.sha256(content).hexdigest() @@ -1383,6 +1407,7 @@ class ComponentObjects: mimeType=mimeType, fileSize=fileSize, fileHash=fileHash, + folderId=resolved_folder_id, ) # Ensure audit user is always stored: workflow/singleton contexts sometimes leave # the connector without _current_user_id, so _saveRecord skips sysCreatedBy → diff --git a/modules/migration/seedData/ui_language_seed.json b/modules/migration/seedData/ui_language_seed.json index 0f769074..060e6c51 100644 --- a/modules/migration/seedData/ui_language_seed.json +++ b/modules/migration/seedData/ui_language_seed.json @@ -3383,6 +3383,116 @@ "key": "Warnschwelle", "value": "" }, + { + "context": "ui", + "key": "Ansicht an Fenster anpassen", + "value": "" + }, + { + "context": "ui", + "key": "Ansicht zurücksetzen", + "value": "" + }, + { + "context": "ui", + "key": "Auswahl löschen", + "value": "" + }, + { + "context": "ui", + "key": "Canvas bearbeiten", + "value": "" + }, + { + "context": "ui", + "key": "Klicken Sie auf einen Ausgang, dann auf einen Eingang", + "value": "" + }, + { + "context": "ui", + "key": "Klicken Sie auf einen Eingang, um die Verbindung zu erstellen", + "value": "" + }, + { + "context": "ui", + "key": "Kommentar (optional)", + "value": "" + }, + { + "context": "ui", + "key": "Kommentar bearbeiten", + "value": "" + }, + { + "context": "ui", + "key": "Knoten duplizieren", + "value": "" + }, + { + "context": "ui", + "key": "Rückgängig", + "value": "" + }, + { + "context": "ui", + "key": "Verbindungen zeichnen", + "value": "" + }, + { + "context": "ui", + "key": "Vergrößern", + "value": "" + }, + { + "context": "ui", + "key": "Verkleinern", + "value": "" + }, + { + "context": "ui", + "key": "Wiederholen", + "value": "" + }, + { + "context": "ui", + "key": "Zoom-Voreinstellungen", + "value": "" + }, + { + "context": "ui", + "key": "Zoomstufe (Prozent)", + "value": "" + }, + { + "context": "ui", + "key": "Doppelklick zum Bearbeiten", + "value": "" + }, + { + "context": "ui", + "key": "Kommentar auf dem Canvas einfügen", + "value": "" + }, + { + "context": "ui", + "key": "Kommentar eingeben …", + "value": "" + }, + { + "context": "ui", + "key": "Canvas-Notiz verschieben", + "value": "" + }, + { + "context": "ui", + "key": "Notizfarbe", + "value": "" + }, + { + "context": "ui", + "key": "Notizgröße ändern", + "value": "" + }, { "context": "ui", "key": "✓ Mandat eingereicht", @@ -6776,6 +6886,116 @@ "key": "Warnschwelle", "value": "Warnschwelle" }, + { + "context": "ui", + "key": "Ansicht an Fenster anpassen", + "value": "Ansicht an Fenster anpassen" + }, + { + "context": "ui", + "key": "Ansicht zurücksetzen", + "value": "Ansicht zurücksetzen" + }, + { + "context": "ui", + "key": "Auswahl löschen", + "value": "Auswahl löschen" + }, + { + "context": "ui", + "key": "Canvas bearbeiten", + "value": "Canvas bearbeiten" + }, + { + "context": "ui", + "key": "Klicken Sie auf einen Ausgang, dann auf einen Eingang", + "value": "Klicken Sie auf einen Ausgang, dann auf einen Eingang" + }, + { + "context": "ui", + "key": "Klicken Sie auf einen Eingang, um die Verbindung zu erstellen", + "value": "Klicken Sie auf einen Eingang, um die Verbindung zu erstellen" + }, + { + "context": "ui", + "key": "Kommentar (optional)", + "value": "Kommentar (optional)" + }, + { + "context": "ui", + "key": "Kommentar bearbeiten", + "value": "Kommentar bearbeiten" + }, + { + "context": "ui", + "key": "Knoten duplizieren", + "value": "Knoten duplizieren" + }, + { + "context": "ui", + "key": "Rückgängig", + "value": "Rückgängig" + }, + { + "context": "ui", + "key": "Verbindungen zeichnen", + "value": "Verbindungen zeichnen" + }, + { + "context": "ui", + "key": "Vergrößern", + "value": "Vergrößern" + }, + { + "context": "ui", + "key": "Verkleinern", + "value": "Verkleinern" + }, + { + "context": "ui", + "key": "Wiederholen", + "value": "Wiederholen" + }, + { + "context": "ui", + "key": "Zoom-Voreinstellungen", + "value": "Zoom-Voreinstellungen" + }, + { + "context": "ui", + "key": "Zoomstufe (Prozent)", + "value": "Zoomstufe (Prozent)" + }, + { + "context": "ui", + "key": "Doppelklick zum Bearbeiten", + "value": "Doppelklick zum Bearbeiten" + }, + { + "context": "ui", + "key": "Kommentar auf dem Canvas einfügen", + "value": "Kommentar auf dem Canvas einfügen" + }, + { + "context": "ui", + "key": "Kommentar eingeben …", + "value": "Kommentar eingeben …" + }, + { + "context": "ui", + "key": "Canvas-Notiz verschieben", + "value": "Zum Verschieben greifen" + }, + { + "context": "ui", + "key": "Notizfarbe", + "value": "Notizfarbe" + }, + { + "context": "ui", + "key": "Notizgröße ändern", + "value": "Notizgröße ändern" + }, { "context": "ui", "key": "✓ Mandat eingereicht", @@ -9994,6 +10214,116 @@ "key": "Warnschwelle", "value": "Warning threshold" }, + { + "context": "ui", + "key": "Ansicht an Fenster anpassen", + "value": "Fit to window" + }, + { + "context": "ui", + "key": "Ansicht zurücksetzen", + "value": "Reset view" + }, + { + "context": "ui", + "key": "Auswahl löschen", + "value": "Delete selection" + }, + { + "context": "ui", + "key": "Canvas bearbeiten", + "value": "Edit canvas" + }, + { + "context": "ui", + "key": "Klicken Sie auf einen Ausgang, dann auf einen Eingang", + "value": "Click an output, then an input" + }, + { + "context": "ui", + "key": "Klicken Sie auf einen Eingang, um die Verbindung zu erstellen", + "value": "Click an input to create the connection" + }, + { + "context": "ui", + "key": "Kommentar (optional)", + "value": "Comment (optional)" + }, + { + "context": "ui", + "key": "Kommentar bearbeiten", + "value": "Edit comment" + }, + { + "context": "ui", + "key": "Knoten duplizieren", + "value": "Duplicate node" + }, + { + "context": "ui", + "key": "Rückgängig", + "value": "Undo" + }, + { + "context": "ui", + "key": "Verbindungen zeichnen", + "value": "Draw connections" + }, + { + "context": "ui", + "key": "Vergrößern", + "value": "Zoom in" + }, + { + "context": "ui", + "key": "Verkleinern", + "value": "Zoom out" + }, + { + "context": "ui", + "key": "Wiederholen", + "value": "Redo" + }, + { + "context": "ui", + "key": "Zoom-Voreinstellungen", + "value": "Zoom presets" + }, + { + "context": "ui", + "key": "Zoomstufe (Prozent)", + "value": "Zoom level (percent)" + }, + { + "context": "ui", + "key": "Doppelklick zum Bearbeiten", + "value": "Double-click to edit" + }, + { + "context": "ui", + "key": "Kommentar auf dem Canvas einfügen", + "value": "Add comment on canvas" + }, + { + "context": "ui", + "key": "Kommentar eingeben …", + "value": "Enter a comment…" + }, + { + "context": "ui", + "key": "Canvas-Notiz verschieben", + "value": "Drag to move note" + }, + { + "context": "ui", + "key": "Notizfarbe", + "value": "Note color" + }, + { + "context": "ui", + "key": "Notizgröße ändern", + "value": "Resize note" + }, { "context": "ui", "key": "✓ Mandat eingereicht", diff --git a/modules/routes/routeAdminDatabaseHealth.py b/modules/routes/routeAdminDatabaseHealth.py index 44b9f0c1..84e3443f 100644 --- a/modules/routes/routeAdminDatabaseHealth.py +++ b/modules/routes/routeAdminDatabaseHealth.py @@ -1,13 +1,16 @@ # Copyright (c) 2025 Patrick Motsch # All rights reserved. """ -SysAdmin API for database table statistics and FK orphan detection/cleanup. +SysAdmin API for database table statistics, FK orphan detection/cleanup, +and database migration (backup / restore). """ +import json import logging from typing import Any, Dict, List, Optional -from fastapi import APIRouter, Depends, HTTPException, Request, status +from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status +from fastapi.responses import StreamingResponse from pydantic import BaseModel, Field from modules.auth import limiter @@ -22,6 +25,16 @@ from modules.system.databaseHealth import ( _listOrphans, _scanOrphans, ) +from modules.system.databaseMigration import ( + _exportDatabases, + _exportSingleDb, + _getAvailableDatabases, + _getInstanceLabel, + _importDatabases, + _importSingleDb, + _prepareImport, + _validateImportPayload, +) logger = logging.getLogger(__name__) @@ -194,3 +207,307 @@ def postDatabaseOrphansCleanAll( excludeUserFks, ) return {"results": results, "skipped": skipped, "errored": errored, "deleted": deletedTotal} + + +# --------------------------------------------------------------------------- +# Migration (Backup / Restore) +# --------------------------------------------------------------------------- + +class MigrationImportRequest(BaseModel): + """Body for the import endpoint.""" + + payload: dict = Field(..., description="The full export JSON payload") + mode: str = Field( + ..., + description="'replace' (clear + insert) or 'merge' (insert missing only)", + ) + + +@router.get("/migration/databases") +@limiter.limit("30/minute") +def getMigrationDatabases( + request: Request, + currentUser: User = Depends(requireSysAdmin), +) -> Dict[str, Any]: + """List registered databases with table/record counts for the migration UI.""" + databases = _getAvailableDatabases() + return {"databases": databases, "instanceLabel": _getInstanceLabel()} + + +@router.get("/migration/export") +@limiter.limit("2/minute") +def getMigrationExport( + request: Request, + databases: str = "all", + currentUser: User = Depends(requireSysAdmin), +) -> StreamingResponse: + """Export selected databases as a downloadable JSON file. + + ``databases`` is a comma-separated list of database names, or ``"all"``. + """ + if databases == "all": + available = _getAvailableDatabases() + dbList = [db["name"] for db in available] + else: + dbList = [d.strip() for d in databases.split(",") if d.strip()] + + if not dbList: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No databases selected for export.", + ) + + logger.info( + "SysAdmin migration export: user=%s databases=%s", + currentUser.username, + dbList, + ) + + try: + exportData = _exportDatabases(dbList) + except Exception as e: + logger.error("Migration export failed: %s", e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Export failed: {e}", + ) from e + + from datetime import datetime, timezone + + ts = datetime.now(timezone.utc).strftime("%Y-%m-%d_%H-%M") + filename = f"migration_backup_{ts}.json" + + content = json.dumps(exportData, ensure_ascii=False, default=str) + + return StreamingResponse( + iter([content]), + media_type="application/json", + headers={"Content-Disposition": f'attachment; filename="{filename}"'}, + ) + + +@router.post("/migration/validate") +@limiter.limit("5/minute") +async def postMigrationValidate( + request: Request, + file: UploadFile = File(...), + currentUser: User = Depends(requireSysAdmin), +) -> Dict[str, Any]: + """Validate an uploaded migration JSON file without writing anything.""" + try: + rawBytes = await file.read() + payload = json.loads(rawBytes.decode("utf-8")) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid JSON file: {e}", + ) from e + + result = _validateImportPayload(payload) + logger.info( + "SysAdmin migration validate: user=%s valid=%s", + currentUser.username, + result.get("valid"), + ) + return result + + +@router.post("/migration/import") +@limiter.limit("2/minute") +async def postMigrationImport( + request: Request, + file: UploadFile = File(...), + mode: str = "merge", + currentUser: User = Depends(requireSysAdmin), +) -> Dict[str, Any]: + """Import a migration JSON file. + + ``mode`` is passed as a form field: + - ``replace``: clear all tables (except system objects) and insert. + - ``merge``: insert only records whose ID does not yet exist. + """ + if mode not in ("replace", "merge"): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid mode: '{mode}'. Must be 'replace' or 'merge'.", + ) + + try: + rawBytes = await file.read() + payload = json.loads(rawBytes.decode("utf-8")) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid JSON file: {e}", + ) from e + + validation = _validateImportPayload(payload) + if not validation.get("valid"): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"message": "Payload validation failed", "warnings": validation.get("warnings", [])}, + ) + + logger.info( + "SysAdmin migration import: user=%s mode=%s", + currentUser.username, + mode, + ) + + try: + result = _importDatabases(payload, mode) + except Exception as e: + logger.error("Migration import failed: %s", e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Import failed: {e}", + ) from e + + logger.info( + "SysAdmin migration import complete: user=%s mode=%s totalRecords=%s warnings=%s", + currentUser.username, + mode, + result.get("totalRecords"), + len(result.get("warnings", [])), + ) + return result + + +# --------------------------------------------------------------------------- +# Per-DB endpoints (progress-friendly) +# --------------------------------------------------------------------------- + +@router.get("/migration/export-single") +@limiter.limit("60/minute") +def getMigrationExportSingle( + request: Request, + database: str, + currentUser: User = Depends(requireSysAdmin), +) -> Dict[str, Any]: + """Export a single database as JSON (used by the frontend for per-DB progress).""" + from modules.shared.dbRegistry import getRegisteredDatabases + + if database not in getRegisteredDatabases(): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Database '{database}' is not registered.", + ) + + logger.info("SysAdmin migration export-single: user=%s db=%s", currentUser.username, database) + + try: + dbPayload = _exportSingleDb(database) + except Exception as e: + logger.error("Export-single failed for %s: %s", database, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Export failed for '{database}': {e}", + ) from e + + return {"database": database, "data": dbPayload} + + +@router.post("/migration/prepare-import") +@limiter.limit("5/minute") +async def postMigrationPrepareImport( + request: Request, + file: UploadFile = File(...), + currentUser: User = Depends(requireSysAdmin), +) -> Dict[str, Any]: + """Validate + remap system-object IDs and return metadata for per-DB import. + + The remapped payload is stored server-side in memory (returned as opaque token) + so the frontend can drive per-DB import calls without re-uploading. + """ + try: + rawBytes = await file.read() + payload = json.loads(rawBytes.decode("utf-8")) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid JSON file: {e}", + ) from e + + logger.info("SysAdmin migration prepare-import: user=%s", currentUser.username) + + result = _prepareImport(payload) + if not result.get("valid"): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"message": "Payload validation failed", "warnings": result.get("warnings", [])}, + ) + + import uuid + token = str(uuid.uuid4()) + _pendingImports[token] = { + "payload": payload, + "protectedIds": result["protectedIds"], + } + + return { + "valid": True, + "token": token, + "databases": result["databases"], + "warnings": result["warnings"], + "systemObjectsFound": result["systemObjectsFound"], + "protectedIds": result["protectedIds"], + } + + +_pendingImports: Dict[str, dict] = {} + + +@router.post("/migration/import-single") +@limiter.limit("60/minute") +def postMigrationImportSingle( + request: Request, + body: dict, + currentUser: User = Depends(requireSysAdmin), +) -> Dict[str, Any]: + """Import a single database from a previously prepared payload. + + Body: ``{token, database, mode}`` + """ + token = body.get("token", "") + database = body.get("database", "") + mode = body.get("mode", "merge") + + if mode not in ("replace", "merge"): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid mode: '{mode}'.", + ) + + pending = _pendingImports.get(token) + if not pending: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Invalid or expired import token. Please re-upload the file.", + ) + + logger.info("SysAdmin migration import-single: user=%s db=%s mode=%s", currentUser.username, database, mode) + + try: + result = _importSingleDb(pending["payload"], database, mode, pending["protectedIds"]) + except Exception as e: + logger.error("Import-single failed for %s: %s", database, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Import failed for '{database}': {e}", + ) from e + + return result + + +@router.post("/migration/import-done") +@limiter.limit("10/minute") +def postMigrationImportDone( + request: Request, + body: dict, + currentUser: User = Depends(requireSysAdmin), +) -> Dict[str, Any]: + """Clean up the server-side payload cache after import is complete.""" + token = body.get("token", "") + if token in _pendingImports: + del _pendingImports[token] + return {"ok": True} diff --git a/modules/routes/routeAutomationWorkspace.py b/modules/routes/routeAutomationWorkspace.py index b742d7ea..32624363 100644 --- a/modules/routes/routeAutomationWorkspace.py +++ b/modules/routes/routeAutomationWorkspace.py @@ -26,6 +26,7 @@ from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import ( AutoWorkflow, ) from modules.features.graphicalEditor.interfaceFeatureGraphicalEditor import graphicalEditorDatabase +from modules.workflows.automation2.workflowArtifactVisibility import suppress_workflow_file_in_workspace_ui from modules.shared.i18nRegistry import apiRouteContext routeApiMsg = apiRouteContext("routeAutomationWorkspace") @@ -265,7 +266,8 @@ def getWorkspaceRunDetail( logger.warning("getWorkspaceRunDetail: file lookup failed: %s", e) def _resolveFileList(ids: set[str]) -> list[dict]: - return [fileMetaById[fid] for fid in ids if fid in fileMetaById] + rows = [dict(fileMetaById[fid]) for fid in ids if fid in fileMetaById] + return [m for m in rows if not suppress_workflow_file_in_workspace_ui(m)] assignedFileIds: set[str] = set() for step, (inputIds, outputIds) in zip(steps, perStepFileIds): diff --git a/modules/routes/routeDataConnections.py b/modules/routes/routeDataConnections.py index 2bc48042..7ab0f6d7 100644 --- a/modules/routes/routeDataConnections.py +++ b/modules/routes/routeDataConnections.py @@ -22,6 +22,7 @@ from fastapi.responses import JSONResponse from modules.datamodels.datamodelUam import User, UserConnection, AuthAuthority, ConnectionStatus from modules.datamodels.datamodelSecurity import Token from modules.auth import getCurrentUser, limiter +from modules.auth.oauthConnectTicket import issue_connect_ticket from modules.auth.tokenRefreshService import token_refresh_service from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata, normalize_pagination_dict from modules.interfaces.interfaceDbApp import getInterface @@ -564,14 +565,30 @@ def connect_service( reauth = bool((body or {}).get("reauth")) if isinstance(body, dict) else False reauthSuffix = "&reauth=1" if reauth else "" - # Data-app OAuth (JWT state issued server-side in /auth/connect) + # Data-app OAuth: issue connect ticket here (Bearer auth) so the popup + # does not depend on httpOnly cookies (UI uses localStorage Bearer). auth_url = None if connection.authority == AuthAuthority.MSFT: - auth_url = f"/api/msft/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}" + ticket = issue_connect_ticket("msft_connect", connectionId, str(currentUser.id)) + ticket_param = f"&connectTicket={quote(ticket, safe='')}" + auth_url = ( + f"/api/msft/auth/connect?connectionId={quote(connectionId, safe='')}" + f"{ticket_param}{reauthSuffix}" + ) elif connection.authority == AuthAuthority.GOOGLE: - auth_url = f"/api/google/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}" + ticket = issue_connect_ticket("google_connect", connectionId, str(currentUser.id)) + ticket_param = f"&connectTicket={quote(ticket, safe='')}" + auth_url = ( + f"/api/google/auth/connect?connectionId={quote(connectionId, safe='')}" + f"{ticket_param}{reauthSuffix}" + ) elif connection.authority == AuthAuthority.CLICKUP: - auth_url = f"/api/clickup/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}" + ticket = issue_connect_ticket("clickup_connect", connectionId, str(currentUser.id)) + ticket_param = f"&connectTicket={quote(ticket, safe='')}" + auth_url = ( + f"/api/clickup/auth/connect?connectionId={quote(connectionId, safe='')}" + f"{ticket_param}{reauthSuffix}" + ) elif connection.authority == AuthAuthority.INFOMANIAK: # Infomaniak does not use OAuth for data access; the frontend posts a # Personal Access Token directly to /api/infomaniak/connections/{id}/token. diff --git a/modules/routes/routeDataSources.py b/modules/routes/routeDataSources.py index b2f919b7..9ffd42ed 100644 --- a/modules/routes/routeDataSources.py +++ b/modules/routes/routeDataSources.py @@ -496,7 +496,7 @@ def _getDataSourceCostEstimate( Uses the current effective ragLimits (DataSource.settings.ragLimits with fallback to centralized defaults) as the basis. Returns the same - `{estimatedTokens, estimatedUsd, basis}` shape regardless of source kind. + `{estimatedTokens, estimatedChf, basis}` shape regardless of source kind. """ try: from modules.interfaces.interfaceDbApp import getRootInterface diff --git a/modules/routes/routeSecurityClickup.py b/modules/routes/routeSecurityClickup.py index d6f71d20..935509bc 100644 --- a/modules/routes/routeSecurityClickup.py +++ b/modules/routes/routeSecurityClickup.py @@ -18,6 +18,7 @@ from modules.interfaces.interfaceDbApp import getInterface, getRootInterface from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection from modules.datamodels.datamodelSecurity import Token, TokenPurpose from modules.auth import getCurrentUser, limiter, SECRET_KEY, ALGORITHM +from modules.auth.oauthConnectTicket import resolve_connect_context from modules.shared.timeUtils import createExpirationTimestamp, getUtcTimestamp from modules.shared.i18nRegistry import apiRouteContext routeApiMsg = apiRouteContext("routeSecurityClickup") @@ -76,28 +77,20 @@ router = APIRouter( def auth_connect( request: Request, connectionId: str = Query(..., description="UserConnection id"), - currentUser: User = Depends(getCurrentUser), + connectTicket: str = Query(..., description="Short-lived ticket from POST /api/connections/{id}/connect"), ) -> RedirectResponse: - """Start ClickUp OAuth for an existing connection (requires gateway session).""" + """Start ClickUp OAuth for an existing connection. + + Authenticated via ``connectTicket`` (issued by POST connect) so the popup + works when the UI uses Bearer tokens in localStorage instead of cookies. + """ try: _require_clickup_config() - interface = getInterface(currentUser) - connections = interface.getUserConnections(currentUser.id) - connection = None - for conn in connections: - if conn.id == connectionId and conn.authority == AuthAuthority.CLICKUP: - connection = conn - break - if not connection: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=routeApiMsg("ClickUp connection not found")) - - state_jwt = _issue_oauth_state( - { - "flow": _FLOW_CONNECT, - "connectionId": connectionId, - "userId": str(currentUser.id), - } + _user, connection = resolve_connect_context( + connectTicket, connectionId, _FLOW_CONNECT, AuthAuthority.CLICKUP ) + + state_jwt = connectTicket query = urlencode( { "client_id": CLIENT_ID, diff --git a/modules/routes/routeSecurityGoogle.py b/modules/routes/routeSecurityGoogle.py index 7b6c1c64..87df681a 100644 --- a/modules/routes/routeSecurityGoogle.py +++ b/modules/routes/routeSecurityGoogle.py @@ -22,6 +22,7 @@ from modules.interfaces.interfaceDbApp import getInterface, getRootInterface from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection from modules.datamodels.datamodelSecurity import Token, TokenPurpose from modules.auth import getCurrentUser, limiter, SECRET_KEY, ALGORITHM +from modules.auth.oauthConnectTicket import resolve_connect_context from modules.auth import ( createAccessToken, setAccessTokenCookie, @@ -281,10 +282,13 @@ async def auth_login_callback( def auth_connect( request: Request, connectionId: str = Query(..., description="UserConnection id"), + connectTicket: str = Query(..., description="Short-lived ticket from POST /api/connections/{id}/connect"), reauth: Optional[int] = Query(0, description="If 1, force the consent screen so newly added scopes are granted"), - currentUser: User = Depends(getCurrentUser), ) -> RedirectResponse: - """Start Google Data OAuth for an existing connection (requires gateway session). + """Start Google Data OAuth for an existing connection. + + Authenticated via ``connectTicket`` (issued by POST connect) so the popup + works when the UI uses Bearer tokens in localStorage instead of cookies. Google already defaults to ``prompt=consent`` here, but ``include_granted_scopes=true`` can cause newly added scopes (e.g. calendar.readonly, contacts.readonly) to be @@ -294,23 +298,11 @@ def auth_connect( """ try: _require_google_data_config() - interface = getInterface(currentUser) - connections = interface.getUserConnections(currentUser.id) - connection = None - for conn in connections: - if conn.id == connectionId and conn.authority == AuthAuthority.GOOGLE: - connection = conn - break - if not connection: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=routeApiMsg("Google connection not found")) - - state_jwt = _issue_oauth_state( - { - "flow": _FLOW_CONNECT, - "connectionId": connectionId, - "userId": str(currentUser.id), - } + _user, connection = resolve_connect_context( + connectTicket, connectionId, _FLOW_CONNECT, AuthAuthority.GOOGLE ) + + state_jwt = connectTicket oauth = OAuth2Session( client_id=DATA_CLIENT_ID, redirect_uri=DATA_REDIRECT_URI, diff --git a/modules/routes/routeSecurityMsft.py b/modules/routes/routeSecurityMsft.py index a2768a2b..67a598dd 100644 --- a/modules/routes/routeSecurityMsft.py +++ b/modules/routes/routeSecurityMsft.py @@ -23,6 +23,7 @@ from modules.interfaces.interfaceDbApp import getInterface, getRootInterface from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection from modules.datamodels.datamodelSecurity import Token, TokenPurpose from modules.auth import getCurrentUser, limiter, SECRET_KEY, ALGORITHM +from modules.auth.oauthConnectTicket import resolve_connect_context from modules.auth import ( createAccessToken, setAccessTokenCookie, @@ -244,41 +245,30 @@ async def auth_login_callback( def auth_connect( request: Request, connectionId: str = Query(..., description="UserConnection id"), + connectTicket: str = Query(..., description="Short-lived ticket from POST /api/connections/{id}/connect"), reauth: Optional[int] = Query(0, description="If 1, force the consent screen so newly added scopes are granted"), - currentUser: User = Depends(getCurrentUser), ) -> RedirectResponse: """Start Microsoft Data OAuth for an existing connection. + Authenticated via ``connectTicket`` (issued by POST connect) so the popup + works when the UI uses Bearer tokens in localStorage instead of cookies. + With ``reauth=1`` the consent screen is forced (``prompt=consent``) so the user re-grants permissions and any newly added scopes (e.g. Calendars.Read, Contacts.Read) actually land on the access token. """ try: _require_msft_data_config() - interface = getInterface(currentUser) - connections = interface.getUserConnections(currentUser.id) - connection = None - for conn in connections: - if conn.id == connectionId and conn.authority == AuthAuthority.MSFT: - connection = conn - break - if not connection: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, detail=routeApiMsg("Microsoft connection not found") - ) + _user, connection = resolve_connect_context( + connectTicket, connectionId, _FLOW_CONNECT, AuthAuthority.MSFT + ) msal_app = msal.ConfidentialClientApplication( DATA_CLIENT_ID, authority=AUTHORITY, client_credential=DATA_CLIENT_SECRET, ) - state_jwt = _issue_oauth_state( - { - "flow": _FLOW_CONNECT, - "connectionId": connectionId, - "userId": str(currentUser.id), - } - ) + state_jwt = connectTicket login_kwargs: Dict[str, Any] = {"prompt": "select_account", "state": state_jwt} login_hint = connection.externalEmail or connection.externalUsername if login_hint: diff --git a/modules/routes/routeWorkflowDashboard.py b/modules/routes/routeWorkflowDashboard.py index 85b372a1..ea4b8854 100644 --- a/modules/routes/routeWorkflowDashboard.py +++ b/modules/routes/routeWorkflowDashboard.py @@ -58,14 +58,32 @@ def _getUserMandateIds(userId: str) -> list[str]: def _getAdminMandateIds(userId: str, mandateIds: list) -> list: - """Batch-check which mandates the user is admin for (2 SQL queries total).""" + """Batch-check which mandates the user is admin for (UserMandate → UserMandateRole → Role).""" if not mandateIds: return [] rootIface = getRootInterface() - from modules.datamodels.datamodelMembership import UserMandateRole - allRoles = rootIface.db.getRecordset(UserMandateRole, recordFilter={ - "userId": userId, "mandateId": mandateIds, - }) + from modules.datamodels.datamodelMembership import UserMandate, UserMandateRole + + memberships = rootIface.db.getRecordset( + UserMandate, + recordFilter={"userId": userId, "mandateId": mandateIds, "enabled": True}, + ) + if not memberships: + return [] + + umIdToMandateId: dict[str, str] = {} + for m in memberships: + row = m if isinstance(m, dict) else m.__dict__ + um_id = row.get("id") + mid = row.get("mandateId") + if um_id and mid: + umIdToMandateId[str(um_id)] = str(mid) + + userMandateIds = list(umIdToMandateId.keys()) + allRoles = rootIface.db.getRecordset( + UserMandateRole, + recordFilter={"userMandateId": userMandateIds}, + ) if not allRoles: return [] @@ -74,23 +92,26 @@ def _getAdminMandateIds(userId: str, mandateIds: list) -> list: for r in allRoles: row = r if isinstance(r, dict) else r.__dict__ rid = row.get("roleId") - mid = row.get("mandateId") - if rid: + um_id = row.get("userMandateId") + mid = umIdToMandateId.get(str(um_id)) if um_id else None + if rid and mid: roleIds.add(rid) roleToMandate.setdefault(rid, set()).add(mid) if not roleIds: return [] - from modules.datamodels.datamodelRbac import MandateRole - roleRecords = rootIface.db.getRecordset(MandateRole, recordFilter={"id": list(roleIds)}) + from modules.datamodels.datamodelRbac import Role + roleRecords = rootIface.db.getRecordset(Role, recordFilter={"id": list(roleIds)}) adminMandates: set = set() for role in (roleRecords or []): row = role if isinstance(role, dict) else role.__dict__ - if row.get("isAdmin"): - rid = row.get("id") - if rid and rid in roleToMandate: - adminMandates.update(roleToMandate[rid]) + rid = row.get("id") + if not rid or rid not in roleToMandate: + continue + # Same rule as routeBilling._isAdminOfMandate / notifyMandateAdmins + if row.get("roleLabel") == "admin" and not row.get("featureInstanceId"): + adminMandates.update(roleToMandate[rid]) return [mid for mid in mandateIds if mid in adminMandates] diff --git a/modules/serviceCenter/services/serviceExtraction/extractors/extractorPdf.py b/modules/serviceCenter/services/serviceExtraction/extractors/extractorPdf.py index 1df4e7fc..657e3fc6 100644 --- a/modules/serviceCenter/services/serviceExtraction/extractors/extractorPdf.py +++ b/modules/serviceCenter/services/serviceExtraction/extractors/extractorPdf.py @@ -73,7 +73,30 @@ class PdfExtractor(Extractor): )) return parts - # Extract text per page with PyMuPDF (same lib as in-place search - ensures extraction matches PDF text layer) + file_name = context.get("fileName", "document.pdf") + ordered_ok = False + try: + doc = fitz.open(stream=fileBytes, filetype="pdf") + for page_index in range(len(doc)): + page = doc[page_index] + page_parts = self._extract_page_blocks_in_reading_order( + page, + doc, + page_index=page_index, + root_id=rootId, + file_name=file_name, + ) + if page_parts: + parts.extend(page_parts) + ordered_ok = True + doc.close() + except Exception: + ordered_ok = False + + if ordered_ok and any(getattr(p, "typeGroup", "") in ("text", "image") for p in parts): + return parts + + parts = [parts[0]] # keep container only; fall back below try: doc = fitz.open(stream=fileBytes, filetype="pdf") for i in range(len(doc)): @@ -174,4 +197,196 @@ class PdfExtractor(Extractor): return parts + @staticmethod + def _text_from_text_block(block: Dict[str, Any]) -> str: + lines_out: List[str] = [] + for line in block.get("lines") or []: + if not isinstance(line, dict): + continue + spans = line.get("spans") or [] + line_text = "".join( + str(span.get("text") or "") + for span in spans + if isinstance(span, dict) + ) + lines_out.append(line_text) + return "\n".join(lines_out).strip() + @staticmethod + def _bbox_center(bbox: Any) -> tuple[float, float]: + if not isinstance(bbox, (list, tuple)) or len(bbox) < 4: + return 0.0, 0.0 + x0, y0, x1, y1 = float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3]) + return (x0 + x1) / 2.0, (y0 + y1) / 2.0 + + @staticmethod + def _point_inside_bbox(x: float, y: float, bbox: Any) -> bool: + if not isinstance(bbox, (list, tuple)) or len(bbox) < 4: + return False + x0, y0, x1, y1 = float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3]) + return x0 <= x <= x1 and y0 <= y <= y1 + + def _extract_page_blocks_in_reading_order( + self, + page: Any, + doc: Any, + *, + page_index: int, + root_id: str, + file_name: str, + ) -> List[ContentPart]: + """Emit text/image/table parts in on-page reading order (top-to-bottom, left-to-right).""" + entries: List[tuple[float, float, str, Dict[str, Any]]] = [] + table_bboxes: List[Any] = [] + + try: + table_finder = page.find_tables() + for ti, tab in enumerate(getattr(table_finder, "tables", []) or []): + try: + matrix = tab.extract() + except Exception: + matrix = None + if not matrix: + continue + csv_data = self._rows_to_csv_payload(matrix) + if not csv_data.strip(): + continue + bbox = getattr(tab, "bbox", None) + if bbox is not None: + table_bboxes.append(bbox) + cy, cx = self._bbox_center(bbox) + entries.append((cy, cx, "table", { + "label": f"table_{page_index + 1}_{ti}", + "data": csv_data, + "table_index": ti, + })) + except Exception: + pass + + try: + page_dict = page.get_text("dict", sort=True) + except Exception: + page_dict = None + blocks = page_dict.get("blocks") if isinstance(page_dict, dict) else None + if isinstance(blocks, list): + text_block_no = 0 + image_no = 0 + for block in blocks: + if not isinstance(block, dict): + continue + bbox = block.get("bbox") + cy, cx = self._bbox_center(bbox) + btype = block.get("type") + if btype == 0: + if any(self._point_inside_bbox(cx, cy, tb) for tb in table_bboxes): + continue + text = self._text_from_text_block(block) + if not text: + continue + label = f"page_{page_index + 1}" if text_block_no == 0 else f"page_{page_index + 1}_t{text_block_no}" + entries.append((cy, cx, "text", { + "label": label, + "data": text, + "text_block_no": text_block_no, + })) + text_block_no += 1 + continue + if btype != 1: + continue + img_bytes = block.get("image") + ext = str(block.get("ext") or "png").lower() + mime = f"image/{ext}" + if not img_bytes: + xref = block.get("xref") + if xref is not None: + try: + extracted = doc.extract_image(int(xref)) + img_bytes = extracted.get("image", b"") + ext = str(extracted.get("ext") or ext).lower() + mime = f"image/{ext}" + except Exception: + img_bytes = b"" + if not img_bytes: + continue + entries.append((cy, cx, "image", { + "label": f"image_{page_index + 1}_{image_no}", + "mime": mime, + "bytes": img_bytes, + "image_no": image_no, + })) + image_no += 1 + + entries.sort(key=lambda item: (item[0], item[1])) + out: List[ContentPart] = [] + for _y, _x, kind, payload in entries: + if kind == "text": + tbno = int(payload.get("text_block_no") or 0) + text = str(payload.get("data") or "") + out.append(ContentPart( + id=makeId(), + parentId=root_id, + label=str(payload.get("label") or f"page_{page_index + 1}"), + typeGroup="text", + mimeType="text/plain", + data=text, + metadata={ + "pages": 1, + "pageIndex": page_index, + "size": len(text.encode("utf-8")), + "contextRef": { + "containerPath": file_name, + "location": f"page:{page_index + 1}/block:{tbno}", + "pageIndex": page_index, + }, + }, + )) + elif kind == "table": + ti = int(payload.get("table_index") or 0) + csv_data = str(payload.get("data") or "") + out.append(ContentPart( + id=makeId(), + parentId=root_id, + label=str(payload.get("label") or f"table_{page_index + 1}_{ti}"), + typeGroup="table", + mimeType="text/csv", + data=csv_data, + metadata={ + "pageIndex": page_index, + "size": len(csv_data.encode("utf-8")), + "contextRef": { + "containerPath": file_name, + "location": f"page:{page_index + 1}/table:{ti}", + "pageIndex": page_index, + }, + }, + )) + elif kind == "image": + ino = int(payload.get("image_no") or 0) + img_bytes = payload.get("bytes") or b"" + mime = str(payload.get("mime") or "image/png") + out.append(ContentPart( + id=makeId(), + parentId=root_id, + label=str(payload.get("label") or f"image_{page_index + 1}_{ino}"), + typeGroup="image", + mimeType=mime, + data=base64.b64encode(img_bytes).decode("utf-8"), + metadata={ + "pageIndex": page_index, + "size": len(img_bytes), + "contextRef": { + "containerPath": file_name, + "location": f"page:{page_index + 1}/image:{ino}", + "pageIndex": page_index, + }, + }, + )) + return out + + @staticmethod + def _rows_to_csv_payload(rows: List[List[Any]]) -> str: + lines: List[str] = [] + for row in rows: + cells = [str(c or "").replace('"', '""') for c in row] + lines.append(",".join(f'"{c}"' for c in cells)) + return "\n".join(lines) diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererMarkdown.py index 84649ae7..b70c9dbb 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererMarkdown.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererMarkdown.py @@ -6,7 +6,7 @@ Markdown renderer for report generation. from .documentRendererBaseTemplate import BaseRenderer from modules.datamodels.datamodelDocument import RenderedDocument -from typing import Dict, Any, List, Optional +from typing import Any, Dict, List, Optional class RendererMarkdown(BaseRenderer): """Renders content to Markdown format with format-specific extraction.""" @@ -33,12 +33,72 @@ class RendererMarkdown(BaseRenderer): @classmethod def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]: - """ - Return list of section content types that Markdown renderer accepts. - Markdown renderer accepts all section types except images. + """Markdown accepts all section types including images. + + Images are emitted as sibling files (``extract_media_….png``) with + ``![alt](filename)`` relative links in the ``.md`` — same pattern as + ``RendererHtml`` (main document + sidecar assets). """ from modules.datamodels.datamodelJson import supportedSectionTypes - return [st for st in supportedSectionTypes if st != "image"] + return list(supportedSectionTypes) + + def _collectImageDocuments(self, jsonContent: Dict[str, Any]) -> List[Dict[str, Any]]: + """Extract image sections into sidecar file payloads for markdown export.""" + import base64 as _b64 + + out: List[Dict[str, Any]] = [] + documents = jsonContent.get("documents") + if not isinstance(documents, list): + raise ValueError("extractedContent.documents must be a list") + + for doc in documents: + if not isinstance(doc, dict): + continue + for section in doc.get("sections") or []: + if not isinstance(section, dict): + continue + if section.get("content_type") != "image": + continue + for element in section.get("elements") or []: + if not isinstance(element, dict): + raise ValueError("image section element must be a dict") + content = element.get("content") + if not isinstance(content, dict): + raise ValueError("image section element missing content dict") + + b64 = content.get("base64Data") + if not isinstance(b64, str) or not b64: + raise ValueError( + "image section missing base64Data — markdown export " + "requires binary payload to write sidecar image files" + ) + alt = content.get("altText") + if not isinstance(alt, str) or not alt.strip(): + raise ValueError("image section missing altText") + mime = content.get("mimeType") + if not isinstance(mime, str) or not mime.strip().startswith("image/"): + raise ValueError("image section missing mimeType") + fname = content.get("fileName") + if not isinstance(fname, str) or not fname.strip(): + raise ValueError("image section missing fileName") + + safe_name = "".join( + c if c.isalnum() or c in "._-" else "_" for c in fname.strip() + ) + if not safe_name: + raise ValueError(f"image fileName sanitized to empty: {fname!r}") + + blob = _b64.b64decode(b64, validate=True) + if not blob: + raise ValueError(f"image base64Data decoded to empty bytes ({fname!r})") + + out.append({ + "filename": safe_name, + "altText": alt.strip(), + "mimeType": mime.strip(), + "bytes": blob, + }) + return out async def render( self, @@ -49,311 +109,281 @@ class RendererMarkdown(BaseRenderer): *, style: Dict[str, Any] = None, ) -> List[RenderedDocument]: - """Render extracted JSON content to Markdown format.""" + """Render markdown plus sidecar image files (same folder as the ``.md``). + + Returns ``[main.md, image1.png, image2.jpg, …]``. Relative ``![alt](file)`` + links in the markdown point at those sibling files — no API URLs, no + base64 inlined in the markdown text. + """ _ = style - try: - # Generate markdown from JSON structure - markdownContent = self._generateMarkdownFromJson(extractedContent, title) - - # Determine filename from document or title - documents = extractedContent.get("documents", []) - if documents and isinstance(documents[0], dict): - filename = documents[0].get("filename") - if not filename: - filename = self._determineFilename(title, "text/markdown") - else: - filename = self._determineFilename(title, "text/markdown") - - # Extract metadata for document type and other info - metadata = extractedContent.get("metadata", {}) if extractedContent else {} - documentType = metadata.get("documentType") if isinstance(metadata, dict) else None - - return [ + image_docs = self._collectImageDocuments(extractedContent) + markdownContent = self._generateMarkdownFromJson(extractedContent, title) + + documents = extractedContent.get("documents") or [] + filename: Optional[str] = None + if documents and isinstance(documents[0], dict): + filename = documents[0].get("filename") + if not filename: + filename = self._determineFilename(title, "text/markdown") + + metadata = extractedContent.get("metadata") if isinstance(extractedContent, dict) else None + if not isinstance(metadata, dict): + metadata = None + documentType = metadata.get("documentType") if metadata else None + + result: List[RenderedDocument] = [ + RenderedDocument( + documentData=markdownContent.encode("utf-8"), + mimeType="text/markdown", + filename=filename, + documentType=documentType, + metadata=metadata, + ) + ] + for img in image_docs: + result.append( RenderedDocument( - documentData=markdownContent.encode('utf-8'), - mimeType="text/markdown", - filename=filename, - documentType=documentType, - metadata=metadata if isinstance(metadata, dict) else None + documentData=img["bytes"], + mimeType=img["mimeType"], + filename=img["filename"], ) - ] - - except Exception as e: - self.logger.error(f"Error rendering markdown: {str(e)}") - # Return minimal markdown fallback - fallbackContent = f"# {title}\n\nError rendering report: {str(e)}" - metadata = extractedContent.get("metadata", {}) if extractedContent else {} - documentType = metadata.get("documentType") if isinstance(metadata, dict) else None - return [ - RenderedDocument( - documentData=fallbackContent.encode('utf-8'), - mimeType="text/markdown", - filename=self._determineFilename(title, "text/markdown"), - documentType=documentType, - metadata=metadata if isinstance(metadata, dict) else None - ) - ] - + ) + return result + def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: """Generate markdown content from structured JSON document.""" - try: - # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) - if not self._validateJsonStructure(jsonContent): - raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - - # Extract sections and metadata from standardized schema - sections = self._extractSections(jsonContent) - metadata = self._extractMetadata(jsonContent) - - # Use provided title (which comes from documents[].title) as primary source - # Fallback to metadata.title only if title parameter is empty - documentTitle = title if title else metadata.get("title", "Generated Document") - - # Build markdown content - markdownParts = [] - - # Document title - markdownParts.append(f"# {documentTitle}") - markdownParts.append("") - - # Process each section - for section in sections: - sectionMarkdown = self._renderJsonSection(section) - if sectionMarkdown: - markdownParts.append(sectionMarkdown) - markdownParts.append("") # Add spacing between sections - - # Add generation info - markdownParts.append("---") - markdownParts.append(f"*Generated: {self._formatTimestamp()}*") - - return '\n'.join(markdownParts) - - except Exception as e: - self.logger.error(f"Error generating markdown from JSON: {str(e)}") - raise Exception(f"Markdown generation failed: {str(e)}") + if not self._validateJsonStructure(jsonContent): + raise ValueError( + "JSON content must follow standardized schema: " + "{metadata: {...}, documents: [{sections: [...]}]}" + ) + + sections = self._extractSections(jsonContent) + metadata = self._extractMetadata(jsonContent) + + documentTitle = title or (metadata.get("title") if isinstance(metadata, dict) else None) + if not documentTitle: + raise ValueError( + "markdown render: no title given and metadata.title missing — " + "callers must pass an explicit title" + ) + + markdownParts: List[str] = [f"# {documentTitle}", ""] + + for section in sections: + sectionMarkdown = self._renderJsonSection(section) + if sectionMarkdown: + markdownParts.append(sectionMarkdown) + markdownParts.append("") + + markdownParts.append("---") + markdownParts.append(f"*Generated: {self._formatTimestamp()}*") + + return "\n".join(markdownParts) def _renderJsonSection(self, section: Dict[str, Any]) -> str: """Render a single JSON section to markdown. - Supports three content formats: reference, object (base64), extracted_text. + + Errors propagate: unknown section types or malformed payloads must surface, + not be swallowed into a fallback paragraph or ``[Error rendering section]`` + marker that hides the real problem. """ - try: - sectionType = self._getSectionType(section) - sectionData = self._getSectionData(section) - - # Check for three content formats from Phase 5D in elements - if isinstance(sectionData, list): - markdownParts = [] - for element in sectionData: - element_type = element.get("type", "") if isinstance(element, dict) else "" - - # Support three content formats from Phase 5D - if element_type == "reference": - # Document reference format - doc_ref = element.get("documentReference", "") - label = element.get("label", "Reference") - markdownParts.append(f"*[Reference: {label}]*") - continue - elif element_type == "extracted_text": - # Extracted text format - content = element.get("content", "") - source = element.get("source", "") - if content: - source_text = f" *(Source: {source})*" if source else "" - markdownParts.append(f"{content}{source_text}") - continue - - # If we processed reference/extracted_text elements, return them - if markdownParts: - return '\n\n'.join(markdownParts) - - if sectionType == "table": - # Work directly with elements like other renderers - if isinstance(sectionData, list) and sectionData: - element = sectionData[0] if isinstance(sectionData[0], dict) else {} - return self._renderJsonTable(element) - return "" - elif sectionType == "bullet_list": - # Work directly with elements like other renderers - if isinstance(sectionData, list) and sectionData: - element = sectionData[0] if isinstance(sectionData[0], dict) else {} - return self._renderJsonBulletList(element) - return "" - elif sectionType == "heading": - # Work directly with elements like other renderers - if isinstance(sectionData, list) and sectionData: - element = sectionData[0] if isinstance(sectionData[0], dict) else {} - return self._renderJsonHeading(element) - return "" - elif sectionType == "paragraph": - # Work directly with elements like other renderers - if isinstance(sectionData, list) and sectionData: - element = sectionData[0] if isinstance(sectionData[0], dict) else {} - return self._renderJsonParagraph(element) - elif isinstance(sectionData, dict): - return self._renderJsonParagraph(sectionData) - return "" - elif sectionType == "code_block": - # Work directly with elements like other renderers - if isinstance(sectionData, list) and sectionData: - element = sectionData[0] if isinstance(sectionData[0], dict) else {} - return self._renderJsonCodeBlock(element) - return "" - elif sectionType == "image": - # Work directly with elements like other renderers - if isinstance(sectionData, list) and sectionData: - element = sectionData[0] if isinstance(sectionData[0], dict) else {} - return self._renderJsonImage(element) - return "" - else: - # Fallback to paragraph for unknown types - if isinstance(sectionData, list) and sectionData: - element = sectionData[0] if isinstance(sectionData[0], dict) else {} - return self._renderJsonParagraph(element) - elif isinstance(sectionData, dict): - return self._renderJsonParagraph(sectionData) - return "" - - except Exception as e: - self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}") - return f"*[Error rendering section: {str(e)}]*" + sectionType = self._getSectionType(section) + sectionData = self._getSectionData(section) + + if isinstance(sectionData, list): + markdownParts: List[str] = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + if element_type == "reference": + label = element.get("label", "Reference") + markdownParts.append(f"*[Reference: {label}]*") + continue + if element_type == "extracted_text": + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" *(Source: {source})*" if source else "" + markdownParts.append(f"{content}{source_text}") + continue + if markdownParts: + return "\n\n".join(markdownParts) + + def _first_element(data: Any) -> Dict[str, Any]: + if isinstance(data, list) and data and isinstance(data[0], dict): + return data[0] + if isinstance(data, dict): + return data + raise ValueError( + f"section type {sectionType!r} expects elements list / dict, got {type(data).__name__}" + ) + + if sectionType == "table": + return self._renderJsonTable(_first_element(sectionData)) + if sectionType == "bullet_list": + return self._renderJsonBulletList(_first_element(sectionData)) + if sectionType == "heading": + return self._renderJsonHeading(_first_element(sectionData)) + if sectionType == "paragraph": + return self._renderJsonParagraph(_first_element(sectionData)) + if sectionType == "code_block": + return self._renderJsonCodeBlock(_first_element(sectionData)) + if sectionType == "image": + return self._renderJsonImage(_first_element(sectionData)) + + raise ValueError( + f"unsupported section content_type {sectionType!r} " + f"(section id={self._getSectionId(section)!r})" + ) def _renderJsonTable(self, tableData: Dict[str, Any]) -> str: """Render a JSON table to markdown.""" - try: - # Extract from nested content structure: element.content.{headers, rows} - content = tableData.get("content", {}) - if not isinstance(content, dict): - return "" - headers = content.get("headers", []) - rows = content.get("rows", []) - - if not headers or not rows: - return "" - - markdownParts = [] - - # Create table header - headerLine = " | ".join(str(header) for header in headers) - markdownParts.append(headerLine) - - # Add separator line - separatorLine = " | ".join("---" for _ in headers) - markdownParts.append(separatorLine) - - # Add data rows - for row in rows: - rowLine = " | ".join(str(cellData) for cellData in row) - markdownParts.append(rowLine) - - return '\n'.join(markdownParts) - - except Exception as e: - self.logger.warning(f"Error rendering table: {str(e)}") + content = tableData.get("content") + if not isinstance(content, dict): + raise ValueError( + f"table section has invalid content (type={type(content).__name__})" + ) + headers = content.get("headers") or [] + rows = content.get("rows") or [] + if not headers or not rows: return "" + + lines = [ + " | ".join(str(h) for h in headers), + " | ".join("---" for _ in headers), + ] + for row in rows: + lines.append(" | ".join(str(cell) for cell in row)) + return "\n".join(lines) + def _renderInlineRunsMarkdown(self, runs: Any) -> str: + """Turn Phase-5 inlineRuns (from markdownToDocumentJson) into markdown text.""" + if not runs: + return "" + if not isinstance(runs, list): + return str(runs) + parts: List[str] = [] + for run in runs: + if not isinstance(run, dict): + parts.append(str(run)) + continue + run_type = run.get("type", "text") + value = str(run.get("value", "")) + if run_type == "text": + parts.append(value) + elif run_type == "bold": + parts.append(f"**{value}**") + elif run_type == "italic": + parts.append(f"*{value}*") + elif run_type == "code": + if not value: + parts.append("``") + elif "`" not in value: + parts.append(f"`{value}`") + else: + parts.append(f"``{value}``") + elif run_type == "link": + href = str(run.get("href", "")) + parts.append(f"[{value}]({href})") + elif run_type == "image": + parts.append(f"![{value}](image)") + else: + parts.append(value) + return "".join(parts) + def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str: """Render a JSON bullet list to markdown.""" - try: - # Extract from nested content structure: element.content.{items} - content = listData.get("content", {}) - if not isinstance(content, dict): - return "" - items = content.get("items", []) - - if not items: - return "" - - markdownParts = [] - for item in items: - if isinstance(item, str): - markdownParts.append(f"- {item}") - elif isinstance(item, dict) and "text" in item: - markdownParts.append(f"- {item['text']}") - - return '\n'.join(markdownParts) - - except Exception as e: - self.logger.warning(f"Error rendering bullet list: {str(e)}") + content = listData.get("content") + if not isinstance(content, dict): + raise ValueError( + f"bullet_list section has invalid content (type={type(content).__name__})" + ) + items = content.get("items") or [] + if not items: return "" - + + lines: List[str] = [] + for item in items: + if isinstance(item, str): + lines.append(f"- {item}") + elif isinstance(item, list): + lines.append(f"- {self._renderInlineRunsMarkdown(item)}") + elif isinstance(item, dict) and "text" in item: + lines.append(f"- {item['text']}") + else: + raise ValueError( + f"bullet_list item has unsupported shape (type={type(item).__name__})" + ) + return "\n".join(lines) + def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str: """Render a JSON heading to markdown.""" - try: - # Extract from nested content structure: element.content.{text, level} - content = headingData.get("content", {}) - if not isinstance(content, dict): - return "" - text = content.get("text", "") - level = content.get("level", 1) - - if text: - level = max(1, min(6, level)) - md_level = min(6, level + 1) - return f"{'#' * md_level} {text}" - - return "" - - except Exception as e: - self.logger.warning(f"Error rendering heading: {str(e)}") - return "" - + content = headingData.get("content") + if not isinstance(content, dict): + raise ValueError( + f"heading section has invalid content (type={type(content).__name__})" + ) + text = content.get("text") + if not isinstance(text, str) or not text: + raise ValueError("heading section has empty 'text'") + level = content.get("level", 1) + if not isinstance(level, int): + raise ValueError(f"heading 'level' must be int, got {type(level).__name__}") + level = max(1, min(6, level)) + md_level = min(6, level + 1) + return f"{'#' * md_level} {text}" + def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str: """Render a JSON paragraph to markdown.""" - try: - # Extract from nested content structure - content = paragraphData.get("content", {}) - if isinstance(content, dict): - text = content.get("text", "") - elif isinstance(content, str): - text = content - else: - text = "" - return text if text else "" - - except Exception as e: - self.logger.warning(f"Error rendering paragraph: {str(e)}") - return "" - + content = paragraphData.get("content") + top = paragraphData.get("text") + if isinstance(top, str) and top.strip(): + if not isinstance(content, dict) or ( + not content.get("text") and not content.get("inlineRuns") + ): + return top + + if isinstance(content, dict): + runs = self._inlineRunsFromContent(content) + if runs: + return self._renderInlineRunsMarkdown(runs) + text = content.get("text", "") + return text if isinstance(text, str) else "" + if isinstance(content, str): + return content + raise ValueError( + f"paragraph section has invalid content (type={type(content).__name__})" + ) + def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str: """Render a JSON code block to markdown.""" - try: - # Extract from nested content structure - content = codeData.get("content", {}) - if not isinstance(content, dict): - return "" - code = content.get("code", "") - language = content.get("language", "") - - if code: - if language: - return f"```{language}\n{code}\n```" - else: - return f"```\n{code}\n```" - - return "" - - except Exception as e: - self.logger.warning(f"Error rendering code block: {str(e)}") - return "" + content = codeData.get("content") + if not isinstance(content, dict): + raise ValueError( + f"code_block section has invalid content (type={type(content).__name__})" + ) + code = content.get("code") + if not isinstance(code, str) or not code: + raise ValueError("code_block section has empty 'code'") + language = content.get("language") or "" + return f"```{language}\n{code}\n```" if language else f"```\n{code}\n```" def _renderJsonImage(self, imageData: Dict[str, Any]) -> str: - """Render a JSON image to markdown.""" - try: - # Extract from nested content structure: element.content.{base64Data, altText, caption} - content = imageData.get("content", {}) - if not isinstance(content, dict): - return "" - altText = content.get("altText", "Image") - base64Data = content.get("base64Data", "") - - if base64Data: - # For base64 images, we can't embed them directly in markdown - # So we'll use a placeholder with the alt text - return f"![{altText}](data:image/png;base64,{base64Data[:50]}...)" - else: - return f"![{altText}](image-placeholder)" - - except Exception as e: - self.logger.warning(f"Error rendering image: {str(e)}") - return f"![{imageData.get('altText', 'Image')}](image-error)" + """Render image as relative ``![alt](fileName)`` link to a sidecar file.""" + content = imageData.get("content") + if not isinstance(content, dict): + raise ValueError( + f"image section has invalid content (type={type(content).__name__})" + ) + altText = content.get("altText") + if not isinstance(altText, str) or not altText.strip(): + raise ValueError("image section is missing 'altText'") + fileName = content.get("fileName") + if not isinstance(fileName, str) or not fileName.strip(): + raise ValueError("image section is missing 'fileName' for relative markdown link") + safe_name = "".join( + c if c.isalnum() or c in "._-" else "_" for c in fileName.strip() + ) + if not safe_name: + raise ValueError(f"image fileName sanitized to empty: {fileName!r}") + return f"![{altText.strip()}]({safe_name})" diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py index f75a5108..7ec05c5c 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py @@ -670,7 +670,7 @@ class RendererPdf(BaseRenderer): runType = run.get("type", "text") value = self._escapeReportlabXml(run.get("value", "")) if runType == "text": - parts.append(value) + parts.append(value.replace("\n", "
")) elif runType == "bold": parts.append(f"{value}") elif runType == "italic": @@ -691,6 +691,7 @@ class RendererPdf(BaseRenderer): if not text: return "" s = self._escapeReportlabXml(text) + s = s.replace("\n", "
") s = _re_pdf.sub(r"\*\*(.+?)\*\*", r"\1", s, flags=_re_pdf.DOTALL) s = _re_pdf.sub(r"__(.+?)__", r"\1", s, flags=_re_pdf.DOTALL) s = _re_pdf.sub(r"(?\1", s) diff --git a/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py b/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py index 594fbe02..d3fddeb1 100644 --- a/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py +++ b/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py @@ -4,10 +4,76 @@ import json import logging import os import re -from typing import Any, Dict +from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) +_MAX_AUTO_TABLE_COLS = 64 +_MAX_AUTO_TABLE_ROWS = 5000 +_MAX_AUTO_CELL_CHARS = 8000 + + +def _sanitize_cell_for_pipe_table(cell: str) -> str: + """Single-line cell safe for markdown pipe tables (no raw ``|``).""" + s = str(cell).replace("\r\n", "\n").replace("\r", "\n") + s = " ".join(line.strip() for line in s.split("\n") if line.strip()).strip() + return s.replace("|", "·") + + +def _try_delimited_block_as_markdown_table(block: str) -> Optional[str]: + """If ``block`` is a uniform tab- or semicolon-separated grid, return a pipe markdown table.""" + lines = [ln.strip() for ln in block.replace("\r\n", "\n").replace("\r", "\n").split("\n")] + lines = [ln for ln in lines if ln] + if len(lines) < 2: + return None + for sep in ("\t", ";"): + rows: List[List[str]] = [] + bad = False + for ln in lines: + cells = [c.strip() for c in ln.split(sep)] + if len(cells) < 2: + bad = True + break + rows.append(cells) + if bad: + continue + ncols = len(rows[0]) + if ncols > _MAX_AUTO_TABLE_COLS or len(rows) > _MAX_AUTO_TABLE_ROWS: + continue + if any(len(r) != ncols for r in rows): + continue + if any(len(_sanitize_cell_for_pipe_table(c)) > _MAX_AUTO_CELL_CHARS for r in rows for c in r): + continue + + def _row_md(r: List[str]) -> str: + return "| " + " | ".join(_sanitize_cell_for_pipe_table(c) for c in r) + " |" + + header = _row_md(rows[0]) + divider = "| " + " | ".join(["---"] * ncols) + " |" + body = "\n".join(_row_md(r) for r in rows[1:]) + return "\n".join([header, divider, body]) + return None + + +def enhancePlainTextWithMarkdownTables(body: str) -> str: + """Detect delimiter-separated grids in plain paragraphs and convert them to markdown pipe tables. + + Extractors often emit CSV-like blocks (``;`` or TAB) without markdown markers; passing those + straight into ``markdownToDocumentJson`` produced one giant paragraph. This pass runs only + on whitespace-separated blocks so normal prose stays unchanged. + """ + if not isinstance(body, str) or not body.strip(): + return body if isinstance(body, str) else "" + chunks = re.split(r"\n\s*\n", body.strip()) + out_parts: List[str] = [] + for ch in chunks: + ch = ch.strip() + if not ch: + continue + md_table = _try_delimited_block_as_markdown_table(ch) + out_parts.append(md_table if md_table else ch) + return "\n\n".join(out_parts) + def _parseInlineRuns(text: str) -> list: """ diff --git a/modules/serviceCenter/services/serviceKnowledge/_costEstimate.py b/modules/serviceCenter/services/serviceKnowledge/_costEstimate.py index 565c219d..c50da1fa 100644 --- a/modules/serviceCenter/services/serviceKnowledge/_costEstimate.py +++ b/modules/serviceCenter/services/serviceKnowledge/_costEstimate.py @@ -3,15 +3,17 @@ """Indicative cost estimation for a RAG bootstrap run. This is **not** a billing-grade forecast: it gives the user a back-of-the-envelope -USD figure for the worst-case full sync, so they can sanity-check before raising +CHF figure for the worst-case full sync, so they can sanity-check before raising `maxBytes`/`maxItems`. The output always carries the underlying assumptions (`basis`) so the user can judge plausibility. Heuristic: estimatedTokens = ceil(maxBytes / CHARS_PER_TOKEN_BYTES_FACTOR) - estimatedUsd = estimatedTokens / 1_000_000 * EMBEDDING_USD_PER_MTOKEN + estimatedChf = estimatedTokens / 1_000_000 * EMBEDDING_CHF_PER_MTOKEN -Defaults match OpenAI `text-embedding-3-small` pricing (2026-Q2). +Defaults match OpenAI `text-embedding-3-small` published pricing (2026-Q2); +the project convention treats provider list prices as CHF directly (see +`calculatepriceCHF` in `aicorePluginOpenai.py`), so no FX conversion applies. """ from __future__ import annotations @@ -21,7 +23,7 @@ from typing import Any, Dict CHARS_PER_TOKEN = 4 -EMBEDDING_USD_PER_MTOKEN = 0.02 +EMBEDDING_CHF_PER_MTOKEN = 0.02 DEFAULT_TOKENS_PER_ITEM = 1500 BYTES_PER_TOKEN_TEXT_FACTOR = 4 EXTRACTABLE_FRACTION = 0.4 @@ -34,12 +36,12 @@ def estimateBootstrapCost(limits: Dict[str, int], kind: str = "files") -> Dict[s { "estimatedTokens": int, - "estimatedUsd": float, # rounded to 4 decimals + "estimatedChf": float, # rounded to 4 decimals "basis": { "kind": "files"|"clickup", "limits": {...}, "assumptions": { - "embeddingUsdPerMToken": 0.02, + "embeddingChfPerMToken": 0.02, "charsPerToken": 4, "extractableFraction": 0.4, "tokensPerItem": 1500 # only for clickup-like item counts @@ -49,7 +51,7 @@ def estimateBootstrapCost(limits: Dict[str, int], kind: str = "files") -> Dict[s } """ assumptions: Dict[str, Any] = { - "embeddingUsdPerMToken": EMBEDDING_USD_PER_MTOKEN, + "embeddingChfPerMToken": EMBEDDING_CHF_PER_MTOKEN, "charsPerToken": CHARS_PER_TOKEN, } @@ -69,11 +71,11 @@ def estimateBootstrapCost(limits: Dict[str, int], kind: str = "files") -> Dict[s estimatedTokens = 0 assumptions["formula"] = "unknown kind, returning zero" - estimatedUsd = round(estimatedTokens / 1_000_000 * EMBEDDING_USD_PER_MTOKEN, 4) + estimatedChf = round(estimatedTokens / 1_000_000 * EMBEDDING_CHF_PER_MTOKEN, 4) return { "estimatedTokens": estimatedTokens, - "estimatedUsd": estimatedUsd, + "estimatedChf": estimatedChf, "basis": { "kind": kind, "limits": dict(limits), diff --git a/modules/shared/debugLogger.py b/modules/shared/debugLogger.py index d1b22abc..9062ed53 100644 --- a/modules/shared/debugLogger.py +++ b/modules/shared/debugLogger.py @@ -19,6 +19,12 @@ def _resolveLogDir() -> str: logDir = os.path.join(gatewayDir, logDir) return logDir + +def resolve_app_log_dir() -> str: + """Absolute filesystem path for ``APP_LOGGING_LOG_DIR``.""" + return _resolveLogDir() + + def ensureDir(path: str) -> None: """Create directory if it does not exist.""" os.makedirs(path, exist_ok=True) diff --git a/modules/shared/frontendTypes.py b/modules/shared/frontendTypes.py index 9d73ee03..46b142a1 100644 --- a/modules/shared/frontendTypes.py +++ b/modules/shared/frontendTypes.py @@ -88,6 +88,15 @@ class FrontendType(str, Enum): FILTER_EXPRESSION = "filterExpression" """Filter expression builder for data.filter""" + CONTEXT_BUILDER = "contextBuilder" + """Upstream handover picker (graph editor): DataRef / path selection from prior nodes.""" + + CONTEXT_ASSIGNMENTS = "contextAssignments" + """Context set assignments: target key, picker | literal | human task (graph editor).""" + + USER_FILE_FOLDER = "userFileFolder" + """User file storage folder (graph editor): browse My Files tree or create folders.""" + # Mapping of custom types to their API endpoint for dynamic options CUSTOM_TYPE_OPTIONS_API: Dict[FrontendType, str] = { diff --git a/modules/system/databaseMigration.py b/modules/system/databaseMigration.py new file mode 100644 index 00000000..2637a61d --- /dev/null +++ b/modules/system/databaseMigration.py @@ -0,0 +1,611 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Database migration utilities — backup (export) and restore (import) for all +registered PowerOn databases. + +System objects (root mandate, admin user, event user) are protected: they are +never deleted or overwritten during import. Their IDs in the backup payload +are remapped to the IDs of the corresponding live objects so that all FK +references stay consistent. + +All functions are intended for SysAdmin use only (access control in the route layer). +""" + +import logging +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional, Set, Tuple + +import psycopg2 +import psycopg2.extras + +from modules.shared.configuration import APP_CONFIG +from modules.shared.dbRegistry import getRegisteredDatabases +from modules.system.databaseHealth import _getConnection, _jsonSafe + +logger = logging.getLogger(__name__) + +_EXPORT_FORMAT_VERSION = "1.0" +_SYSTEM_TABLE = "_system" + + +# --------------------------------------------------------------------------- +# Instance label +# --------------------------------------------------------------------------- + +def _getInstanceLabel() -> str: + """Return the instance type from APP_ENV_TYPE (e.g. 'dev', 'int', 'prod').""" + return APP_CONFIG.get("APP_ENV_TYPE", "unknown") + + +# --------------------------------------------------------------------------- +# Database list +# --------------------------------------------------------------------------- + +def _getAvailableDatabases() -> List[dict]: + """Return registered databases with table/row counts for the UI.""" + registeredDbs = getRegisteredDatabases() + results: List[dict] = [] + for dbName in sorted(registeredDbs): + if dbName == "poweron_test": + continue + entry: dict = {"name": dbName, "tableCount": 0, "recordCount": 0} + try: + conn = _getConnection(dbName) + try: + with conn.cursor() as cur: + cur.execute(""" + SELECT relname, n_live_tup + FROM pg_stat_user_tables + WHERE schemaname = 'public' + AND relname NOT LIKE '\\_%%' + """) + for row in cur.fetchall(): + entry["tableCount"] += 1 + entry["recordCount"] += int(row["n_live_tup"]) + finally: + conn.close() + except Exception as e: + logger.warning("Could not stat database %s: %s", dbName, e) + results.append(entry) + return results + + +# --------------------------------------------------------------------------- +# Export +# --------------------------------------------------------------------------- + +def _exportDatabases(databases: List[str]) -> dict: + """Export selected databases as a JSON-serialisable dict. + + Returns ``{meta: {...}, databases: {dbName: {tables: {tbl: [rows]}, summary: {...}}}}`` + """ + registeredDbs = getRegisteredDatabases() + + if not databases: + raise ValueError("No databases selected for export.") + + exportData: dict = { + "meta": { + "exportedAt": datetime.now(timezone.utc).isoformat(), + "version": _EXPORT_FORMAT_VERSION, + "databaseCount": 0, + "totalTables": 0, + "totalRecords": 0, + }, + "databases": {}, + } + + for dbName in databases: + if dbName not in registeredDbs: + logger.warning("Export: skipping unregistered database %s", dbName) + continue + try: + dbPayload = _exportSingleDb(dbName) + exportData["databases"][dbName] = dbPayload + exportData["meta"]["databaseCount"] += 1 + exportData["meta"]["totalTables"] += dbPayload["tableCount"] + exportData["meta"]["totalRecords"] += dbPayload["totalRecords"] + except Exception as e: + logger.error("Export failed for database %s: %s", dbName, e) + + return exportData + + +def _exportSingleDb(dbName: str) -> dict: + conn = _getConnection(dbName) + try: + tables = _listTables(conn) + dbPayload: dict = {"tables": {}, "summary": {}, "tableCount": 0, "totalRecords": 0} + for tbl in tables: + rows = _readTableRows(conn, tbl) + dbPayload["tables"][tbl] = rows + dbPayload["summary"][tbl] = {"recordCount": len(rows)} + dbPayload["tableCount"] += 1 + dbPayload["totalRecords"] += len(rows) + return dbPayload + finally: + conn.close() + + +def _listTables(conn) -> List[str]: + with conn.cursor() as cur: + cur.execute(""" + SELECT table_name + FROM information_schema.tables + WHERE table_schema = 'public' + AND table_type = 'BASE TABLE' + AND table_name != %s + ORDER BY table_name + """, (_SYSTEM_TABLE,)) + return [row["table_name"] for row in cur.fetchall()] + + +def _readTableRows(conn, tableName: str) -> List[dict]: + with conn.cursor() as cur: + cur.execute(f'SELECT * FROM "{tableName}"') + return [{k: _jsonSafe(v) for k, v in dict(row).items()} for row in cur.fetchall()] + + +# --------------------------------------------------------------------------- +# Validate +# --------------------------------------------------------------------------- + +def _validateImportPayload(payload: dict) -> dict: + """Validate an import payload without writing anything. + + Returns ``{valid, summary, warnings, systemObjectsFound}``. + """ + warnings: List[str] = [] + summary: List[dict] = [] + + meta = payload.get("meta") + if not meta or not isinstance(meta, dict): + return {"valid": False, "summary": [], "warnings": ["Fehlende oder ungueltige 'meta'-Sektion"], "systemObjectsFound": []} + + version = meta.get("version", "") + if version != _EXPORT_FORMAT_VERSION: + warnings.append(f"Unbekannte Format-Version: {version} (erwartet: {_EXPORT_FORMAT_VERSION})") + + databases = payload.get("databases") + if not databases or not isinstance(databases, dict): + return {"valid": False, "summary": [], "warnings": ["Fehlende oder ungueltige 'databases'-Sektion"], "systemObjectsFound": []} + + registeredDbs = getRegisteredDatabases() + + for dbName, dbData in databases.items(): + tables = dbData.get("tables", {}) + tableCount = len(tables) + recordCount = sum(len(rows) for rows in tables.values() if isinstance(rows, list)) + registered = dbName in registeredDbs + if not registered: + warnings.append(f"Datenbank '{dbName}' ist nicht registriert und wird uebersprungen") + summary.append({ + "database": dbName, + "tableCount": tableCount, + "recordCount": recordCount, + "registered": registered, + }) + + systemObjectsFound = _detectSystemObjectsInPayload(payload) + + valid = any(s["registered"] for s in summary) + return { + "valid": valid, + "summary": summary, + "warnings": warnings, + "systemObjectsFound": systemObjectsFound, + } + + +def _detectSystemObjectsInPayload(payload: dict) -> List[dict]: + """Find system objects (root mandate, admin user, event user) in a payload.""" + found: List[dict] = [] + appData = payload.get("databases", {}).get("poweron_app", {}).get("tables", {}) + + for row in appData.get("Mandate", []): + if row.get("name") == "root" and row.get("isSystem") is True: + found.append({"type": "mandate", "label": "Root Mandate", "payloadId": row.get("id")}) + + for row in appData.get("UserInDB", []): + if row.get("username") == "admin": + found.append({"type": "user", "label": "Admin User", "payloadId": row.get("id")}) + elif row.get("username") == "event": + found.append({"type": "user", "label": "Event User", "payloadId": row.get("id")}) + + return found + + +# --------------------------------------------------------------------------- +# System-object ID remapping +# --------------------------------------------------------------------------- + +def _loadLiveSystemObjectIds() -> Dict[str, str]: + """Load the IDs of the 3 protected system objects from the live DB. + + Returns a dict like ``{"rootMandate": "", "adminUser": "", "eventUser": ""}``. + """ + registeredDbs = getRegisteredDatabases() + if "poweron_app" not in registeredDbs: + return {} + + result: Dict[str, str] = {} + conn = _getConnection("poweron_app") + try: + with conn.cursor() as cur: + cur.execute("""SELECT id FROM "Mandate" WHERE "name" = 'root' AND "isSystem" = true LIMIT 1""") + row = cur.fetchone() + if row: + result["rootMandate"] = str(row["id"]) + + cur.execute("""SELECT id FROM "UserInDB" WHERE "username" = 'admin' LIMIT 1""") + row = cur.fetchone() + if row: + result["adminUser"] = str(row["id"]) + + cur.execute("""SELECT id FROM "UserInDB" WHERE "username" = 'event' LIMIT 1""") + row = cur.fetchone() + if row: + result["eventUser"] = str(row["id"]) + finally: + conn.close() + + return result + + +def _buildIdRemapFromPayload(payload: dict, liveIds: Dict[str, str]) -> Dict[str, str]: + """Build an ``{oldId: newId}`` mapping for system objects. + + Compares IDs found in the payload with the live system-object IDs. + Only entries where the IDs actually differ are included. + """ + remap: Dict[str, str] = {} + appTables = payload.get("databases", {}).get("poweron_app", {}).get("tables", {}) + + for row in appTables.get("Mandate", []): + if row.get("name") == "root" and row.get("isSystem") is True: + oldId = str(row.get("id", "")) + newId = liveIds.get("rootMandate", "") + if oldId and newId and oldId != newId: + remap[oldId] = newId + + for row in appTables.get("UserInDB", []): + username = row.get("username") + oldId = str(row.get("id", "")) + if username == "admin": + newId = liveIds.get("adminUser", "") + elif username == "event": + newId = liveIds.get("eventUser", "") + else: + continue + if oldId and newId and oldId != newId: + remap[oldId] = newId + + return remap + + +def _remapSystemObjectIds(payload: dict, remap: Dict[str, str]) -> dict: + """Walk the entire payload and replace every value that matches an old system-object ID.""" + if not remap: + return payload + + remapSet = set(remap.keys()) + + databases = payload.get("databases", {}) + for dbName, dbData in databases.items(): + tables = dbData.get("tables", {}) + for tableName, rows in tables.items(): + if not isinstance(rows, list): + continue + for row in rows: + _remapRowValues(row, remap, remapSet) + + return payload + + +def _remapRowValues(row: dict, remap: Dict[str, str], remapSet: Set[str]) -> None: + """In-place replace string values in a row dict that match a remap key.""" + for key, val in row.items(): + if isinstance(val, str) and val in remapSet: + row[key] = remap[val] + elif isinstance(val, dict): + _remapRowValues(val, remap, remapSet) + elif isinstance(val, list): + for i, item in enumerate(val): + if isinstance(item, str) and item in remapSet: + val[i] = remap[item] + elif isinstance(item, dict): + _remapRowValues(item, remap, remapSet) + + +# --------------------------------------------------------------------------- +# Import +# --------------------------------------------------------------------------- + +_PROTECTED_ROWS: Dict[str, List[dict]] = { + "Mandate": [{"name": "root", "isSystem": True}], + "UserInDB": [{"username": "admin"}, {"username": "event"}], +} + + +def _isProtectedRow(tableName: str, row: dict) -> bool: + """Return True if a row represents a protected system object.""" + patterns = _PROTECTED_ROWS.get(tableName, []) + for pattern in patterns: + if all(row.get(k) == v for k, v in pattern.items()): + return True + return False + + +def _importDatabases(payload: dict, mode: str) -> dict: + """Import databases from a validated payload. + + ``mode`` is ``"replace"`` (clear + insert) or ``"merge"`` (insert missing only). + """ + if mode not in ("replace", "merge"): + raise ValueError(f"Invalid import mode: {mode}") + + registeredDbs = getRegisteredDatabases() + + liveIds = _loadLiveSystemObjectIds() + remap = _buildIdRemapFromPayload(payload, liveIds) + if remap: + logger.info("System-object ID remap: %s", remap) + _remapSystemObjectIds(payload, remap) + + protectedIdSet = set(liveIds.values()) + + imported: Dict[str, dict] = {} + warnings: List[str] = [] + databases = payload.get("databases", {}) + + for dbName, dbData in databases.items(): + if dbName not in registeredDbs: + warnings.append(f"Datenbank '{dbName}' uebersprungen (nicht registriert)") + continue + + tables = dbData.get("tables", {}) + dbResult: Dict[str, int] = {} + + conn = _getConnection(dbName) + try: + conn.autocommit = False + existingTables = set(_listTables(conn)) + + for tableName, rows in tables.items(): + if not isinstance(rows, list): + continue + if tableName not in existingTables: + warnings.append(f"Tabelle '{dbName}.{tableName}' existiert nicht, uebersprungen") + continue + + physicalCols = _getPhysicalColumns(conn, tableName) + if not physicalCols: + continue + + filteredRows = [] + for row in rows: + if _isProtectedRow(tableName, row): + continue + if row.get("id") and str(row["id"]) in protectedIdSet: + continue + filteredRows.append(row) + + if mode == "replace": + _deleteNonProtected(conn, tableName, protectedIdSet) + + insertedCount = _insertRows(conn, tableName, filteredRows, physicalCols, mode) + dbResult[tableName] = insertedCount + + conn.commit() + except Exception as e: + conn.rollback() + logger.error("Import failed for database %s: %s", dbName, e) + warnings.append(f"Import fuer '{dbName}' fehlgeschlagen: {e}") + continue + finally: + conn.close() + + imported[dbName] = dbResult + + totalRecords = sum(sum(v.values()) for v in imported.values()) + return { + "success": True, + "imported": imported, + "totalRecords": totalRecords, + "warnings": warnings, + } + + +def _getPhysicalColumns(conn, tableName: str) -> List[str]: + with conn.cursor() as cur: + cur.execute(""" + SELECT column_name + FROM information_schema.columns + WHERE table_schema = 'public' AND table_name = %s + ORDER BY ordinal_position + """, (tableName,)) + return [row["column_name"] for row in cur.fetchall()] + + +def _deleteNonProtected(conn, tableName: str, protectedIds: Set[str]) -> int: + """Delete all rows except protected system objects.""" + if not protectedIds: + with conn.cursor() as cur: + cur.execute(f'DELETE FROM "{tableName}"') + return cur.rowcount + + idList = list(protectedIds) + with conn.cursor() as cur: + cur.execute( + f'DELETE FROM "{tableName}" WHERE "id"::text != ALL(%(ids)s)', + {"ids": idList}, + ) + return cur.rowcount + + +def _insertRows( + conn, + tableName: str, + rows: List[dict], + physicalCols: List[str], + mode: str, +) -> int: + """Insert rows into a table. In merge mode, skip rows whose id already exists.""" + if not rows: + return 0 + + physicalColSet = set(physicalCols) + inserted = 0 + + for row in rows: + cols = [c for c in row.keys() if c in physicalColSet] + if not cols: + continue + + values = [_pgSafe(row[c]) for c in cols] + colNames = ", ".join(f'"{c}"' for c in cols) + placeholders = ", ".join(["%s"] * len(cols)) + + if mode == "merge": + sql = f'INSERT INTO "{tableName}" ({colNames}) VALUES ({placeholders}) ON CONFLICT ("id") DO NOTHING' + else: + sql = f'INSERT INTO "{tableName}" ({colNames}) VALUES ({placeholders})' + + try: + with conn.cursor() as cur: + cur.execute(sql, values) + inserted += cur.rowcount + except Exception as e: + logger.warning("Insert failed for %s row: %s", tableName, e) + conn.rollback() + conn.autocommit = False + + return inserted + + +def _pgSafe(v: Any) -> Any: + """Convert Python values to psycopg2-compatible types.""" + import json as _json + + if v is None or isinstance(v, (str, int, float, bool)): + return v + if isinstance(v, (dict, list)): + return _json.dumps(v) + return str(v) + + +# --------------------------------------------------------------------------- +# Prepare import (validate + remap, return context for per-DB import) +# --------------------------------------------------------------------------- + +def _prepareImport(payload: dict) -> dict: + """Validate, remap system-object IDs, and return the prepared payload + together with metadata the frontend needs to drive per-DB import. + + Returns ``{valid, warnings, systemObjectsFound, databases, protectedIds, remappedPayload}``. + """ + validation = _validateImportPayload(payload) + if not validation.get("valid"): + return { + "valid": False, + "warnings": validation.get("warnings", []), + "systemObjectsFound": validation.get("systemObjectsFound", []), + "databases": [], + "protectedIds": [], + } + + liveIds = _loadLiveSystemObjectIds() + remap = _buildIdRemapFromPayload(payload, liveIds) + if remap: + logger.info("System-object ID remap: %s", remap) + _remapSystemObjectIds(payload, remap) + + protectedIdSet = set(liveIds.values()) + + registeredDbs = getRegisteredDatabases() + dbList = [] + for dbName, dbData in payload.get("databases", {}).items(): + if dbName not in registeredDbs: + continue + tables = dbData.get("tables", {}) + recordCount = sum(len(rows) for rows in tables.values() if isinstance(rows, list)) + dbList.append({ + "database": dbName, + "tableCount": len(tables), + "recordCount": recordCount, + }) + + return { + "valid": True, + "warnings": validation.get("warnings", []), + "systemObjectsFound": validation.get("systemObjectsFound", []), + "databases": dbList, + "protectedIds": list(protectedIdSet), + } + + +def _importSingleDb(payload: dict, dbName: str, mode: str, protectedIds: List[str]) -> dict: + """Import a single database from the (already remapped) payload. + + Returns ``{database, tables: {tableName: insertedCount}, recordCount, warnings}``. + """ + if mode not in ("replace", "merge"): + raise ValueError(f"Invalid import mode: {mode}") + + registeredDbs = getRegisteredDatabases() + if dbName not in registeredDbs: + return {"database": dbName, "tables": {}, "recordCount": 0, + "warnings": [f"Datenbank '{dbName}' nicht registriert"]} + + dbData = payload.get("databases", {}).get(dbName) + if not dbData: + return {"database": dbName, "tables": {}, "recordCount": 0, + "warnings": [f"Keine Daten fuer '{dbName}' im Payload"]} + + protectedIdSet = set(protectedIds) + tables = dbData.get("tables", {}) + warnings: List[str] = [] + dbResult: Dict[str, int] = {} + + conn = _getConnection(dbName) + try: + conn.autocommit = False + existingTables = set(_listTables(conn)) + + for tableName, rows in tables.items(): + if not isinstance(rows, list): + continue + if tableName not in existingTables: + warnings.append(f"Tabelle '{dbName}.{tableName}' existiert nicht, uebersprungen") + continue + + physicalCols = _getPhysicalColumns(conn, tableName) + if not physicalCols: + continue + + filteredRows = [] + for row in rows: + if _isProtectedRow(tableName, row): + continue + if row.get("id") and str(row["id"]) in protectedIdSet: + continue + filteredRows.append(row) + + if mode == "replace": + _deleteNonProtected(conn, tableName, protectedIdSet) + + insertedCount = _insertRows(conn, tableName, filteredRows, physicalCols, mode) + dbResult[tableName] = insertedCount + + conn.commit() + except Exception as e: + conn.rollback() + logger.error("Import failed for database %s: %s", dbName, e) + return {"database": dbName, "tables": {}, "recordCount": 0, + "warnings": [f"Import fuer '{dbName}' fehlgeschlagen: {e}"]} + finally: + conn.close() + + recordCount = sum(dbResult.values()) + return {"database": dbName, "tables": dbResult, "recordCount": recordCount, "warnings": warnings} diff --git a/modules/workflows/automation2/executionEngine.py b/modules/workflows/automation2/executionEngine.py index e49754f8..8efe9339 100644 --- a/modules/workflows/automation2/executionEngine.py +++ b/modules/workflows/automation2/executionEngine.py @@ -15,6 +15,8 @@ from modules.workflows.automation2.graphUtils import ( topoSort, getInputSources, getLoopBodyNodeIds, + getLoopDoneNodeIds, + getLoopPrimaryInputSource, ) from modules.workflows.automation2.executors import ( @@ -26,10 +28,15 @@ from modules.workflows.automation2.executors import ( PauseForHumanTaskError, PauseForEmailWaitError, ) -from modules.features.graphicalEditor.portTypes import normalizeToSchema +from modules.features.graphicalEditor.portTypes import normalizeToSchema, wrapTransit, unwrapTransit from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES from modules.serviceCenter.services.serviceSubscription.mainServiceSubscription import SubscriptionInactiveException as _SubscriptionInactiveException from modules.serviceCenter.services.serviceBilling.mainServiceBilling import BillingContextError as _BillingContextError +from modules.workflows.automation2.graphicalEditorRunFileLogger import ( + GraphicalEditorRunFileLogger, + graphical_editor_run_file_logging_enabled, + merge_run_context_with_ge_log_prefix, +) from modules.workflows.automation2.runEnvelope import normalize_run_envelope logger = logging.getLogger(__name__) @@ -79,14 +86,32 @@ def _outputSchemaForNode(nodeType: str) -> Optional[str]: if isinstance(p0, dict): spec = p0.get("schema") if isinstance(spec, dict) and spec.get("kind") == "fromGraph": - return "FormPayload" + # Read override from the port definition — ``FormPayload`` is the + # fallback for true form nodes; dynamic context nodes (e.g. + # context.transformContext) declare ``fromGraphResultSchema`` to + # avoid wrong normalization. + return p0.get("fromGraphResultSchema") or "FormPayload" if isinstance(spec, str): return spec return None -def _isMergeNode(nodeType: str) -> bool: - return nodeType == "flow.merge" +def _isBarrierNode(nodeType: str) -> bool: + """Barrier nodes wait for all connected predecessors before executing. + + Backwards compatible: ``flow.merge`` is always a barrier. Any other node may + declare ``waitsForAllPredecessors: True`` in its STATIC_NODE_TYPES entry. + + Note: ``context.mergeContext`` is NOT a barrier — it receives its list of + inputs via the ``dataSource`` DataRef parameter (typically ``loop.bodyResults``) + and executes once its single upstream edge is satisfied. + """ + if nodeType == "flow.merge": + return True + for nd in STATIC_NODE_TYPES: + if nd.get("id") == nodeType: + return bool(nd.get("waitsForAllPredecessors")) + return False def _allMergePredecessorsReady( @@ -94,7 +119,7 @@ def _allMergePredecessorsReady( connectionMap: Dict[str, List], nodeOutputs: Dict[str, Any], ) -> bool: - """For flow.merge: check that every connected predecessor has produced output or was skipped.""" + """For barrier nodes: check that every connected predecessor has produced output or was skipped.""" for src, _, _ in connectionMap.get(nodeId, []): if src not in nodeOutputs: return False @@ -138,10 +163,15 @@ def _is_node_on_active_path( meta = out.get("_meta", {}) if out.get("_transit") else out branch = meta.get("branch") match = meta.get("match") + matches = meta.get("matches") active_output = None if branch is not None: active_output = branch + elif isinstance(matches, list) and matches: + if source_output not in matches: + return False + continue elif match is not None: if match < 0: return False @@ -195,6 +225,30 @@ def _serializableOutputs(nodeOutputs: Dict[str, Any]) -> Dict[str, Any]: return _stripBinaryValues(cleaned) +def _merge_node_parameters_into_snap( + snap: Optional[Dict[str, Any]], + *, + node_id: Optional[str], + context: Optional[Dict[str, Any]], +) -> Dict[str, Any]: + """Copy wire snapshot and attach **nodeParameters** from the graph definition (by ``node_id``). + + Uses ``context['graphNodesById']`` populated at executeGraph start — stable even when + per-step node dict references differ. Field name is ``nodeParameters`` (no leading + underscore) so it survives consumers that hide ``_*`` keys.""" + merged: Dict[str, Any] = dict(snap or {}) + if not node_id or not isinstance(context, dict): + return merged + cmap = context.get("graphNodesById") + if not isinstance(cmap, dict): + return merged + gnode = cmap.get(node_id) + if not isinstance(gnode, dict): + return merged + merged["nodeParameters"] = dict(gnode.get("parameters") or {}) + return merged + + def _emitStepEvent(runId: str, stepData: Dict[str, Any]) -> None: """Emit a step-log SSE event to any listening client for this run.""" try: @@ -203,7 +257,6 @@ def _emitStepEvent(runId: str, stepData: Dict[str, Any]) -> None: queueId = f"run-trace-{runId}" if not em.has_queue(queueId): return - import asyncio loop = asyncio.get_event_loop() if loop.is_running(): asyncio.ensure_future(em.emit_event(queueId, "step", stepData, event_category="tracing")) @@ -274,6 +327,80 @@ def _updateStepLog(iface, stepId: str, status: str, output: Dict = None, error: logger.debug("Could not update AutoStepLog %s: %s", stepId, e) +def _ge_iso_timestamp() -> str: + """UTC timestamp for NDJSON logs (readable, milliseconds).""" + return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" + + +async def _ge_log_node_finished( + file_logger: Optional[GraphicalEditorRunFileLogger], + *, + run_id: Optional[str], + node_outputs: Dict[str, Any], + run_envelope: Optional[Dict[str, Any]], + node_id: str, + node_type: str, + status: str, + input_snap: Optional[Dict[str, Any]], + output: Any = None, + error: Optional[str] = None, + duration_ms: Optional[int] = None, + retry_count: Optional[int] = None, + skip_reason: Optional[str] = None, + loop_index: Optional[int] = None, + loop_node_id: Optional[str] = None, + loop_item: Optional[Any] = None, + exec_context: Optional[Dict[str, Any]] = None, +) -> None: + """Append one execution line + one workflow-context snapshot (NDJSON).""" + if file_logger is None or not run_id: + return + ts = _ge_iso_timestamp() + snap = _merge_node_parameters_into_snap(input_snap, node_id=node_id, context=exec_context) + exec_rec: Dict[str, Any] = { + "timestamp": ts, + "runId": run_id, + "nodeId": node_id, + "nodeType": node_type, + "status": status, + "input": _stripBinaryValues(snap), + } + if skip_reason: + exec_rec["skipReason"] = skip_reason + if duration_ms is not None: + exec_rec["durationMs"] = duration_ms + if retry_count is not None: + exec_rec["retryCount"] = retry_count + if loop_index is not None: + exec_rec["loopIndex"] = loop_index + if loop_node_id is not None: + exec_rec["loopNodeId"] = loop_node_id + if loop_item is not None: + exec_rec["loopItem"] = _stripBinaryValues(loop_item) + if error is not None: + exec_rec["error"] = error + if output is not None: + exec_rec["output"] = ( + _stripBinaryValues(output) if isinstance(output, dict) else {"value": _stripBinaryValues(output)} + ) + await file_logger.append_node_execution_line(exec_rec) + + ctx_rec: Dict[str, Any] = { + "timestamp": ts, + "runId": run_id, + "afterNodeId": node_id, + "afterNodeType": node_type, + "afterStatus": status, + "nodeOutputsSnapshot": _serializableOutputs(node_outputs), + "runEnvelope": _stripBinaryValues(dict(run_envelope or {})), + } + if loop_index is not None: + ctx_rec["loopIndex"] = loop_index + if loop_node_id is not None: + ctx_rec["loopNodeId"] = loop_node_id + await file_logger.append_context_snapshot_line(ctx_rec) + + async def _executeWithRetry(executor, node, context, maxRetries: int = 0, retryDelaySeconds: float = 1.0): """Execute a node with optional retry policy from node parameters.""" params = node.get("parameters") or {} @@ -326,6 +453,179 @@ def _substituteFeatureInstancePlaceholders( return _json.loads(replaced) +async def _run_post_loop_done_nodes( + *, + loop_node_id: str, + body_ids: Set[str], + items: List[Any], + ordered: List[Dict], + connectionMap: Dict[str, List], + nodeOutputs: Dict[str, Any], + context: Dict[str, Any], + services: Any, + automation2_interface: Optional[Any], + runId: Optional[str], + processed_in_loop: Set[str], + ge_file_logger: Optional[GraphicalEditorRunFileLogger] = None, +) -> Optional[Dict[str, Any]]: + """After all loop iterations: merge upstream into loop output and run the Done (output 1) branch once.""" + _prim_in = getLoopPrimaryInputSource(loop_node_id, connectionMap, body_ids) + _upstream_loop = nodeOutputs.get(_prim_in[0]) if _prim_in else None + _base_raw = unwrapTransit(_upstream_loop) if isinstance(_upstream_loop, dict) and _upstream_loop.get("_transit") else _upstream_loop + _prev_loop_out = nodeOutputs.get(loop_node_id) + # ``bodyResults`` lives on the plain iteration-state dict; after resume / edge + # cases the loop slot may still be wrapped in Transit — unwrap before read. + _prev_plain = _prev_loop_out + if isinstance(_prev_loop_out, dict) and _prev_loop_out.get("_transit"): + _prev_plain = unwrapTransit(_prev_loop_out) + _body_results = ( + _prev_plain.get("bodyResults") if isinstance(_prev_plain, dict) else None + ) + if not isinstance(_base_raw, dict): + raise RuntimeError( + f"flow.loop {loop_node_id}: primary upstream output must be a dict (JSON handover / node output); " + f"got {type(_base_raw).__name__}" + ) + _merged_loop = {**_base_raw, "items": items, "count": len(items)} + if _body_results is not None: + _merged_loop["bodyResults"] = _body_results + nodeOutputs[loop_node_id] = wrapTransit(_merged_loop, {"loopCompleted": True, "loopNodeId": loop_node_id}) + + _done_all = getLoopDoneNodeIds(loop_node_id, connectionMap) + _done_only = _done_all - body_ids + _done_ordered = [n for n in ordered if n.get("id") in _done_only] + for _dn in _done_ordered: + _dnid = _dn.get("id") + if not _dnid or context.get("_stopped"): + break + if not _is_node_on_active_path(_dnid, connectionMap, nodeOutputs): + _skipSnap = {"_skipReason": "inactive_branch"} + for _sSrc, _, _ in connectionMap.get(_dnid, []): + if _sSrc in nodeOutputs: + _skipSnap[_sSrc] = nodeOutputs[_sSrc] + _skipSnap = _merge_node_parameters_into_snap(_skipSnap, node_id=_dnid, context=context) + _skId = _createStepLog(automation2_interface, runId, _dnid, _dn.get("type", ""), status="skipped", inputSnapshot=_skipSnap) + if _skId: + _updateStepLog(automation2_interface, _skId, "skipped") + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=_dnid, + node_type=_dn.get("type", ""), + status="skipped", + input_snap=_skipSnap, + skip_reason=str(_skipSnap.get("_skipReason") or "inactive_branch"), + ) + continue + _dexec = _getExecutor(_dn.get("type", ""), services, automation2_interface) + if not _dexec: + nodeOutputs[_dnid] = None + continue + _dStart = time.time() + _dIn = {} + for _src, _, _ in connectionMap.get(_dnid, []): + if _src in nodeOutputs: + _dIn[_src] = nodeOutputs[_src] + _dIn = _merge_node_parameters_into_snap(_dIn, node_id=_dnid, context=context) + _dStepId = _createStepLog(automation2_interface, runId, _dnid, _dn.get("type", ""), "running", _dIn) + try: + _dres, _dRetry = await _executeWithRetry(_dexec, _dn, context) + _dres = _normalizeResult(_dres, _dn.get("type", "")) + nodeOutputs[_dnid] = _dres + _dDur = int((time.time() - _dStart) * 1000) + _dTok = _dres.get("tokensUsed", 0) if isinstance(_dres, dict) else 0 + _updateStepLog(automation2_interface, _dStepId, "completed", + output=_dres if isinstance(_dres, dict) else {"value": _dres}, + durationMs=_dDur, tokensUsed=_dTok, retryCount=_dRetry) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=_dnid, + node_type=_dn.get("type", ""), + status="completed", + input_snap=_dIn, + output=_dres, + duration_ms=_dDur, + retry_count=_dRetry, + ) + except PauseForHumanTaskError: + _updateStepLog(automation2_interface, _dStepId, "completed", + durationMs=int((time.time() - _dStart) * 1000)) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=_dnid, + node_type=_dn.get("type", ""), + status="completed", + input_snap=_dIn, + duration_ms=int((time.time() - _dStart) * 1000), + ) + raise + except PauseForEmailWaitError: + _updateStepLog(automation2_interface, _dStepId, "completed", + durationMs=int((time.time() - _dStart) * 1000)) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=_dnid, + node_type=_dn.get("type", ""), + status="completed", + input_snap=_dIn, + duration_ms=int((time.time() - _dStart) * 1000), + ) + raise + except (_SubscriptionInactiveException, _BillingContextError): + _dFailDur = int((time.time() - _dStart) * 1000) + _updateStepLog(automation2_interface, _dStepId, "failed", + error="Subscription/Billing error", durationMs=_dFailDur) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=_dnid, + node_type=_dn.get("type", ""), + status="failed", + input_snap=_dIn, + error="Subscription/Billing error", + duration_ms=_dFailDur, + ) + raise + except Exception as _dex: + _dFailDur2 = int((time.time() - _dStart) * 1000) + _updateStepLog(automation2_interface, _dStepId, "failed", + error=str(_dex), durationMs=_dFailDur2) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=_dnid, + node_type=_dn.get("type", ""), + status="failed", + input_snap=_dIn, + error=str(_dex), + duration_ms=_dFailDur2, + ) + raise + processed_in_loop.update(_done_only) + return None + + async def executeGraph( graph: Dict[str, Any], services: Any, @@ -360,7 +660,12 @@ async def executeGraph( ) from modules.workflows.processing.shared.methodDiscovery import discoverMethods discoverMethods(services) - from modules.workflows.automation2.pickNotPushMigration import materializeConnectionRefs + from modules.workflows.automation2.pickNotPushMigration import ( + materializeConnectionRefs, + materializePrimaryTextHandover, + materializeRecommendedDataPickRef, + normalizeFileCreatePresentationRefs, + ) from modules.workflows.automation2.featureInstanceRefMigration import ( materializeFeatureInstanceRefs, ) @@ -372,6 +677,9 @@ async def executeGraph( # subsequent connection-ref pass and validation see the canonical shape. graph = materializeFeatureInstanceRefs(graph) graph = materializeConnectionRefs(graph) + graph = materializePrimaryTextHandover(graph) + graph = materializeRecommendedDataPickRef(graph) + graph = normalizeFileCreatePresentationRefs(graph) nodeTypeIds = _getNodeTypeIds(services) logger.debug("executeGraph nodeTypeIds (%d): %s", len(nodeTypeIds), sorted(nodeTypeIds)) errors = validateGraph(graph, nodeTypeIds) @@ -410,6 +718,7 @@ async def executeGraph( except Exception as valErr: logger.warning("executeGraph resume: schema validation failed for %s: %s", startAfterNodeId, valErr) + ge_file_logger: Optional[GraphicalEditorRunFileLogger] = None nodeOutputs: Dict[str, Any] = dict(initialNodeOutputs or {}) if not runId and automation2_interface and workflowId and not is_resume: run_context = { @@ -447,9 +756,18 @@ async def executeGraph( ) runId = run.get("id") if run else None logger.info("executeGraph created run %s label=%s", runId, run_label) + if runId and graphical_editor_run_file_logging_enabled(): + ge_file_logger = GraphicalEditorRunFileLogger.bootstrap_new_run( + automation2_interface, + runId, + run_context, + ) env_for_run = normalize_run_envelope(run_envelope, user_id=userId) + graph_nodes_by_id: Dict[str, Any] = { + str(n["id"]): n for n in nodes if n.get("id") + } context = { "workflowId": workflowId, "instanceId": instanceId, @@ -462,13 +780,29 @@ async def executeGraph( "_runId": runId, "_orderedNodes": ordered, "runEnvelope": env_for_run, + "graphNodesById": graph_nodes_by_id, } + # Lets graph actions (e.g. ``context.setContext`` human-task mode) call + # ``createTask`` / ``updateRun`` without threading the interface through services. + if automation2_interface: + context["_automation2Interface"] = automation2_interface # _context key in nodeOutputs for system variable resolution nodeOutputs["_context"] = context if runId: _activeRunContexts[runId] = context + if ( + graphical_editor_run_file_logging_enabled() + and automation2_interface + and runId + and ge_file_logger is None + ): + ge_file_logger = GraphicalEditorRunFileLogger.ensure_attached( + automation2_interface, + runId, + ) + skip_until_passed = bool(startAfterNodeId) processed_in_loop: Set[str] = set() _aggregateAccumulators: Dict[str, list] = {} @@ -487,6 +821,14 @@ async def executeGraph( body_ids = getLoopBodyNodeIds(loop_node_id, connectionMap) if loop_node_id else set() body_ordered = [n for n in ordered if n.get("id") in body_ids] processed_in_loop = set(body_ids) | {loop_node_id} if loop_node_id else set() + _resume_feedback_body_node_id = None + for _fb_src, _fb_so, _fb_ti in (connectionMap.get(loop_node_id) or []): + if _fb_src in body_ids and _fb_ti == 0: + _resume_feedback_body_node_id = _fb_src + break + if not _resume_feedback_body_node_id and body_ordered: + _resume_feedback_body_node_id = body_ordered[-1].get("id") + _resume_body_results: List[Any] = [] while next_index < len(items) and loop_node_id: nodeOutputs[loop_node_id] = { "items": items, @@ -510,6 +852,7 @@ async def executeGraph( for _rSrc, _, _ in connectionMap.get(bnid, []): if _rSrc in nodeOutputs: _rInputSnap[_rSrc] = nodeOutputs[_rSrc] + _rInputSnap = _merge_node_parameters_into_snap(_rInputSnap, node_id=bnid, context=context) _rStepId = _createStepLog(automation2_interface, runId, bnid, body_node.get("type", ""), "running", _rInputSnap) try: result, _rRetry = await _executeWithRetry(executor, body_node, context) @@ -523,26 +866,111 @@ async def executeGraph( _updateStepLog(automation2_interface, _rStepId, "completed", output=result if isinstance(result, dict) else {"value": result}, durationMs=_rDur, retryCount=_rRetry) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=bnid, + node_type=body_node.get("type", ""), + status="completed", + input_snap=_rInputSnap, + output=result, + duration_ms=_rDur, + retry_count=_rRetry, + loop_index=next_index, + loop_node_id=loop_node_id, + loop_item=items[next_index], + ) logger.info("executeGraph loop resume body node %s done (iter %d, retries=%d)", bnid, next_index, _rRetry) + if _resume_feedback_body_node_id and bnid == _resume_feedback_body_node_id: + _resume_body_results.append(result) except PauseForHumanTaskError as e: + _rPauseDur = int((time.time() - _rStepStart) * 1000) _updateStepLog(automation2_interface, _rStepId, "completed", - durationMs=int((time.time() - _rStepStart) * 1000)) + durationMs=_rPauseDur) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=bnid, + node_type=body_node.get("type", ""), + status="completed", + input_snap=_rInputSnap, + duration_ms=_rPauseDur, + loop_index=next_index, + loop_node_id=loop_node_id, + loop_item=items[next_index], + ) if automation2_interface: run_ctx = dict(run.get("context") or {}) run_ctx["_loopState"] = {"loopNodeId": loop_node_id, "currentIndex": next_index, "items": items} - automation2_interface.updateRun(e.runId, status="paused", nodeOutputs=_serializableOutputs(nodeOutputs), currentNodeId=e.nodeId, context=run_ctx) + automation2_interface.updateRun(runId, status="paused", nodeOutputs=_serializableOutputs(nodeOutputs), currentNodeId=e.nodeId, context=run_ctx) return {"success": False, "paused": True, "taskId": e.taskId, "runId": e.runId, "nodeId": e.nodeId, "nodeOutputs": _serializableOutputs(nodeOutputs)} except PauseForEmailWaitError as e: + _rEmailDur = int((time.time() - _rStepStart) * 1000) _updateStepLog(automation2_interface, _rStepId, "completed", - durationMs=int((time.time() - _rStepStart) * 1000)) + durationMs=_rEmailDur) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=bnid, + node_type=body_node.get("type", ""), + status="completed", + input_snap=_rInputSnap, + duration_ms=_rEmailDur, + loop_index=next_index, + loop_node_id=loop_node_id, + loop_item=items[next_index], + ) raise except (_SubscriptionInactiveException, _BillingContextError): + _rFailDurSb = int((time.time() - _rStepStart) * 1000) _updateStepLog(automation2_interface, _rStepId, "failed", - error="Subscription/Billing error", durationMs=int((time.time() - _rStepStart) * 1000)) + error="Subscription/Billing error", durationMs=_rFailDurSb) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=bnid, + node_type=body_node.get("type", ""), + status="failed", + input_snap=_rInputSnap, + error="Subscription/Billing error", + duration_ms=_rFailDurSb, + loop_index=next_index, + loop_node_id=loop_node_id, + loop_item=items[next_index], + ) raise except Exception as ex: + _rFailDurEx = int((time.time() - _rStepStart) * 1000) _updateStepLog(automation2_interface, _rStepId, "failed", - error=str(ex), durationMs=int((time.time() - _rStepStart) * 1000)) + error=str(ex), durationMs=_rFailDurEx) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=bnid, + node_type=body_node.get("type", ""), + status="failed", + input_snap=_rInputSnap, + error=str(ex), + duration_ms=_rFailDurEx, + loop_index=next_index, + loop_node_id=loop_node_id, + loop_item=items[next_index], + ) logger.exception("executeGraph loop body node %s FAILED: %s", bnid, ex) nodeOutputs[bnid] = {"error": str(ex), "success": False} if runId and automation2_interface: @@ -552,11 +980,28 @@ async def executeGraph( return {"success": False, "error": str(ex), "nodeOutputs": _serializableOutputs(nodeOutputs), "failedNode": bnid, "runId": runId} next_index += 1 if loop_node_id: - nodeOutputs[loop_node_id] = {"items": items, "count": len(items)} for aggId, accItems in _aggregateAccumulators.items(): nodeOutputs[aggId] = {"items": accItems, "count": len(accItems), "_success": True} _aggregateAccumulators.clear() - processed_in_loop = set(body_ids) | {loop_node_id} + if _resume_body_results: + _rlo = nodeOutputs.get(loop_node_id) + if isinstance(_rlo, dict): + _rlo["bodyResults"] = _resume_body_results + nodeOutputs[loop_node_id] = _rlo + await _run_post_loop_done_nodes( + loop_node_id=loop_node_id, + body_ids=body_ids, + items=items, + ordered=ordered, + connectionMap=connectionMap, + nodeOutputs=nodeOutputs, + context=context, + services=services, + automation2_interface=automation2_interface, + runId=runId, + processed_in_loop=processed_in_loop, + ge_file_logger=ge_file_logger, + ) for i, node in enumerate(ordered): if skip_until_passed: @@ -570,15 +1015,41 @@ async def executeGraph( break nodeId = node.get("id") nodeType = node.get("type", "") - if not _is_node_on_active_path(nodeId, connectionMap, nodeOutputs): + # flow.loop: the feedback edge (body → loop input 0) hasn't run yet on the first + # pass → would make _is_node_on_active_path return False. Only check the + # *primary* predecessor (the one outside the loop body). + if nodeType == "flow.loop": + _loop_body_ids = getLoopBodyNodeIds(nodeId, connectionMap) + _loop_primary = getLoopPrimaryInputSource(nodeId, connectionMap, _loop_body_ids) + _loop_check_map = ( + {nodeId: [(_loop_primary[0], _loop_primary[1], 0)]} + if _loop_primary else connectionMap + ) + _loop_active = _is_node_on_active_path(nodeId, _loop_check_map, nodeOutputs) + else: + _loop_active = _is_node_on_active_path(nodeId, connectionMap, nodeOutputs) + if not _loop_active: logger.info("executeGraph step %d/%d: nodeId=%s SKIP (inactive branch)", i + 1, len(ordered), nodeId) _skipInputSnap = {"_skipReason": "inactive_branch"} for _sSrc, _, _ in connectionMap.get(nodeId, []): if _sSrc in nodeOutputs: _skipInputSnap[_sSrc] = nodeOutputs[_sSrc] + _skipInputSnap = _merge_node_parameters_into_snap(_skipInputSnap, node_id=nodeId, context=context) _skipStepId = _createStepLog(automation2_interface, runId, nodeId, nodeType, status="skipped", inputSnapshot=_skipInputSnap) if _skipStepId: _updateStepLog(automation2_interface, _skipStepId, "skipped") + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=nodeId, + node_type=nodeType, + status="skipped", + input_snap=_skipInputSnap, + skip_reason=str(_skipInputSnap.get("_skipReason") or "inactive_branch"), + ) continue executor = _getExecutor(nodeType, services, automation2_interface) logger.info( @@ -601,6 +1072,7 @@ async def executeGraph( for _lSrc, _, _ in connectionMap.get(nodeId, []): if _lSrc in nodeOutputs: _loopInputSnap[_lSrc] = nodeOutputs[_lSrc] + _loopInputSnap = _merge_node_parameters_into_snap(_loopInputSnap, node_id=nodeId, context=context) _stepId = _createStepLog(automation2_interface, runId, nodeId, nodeType, "running", _loopInputSnap) result = await executor.execute(node, context) items = result.get("items") or [] @@ -612,6 +1084,17 @@ async def executeGraph( _loopConcurrency = max(1, min(_loopConcurrency, 20)) _batchMode = len(items) > STEPLOG_BATCH_THRESHOLD _aggLock = asyncio.Lock() + # Prefer the *last* body node wired to loop input 0 (feedback / + # pipeline end) — first matching inbound edge can be a shallow node. + _feedback_candidates = [ + _fb_src + for _fb_src, _fb_so, _fb_ti in (connectionMap.get(nodeId) or []) + if _fb_src in body_ids and _fb_ti == 0 + ] + _feedback_body_node_id = _feedback_candidates[-1] if _feedback_candidates else None + if not _feedback_body_node_id and body_ordered: + _feedback_body_node_id = body_ordered[-1].get("id") + _bodyResultsPerIter: List[Any] = [None] * len(items) async def _runLoopIteration(_idx: int, _item: Any) -> Optional[Dict]: """Execute all body nodes for one iteration. Returns error dict or None.""" @@ -639,10 +1122,23 @@ async def executeGraph( _activeOutputs[bnid] = None continue _bStepStart = time.time() + _bInputSnapAlways: Dict[str, Any] = {"_loopItem": _item, "_loopIndex": _idx} + for _bSnapSrc, _, _ in connectionMap.get(bnid, []): + if _bSnapSrc in _activeOutputs: + _bInputSnapAlways[_bSnapSrc] = _activeOutputs[_bSnapSrc] + _bInputSnapAlways = _merge_node_parameters_into_snap( + _bInputSnapAlways, node_id=bnid, context=context + ) _bStepId = None if not _batchMode or _idx == 0 or _idx == len(items) - 1: - _bInputSnap = {"_loopItem": _item, "_loopIndex": _idx} - _bStepId = _createStepLog(automation2_interface, runId, bnid, body_node.get("type", ""), "running", _bInputSnap) + _bStepId = _createStepLog( + automation2_interface, + runId, + bnid, + body_node.get("type", ""), + "running", + _bInputSnapAlways, + ) try: bres, _bRetry = await _executeWithRetry(bexec, body_node, _activeCtx) if body_node.get("type") == "data.aggregate": @@ -655,17 +1151,50 @@ async def executeGraph( _aggregateTempChunks.setdefault(bnid, []).append(_aggregateAccumulators[bnid]) _aggregateAccumulators[bnid] = [] _activeOutputs[bnid] = bres + _bDur = int((time.time() - _bStepStart) * 1000) if _bStepId: - _bDur = int((time.time() - _bStepStart) * 1000) _updateStepLog(automation2_interface, _bStepId, "completed", output=bres if isinstance(bres, dict) else {"value": bres}, durationMs=_bDur, retryCount=_bRetry) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=_activeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=bnid, + node_type=body_node.get("type", ""), + status="completed", + input_snap=_bInputSnapAlways, + output=bres, + duration_ms=_bDur, + retry_count=_bRetry, + loop_index=_idx, + loop_node_id=nodeId, + loop_item=_item, + ) if _loopConcurrency == 1: nodeOutputs[bnid] = bres except PauseForHumanTaskError as e: + _bHd = int((time.time() - _bStepStart) * 1000) if _bStepId: _updateStepLog(automation2_interface, _bStepId, "completed", - durationMs=int((time.time() - _bStepStart) * 1000)) + durationMs=_bHd) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=_activeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=bnid, + node_type=body_node.get("type", ""), + status="completed", + input_snap=_bInputSnapAlways, + duration_ms=_bHd, + loop_index=_idx, + loop_node_id=nodeId, + loop_item=_item, + ) if runId and automation2_interface: _run = automation2_interface.getRun(runId) or {} _run_ctx = dict(_run.get("context") or {}) @@ -673,22 +1202,76 @@ async def executeGraph( automation2_interface.updateRun(e.runId, status="paused", nodeOutputs=_serializableOutputs(nodeOutputs), currentNodeId=e.nodeId, context=_run_ctx) return {"_pause": True, "taskId": e.taskId, "runId": e.runId, "nodeId": e.nodeId} except PauseForEmailWaitError: + _bEd = int((time.time() - _bStepStart) * 1000) if _bStepId: _updateStepLog(automation2_interface, _bStepId, "completed", - durationMs=int((time.time() - _bStepStart) * 1000)) + durationMs=_bEd) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=_activeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=bnid, + node_type=body_node.get("type", ""), + status="completed", + input_snap=_bInputSnapAlways, + duration_ms=_bEd, + loop_index=_idx, + loop_node_id=nodeId, + loop_item=_item, + ) raise except (_SubscriptionInactiveException, _BillingContextError): + _bSb = int((time.time() - _bStepStart) * 1000) if _bStepId: _updateStepLog(automation2_interface, _bStepId, "failed", - error="Subscription/Billing error", durationMs=int((time.time() - _bStepStart) * 1000)) + error="Subscription/Billing error", durationMs=_bSb) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=_activeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=bnid, + node_type=body_node.get("type", ""), + status="failed", + input_snap=_bInputSnapAlways, + error="Subscription/Billing error", + duration_ms=_bSb, + loop_index=_idx, + loop_node_id=nodeId, + loop_item=_item, + ) raise except Exception as ex: + _bFail = int((time.time() - _bStepStart) * 1000) if _bStepId: _updateStepLog(automation2_interface, _bStepId, "failed", - error=str(ex), durationMs=int((time.time() - _bStepStart) * 1000)) + error=str(ex), durationMs=_bFail) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=_activeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=bnid, + node_type=body_node.get("type", ""), + status="failed", + input_snap=_bInputSnapAlways, + error=str(ex), + duration_ms=_bFail, + loop_index=_idx, + loop_node_id=nodeId, + loop_item=_item, + ) logger.exception("executeGraph loop body node %s FAILED (iter %d): %s", bnid, _idx, ex) return {"_error": str(ex), "failedNode": bnid} + if _feedback_body_node_id: + async with _aggLock: + if _idx < len(_bodyResultsPerIter): + _bodyResultsPerIter[_idx] = _activeOutputs.get(_feedback_body_node_id) if _batchMode and _idx > 0 and _idx % STEPLOG_BATCH_THRESHOLD == 0 and runId: _emitStepEvent(runId, {"type": "loop_progress", "nodeId": nodeId, "iteration": _idx, "total": len(items)}) return None @@ -732,7 +1315,6 @@ async def executeGraph( _activeRunContexts.pop(runId, None) return {"success": False, "error": _rval["_error"], "nodeOutputs": _serializableOutputs(nodeOutputs), "failedNode": _rval.get("failedNode"), "runId": runId} - nodeOutputs[nodeId] = {"items": items, "count": len(items)} for aggId, accItems in _aggregateAccumulators.items(): allChunks = _aggregateTempChunks.pop(aggId, []) finalItems = [] @@ -741,13 +1323,57 @@ async def executeGraph( finalItems.extend(accItems) nodeOutputs[aggId] = {"items": finalItems, "count": len(finalItems), "_success": True} _aggregateAccumulators.clear() + + # Always attach ``bodyResults`` (list per iteration, possibly None + # placeholders) so DataRefs to ``bodyResults`` resolve and + # ``context.mergeContext`` can fall back to the wired loop output. + _lo = nodeOutputs.get(nodeId) + if isinstance(_lo, dict): + _lo["bodyResults"] = _bodyResultsPerIter + nodeOutputs[nodeId] = _lo + + await _run_post_loop_done_nodes( + loop_node_id=nodeId, + body_ids=body_ids, + items=items, + ordered=ordered, + connectionMap=connectionMap, + nodeOutputs=nodeOutputs, + context=context, + services=services, + automation2_interface=automation2_interface, + runId=runId, + processed_in_loop=processed_in_loop, + ge_file_logger=ge_file_logger, + ) + + _loopDurMs = int((time.time() - _stepStartMs) * 1000) + _loopStepOut = { + "iterationCount": len(items), + "items": len(items), + "concurrency": _loopConcurrency, + "batchMode": _batchMode, + } _updateStepLog(automation2_interface, _stepId, "completed", - output={"iterationCount": len(items), "items": len(items), "concurrency": _loopConcurrency, "batchMode": _batchMode}, - durationMs=int((time.time() - _stepStartMs) * 1000)) + output=_loopStepOut, + durationMs=_loopDurMs) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=nodeId, + node_type=nodeType, + status="completed", + input_snap=_loopInputSnap, + output=_loopStepOut, + duration_ms=_loopDurMs, + ) logger.info("executeGraph flow.loop done: %d iterations (concurrency=%d, batchMode=%s)", len(items), _loopConcurrency, _batchMode) - elif _isMergeNode(nodeType): + elif _isBarrierNode(nodeType): if not _allMergePredecessorsReady(nodeId, connectionMap, nodeOutputs): - logger.info("executeGraph node %s (flow.merge): waiting — not all predecessors ready, deferring", nodeId) + logger.info("executeGraph node %s (%s): waiting — not all predecessors ready, deferring", nodeId, nodeType) nodeOutputs[nodeId] = None continue _stepStartMs = time.time() @@ -755,16 +1381,37 @@ async def executeGraph( for src, _, _ in connectionMap.get(nodeId, []): if src in nodeOutputs: _inputSnap[src] = nodeOutputs[src] + _inputSnap = _merge_node_parameters_into_snap(_inputSnap, node_id=nodeId, context=context) _stepId = _createStepLog(automation2_interface, runId, nodeId, nodeType, "running", _inputSnap) result, retryCount = await _executeWithRetry(executor, node, context) result = _normalizeResult(result, nodeType) nodeOutputs[nodeId] = result + _mergeDurMs = int((time.time() - _stepStartMs) * 1000) + _mergeTok = result.get("tokensUsed", 0) if isinstance(result, dict) else 0 + _updateStepLog(automation2_interface, _stepId, "completed", + output=result if isinstance(result, dict) else {"value": result}, + durationMs=_mergeDurMs, tokensUsed=_mergeTok, retryCount=retryCount) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=nodeId, + node_type=nodeType, + status="completed", + input_snap=_inputSnap, + output=result, + duration_ms=_mergeDurMs, + retry_count=retryCount, + ) else: _stepStartMs = time.time() _inputSnap = {} for src, _, _ in connectionMap.get(nodeId, []): if src in nodeOutputs: _inputSnap[src] = nodeOutputs[src] + _inputSnap = _merge_node_parameters_into_snap(_inputSnap, node_id=nodeId, context=context) _stepId = _createStepLog(automation2_interface, runId, nodeId, nodeType, "running", _inputSnap) result, retryCount = await _executeWithRetry(executor, node, context) result = _normalizeResult(result, nodeType) @@ -774,6 +1421,20 @@ async def executeGraph( _updateStepLog(automation2_interface, _stepId, "completed", output=result if isinstance(result, dict) else {"value": result}, durationMs=_durMs, tokensUsed=_tokens, retryCount=retryCount) + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=nodeId, + node_type=nodeType, + status="completed", + input_snap=_inputSnap, + output=result, + duration_ms=_durMs, + retry_count=retryCount, + ) logger.info( "executeGraph node %s done: result_type=%s result_keys=%s retries=%d duration=%dms", nodeId, @@ -783,8 +1444,24 @@ async def executeGraph( _durMs, ) except PauseForHumanTaskError as e: + _huPauseMs = int((time.time() - _stepStartMs) * 1000) _updateStepLog(automation2_interface, _stepId, "completed", - durationMs=int((time.time() - _stepStartMs) * 1000)) + durationMs=_huPauseMs) + _ge_in = locals().get("_inputSnap") + if _ge_in is None: + _ge_in = locals().get("_loopInputSnap") or {} + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=nodeId, + node_type=nodeType, + status="completed", + input_snap=_ge_in, + duration_ms=_huPauseMs, + ) logger.info("executeGraph paused for human task %s", e.taskId) return { "success": False, @@ -795,8 +1472,24 @@ async def executeGraph( "nodeOutputs": _serializableOutputs(nodeOutputs), } except PauseForEmailWaitError as e: + _emailPauseMs = int((time.time() - _stepStartMs) * 1000) _updateStepLog(automation2_interface, _stepId, "completed", - durationMs=int((time.time() - _stepStartMs) * 1000)) + durationMs=_emailPauseMs) + _ge_email_in = locals().get("_inputSnap") + if _ge_email_in is None: + _ge_email_in = locals().get("_loopInputSnap") or {} + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=nodeId, + node_type=nodeType, + status="completed", + input_snap=_ge_email_in, + duration_ms=_emailPauseMs, + ) logger.info("executeGraph paused for email wait (run %s, node %s)", e.runId, e.nodeId) try: from modules.interfaces.interfaceDbApp import getRootInterface @@ -820,6 +1513,9 @@ async def executeGraph( "mandateId": context.get("mandateId"), "instanceId": context.get("instanceId"), } + if automation2_interface and e.runId: + prev_ctx = dict((automation2_interface.getRun(e.runId) or {}).get("context") or {}) + run_ctx = merge_run_context_with_ge_log_prefix(prev_ctx, run_ctx) automation2_interface.updateRun( e.runId, status="paused", @@ -840,6 +1536,22 @@ async def executeGraph( nodeOutputs[nodeId] = {"error": str(e), "success": False} _durMs = int((time.time() - _stepStartMs) * 1000) _updateStepLog(automation2_interface, _stepId, "failed", error=str(e), durationMs=_durMs) + _ge_fail_in = locals().get("_inputSnap") + if _ge_fail_in is None: + _ge_fail_in = locals().get("_loopInputSnap") or {} + await _ge_log_node_finished( + ge_file_logger, + run_id=runId, + node_outputs=nodeOutputs, + run_envelope=context.get("runEnvelope"), + exec_context=context, + node_id=nodeId, + node_type=nodeType, + status="failed", + input_snap=_ge_fail_in, + error=str(e), + duration_ms=_durMs, + ) if runId and automation2_interface: automation2_interface.updateRun(runId, status="failed", nodeOutputs=_serializableOutputs(nodeOutputs)) if runId: diff --git a/modules/workflows/automation2/executors/actionNodeExecutor.py b/modules/workflows/automation2/executors/actionNodeExecutor.py index 409fa54d..5783b108 100644 --- a/modules/workflows/automation2/executors/actionNodeExecutor.py +++ b/modules/workflows/automation2/executors/actionNodeExecutor.py @@ -1,10 +1,14 @@ # Copyright (c) 2025 Patrick Motsch -# Action node executor - maps ai.*, email.*, sharepoint.*, clickup.*, file.*, trustee.* to method actions. +# Action node executor — maps ai.*, email.*, sharepoint.*, clickup.*, file.*, trustee.* to method actions. # -# Typed Port System: explicit DataRefs / static parameters; optional ``documentList`` from input port 0 -# when the param is empty (same idea as IOExecutor wire fill). -# ``materializeConnectionRefs`` (see pickNotPushMigration) may still rewrite empty connectionReference at run start. +# Typed port system: parameters resolve via DataRefs / static values. Declarative port inheritance +# uses ``graphInherit`` on parameter definitions in node JSON (see STATIC_NODE_TYPES): e.g. +# ``primaryTextRef`` is materialized to explicit refs in pickNotPushMigration.materializePrimaryTextHandover; +# ``documentListWire`` is applied at runtime in this executor via graphUtils.extract_wired_document_list. + +import base64 +import binascii import json import logging import re @@ -16,12 +20,125 @@ from modules.features.graphicalEditor.portTypes import ( ) from modules.serviceCenter.services.serviceSubscription.mainServiceSubscription import SubscriptionInactiveException as _SubscriptionInactiveException from modules.serviceCenter.services.serviceBilling.mainServiceBilling import BillingContextError as _BillingContextError +from modules.workflows.automation2.executors.inputExecutor import PauseForHumanTaskError +from modules.workflows.methods.methodContext.actions.extractContent import ( + PRESENTATION_KIND, + build_presentation_envelope_from_plain_text, + presentation_dict_without_meta, + presentation_response_text, +) logger = logging.getLogger(__name__) +_FILE_CREATE_CTX_LOG_MAX = 500 + + +def _attach_unified_presentation_data(out: Dict[str, Any], *, node_def: Dict[str, Any]) -> None: + """Ensure ``out[\"data\"]`` carries ``context.extractContent.presentation.v1`` for ``file.create``.""" + if node_def.get("skipUnifiedPresentation"): + return + data = out.get("data") + if isinstance(data, dict) and data.get("kind") == PRESENTATION_KIND: + return + text = str(out.get("response") or "").strip() + if not text and isinstance(data, dict): + text = str(data.get("response") or "").strip() + if not text: + return + pres = build_presentation_envelope_from_plain_text(text, source_name=node_type or "content") + if not pres: + return + meta: Dict[str, Any] = {"actionType": node_type} + if isinstance(data, dict): + prev = data.get("_meta") + if isinstance(prev, dict): + meta = {**prev, **meta} + out["data"] = {**pres, "_meta": meta} + + +def _truncate_for_log(val: Any, max_len: int = _FILE_CREATE_CTX_LOG_MAX) -> str: + s = val if isinstance(val, str) else repr(val) + s = s.replace("\r", "\\r").replace("\n", "\\n") + if len(s) <= max_len: + return s + return s[:max_len] + f"...<{len(s)} chars>" + + +def _log_file_create_context_resolution( + node_id: str, + raw_params: Dict[str, Any], + resolved_params: Dict[str, Any], + exec_context: Dict[str, Any], +) -> None: + """Debug ``file.create`` when ``context`` resolves empty — trace refs and upstream output.""" + raw_c = raw_params.get("context") + res_c = resolved_params.get("context") + node_outputs = exec_context.get("nodeOutputs") or {} + input_sources = (exec_context.get("inputSources") or {}).get(node_id) or {} + src_entry = input_sources.get(0) + src_id = src_entry[0] if src_entry else None + upstream = node_outputs.get(src_id) if src_id else None + + up_summary = "missing" + up_resp_len = -1 + up_transit = False + if isinstance(upstream, dict): + up_transit = bool(upstream.get("_transit")) + inner = upstream.get("data") if up_transit else upstream + up_keys = sorted(k for k in upstream.keys() if not str(k).startswith("_") or k in ("_transit", "_success")) + up_resp_len = len(str((inner if isinstance(inner, dict) else upstream).get("response") or "")) + up_summary = "keys=%s transit=%s response_len=%s _success=%s" % ( + up_keys[:25], + up_transit, + up_resp_len, + upstream.get("_success"), + ) + + def _shape(name: str, v: Any) -> str: + if v is None: + return f"{name}=None" + if isinstance(v, dict) and v.get("type") == "ref": + return f"{name}=ref(nodeId={v.get('nodeId')!r}, path={v.get('path')!r})" + if isinstance(v, list): + if v and all(isinstance(x, dict) and x.get("type") == "ref" for x in v): + bits = [ + f"ref({x.get('nodeId')!r},{x.get('path')!r})" + for x in v[:5] + ] + return f"{name}=contextBuilder[{len(v)} refs: {', '.join(bits)}{'…' if len(v) > 5 else ''}]" + return f"{name}=list(len={len(v)}, elem0_type={type(v[0]).__name__})" + if isinstance(v, str): + return f"{name}=str(len={len(v)}, preview={_truncate_for_log(v, 240)!r})" + return f"{name}={type(v).__name__}({_truncate_for_log(v)!r})" + + logger.info( + "file.create context resolution node=%s port0=%r upstream_node=%s upstream: %s | %s | %s", + node_id, + src_id, + src_id, + up_summary, + _shape("raw", raw_c), + _shape("resolved", res_c), + ) + + +def _looks_like_ascii_base64_payload(s: str) -> bool: + """Heuristic: ActionDocument binary payloads use standard ASCII base64; markdown/text uses other chars (#, *, -, …).""" + t = "".join(s.split()) + if len(t) < 8: + return False + if not t.isascii(): + return False + return bool(re.fullmatch(r"[A-Za-z0-9+/]+=*", t)) and len(t) % 4 == 0 + def _coerce_document_data_to_bytes(raw: Any) -> Optional[bytes]: - """Normalize documentData (bytes/str/buffer) for DB file persistence.""" + """Normalize documentData for DB file persistence. + + ActionDocument conventions (see methodFile.create): binary bodies are carried as ASCII + base64 strings; plain markdown/text stays as Unicode. Do not UTF-8-encode a base64 + literal — that persists the ASCII of the encoding (file looks like base64 gibberish). + """ if raw is None: return None if isinstance(raw, bytes): @@ -33,11 +150,67 @@ def _coerce_document_data_to_bytes(raw: Any) -> Optional[bytes]: b = raw.tobytes() return b if len(b) > 0 else None if isinstance(raw, str): - b = raw.encode("utf-8") + stripped = raw.strip() + if not stripped: + return None + if _looks_like_ascii_base64_payload(stripped): + try: + decoded = base64.b64decode(stripped, validate=True) + except (TypeError, binascii.Error, ValueError): + try: + decoded = base64.b64decode(stripped) + except (binascii.Error, ValueError): + decoded = b"" + if decoded: + return decoded + b = stripped.encode("utf-8") return b if len(b) > 0 else None return None +def _image_documents_from_docs_list(docs_list: list) -> list: + """All image/* ActionDocument dicts (generic — no assumptions about index 0).""" + return [ + d for d in (docs_list or []) + if isinstance(d, dict) and str(d.get("mimeType") or "").strip().lower().startswith("image/") + ] + + +def _image_refs_from_extract_node_data(extract_data: Any) -> list: + """Synthetic image document dicts from ``context.extractContent`` ``_meta.persistedImageArtifacts``.""" + if not isinstance(extract_data, dict): + return [] + meta = extract_data.get("_meta") + if not isinstance(meta, dict): + return [] + arts = meta.get("persistedImageArtifacts") + if not isinstance(arts, list): + return [] + out: list = [] + for a in arts: + if not isinstance(a, dict): + continue + fid = a.get("fileId") + if not fid: + continue + out.append( + { + "documentName": a.get("fileName") or f"extract_image_{fid}", + "mimeType": str(a.get("mimeType") or "application/octet-stream"), + "documentData": None, + "fileId": str(fid), + "_hasBinaryData": True, + "validationMetadata": { + "actionType": "context.extractContent", + "handoverRole": "extractedMedia", + "suppressInWorkflowFileLists": True, + "sourcePartId": a.get("sourcePartId"), + }, + } + ) + return out + + _USER_CONNECTION_ID_RE = re.compile( r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", re.IGNORECASE, @@ -174,6 +347,13 @@ def _buildConnectionRefDict(connRef: str, chatService, services) -> Optional[Dic return {"id": conn_id, "authority": authority, "label": label or f"{authority}:{user}"} +def _schemaCarriesConnectionProvenance(outputSchema: str) -> bool: + """True iff the port schema declares ``carriesConnectionProvenance`` in the catalog.""" + from modules.features.graphicalEditor.portTypes import PORT_TYPE_CATALOG + schema = PORT_TYPE_CATALOG.get(outputSchema) + return bool(getattr(schema, "carriesConnectionProvenance", False)) + + def _attachConnectionProvenance( out: Dict[str, Any], resolvedParams: Dict[str, Any], @@ -187,7 +367,7 @@ def _attachConnectionProvenance( cref = resolvedParams.get("connectionReference") if not cref: return - if outputSchema not in ("FileList", "DocumentList", "EmailList", "TaskList", "EmailDraft", "UdmDocument"): + if not _schemaCarriesConnectionProvenance(outputSchema): return payload = _buildConnectionRefDict(str(cref), chatService, services) if payload: @@ -203,8 +383,7 @@ def _resolveConnectionParam(params: Dict, chatService, services) -> None: params["connectionReference"] = resolved -def _applyEmailCheckFilter(params: Dict) -> None: - """Build filter from discrete email params for email.checkEmail.""" +def _mapper_emailCheckFilter(params: Dict, **_) -> None: built = _buildEmailFilter( fromAddress=params.get("fromAddress"), subjectContains=params.get("subjectContains"), @@ -216,8 +395,7 @@ def _applyEmailCheckFilter(params: Dict) -> None: params.pop(k, None) -def _applyEmailSearchQuery(params: Dict) -> None: - """Build query from discrete email params for email.searchEmail.""" +def _mapper_emailSearchQuery(params: Dict, **_) -> None: built = _buildSearchQuery( query=params.get("query"), fromAddress=params.get("fromAddress"), @@ -232,6 +410,56 @@ def _applyEmailSearchQuery(params: Dict) -> None: params.pop(k, None) +def _mapper_aiPromptLegacyAlias(params: Dict, **_) -> None: + """Backwards-compatible alias: legacy ``prompt`` parameter is exposed as ``aiPrompt``.""" + if "aiPrompt" not in params and "prompt" in params: + params["aiPrompt"] = params.pop("prompt") + + +def _mapper_emailDraftContextFromSubjectBody(params: Dict, **_) -> None: + """Build ``context`` from discrete subject + body fields and drop them.""" + subject = params.get("subject", "") + body = params.get("body", "") + if not (subject or body): + return + parts = [] + if subject: + parts.append(f"Subject: {subject}") + if body: + parts.append(f"Body:\n{body}") + params["context"] = "\n\n".join(parts) + params.pop("subject", None) + params.pop("body", None) + + +def _mapper_clickupTaskUpdateMerge(params: Dict, **_) -> None: + from modules.workflows.automation2.clickupTaskUpdateMerge import merge_clickup_task_update_entries + merge_clickup_task_update_entries(params) + + +_PARAM_MAPPERS: Dict[str, Any] = { + "emailCheckFilter": _mapper_emailCheckFilter, + "emailSearchQuery": _mapper_emailSearchQuery, + "aiPromptLegacyAlias": _mapper_aiPromptLegacyAlias, + "emailDraftContextFromSubjectBody": _mapper_emailDraftContextFromSubjectBody, + "clickupTaskUpdateMerge": _mapper_clickupTaskUpdateMerge, +} + + +def _applyParamMappers(nodeDef: Dict[str, Any], resolvedParams: Dict[str, Any]) -> None: + """Run declared ``paramMappers`` from the node definition (no node-id branching).""" + mappers = nodeDef.get("paramMappers") or [] + for name in mappers: + fn = _PARAM_MAPPERS.get(name) + if not fn: + logger.warning("Unknown paramMapper %r — node %s; skipping", name, nodeDef.get("id")) + continue + try: + fn(resolvedParams) + except Exception as e: + logger.warning("paramMapper %r failed for node %s: %s", name, nodeDef.get("id"), e) + + def _getOutputSchemaName(nodeDef: Dict) -> str: """Get the output schema name from the node definition.""" outputPorts = nodeDef.get("outputPorts", {}) @@ -239,76 +467,55 @@ def _getOutputSchemaName(nodeDef: Dict) -> str: return port0.get("schema", "ActionResult") -def _extract_wired_document_list(inp: Any) -> Optional[Dict[str, Any]]: +def _resolveUpstreamPayload(nodeId: str, context: Dict[str, Any]) -> Any: + """Return the unwrapped output of the primary inbound wire to ``nodeId``. + + Prefer logical input port 0. Some persisted graphs register the only edge + under a non-zero ``targetInput`` — fall back to the sole inbound port or + the first ``connectionMap`` entry so ``injectUpstreamPayload`` (e.g. + ``context.mergeContext`` after ``flow.loop``) still receives data. """ - Build a DocumentList-shaped dict from upstream node output (matches IOExecutor wire behavior). - Handles DocumentList, human upload shapes (file / files / fileIds), FileList, loop file items. - During flow.loop body execution the loop node's output is - {items, count, currentItem, currentIndex}; wired document actions must use currentItem. - """ - if inp is None: + from modules.features.graphicalEditor.switchOutput import unwrap_transit_for_port + + nodeOutputs = context.get("nodeOutputs") or {} + connectionMap = context.get("connectionMap") or {} + src_map = (context.get("inputSources") or {}).get(nodeId) or {} + + entry = src_map.get(0) + if not entry and src_map: + if len(src_map) == 1: + entry = next(iter(src_map.values())) + else: + mi = min(src_map.keys()) + entry = src_map.get(mi) + if not entry and connectionMap.get(nodeId): + inc = connectionMap[nodeId] + if inc: + src_node_id, _so, _ti = inc[0] + entry = (src_node_id, _so) + + if not entry: return None - from modules.features.graphicalEditor.portTypes import ( - unwrapTransit, - _coerce_document_list_upload_fields, - _file_record_to_document, - ) - - data = unwrapTransit(inp) - if isinstance(data, str): - one = _file_record_to_document(data) - return {"documents": [one], "count": 1} if one else None - if not isinstance(data, dict): - return None - d = dict(data) - _coerce_document_list_upload_fields(d) - # Per-iteration payload from executionEngine (flow.loop → downstream in loop body) - if "currentItem" in d: - ci = d.get("currentItem") - if ci is not None: - nested = _extract_wired_document_list(ci) - if nested: - return nested - docs = d.get("documents") - if isinstance(docs, list) and len(docs) > 0: - return {"documents": docs, "count": d.get("count", len(docs))} - raw_list = d.get("documentList") - if isinstance(raw_list, list) and len(raw_list) > 0 and isinstance(raw_list[0], dict): - return {"documents": raw_list, "count": len(raw_list)} - doc_id = d.get("documentId") or d.get("id") - if doc_id and str(doc_id).strip(): - one: Dict[str, Any] = {"id": str(doc_id).strip()} - fn = d.get("fileName") or d.get("name") - if fn: - one["name"] = str(fn) - mt = d.get("mimeType") - if mt: - one["mimeType"] = str(mt) - return {"documents": [one], "count": 1} - files = d.get("files") - if isinstance(files, list) and files: - collected = [] - for item in files: - conv = _file_record_to_document(item) if isinstance(item, dict) else None - if conv: - collected.append(conv) - if collected: - return {"documents": collected, "count": len(collected)} - return None + src_node_id, src_out = entry + upstream = nodeOutputs.get(src_node_id) + return unwrap_transit_for_port(upstream, src_out) -def _document_list_param_is_empty(val: Any) -> bool: - if val is None or val == "": - return True - if isinstance(val, list) and len(val) == 0: - return True - if isinstance(val, dict): - if val.get("documents") or val.get("references") or val.get("items"): - return False - if val.get("documentId") or val.get("id"): - return False - return True - return False +def _resolveBranchInputs(nodeId: str, context: Dict[str, Any]) -> Dict[int, Any]: + """Return ``Dict[port_index → unwrapped upstream output]`` for every wired input port.""" + from modules.features.graphicalEditor.switchOutput import unwrap_transit_for_port + src_map = (context.get("inputSources") or {}).get(nodeId) or {} + nodeOutputs = context.get("nodeOutputs") or {} + out: Dict[int, Any] = {} + for port_ix, entry in src_map.items(): + if not entry: + continue + src_node_id, src_out = entry + upstream = nodeOutputs.get(src_node_id) + if upstream is None: + continue + out[int(port_ix)] = unwrap_transit_for_port(upstream, src_out) + return out class ActionNodeExecutor: @@ -323,7 +530,11 @@ class ActionNodeExecutor: context: Dict[str, Any], ) -> Any: from modules.features.graphicalEditor.nodeRegistry import getNodeTypeToMethodAction - from modules.workflows.automation2.graphUtils import resolveParameterReferences + from modules.workflows.automation2.graphUtils import ( + document_list_param_is_empty, + extract_wired_document_list, + resolveParameterReferences, + ) from modules.workflows.processing.core.actionExecutor import ActionExecutor nodeType = node.get("type", "") @@ -343,7 +554,12 @@ class ActionNodeExecutor: # 1. Resolve parameters (DataRef, SystemVar, Static) params = dict(node.get("parameters") or {}) logger.debug("ActionNodeExecutor node %s raw params keys=%s", nodeId, list(params.keys())) - resolvedParams = resolveParameterReferences(params, context.get("nodeOutputs", {})) + resolvedParams = resolveParameterReferences( + params, + context.get("nodeOutputs", {}), + consumer_node_id=nodeId, + input_sources=context.get("inputSources"), + ) logger.debug("ActionNodeExecutor node %s resolved params keys=%s documentList_present=%s documentList_type=%s", nodeId, list(resolvedParams.keys()), "documentList" in resolvedParams, type(resolvedParams.get("documentList")).__name__) # 2. Apply defaults from parameter definitions @@ -352,29 +568,45 @@ class ActionNodeExecutor: if pName and pName not in resolvedParams and "default" in pDef: resolvedParams[pName] = pDef["default"] - _param_names = {p.get("name") for p in nodeDef.get("parameters", []) if p.get("name")} - if "documentList" in _param_names and _document_list_param_is_empty(resolvedParams.get("documentList")): + for pDef in nodeDef.get("parameters") or []: + gi = pDef.get("graphInherit") or {} + if gi.get("kind") != "documentListWire": + continue + pname = pDef.get("name") + if not pname or not document_list_param_is_empty(resolvedParams.get(pname)): + continue + port_ix = int(gi.get("port", 0)) _src_map = (context.get("inputSources") or {}).get(nodeId) or {} - _entry = _src_map.get(0) - if _entry: - _src_node_id, _ = _entry - _upstream = (context.get("nodeOutputs") or {}).get(_src_node_id) - _wired = _extract_wired_document_list(_upstream) - if _wired: - resolvedParams["documentList"] = _wired + _entry = _src_map.get(port_ix) + if not _entry: + continue + _src_node_id, _ = _entry + _upstream = (context.get("nodeOutputs") or {}).get(_src_node_id) + _wired = extract_wired_document_list(_upstream) + if _wired: + resolvedParams[pname] = _wired # 3. Resolve connectionReference chatService = getattr(self.services, "chat", None) _resolveConnectionParam(resolvedParams, chatService, self.services) - # 4. Node-type-specific param transformations - if nodeType == "email.checkEmail": - _applyEmailCheckFilter(resolvedParams) - elif nodeType == "email.searchEmail": - _applyEmailSearchQuery(resolvedParams) - elif nodeType == "clickup.updateTask": - from modules.workflows.automation2.clickupTaskUpdateMerge import merge_clickup_task_update_entries - merge_clickup_task_update_entries(resolvedParams) + # 3b. Optional graph-level injections declared on the node definition. + # - injectUpstreamPayload: True → ``_upstreamPayload`` (port 0 source output, transit-unwrapped) + # - injectBranchInputs: True → ``_branchInputs`` (Dict[port_index, output] for all wired ports) + # - injectRunContext: True → ``_runContext`` (the live execution context dict) + if nodeDef.get("injectUpstreamPayload"): + resolvedParams["_upstreamPayload"] = _resolveUpstreamPayload(nodeId, context) + if nodeDef.get("injectBranchInputs"): + resolvedParams["_branchInputs"] = _resolveBranchInputs(nodeId, context) + if nodeDef.get("injectRunContext"): + resolvedParams["_runContext"] = context + resolvedParams["_workflowNodeId"] = nodeId + + # 4. Apply declarative paramMappers from the node definition + _applyParamMappers(nodeDef, resolvedParams) + + if nodeDef.get("logContextResolution"): + _log_file_create_context_resolution(nodeId, params, resolvedParams, context) # 5. email.checkEmail pause for email wait if nodeType == "email.checkEmail": @@ -391,26 +623,7 @@ class ActionNodeExecutor: } raise PauseForEmailWaitError(runId=runId, nodeId=nodeId, waitConfig=waitConfig) - # 6. AI nodes: normalize legacy "prompt" -> "aiPrompt" - if nodeType == "ai.prompt": - if "aiPrompt" not in resolvedParams and "prompt" in resolvedParams: - resolvedParams["aiPrompt"] = resolvedParams.pop("prompt") - - # 7. Build context for email.draftEmail from subject + body - if nodeType == "email.draftEmail": - subject = resolvedParams.get("subject", "") - body = resolvedParams.get("body", "") - if subject or body: - contextParts = [] - if subject: - contextParts.append(f"Subject: {subject}") - if body: - contextParts.append(f"Body:\n{body}") - resolvedParams["context"] = "\n\n".join(contextParts) - resolvedParams.pop("subject", None) - resolvedParams.pop("body", None) - - # 8. Create progress parent so nested actions have a hierarchy + # 6. Create progress parent so nested actions have a hierarchy import time as _time nodeOperationId = f"node_{nodeId}_{context.get('_runId', 'x')}_{int(_time.time())}" chatService = getattr(self.services, "chat", None) @@ -440,10 +653,27 @@ class ActionNodeExecutor: except Exception: pass - # 9. Persist generated documents as files and build JSON-safe output + # 7. Persist generated documents as files and build JSON-safe output + _raw_folder_id = resolvedParams.get("folderId") + persist_folder_id: Optional[str] = None + if _raw_folder_id is not None: + _s = str(_raw_folder_id).strip() + if _s: + persist_folder_id = _s + docsList = [] for d in (result.documents or []): dumped = d.model_dump() if hasattr(d, "model_dump") else dict(d) if isinstance(d, dict) else d + if isinstance(dumped, dict): + _meta = dumped.get("validationMetadata") if isinstance(dumped.get("validationMetadata"), dict) else {} + _existing = dumped.get("fileId") or _meta.get("fileId") + # e.g. file.create already persisted inside the action — avoid a second FileItem with wrong bytes + if _existing and str(_existing).strip(): + dumped["documentData"] = None + dumped.setdefault("_hasBinaryData", True) + docsList.append(dumped) + continue + rawData = getattr(d, "documentData", None) if hasattr(d, "documentData") else (dumped.get("documentData") if isinstance(dumped, dict) else None) rawBytes = _coerce_document_data_to_bytes(rawData) if isinstance(dumped, dict) and rawBytes: @@ -470,7 +700,7 @@ class ActionNodeExecutor: _mgmt = _getMgmtInterface(_owner, mandateId=_mandateId, featureInstanceId=_instanceId) _docName = dumped.get("documentName") or f"workflow-result-{nodeId}.bin" _mimeType = dumped.get("mimeType") or "application/octet-stream" - _fileItem = _mgmt.createFile(_docName, _mimeType, rawBytes) + _fileItem = _mgmt.createFile(_docName, _mimeType, rawBytes, folderId=persist_folder_id) _mgmt.createFileData(_fileItem.id, rawBytes) dumped["fileId"] = _fileItem.id dumped["id"] = _fileItem.id @@ -482,8 +712,8 @@ class ActionNodeExecutor: logger.warning("Could not persist workflow document: %s", _fe) docsList.append(dumped) - # Clean DocumentList shape for document nodes (match file.create: documents + count, no AiResult fields) - if outputSchema == "DocumentList" and nodeType in ("ai.generateDocument", "ai.convertDocument"): + # Clean DocumentList shape for document nodes (documents + count, no ActionResult/AiResult noise) + if outputSchema == "DocumentList": if not result.success: return _normalizeError( RuntimeError(str(result.error or "document action failed")), @@ -497,17 +727,14 @@ class ActionNodeExecutor: return normalizeToSchema(list_out, outputSchema) extractedContext = "" - if result.documents: - doc = result.documents[0] - raw = getattr(doc, "documentData", None) if hasattr(doc, "documentData") else (doc.get("documentData") if isinstance(doc, dict) else None) - if isinstance(raw, bytes): - try: - extractedContext = raw.decode("utf-8").strip() - except (UnicodeDecodeError, ValueError): - extractedContext = "" - elif raw: - extractedContext = str(raw).strip() - + rd_early = getattr(result, "data", None) + if isinstance(rd_early, dict): + if rd_early.get("kind") == PRESENTATION_KIND: + extractedContext = presentation_response_text(presentation_dict_without_meta(rd_early)).strip() + else: + _r = rd_early.get("response") + if _r is not None and str(_r).strip(): + extractedContext = str(_r).strip() promptText = str(resolvedParams.get("aiPrompt") or resolvedParams.get("prompt") or "").strip() resultData = getattr(result, "data", None) @@ -525,7 +752,7 @@ class ActionNodeExecutor: "data": dataField, } - if nodeType.startswith("ai."): + if outputSchema == "AiResult": out["prompt"] = promptText out["response"] = extractedContext inputContext = resolvedParams.get("context") @@ -541,8 +768,38 @@ class ActionNodeExecutor: out["responseData"] = parsed except (json.JSONDecodeError, TypeError): pass + if outputSchema == "AiResult" and result.success: + out["imageDocumentsOnly"] = _image_documents_from_docs_list(docsList) - if nodeType.startswith("clickup.") and result.success and docsList: + if outputSchema == "ActionResult": + # Unified handover: mirror AiResult primary paths for DataRefs / primaryTextRef + inp_ctx = resolvedParams.get("context") + ctx_str = "" + if inp_ctx is not None: + ctx_str = inp_ctx if isinstance(inp_ctx, str) else json.dumps(inp_ctx, ensure_ascii=False, default=str) + out.setdefault("prompt", "") + out.setdefault("context", ctx_str if ctx_str else "") + rsp = str(out.get("response") or "").strip() + if not rsp: + if nodeDef.get("clearResponse"): + out["response"] = "" + else: + out["response"] = extractedContext or "" + if result.success: + img_only = _image_documents_from_docs_list(docsList) + if nodeDef.get("imageDocumentsFromExtractData") and isinstance(result.data, dict): + img_only = list(img_only) + _image_refs_from_extract_node_data(result.data) + if nodeDef.get("imageDocumentsFromMerged") and isinstance(result.data, dict): + # mergeContext packs iterated image sidecars under ``data.merged.imageDocumentsOnly`` + # rather than the top-level ``documents`` list which is always empty. + merged_blob = result.data.get("merged") + if isinstance(merged_blob, dict): + merged_imgs = merged_blob.get("imageDocumentsOnly") + if isinstance(merged_imgs, list) and merged_imgs: + img_only = merged_imgs + out["imageDocumentsOnly"] = img_only + + if outputSchema == "TaskResult" and result.success and docsList: try: d0 = docsList[0] if isinstance(docsList[0], dict) else {} raw = d0.get("documentData") @@ -554,7 +811,7 @@ class ActionNodeExecutor: except (json.JSONDecodeError, TypeError, ValueError): pass - if outputSchema == "ConsolidateResult" and nodeType == "ai.consolidate": + if outputSchema == "ConsolidateResult": data_dict = result.data if isinstance(getattr(result, "data", None), dict) else {} cr_out = { "result": data_dict.get("result", ""), @@ -564,5 +821,22 @@ class ActionNodeExecutor: _attachConnectionProvenance(cr_out, resolvedParams, outputSchema, chatService, self.services) return normalizeToSchema(cr_out, outputSchema) + if nodeDef.get("popDocumentsFromOutput"): + out.pop("documents", None) + + if outputSchema in ("AiResult", "ActionResult") and result.success: + _attach_unified_presentation_data(out, node_def=nodeDef) + _attachConnectionProvenance(out, resolvedParams, outputSchema, chatService, self.services) - return normalizeToSchema(out, outputSchema) + + # When the node declares ``surfaceDataAsTopLevel`` (typical for + # dynamic-schema context nodes whose output keys are graph-defined), + # surface ``data.`` to ``out.`` so DataRefs from downstream + # nodes hit the user-defined keys without needing a ``data.`` prefix. + if nodeDef.get("surfaceDataAsTopLevel") and isinstance(dataField, dict): + for k, v in dataField.items(): + if k not in out and not str(k).startswith("_"): + out[k] = v + + normalized_schema = outputSchema if isinstance(outputSchema, str) else "Transit" + return normalizeToSchema(out, normalized_schema) diff --git a/modules/workflows/automation2/executors/flowExecutor.py b/modules/workflows/automation2/executors/flowExecutor.py index 511be6ff..00ede971 100644 --- a/modules/workflows/automation2/executors/flowExecutor.py +++ b/modules/workflows/automation2/executors/flowExecutor.py @@ -2,8 +2,9 @@ # Flow control node executor (ifElse, switch, loop, merge). import logging -from typing import Any, Dict +from typing import Any, Dict, List, Optional +from modules.features.graphicalEditor.conditionOperators import apply_condition_operator, resolve_value_kind from modules.features.graphicalEditor.portTypes import wrapTransit, unwrapTransit logger = logging.getLogger(__name__) @@ -65,20 +66,29 @@ class FlowExecutor: nodeId: str, inputSources: Dict, ) -> Any: - condParam = (node.get("parameters") or {}).get("condition") + params = node.get("parameters") or {} + condParam = params.get("condition") + itemParam = params.get("Item") inp = self._getInputData(nodeId, {nodeId: inputSources}, nodeOutputs) - ok = self._evalConditionParam(condParam, nodeOutputs) + ok = self._evalConditionParam(condParam, nodeOutputs, item_param=itemParam, node=node) return wrapTransit( unwrapTransit(inp) if inp else inp, {"branch": 0 if ok else 1, "conditionResult": ok}, ) - def _evalConditionParam(self, condParam: Any, nodeOutputs: Dict) -> bool: - """Evaluate condition: structured {type,ref,operator,value} or legacy string/ref.""" + def _evalConditionParam( + self, + condParam: Any, + nodeOutputs: Dict, + *, + item_param: Any = None, + node: Optional[Dict] = None, + ) -> bool: + """Evaluate condition: structured {operator,value} with Item dataRef, or legacy.""" if condParam is None: return False if isinstance(condParam, dict) and condParam.get("type") == "condition": - return self._evalStructuredCondition(condParam, nodeOutputs) + return self._evalStructuredCondition(condParam, nodeOutputs, item_param=item_param, node=node) from modules.workflows.automation2.graphUtils import resolveParameterReferences resolved = resolveParameterReferences(condParam, nodeOutputs) return self._evalCondition(resolved) @@ -101,57 +111,45 @@ class FlowExecutor: return None return current - def _evalStructuredCondition(self, cond: Dict, nodeOutputs: Dict) -> bool: - """Evaluate structured {ref, operator, value} condition.""" - ref = cond.get("ref") - if not ref or ref.get("type") != "ref": - return False - node_id = ref.get("nodeId") - path = ref.get("path") or [] - left = self._get_by_path(nodeOutputs.get(node_id), list(path)) + def _evalStructuredCondition( + self, + cond: Dict, + nodeOutputs: Dict, + *, + item_param: Any = None, + node: Optional[Dict] = None, + ) -> bool: + """Evaluate structured {operator, value} with Item dataRef (legacy: condition.ref).""" + from modules.workflows.automation2.graphUtils import resolveParameterReferences + + left_ref = item_param + if left_ref is None or (isinstance(left_ref, dict) and not left_ref): + left_ref = cond.get("ref") + left = resolveParameterReferences(left_ref, nodeOutputs) if left_ref is not None else None operator = cond.get("operator", "eq") right = cond.get("value") - if operator == "eq": - return left == right - if operator == "neq": - return left != right - if operator in ("lt", "lte", "gt", "gte"): - try: - l, r = float(left) if left is not None else 0, float(right) if right is not None else 0 - if operator == "lt": - return l < r - if operator == "lte": - return l <= r - if operator == "gt": - return l > r - if operator == "gte": - return l >= r - except (TypeError, ValueError): - return False - if operator == "contains": - return right is not None and str(right) in str(left or "") - if operator == "not_contains": - return right is None or str(right) not in str(left or "") - if operator == "empty": - return left is None or left == "" or (isinstance(left, (list, dict)) and len(left) == 0) - if operator == "not_empty": - return left is not None and left != "" and (not isinstance(left, (list, dict)) or len(left) > 0) - if operator == "is_true": - return bool(left) - if operator == "is_false": - return not bool(left) - if operator == "before": - return self._compare_dates(left, right, lambda a, b: a < b) - if operator == "after": - return self._compare_dates(left, right, lambda a, b: a > b) - if operator == "exists": - return self._file_exists(left) - if operator == "not_exists": - return not self._file_exists(left) - return False + value_kind = "unknown" + ref_for_kind = left_ref if isinstance(left_ref, dict) else cond.get("ref") + if isinstance(ref_for_kind, dict) and ref_for_kind.get("nodeId") and node: + graph_stub = self._graph_stub_for_ref(node, ref_for_kind, nodeOutputs) + value_kind = resolve_value_kind(graph_stub, ref_for_kind) - def _compare_dates(self, left: Any, right: Any, op) -> bool: + return apply_condition_operator(left, str(operator), right, value_kind) + + def _graph_stub_for_ref(self, node: Dict, ref: Dict, nodeOutputs: Dict) -> Dict[str, Any]: + """Minimal graph for ``resolve_value_kind`` (includes value producer when known).""" + nodes: List[Dict[str, Any]] = [{"id": node.get("id"), "type": node.get("type")}] + producer_id = ref.get("nodeId") + if producer_id: + ctx = nodeOutputs.get("_context") if isinstance(nodeOutputs.get("_context"), dict) else {} + graph_nodes = ctx.get("graphNodesById") if isinstance(ctx.get("graphNodesById"), dict) else {} + pnode = graph_nodes.get(producer_id) if isinstance(graph_nodes, dict) else None + if isinstance(pnode, dict): + nodes.append({"id": producer_id, "type": pnode.get("type", "")}) + else: + nodes.append({"id": producer_id, "type": ""}) + return {"nodes": nodes, "targetNodeId": node.get("id")} """Compare left/right as dates; op(a,b) is the comparison.""" def parse(v): @@ -208,23 +206,42 @@ class FlowExecutor: return bool(resolved) async def _switch(self, node: Dict, nodeOutputs: Dict, nodeId: str, inputSources: Dict) -> Any: - valueExpr = (node.get("parameters") or {}).get("value", "") + params = node.get("parameters") or {} + valueExpr = params.get("value", "") from modules.workflows.automation2.graphUtils import resolveParameterReferences - value = resolveParameterReferences(valueExpr, nodeOutputs) - cases = (node.get("parameters") or {}).get("cases", []) - inp = self._getInputData(nodeId, {nodeId: inputSources}, nodeOutputs) - for i, c in enumerate(cases): - if self._evalSwitchCase(value, c): - return wrapTransit( - unwrapTransit(inp) if inp else inp, - {"match": i, "value": value}, - ) - return wrapTransit( - unwrapTransit(inp) if inp else inp, - {"match": -1, "value": value}, + from modules.features.graphicalEditor.switchOutput import ( + build_switch_combined_output, + build_switch_default_payload, ) - def _evalSwitchCase(self, left: Any, case: Any) -> bool: + value = resolveParameterReferences(valueExpr, nodeOutputs) + cases = params.get("cases", []) or [] + value_kind = "unknown" + if isinstance(valueExpr, dict) and valueExpr.get("type") == "ref": + graph_stub = self._graph_stub_for_ref(node, valueExpr, nodeOutputs) + value_kind = resolve_value_kind(graph_stub, valueExpr) + inp = self._getInputData(nodeId, {nodeId: inputSources}, nodeOutputs) + matched: List[int] = [ + i for i, c in enumerate(cases) + if self._evalSwitchCase(value, c, value_kind=value_kind) + ] + default_idx = len(cases) if isinstance(cases, list) else 0 + if not matched: + matched = [default_idx] + combined = build_switch_combined_output( + inp, cases, matched_indices=matched, value_kind=value_kind, + ) + return wrapTransit( + combined, + { + "match": matched[0], + "matches": matched, + "value": value, + "filterApplied": bool(combined.get("filterApplied")), + }, + ) + + def _evalSwitchCase(self, left: Any, case: Any, *, value_kind: Optional[str] = None) -> bool: """ Evaluate a switch case. Case can be: - dict: {operator, value} - use operator to compare left vs value @@ -236,69 +253,90 @@ class FlowExecutor: else: operator = "eq" right = case - # Same logic as _evalStructuredCondition but with explicit left/right - if operator == "eq": - return left == right - if operator == "neq": - return left != right - if operator in ("lt", "lte", "gt", "gte"): - try: - l, r = float(left) if left is not None else 0, float(right) if right is not None else 0 - if operator == "lt": - return l < r - if operator == "lte": - return l <= r - if operator == "gt": - return l > r - if operator == "gte": - return l >= r - except (TypeError, ValueError): - return False - if operator == "contains": - return right is not None and str(right) in str(left or "") - if operator == "not_contains": - return right is None or str(right) not in str(left or "") - if operator == "empty": - return left is None or left == "" or (isinstance(left, (list, dict)) and len(left) == 0) - if operator == "not_empty": - return left is not None and left != "" and (not isinstance(left, (list, dict)) or len(left) > 0) - if operator == "is_true": - return bool(left) - if operator == "is_false": - return not bool(left) - if operator == "before": - return self._compare_dates(left, right, lambda a, b: a < b) - if operator == "after": - return self._compare_dates(left, right, lambda a, b: a > b) - if operator == "exists": - return self._file_exists(left) - if operator == "not_exists": - return not self._file_exists(left) - return False + return apply_condition_operator(left, str(operator), right, value_kind) async def _loop(self, node: Dict, nodeOutputs: Dict, nodeId: str, inputSources: Dict) -> Any: params = node.get("parameters") or {} itemsPath = params.get("items", "[]") - level = params.get("level", "auto") from modules.workflows.automation2.graphUtils import resolveParameterReferences - items = resolveParameterReferences(itemsPath, nodeOutputs) - if level != "auto" and isinstance(items, dict): - items = self._resolveUdmLevel(items, level) - elif isinstance(items, list): - pass - elif isinstance(items, dict): - children = items.get("children") - if isinstance(children, list) and children: - items = children - else: - items = [{"name": k, "value": v} for k, v in items.items()] - else: - items = [items] if items is not None else [] + raw = resolveParameterReferences( + itemsPath, + nodeOutputs, + consumer_node_id=nodeId, + input_sources=inputSources, + ) + items = self._normalize_loop_items(raw) + mode = (params.get("iterationMode") or "all").strip().lower() + stride = params.get("iterationStride", 2) + try: + stride_int = int(stride) + except (TypeError, ValueError): + stride_int = 2 + items = self._apply_iteration_mode(items, mode, stride_int) return {"items": items, "count": len(items)} + def _normalize_loop_items(self, raw: Any) -> List[Any]: + """Coerce resolved `items` into a list (lists, dict children, or scalars).""" + if isinstance(raw, dict) and isinstance(raw.get("items"), list): + return self._expand_presentation_lines_loop_items(raw["items"]) + if isinstance(raw, list): + return self._expand_presentation_lines_loop_items(raw) + if isinstance(raw, dict): + children = raw.get("children") + if isinstance(children, list) and len(children) > 0: + return self._expand_presentation_lines_loop_items(children) + items = [{"name": k, "value": v} for k, v in raw.items()] + return self._expand_presentation_lines_loop_items(items) + return [raw] if raw is not None else [] + + def _expand_presentation_lines_loop_items(self, items: List[Any]) -> List[Any]: + """When looping ``presentation.files`` in ``lines`` mode, iterate per slot (e.g. CSV row).""" + if not items: + return items + expanded: List[Any] = [] + saw_lines_bucket = False + for it in items: + if not isinstance(it, dict): + expanded.append(it) + continue + val = it.get("value") + if not isinstance(val, dict) or val.get("outputMode") != "lines": + expanded.append(it) + continue + data = val.get("data") + if not isinstance(data, list) or len(data) <= 1: + expanded.append(it) + continue + saw_lines_bucket = True + base_name = str(it.get("name") or val.get("sourceFileName") or "line") + for idx, slot in enumerate(data): + if not isinstance(slot, dict): + continue + sid = str(slot.get("id") or slot.get("label") or idx) + expanded.append({"name": f"{base_name}:{sid}", "value": slot}) + return expanded if saw_lines_bucket else items + + def _apply_iteration_mode(self, items: List[Any], mode: str, stride: int) -> List[Any]: + """Select which elements to iterate over (backend-defined modes).""" + if not items: + return [] + m = (mode or "all").strip().lower() + if m == "first": + return items[:1] + if m == "last": + return items[-1:] + if m == "every_second": + return items[::2] + if m == "every_third": + return items[::3] + if m == "every_nth": + step = max(2, min(100, int(stride))) + return items[::step] + return list(items) + def _resolveUdmLevel(self, udm: Dict, level: str) -> list: - """Extract items from a UDM document/node at the requested structural level.""" + """Extract items from a UDM document/node at the requested structural level (test / tooling).""" children = udm.get("children") or [] if level == "documents": return [c for c in children if isinstance(c, dict) and c.get("role") in ("document", "archive")] diff --git a/modules/workflows/automation2/executors/inputExecutor.py b/modules/workflows/automation2/executors/inputExecutor.py index 22fa2eba..4ccef725 100644 --- a/modules/workflows/automation2/executors/inputExecutor.py +++ b/modules/workflows/automation2/executors/inputExecutor.py @@ -65,16 +65,23 @@ class InputExecutor: ) taskId = task.get("id") - self.automation2.updateRun( + from modules.workflows.automation2.graphicalEditorRunFileLogger import merge_persisted_run_context + + _pause_ctx = merge_persisted_run_context( + self.automation2, runId, - status="paused", - nodeOutputs=context.get("nodeOutputs"), - currentNodeId=nodeId, - context={ + { "connectionMap": context.get("connectionMap"), "inputSources": context.get("inputSources"), "orderedNodeIds": [n.get("id") for n in context.get("_orderedNodes", []) if n.get("id")], }, ) + self.automation2.updateRun( + runId, + status="paused", + nodeOutputs=context.get("nodeOutputs"), + currentNodeId=nodeId, + context=_pause_ctx, + ) logger.info("InputExecutor node %s: created task %s, run %s paused", nodeId, taskId, runId) raise PauseForHumanTaskError(runId=runId, taskId=taskId, nodeId=nodeId) diff --git a/modules/workflows/automation2/executors/ioExecutor.py b/modules/workflows/automation2/executors/ioExecutor.py index f6d40b05..14bc8f91 100644 --- a/modules/workflows/automation2/executors/ioExecutor.py +++ b/modules/workflows/automation2/executors/ioExecutor.py @@ -37,7 +37,7 @@ class IOExecutor: nodeOutputs = context.get("nodeOutputs", {}) params = dict(node.get("parameters") or {}) - from modules.workflows.automation2.graphUtils import resolveParameterReferences + from modules.workflows.automation2.graphUtils import extract_wired_document_list, resolveParameterReferences resolvedParams = resolveParameterReferences(params, nodeOutputs) logger.info("IOExecutor node %s resolvedParams keys=%s", nodeId, list(resolvedParams.keys())) @@ -45,9 +45,7 @@ class IOExecutor: if 0 in inputSources: srcId, _ = inputSources[0] inp = nodeOutputs.get(srcId) - from modules.workflows.automation2.executors.actionNodeExecutor import _extract_wired_document_list - - wired = _extract_wired_document_list(inp) + wired = extract_wired_document_list(inp) docs = (wired or {}).get("documents") if isinstance(wired, dict) else None if docs: resolvedParams.setdefault("documentList", wired) diff --git a/modules/workflows/automation2/executors/triggerExecutor.py b/modules/workflows/automation2/executors/triggerExecutor.py index 6fd32b80..cd2d118e 100644 --- a/modules/workflows/automation2/executors/triggerExecutor.py +++ b/modules/workflows/automation2/executors/triggerExecutor.py @@ -21,6 +21,7 @@ class TriggerExecutor: context: Dict[str, Any], ) -> Any: node_id = node.get("id", "") + node_type = str(node.get("type") or "") base = context.get("runEnvelope") if not isinstance(base, dict): out = normalize_run_envelope(None, user_id=context.get("userId")) @@ -31,4 +32,11 @@ class TriggerExecutor: node_id, (out.get("trigger") or {}).get("type"), ) + # Form start: port schema is FormPayload — downstream refs use payload.. + # Do not emit the full run envelope on this port. + if node_type == "trigger.form": + payload = out.get("payload") + if not isinstance(payload, dict): + payload = {} + return {"payload": payload, "_success": True} return out diff --git a/modules/workflows/automation2/graphUtils.py b/modules/workflows/automation2/graphUtils.py index 7ea3b4e8..b31dd7bb 100644 --- a/modules/workflows/automation2/graphUtils.py +++ b/modules/workflows/automation2/graphUtils.py @@ -7,50 +7,6 @@ from typing import Dict, List, Any, Tuple, Set, Optional logger = logging.getLogger(__name__) -def _ai_result_text_from_documents(d: Dict[str, Any]) -> Optional[str]: - """Extract plain-text body from AiResult-style ``documents[0].documentData``.""" - docs = d.get("documents") - if not isinstance(docs, list) or not docs: - return None - d0 = docs[0] - raw: Any = None - if isinstance(d0, dict): - raw = d0.get("documentData") - elif d0 is not None: - raw = getattr(d0, "documentData", None) - if raw is None: - return None - if isinstance(raw, bytes): - try: - t = raw.decode("utf-8").strip() - return t or None - except (UnicodeDecodeError, ValueError): - return None - if isinstance(raw, str): - s = raw.strip() - return s or None - return None - - -def _ref_coalesce_empty_ai_result_text(data: Any, path: List[Any], resolved: Any) -> Any: - """If a ref targets AiResult text fields but resolves empty/missing, fall back to documents. - - Needed when: optional ``responseData`` is absent (no synthetic ``{}``), ``response`` is - still empty but ``documents`` hold the model output, or legacy graphs bind responseData only. - """ - if resolved not in (None, ""): - return resolved - if not isinstance(data, dict) or not path: - return resolved - head = path[0] - if head not in ("response", "responseData", "context"): - return resolved - if head == "context" and len(path) != 1: - return resolved - fb = _ai_result_text_from_documents(data) - return fb if fb is not None else resolved - - def parseGraph(graph: Dict[str, Any]) -> Tuple[List[Dict], List[Dict], Set[str]]: """ Parse graph into nodes, connections, and node IDs. @@ -92,26 +48,93 @@ def buildConnectionMap(connections: List[Dict]) -> Dict[str, List[Tuple[str, int def getLoopBodyNodeIds(loopNodeId: str, connectionMap: Dict[str, List[Tuple[str, int, int]]]) -> Set[str]: - """Nodes reachable from loop's output (BFS forward). Body = downstream nodes that receive from loop.""" + """Nodes reachable from flow.loop output port 0 only (loop body), BFS forward. + + Edges vom Rumpf zurück in den Loop-Knoten (gleicher Eingang wie der Hauptfluss) beenden die + Expansion am Loop-Knoten — der Loop-Knoten selbst ist nie Teil des Rumpfes. + """ from collections import deque - body = set() - # connectionMap: target -> [(source, sourceOutput, targetInput)] - rev: Dict[str, List[str]] = {} # source -> [targets] + + body: Set[str] = set() + rev: Dict[str, List[Tuple[str, int, int]]] = {} for tgt, pairs in connectionMap.items(): - for src, _, _ in pairs: - if src not in rev: - rev[src] = [] - rev[src].append(tgt) - q = deque([loopNodeId]) + for src, so, ti in pairs: + rev.setdefault(src, []).append((tgt, so, ti)) + + q: deque = deque() + for tgt, so, ti in rev.get(loopNodeId, []): + if so != 0: + continue + if tgt == loopNodeId: + continue + q.append(tgt) + while q: nid = q.popleft() - for tgt in rev.get(nid, []): - if tgt not in body: - body.add(tgt) - q.append(tgt) + if nid == loopNodeId: + continue + if nid not in body: + body.add(nid) + for tgt, _so, _ti in rev.get(nid, []): + if tgt == loopNodeId: + continue + if tgt not in body: + q.append(tgt) return body +def getLoopPrimaryInputSource( + loop_node_id: str, + connectionMap: Dict[str, List[Tuple[str, int, int]]], + body_ids: Set[str], +) -> Optional[Tuple[str, int]]: + """Pick the inbound edge for ``flow.loop`` when several wires hit the same input (0). + + The Schleifen-Rücklauf vom Rumpf und der „normale“ Vorgänger enden auf demselben Port; + für die Datenzusammenführung (Fertig-Ausgang, Logs) zählt der Vorgänger **außerhalb** des Rumpfes. + """ + incoming = connectionMap.get(loop_node_id, []) + candidates = [(src, so) for src, so, ti in incoming if ti == 0] + if not candidates: + return None + outside = [(src, so) for src, so in candidates if src not in body_ids] + if outside: + return outside[0] + return candidates[0] + + +def getLoopDoneNodeIds(loopNodeId: str, connectionMap: Dict[str, List[Tuple[str, int, int]]]) -> Set[str]: + """Nodes reachable from flow.loop output port 1 (runs once after all iterations).""" + from collections import deque + + done: Set[str] = set() + rev: Dict[str, List[Tuple[str, int, int]]] = {} + for tgt, pairs in connectionMap.items(): + for src, so, ti in pairs: + rev.setdefault(src, []).append((tgt, so, ti)) + + q: deque = deque() + for tgt, so, ti in rev.get(loopNodeId, []): + if so != 1: + continue + if tgt == loopNodeId: + continue + q.append(tgt) + + while q: + nid = q.popleft() + if nid == loopNodeId: + continue + if nid not in done: + done.add(nid) + for tgt, _so, _ti in rev.get(nid, []): + if tgt == loopNodeId: + continue + if tgt not in done: + q.append(tgt) + return done + + def getInputSources(nodeId: str, connectionMap: Dict[str, List[Tuple[str, int, int]]]) -> Dict[int, Tuple[str, int]]: """ For a node, return targetInput -> (sourceNodeId, sourceOutput). @@ -123,8 +146,15 @@ def getInputSources(nodeId: str, connectionMap: Dict[str, List[Tuple[str, int, i def getTriggerNodes(nodes: List[Dict]) -> List[Dict]: - """Return nodes with category=trigger or type starting with trigger.""" - return [n for n in nodes if (n.get("type", "").startswith("trigger.") or n.get("category") == "trigger")] + """Return start/trigger nodes: type ``trigger.*``, or category ``trigger`` / ``start``.""" + return [ + n + for n in nodes + if ( + str(n.get("type", "")).startswith("trigger.") + or n.get("category") in ("trigger", "start") + ) + ] def validateGraph(graph: Dict[str, Any], nodeTypeIds: Set[str]) -> List[str]: @@ -163,6 +193,11 @@ def validateGraph(graph: Dict[str, Any], nodeTypeIds: Set[str]) -> List[str]: logger.warning("validateGraph port mismatches: %s", port_errors) errors.extend(port_errors) + if nodes and not getTriggerNodes(nodes): + errors.append( + "Workflow has no start node: add a node from the Start category before running." + ) + if errors: logger.debug("validateGraph errors: %s", errors) else: @@ -218,6 +253,8 @@ def _checkPortCompatibility( continue srcOutputPorts = srcDef.get("outputPorts", {}) srcPort = srcOutputPorts.get(srcOut, {}) or {} + if srcNode.get("type") == "flow.switch" and not srcPort.get("schema"): + srcPort = srcOutputPorts.get(0, {}) or srcPort tgtPort = tgtInputPorts.get(tgtIn, {}) or {} if not isinstance(srcPort, dict): @@ -229,6 +266,9 @@ def _checkPortCompatibility( continue if src_schema in accepts: continue + # ContextBranch is a typed Transit envelope (switch filtered branches). + if src_schema == "ContextBranch" and ("Transit" in accepts or "ContextBranch" in accepts): + continue # Port that only declares Transit behaves as an untyped sink (legacy graphs). if len(accepts) == 1 and accepts[0] == "Transit": continue @@ -374,12 +414,21 @@ def _unwrapTypedRef(value: Any) -> Any: return value.get(primary, value) -def resolveParameterReferences(value: Any, nodeOutputs: Dict[str, Any]) -> Any: +def resolveParameterReferences( + value: Any, + nodeOutputs: Dict[str, Any], + *, + consumer_node_id: Optional[str] = None, + input_sources: Optional[Dict[str, Dict[int, tuple]]] = None, +) -> Any: """ Resolve parameter references: - {{nodeId.output}} or {{nodeId.output.path}} in strings (legacy) - { "type": "ref", "nodeId": "...", "path": ["field", "nested"] } -> resolved value - { "type": "value", "value": ... } -> value (then recursively resolve) + + When ``consumer_node_id`` and ``input_sources`` are set, refs to the wired + upstream switch use that connection's output port (per-branch payload). """ import json import re @@ -395,11 +444,23 @@ def resolveParameterReferences(value: Any, nodeOutputs: Dict[str, Any]) -> Any: path = value.get("path") if node_id is not None and isinstance(path, (list, tuple)): data = nodeOutputs.get(node_id) - # Unwrap transit envelopes to access the real data - if isinstance(data, dict) and data.get("_transit"): + wired = None + if consumer_node_id and input_sources: + wired = (input_sources.get(consumer_node_id) or {}).get(0) + if wired and wired[0] == node_id: + from modules.features.graphicalEditor.switchOutput import unwrap_transit_for_port + data = unwrap_transit_for_port(data, wired[1]) + elif isinstance(data, dict) and data.get("_transit"): data = data.get("data", data) plist = list(path) resolved = _get_by_path(data, plist) + if resolved is None: + from modules.workflows.automation2.pickNotPushMigration import ( + remap_stale_presentation_ref_path, + ) + alt_path = remap_stale_presentation_ref_path(plist) + if alt_path != plist: + resolved = _get_by_path(data, alt_path) if resolved is None and isinstance(data, dict) and plist: if plist[0] == "payload" and len(plist) > 1: # Strip explicit "payload" prefix (legacy DataPicker paths) @@ -408,17 +469,34 @@ def resolveParameterReferences(value: Any, nodeOutputs: Dict[str, Any]) -> Any: # Form nodes store fields under {"payload": {fieldName: …}}. # DataPicker emits bare field paths like ["url"]; try under payload. resolved = _get_by_path(data["payload"], plist) - resolved = _ref_coalesce_empty_ai_result_text(data, plist, resolved) - return resolveParameterReferences(resolved, nodeOutputs) + return resolveParameterReferences( + resolved, + nodeOutputs, + consumer_node_id=consumer_node_id, + input_sources=input_sources, + ) return value if value.get("type") == "value": inner = value.get("value") - return resolveParameterReferences(inner, nodeOutputs) + return resolveParameterReferences( + inner, + nodeOutputs, + consumer_node_id=consumer_node_id, + input_sources=input_sources, + ) if value.get("type") == "system": variable = value.get("variable", "") from modules.features.graphicalEditor.portTypes import resolveSystemVariable return resolveSystemVariable(variable, nodeOutputs.get("_context", {})) - return {k: resolveParameterReferences(v, nodeOutputs) for k, v in value.items()} + return { + k: resolveParameterReferences( + v, + nodeOutputs, + consumer_node_id=consumer_node_id, + input_sources=input_sources, + ) + for k, v in value.items() + } if isinstance(value, str): def repl(m): @@ -455,10 +533,97 @@ def resolveParameterReferences(value: Any, nodeOutputs: Dict[str, Any]) -> Any: return re.sub(r"\{\{\s*([^}]+)\s*\}\}", repl, value) if isinstance(value, list): # contextBuilder: list where every item is a `{"type":"ref",...}` envelope. - # Resolve each ref and join the serialised parts into a single prompt string. + # Resolve each part; a single ref preserves the resolved type (str, list, dict). if value and all(isinstance(v, dict) and v.get("type") == "ref" for v in value): - from modules.workflows.methods.methodAi._common import serialize_context - parts = [serialize_context(resolveParameterReferences(v, nodeOutputs)) for v in value] - return "\n\n".join(p for p in parts if p) - return [resolveParameterReferences(v, nodeOutputs) for v in value] + resolved_parts = [ + resolveParameterReferences( + v, + nodeOutputs, + consumer_node_id=consumer_node_id, + input_sources=input_sources, + ) + for v in value + ] + if len(resolved_parts) == 1: + return resolved_parts[0] + return resolved_parts + return [ + resolveParameterReferences( + v, + nodeOutputs, + consumer_node_id=consumer_node_id, + input_sources=input_sources, + ) + for v in value + ] return value + + +def document_list_param_is_empty(val: Any) -> bool: + """True when a documentList-style parameter has not been set (wire + DataRef may fill).""" + if val is None or val == "": + return True + if isinstance(val, list) and len(val) == 0: + return True + if isinstance(val, dict): + if val.get("documents") or val.get("references") or val.get("items"): + return False + if val.get("documentId") or val.get("id"): + return False + return True + return False + + +def extract_wired_document_list(inp: Any) -> Optional[Dict[str, Any]]: + """ + Build a DocumentList-shaped dict from an upstream node output (port wire). + Used when a parameter declares ``graphInherit.kind == "documentListWire"``. + """ + if inp is None: + return None + from modules.features.graphicalEditor.portTypes import ( + unwrapTransit, + _coerce_document_list_upload_fields, + _file_record_to_document, + ) + + data = unwrapTransit(inp) + if isinstance(data, str): + one = _file_record_to_document(data) + return {"documents": [one], "count": 1} if one else None + if not isinstance(data, dict): + return None + d = dict(data) + _coerce_document_list_upload_fields(d) + if "currentItem" in d: + ci = d.get("currentItem") + if ci is not None: + nested = extract_wired_document_list(ci) + if nested: + return nested + docs = d.get("documents") + if isinstance(docs, list) and len(docs) > 0: + return {"documents": docs, "count": d.get("count", len(docs))} + raw_list = d.get("documentList") + if isinstance(raw_list, list) and len(raw_list) > 0 and isinstance(raw_list[0], dict): + return {"documents": raw_list, "count": len(raw_list)} + doc_id = d.get("documentId") or d.get("id") + if doc_id and str(doc_id).strip(): + one: Dict[str, Any] = {"id": str(doc_id).strip()} + fn = d.get("fileName") or d.get("name") + if fn: + one["name"] = str(fn) + mt = d.get("mimeType") + if mt: + one["mimeType"] = str(mt) + return {"documents": [one], "count": 1} + files = d.get("files") + if isinstance(files, list) and files: + collected = [] + for item in files: + conv = _file_record_to_document(item) if isinstance(item, dict) else None + if conv: + collected.append(conv) + if collected: + return {"documents": collected, "count": len(collected)} + return None diff --git a/modules/workflows/automation2/graphicalEditorRunFileLogger.py b/modules/workflows/automation2/graphicalEditorRunFileLogger.py new file mode 100644 index 00000000..ac28ddb1 --- /dev/null +++ b/modules/workflows/automation2/graphicalEditorRunFileLogger.py @@ -0,0 +1,215 @@ +# Copyright (c) 2025 Patrick Motsch +"""Per-run NDJSON logs for persisted Automation2 / graphical-editor runs.""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +from datetime import datetime, timezone +from typing import Any, Dict, Optional + +from modules.shared.configuration import APP_CONFIG +from modules.shared.debugLogger import ensureDir, resolve_app_log_dir + +logger = logging.getLogger(__name__) + + +RUN_FILE_LOG_RELATIVE_ROOT = "graphical_editor_runs" +CONTEXT_KEY = "_geRunFileLogRelativeDir" +EXECUTION_FILENAME = "node_execution.ndjson" +CONTEXT_SNAPSHOT_FILENAME = "workflow_context.ndjson" + + +def graphical_editor_run_file_logging_enabled() -> bool: + """True when NDJSON files should be written for each persisted run.""" + raw = APP_CONFIG.get("APP_GRAPHICAL_EDITOR_RUN_FILE_LOGGING", False) + if isinstance(raw, bool): + return raw + s = str(raw).strip().lower() + return s in ("1", "true", "yes", "on") + + +def merge_run_context_with_ge_log_prefix( + base_context: Optional[Dict[str, Any]], + incoming: Dict[str, Any], +) -> Dict[str, Any]: + """Copy ``CONTEXT_KEY`` from *base_context* onto *incoming* if present (pause paths).""" + out = dict(incoming or {}) + prev = (base_context or {}).get(CONTEXT_KEY) + if prev is not None: + out[CONTEXT_KEY] = prev + return out + + +def merge_persisted_run_context( + automation2_interface: Any, + run_id: str, + replacement: Dict[str, Any], +) -> Dict[str, Any]: + """``{**db_context, **replacement}`` so *_geRunFileLogRelativeDir* and other keys survive pause updates.""" + prev = dict((automation2_interface.getRun(run_id) or {}).get("context") or {}) + return {**prev, **(replacement or {})} + + +class GraphicalEditorRunFileLogger: + """Append-only NDJSON log for one run folder under ``resolve_app_log_dir()``.""" + + __slots__ = ("_exec_path", "_ctx_path", "_lock", "_run_id") + + def __init__(self, run_id: str, absolute_run_dir: str) -> None: + self._run_id = run_id + ensureDir(absolute_run_dir) + self._exec_path = os.path.join(absolute_run_dir, EXECUTION_FILENAME) + self._ctx_path = os.path.join(absolute_run_dir, CONTEXT_SNAPSHOT_FILENAME) + self._lock = asyncio.Lock() + + @property + def run_id(self) -> str: + return self._run_id + + @staticmethod + def fresh_run_subdirectory_name(run_id: str) -> str: + ts = datetime.now(timezone.utc).strftime("%Y_%m_%d_%H_%M_%S") + return f"{ts}__{run_id}" + + @staticmethod + def relative_run_path(subdir_name: str) -> str: + """Path relative to ``APP_LOGGING_LOG_DIR`` (POSIX-style segments).""" + return "/".join((RUN_FILE_LOG_RELATIVE_ROOT, subdir_name)) + + @classmethod + def bootstrap_new_run(cls, automation2_interface: Any, run_id: str, run_context: Dict[str, Any]) -> GraphicalEditorRunFileLogger | None: + """Create filesystem folder + persist CONTEXT_KEY via ``updateRun``.""" + if not graphical_editor_run_file_logging_enabled(): + return None + if not automation2_interface or not run_id: + return None + subdir = cls.fresh_run_subdirectory_name(run_id) + rel = cls.relative_run_path(subdir) + base = resolve_app_log_dir() + absolute = os.path.join(base, RUN_FILE_LOG_RELATIVE_ROOT, subdir) + + merged = dict(run_context or {}) + merged[CONTEXT_KEY] = rel + try: + automation2_interface.updateRun(run_id, context=merged) + except Exception as ex: + logger.warning("GeRunFileLog: could not persist log dir on run=%s: %s", run_id, ex) + return None + + logger.info( + "GeRunFileLog: created run folder %s (run=%s)", + absolute, + run_id, + ) + return cls(run_id, absolute) + + @classmethod + def open_from_run_record(cls, automation2_interface: Any, run_id: str) -> GraphicalEditorRunFileLogger | None: + """Open logger for an existing run using CONTEXT_KEY from DB.""" + if not graphical_editor_run_file_logging_enabled(): + return None + if not automation2_interface or not run_id: + return None + try: + run = automation2_interface.getRun(run_id) or {} + except Exception as ex: + logger.debug("GeRunFileLog: getRun failed run=%s: %s", run_id, ex) + return None + rel = (run.get("context") or {}).get(CONTEXT_KEY) + if not rel or not isinstance(rel, str): + return None + base_norm = os.path.realpath(resolve_app_log_dir()) + allowed_root = os.path.realpath(os.path.join(base_norm, RUN_FILE_LOG_RELATIVE_ROOT)) + cand = os.path.realpath(os.path.join(base_norm, *rel.replace("\\", "/").split("/"))) + if cand != allowed_root and not cand.startswith(allowed_root + os.sep): + logger.warning( + "GeRunFileLog: path outside log root denied for run=%s rel=%s", + run_id, + rel, + ) + return None + absolute = cand + return cls(run_id, absolute) + + @classmethod + def find_existing_absolute_dir(cls, run_id: str) -> Optional[str]: + """If a folder named ``*{timestamp}__{run_id}`` exists under the log root, return its absolute path.""" + root = os.path.realpath(os.path.join(resolve_app_log_dir(), RUN_FILE_LOG_RELATIVE_ROOT)) + if not os.path.isdir(root): + return None + suffix = f"__{run_id}" + try: + names = sorted((n for n in os.listdir(root) if n.endswith(suffix)), reverse=True) + except OSError: + return None + if not names: + return None + cand = os.path.realpath(os.path.join(root, names[0])) + allowed_root = root + if cand != allowed_root and not cand.startswith(allowed_root + os.sep): + return None + return cand if os.path.isdir(cand) else None + + @classmethod + def ensure_attached(cls, automation2_interface: Any, run_id: str) -> GraphicalEditorRunFileLogger | None: + """Open logger from DB, or reattach an on-disk folder for *run_id*, or create a new one.""" + opened = cls.open_from_run_record(automation2_interface, run_id) + if opened is not None: + return opened + if not graphical_editor_run_file_logging_enabled(): + return None + if not automation2_interface or not run_id: + return None + try: + run = automation2_interface.getRun(run_id) or {} + except Exception as ex: + logger.debug("GeRunFileLog: ensure getRun failed run=%s: %s", run_id, ex) + return None + prev_ctx = dict(run.get("context") or {}) + + existing_abs = cls.find_existing_absolute_dir(run_id) + if existing_abs: + base_norm = os.path.realpath(resolve_app_log_dir()) + rel = os.path.relpath(existing_abs, base_norm).replace(os.sep, "/") + merged = {**prev_ctx, CONTEXT_KEY: rel} + try: + automation2_interface.updateRun(run_id, context=merged) + except Exception as ex: + logger.warning("GeRunFileLog: reattach persist failed run=%s: %s", run_id, ex) + return None + logger.info("GeRunFileLog: reattached existing folder for run=%s -> %s", run_id, existing_abs) + return cls(run_id, existing_abs) + + subdir = cls.fresh_run_subdirectory_name(run_id) + rel = cls.relative_run_path(subdir) + base = resolve_app_log_dir() + absolute = os.path.join(base, RUN_FILE_LOG_RELATIVE_ROOT, subdir) + merged = {**prev_ctx, CONTEXT_KEY: rel} + try: + automation2_interface.updateRun(run_id, context=merged) + except Exception as ex: + logger.warning("GeRunFileLog: ensure new folder persist failed run=%s: %s", run_id, ex) + return None + logger.info("GeRunFileLog: created late attach folder %s (run=%s)", absolute, run_id) + return cls(run_id, absolute) + + async def append_node_execution_line(self, record: Dict[str, Any]) -> None: + line = json.dumps(record, ensure_ascii=False, default=str) + async with self._lock: + try: + with open(self._exec_path, "a", encoding="utf-8") as f: + f.write(line + "\n") + except Exception as ex: + logger.warning("GeRunFileLog: append execution failed run=%s: %s", self._run_id, ex) + + async def append_context_snapshot_line(self, record: Dict[str, Any]) -> None: + line = json.dumps(record, ensure_ascii=False, default=str) + async with self._lock: + try: + with open(self._ctx_path, "a", encoding="utf-8") as f: + f.write(line + "\n") + except Exception as ex: + logger.warning("GeRunFileLog: append context snapshot failed run=%s: %s", self._run_id, ex) diff --git a/modules/workflows/automation2/pickNotPushMigration.py b/modules/workflows/automation2/pickNotPushMigration.py index fe347761..a40e6c33 100644 --- a/modules/workflows/automation2/pickNotPushMigration.py +++ b/modules/workflows/automation2/pickNotPushMigration.py @@ -1,18 +1,26 @@ # Copyright (c) 2025 Patrick Motsch """ -Graph helpers for Pick-not-Push: materialize connectionReference as explicit DataRefs. +Graph helpers for Pick-not-Push: materialize typed DataRefs before executeGraph runs. -Runtime: executeGraph deep-copies the version graph and applies materialize_connection_refs -so downstream nodes resolve connection UUIDs from upstream output.connection.id. +- ``materializeConnectionRefs``: empty ``connectionReference`` from upstream connection provenance. +- ``materializePrimaryTextHandover``: parameters whose static definition includes + ``graphInherit.kind == "primaryTextRef"`` (canonical paths: ``PRIMARY_TEXT_HANDOVER_REF_PATH``). +- ``materializeRecommendedDataPickRef``: parameters with ``graphInherit.kind == "recommendedDataPickRef"`` + use the upstream output port's ``dataPickOptions`` entry with ``recommended: true``. + +Runtime: executeGraph deep-copies the version graph and applies these passes in order. """ from __future__ import annotations import copy import logging -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES -from modules.features.graphicalEditor.portTypes import resolve_output_schema_name +from modules.features.graphicalEditor.portTypes import ( + PRIMARY_TEXT_HANDOVER_REF_PATH, + resolve_output_schema_name, +) from modules.workflows.automation2.graphUtils import buildConnectionMap, getInputSources logger = logging.getLogger(__name__) @@ -81,3 +89,207 @@ def materializeConnectionRefs(graph: Dict[str, Any]) -> Dict[str, Any]: logger.debug("materializeConnectionRefs: %s.connectionReference -> ref %s.connection.id", nid, src_id) return g + + +def _slot_empty_for_primary_text_inherit(val: Any) -> bool: + return val is None or val == "" or val == [] + + +def materializePrimaryTextHandover(graph: Dict[str, Any]) -> Dict[str, Any]: + """ + For parameters declaring ``graphInherit.kind == "primaryTextRef"`` (optional ``port``, default 0) with an + empty value, set an explicit ``DataRef`` to the canonical text field of the producer on + that port (see ``PRIMARY_TEXT_HANDOVER_REF_PATH`` keyed by upstream output schema name). + """ + g = copy.deepcopy(graph) + nodes: List[Dict[str, Any]] = g.get("nodes") or [] + connections = g.get("connections") or [] + if not nodes: + return g + + conn_map = buildConnectionMap(connections) + node_by_id = {n["id"]: n for n in nodes if n.get("id")} + + for node in nodes: + nid = node.get("id") + ntype = node.get("type") + if not nid or not ntype: + continue + node_def = _NODE_DEF_BY_ID.get(ntype) + if not node_def: + continue + params = node.get("parameters") + if not isinstance(params, dict): + node["parameters"] = {} + params = node["parameters"] + + for pdef in node_def.get("parameters") or []: + gi = pdef.get("graphInherit") + if not isinstance(gi, dict) or gi.get("kind") != "primaryTextRef": + continue + pname = pdef.get("name") + if not pname: + continue + port_ix = int(gi.get("port", 0)) + if not _slot_empty_for_primary_text_inherit(params.get(pname)): + continue + input_sources = getInputSources(nid, conn_map) + if port_ix not in input_sources: + continue + src_id, _ = input_sources[port_ix] + src_node = node_by_id.get(src_id) or {} + src_def = _NODE_DEF_BY_ID.get(src_node.get("type") or "") + if not src_def: + continue + out_port = (src_def.get("outputPorts") or {}).get(0, {}) or {} + out_schema = resolve_output_schema_name(src_node, out_port if isinstance(out_port, dict) else {}) + # Port-level override takes precedence over the schema-wide default path. + # Example: context.extractContent sets primaryTextRefPath=["data"] because + # its ``response`` field is intentionally empty. + ref_path = ( + out_port.get("primaryTextRefPath") + if isinstance(out_port, dict) and out_port.get("primaryTextRefPath") + else PRIMARY_TEXT_HANDOVER_REF_PATH.get(out_schema) + ) + if not ref_path: + continue + params[pname] = _data_ref(src_id, list(ref_path)) + logger.debug( + "materializePrimaryTextHandover: %s.%s -> ref %s path=%s", + nid, + pname, + src_id, + ref_path, + ) + + return g + + +def _recommended_data_pick_path(out_port: Dict[str, Any]) -> Optional[List[Any]]: + opts = out_port.get("dataPickOptions") if isinstance(out_port, dict) else None + if not isinstance(opts, list): + return None + for opt in opts: + if not isinstance(opt, dict): + continue + if opt.get("recommended") is True: + path = opt.get("path") + if isinstance(path, list) and path: + return list(path) + return None + + +def materializeRecommendedDataPickRef(graph: Dict[str, Any]) -> Dict[str, Any]: + """Materialize empty parameters that declare ``graphInherit.kind == \"recommendedDataPickRef\"``.""" + g = copy.deepcopy(graph) + nodes: List[Dict[str, Any]] = g.get("nodes") or [] + connections = g.get("connections") or [] + if not nodes: + return g + + conn_map = buildConnectionMap(connections) + node_by_id = {n["id"]: n for n in nodes if n.get("id")} + + for node in nodes: + nid = node.get("id") + ntype = node.get("type") + if not nid or not ntype: + continue + node_def = _NODE_DEF_BY_ID.get(ntype) + if not node_def: + continue + params = node.get("parameters") + if not isinstance(params, dict): + node["parameters"] = {} + params = node["parameters"] + + for pdef in node_def.get("parameters") or []: + gi = pdef.get("graphInherit") + if not isinstance(gi, dict) or gi.get("kind") != "recommendedDataPickRef": + continue + pname = pdef.get("name") + if not pname: + continue + port_ix = int(gi.get("port", 0)) + if not _slot_empty_for_primary_text_inherit(params.get(pname)): + continue + input_sources = getInputSources(nid, conn_map) + if port_ix not in input_sources: + continue + src_id, _ = input_sources[port_ix] + src_node = node_by_id.get(src_id) or {} + src_def = _NODE_DEF_BY_ID.get(src_node.get("type") or "") + if not src_def: + continue + out_port = (src_def.get("outputPorts") or {}).get(port_ix, {}) or {} + if not isinstance(out_port, dict): + out_port = (src_def.get("outputPorts") or {}).get(0, {}) or {} + ref_path = _recommended_data_pick_path(out_port if isinstance(out_port, dict) else {}) + if not ref_path: + continue + ref = _data_ref(src_id, ref_path) + if pdef.get("frontendType") == "contextBuilder": + params[pname] = [ref] + else: + params[pname] = ref + logger.debug( + "materializeRecommendedDataPickRef: %s.%s -> ref %s path=%s", + nid, + pname, + src_id, + ref_path, + ) + + return g + + +_STALE_FILE_CREATE_CONTEXT_PATHS = frozenset({ + ("responseData",), + ("response",), + ("merged",), + ("documents", 0, "documentData"), +}) + + +def remap_stale_presentation_ref_path(path: List[Any]) -> List[Any]: + """Map legacy text-handover paths to unified presentation ``data``.""" + if tuple(path) in _STALE_FILE_CREATE_CONTEXT_PATHS: + return ["data"] + return list(path) + + +def _normalize_presentation_refs_in_value(val: Any) -> Any: + """Rewrite stale ref paths inside ``contextBuilder`` lists or bare refs.""" + if isinstance(val, dict) and val.get("type") == "ref": + path = val.get("path") + if isinstance(path, list) and path: + new_path = remap_stale_presentation_ref_path(path) + if new_path != path: + return {**val, "path": new_path} + return val + if isinstance(val, list): + return [_normalize_presentation_refs_in_value(item) for item in val] + return val + + +def normalizeFileCreatePresentationRefs(graph: Dict[str, Any]) -> Dict[str, Any]: + """Remap legacy ``file.create`` context refs to unified presentation ``data``.""" + g = copy.deepcopy(graph) + nodes: List[Dict[str, Any]] = g.get("nodes") or [] + for node in nodes: + if node.get("type") != "file.create": + continue + params = node.get("parameters") + if not isinstance(params, dict): + continue + ctx = params.get("context") + if ctx in (None, "", []): + continue + normalized = _normalize_presentation_refs_in_value(ctx) + if normalized != ctx: + params["context"] = normalized + logger.debug( + "normalizeFileCreatePresentationRefs: %s.context remapped to presentation data ref", + node.get("id"), + ) + return g diff --git a/modules/workflows/automation2/workflowArtifactVisibility.py b/modules/workflows/automation2/workflowArtifactVisibility.py new file mode 100644 index 00000000..0eb8d4bd --- /dev/null +++ b/modules/workflows/automation2/workflowArtifactVisibility.py @@ -0,0 +1,32 @@ +# Copyright (c) 2025 Patrick Motsch +"""Heuristics for hiding internal workflow artefacts from user-facing file lists.""" + +from __future__ import annotations + +from typing import Any, Mapping, Optional + + +_WORKFLOW_INTERNAL_FILE_TAG = "_workflowInternal" + + +def suppress_workflow_file_in_workspace_ui(meta: Optional[Mapping[str, Any]]) -> bool: + """True when a file row should not appear in user-facing file lists. + + Used by Automation Workspace **and** ``/api/files/list`` (Meine Dateien). + Matches persisted JSON handovers from transient runs (``extracted_content_transient*``), + internal extract image files (``extract_media_*``), the ``_workflowInternal`` tag, and + optional explicit flags. + """ + if not isinstance(meta, Mapping): + return False + tags = meta.get("tags") + if isinstance(tags, list) and _WORKFLOW_INTERNAL_FILE_TAG in tags: + return True + fn = str(meta.get("fileName") or "").lower() + if "extracted_content_transient" in fn: + return True + if "extract_media_" in fn: + return True + if meta.get("suppressInWorkflowFileLists") is True: + return True + return False diff --git a/modules/workflows/methods/methodAi/_common.py b/modules/workflows/methods/methodAi/_common.py index c2812a5c..27b36663 100644 --- a/modules/workflows/methods/methodAi/_common.py +++ b/modules/workflows/methods/methodAi/_common.py @@ -4,27 +4,101 @@ """Shared helpers for AI workflow actions.""" import json -from typing import Any +from typing import Any, Optional -def serialize_context(val: Any) -> str: +def is_image_action_document_list(val: Any) -> bool: + """True if ``val`` is a non-empty list of ActionDocument-shaped dicts (mimeType image/*).""" + if not isinstance(val, list) or not val: + return False + for item in val: + if not isinstance(item, dict): + return False + mime = str(item.get("mimeType") or "").strip().lower() + if not mime.startswith("image/"): + return False + return True + + +def _handover_response_plain(val: Any) -> Optional[str]: + """If ``val`` is a dict with a non-empty ``response`` string, return it (BOM-stripped).""" + if not isinstance(val, dict): + return None + r = val.get("response") + if r is None or not str(r).strip(): + return None + return str(r).strip().lstrip("\ufeff") + + +def primary_text_for_prompt_context(val: Any) -> str: + """Flatten ActionResult / presentation / merge payloads to readable text. + + Used when merging multiple context-builder refs so extract outputs are not + turned into giant JSON via ``serialize_context`` (empty ``response``). + """ + if val is None: + return "" + if isinstance(val, str): + s = val.strip().lstrip("\ufeff") + if not s: + return "" + if len(s) >= 2 and ((s.startswith("[") and s.endswith("]")) or (s.startswith("{") and s.endswith("}"))): + try: + return primary_text_for_prompt_context(json.loads(s)) + except (json.JSONDecodeError, TypeError, ValueError): + pass + return s + if isinstance(val, list): + chunks = [primary_text_for_prompt_context(item) for item in val] + chunks = [c for c in chunks if c] + return "\n\n".join(chunks) + if isinstance(val, dict): + got = _handover_response_plain(val) + if got is not None: + return got + inner = val.get("data") + if isinstance(inner, dict): + from modules.workflows.methods.methodContext.actions.extractContent import ( + joined_text_from_extract_node_data, + ) + + t = (joined_text_from_extract_node_data(inner) or "").strip() + if t: + return t + from modules.workflows.methods.methodContext.actions.extractContent import ( + joined_text_from_extract_node_data, + ) + + return (joined_text_from_extract_node_data(val) or "").strip() + return str(val).strip() if str(val).strip() else "" + + +def serialize_context(val: Any, *, prefer_handover_primary: bool = False) -> str: """Convert any context value to a readable string for use in AI prompts. - None / empty string → "" - empty dict (no keys) → "" (avoids literal "{}" in file.create / prompts) - str → as-is - - dict / list → pretty-printed JSON + - dict / list → pretty-printed JSON (unless ``prefer_handover_primary`` and dict has ``response``) + - if JSON encoding fails (cycles, etc.) but dict has ``response``, return that text instead of ``str(dict)`` - anything else → str() """ if val is None or val == "" or val == []: return "" if isinstance(val, dict) and len(val) == 0: return "" + if prefer_handover_primary: + got = _handover_response_plain(val) + if got is not None: + return got if isinstance(val, str): - return val.strip() + return val.strip().lstrip("\ufeff") try: - return json.dumps(val, ensure_ascii=False, indent=2) + return json.dumps(val, ensure_ascii=False, indent=2, default=str) except Exception: + got = _handover_response_plain(val) + if got is not None: + return got return str(val) diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py index f4380ae0..46aac70d 100644 --- a/modules/workflows/methods/methodAi/actions/process.py +++ b/modules/workflows/methods/methodAi/actions/process.py @@ -389,34 +389,33 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: )) final_documents = action_documents + handover_data = None else: - # Text response - create document from content - # If no extension provided, use "txt" (required for filename) - extension = output_extension.lstrip('.') if output_extension else "txt" - meaningful_name = self._generateMeaningfulFileName( - base_name="ai", - extension=extension, - action_name="result" - ) - validationMetadata = { - "actionType": "ai.process", - "resultType": normalized_result_type if normalized_result_type else None, - "outputFormat": output_format if output_format else None, - "hasDocuments": False, - "contentType": "text" + # Text-only response: keep handover in ActionResult.data (no ActionDocument). + # Avoids automation2 persisting a synthetic file per run; use ai.generateDocument for files. + body = aiResponse.content + if body is None: + body = "" + elif not isinstance(body, str): + body = str(body) + final_documents = [] + handover_data = { + "response": body, + "resultType": normalized_result_type, + "outputFormat": output_format, + "contentType": "text", } - action_document = ActionDocument( - documentName=meaningful_name, - documentData=aiResponse.content, - mimeType=output_mime_type, - validationMetadata=validationMetadata - ) - final_documents = [action_document] + md = getattr(aiResponse, "metadata", None) + if md is not None: + extra = getattr(md, "additionalData", None) + if isinstance(extra, dict): + for k, v in extra.items(): + handover_data.setdefault(k, v) # Complete progress tracking self.services.chat.progressLogFinish(operationId, True) - return ActionResult.isSuccess(documents=final_documents) + return ActionResult.isSuccess(documents=final_documents, data=handover_data) except (SubscriptionInactiveException, BillingContextError): try: diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py index 3a47518f..64fc4f0f 100644 --- a/modules/workflows/methods/methodAi/methodAi.py +++ b/modules/workflows/methods/methodAi/methodAi.py @@ -230,7 +230,14 @@ class MethodAi(MethodBase): required=False, default="txt", description="Output file extension" - ) + ), + "folderId": WorkflowActionParameter( + name="folderId", + type="str", + frontendType=FrontendType.USER_FILE_FOLDER, + required=False, + description="Target folder in My Files when persisting workflow output", + ), }, execute=summarizeDocument.__get__(self, self.__class__) ), @@ -275,7 +282,14 @@ class MethodAi(MethodBase): frontendType=FrontendType.TEXT, required=False, description="Output file extension. If not specified, uses same format as input" - ) + ), + "folderId": WorkflowActionParameter( + name="folderId", + type="str", + frontendType=FrontendType.USER_FILE_FOLDER, + required=False, + description="Target folder in My Files when persisting workflow output", + ), }, execute=translateDocument.__get__(self, self.__class__) ), @@ -307,7 +321,14 @@ class MethodAi(MethodBase): required=False, default=True, description="Whether to preserve document structure (headings, tables, etc.)" - ) + ), + "folderId": WorkflowActionParameter( + name="folderId", + type="str", + frontendType=FrontendType.USER_FILE_FOLDER, + required=False, + description="Target folder in My Files when persisting workflow output", + ), }, execute=convertDocument.__get__(self, self.__class__) ), @@ -371,6 +392,13 @@ class MethodAi(MethodBase): required=False, description="Legacy/API output format extension (e.g. txt, docx). Ignored when outputFormat is set." ), + "folderId": WorkflowActionParameter( + name="folderId", + type="str", + frontendType=FrontendType.USER_FILE_FOLDER, + required=False, + description="Target folder in My Files when persisting workflow output", + ), }, execute=generateDocument.__get__(self, self.__class__) ), @@ -411,6 +439,13 @@ class MethodAi(MethodBase): default="", description="Additional context from upstream steps.", ), + "folderId": WorkflowActionParameter( + name="folderId", + type="str", + frontendType=FrontendType.USER_FILE_FOLDER, + required=False, + description="Target folder in My Files when persisting workflow output", + ), }, execute=generateCode.__get__(self, self.__class__) ), diff --git a/modules/workflows/methods/methodBase.py b/modules/workflows/methods/methodBase.py index 02cae134..5a766563 100644 --- a/modules/workflows/methods/methodBase.py +++ b/modules/workflows/methods/methodBase.py @@ -194,40 +194,41 @@ class MethodBase: return wrapper def _validateParameters(self, parameters: Dict[str, Any], paramDefs: Dict[str, WorkflowActionParameter]) -> Dict[str, Any]: - """Validate parameters against definitions - - IMPORTANT: System parameters (like parentOperationId, expectedDocumentFormats) are preserved - even if they're not in the parameter definitions, as they're used internally by the framework. + """Validate declared parameters; pass through unknown ones from the node definition. + + The graphical-editor node definition is the source of truth for the full UI parameter + list. Actions only need to declare the parameters they want validated/defaulted; any + additional parameter passed in by the executor (e.g. contentFilter, pdfExtractMode, + outputMode for context.extractContent) is preserved so the action can read it. + + System parameters (parentOperationId, _runContext, _upstreamPayload, ...) are always + preserved as before. """ - validated = {} - - # System parameters that should always be preserved, even if not in paramDefs - systemParams = ['parentOperationId', 'expectedDocumentFormats'] - for sysParam in systemParams: - if sysParam in parameters: - validated[sysParam] = parameters[sysParam] - + validated: Dict[str, Any] = {} + for paramName, paramDef in paramDefs.items(): value = parameters.get(paramName) - - # Check required + if paramDef.required and value is None: raise ValueError(f"Required parameter '{paramName}' is missing") - - # Use default if not provided + if value is None and paramDef.default is not None: value = paramDef.default - - # Type validation + if value is not None: value = self._validateType(value, paramDef.type) - - # Custom validation rules + if paramDef.validation and value is not None: self._applyValidationRules(value, paramDef.validation) - + validated[paramName] = value - + + # Preserve every additional parameter the executor passed in (node-defined params, + # system params, declarative injections). This keeps the node definition authoritative. + for k, v in parameters.items(): + if k not in validated: + validated[k] = v + return validated def _validateType(self, value: Any, expectedType: str) -> Any: diff --git a/modules/workflows/methods/methodContext/actions/extractContent.py b/modules/workflows/methods/methodContext/actions/extractContent.py index 19100fb7..52d07b34 100644 --- a/modules/workflows/methods/methodContext/actions/extractContent.py +++ b/modules/workflows/methods/methodContext/actions/extractContent.py @@ -1,149 +1,1874 @@ # Copyright (c) 2025 Patrick Motsch # All rights reserved. +"""context.extractContent — extracts content without AI. + +``ActionResult.data`` is one **presentation** envelope (`schemaVersion`, `kind`, +`outputMode`, `fileOrder`, `files`) matching node parameters plus ``_meta`` (operation refs, +persisted-image trace, presentation config). + +Raw ``ContentExtracted`` is not emitted on the automation output; persistence still uses it +internally when ``_runContext`` enables image uploads. + +Older ``kind: context.extractContent.handover.v1`` is legacy-only (merge/tests), not produced here.""" + +import base64 as _b64 +import binascii as _binascii +import copy +import csv +import json import logging +import re +from io import BytesIO, StringIO import time -from typing import Dict, Any -from modules.datamodels.datamodelChat import ActionResult, ActionDocument -from modules.datamodels.datamodelDocref import ( - DocumentReferenceList, - coerceDocumentReferenceList, -) -from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy +from typing import Any, Dict, List, Optional, Tuple + +from modules.datamodels.datamodelChat import ActionResult +from modules.datamodels.datamodelDocref import coerceDocumentReferenceList +from modules.datamodels.datamodelExtraction import ContentExtracted, ExtractionOptions logger = logging.getLogger(__name__) -async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: - operationId = None - try: - workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" - operationId = f"context_extract_{workflowId}_{int(time.time())}" - - documentListParam = parameters.get("documentList") - if not documentListParam: - return ActionResult.isFailure(error="documentList is required") +_UNSAFE_FILE_KEY = re.compile(r"[^\w\-.\(\)\[\]%@+]") - documentList = coerceDocumentReferenceList(documentListParam) - if not documentList.references: - return ActionResult.isFailure( - error=f"documentList could not be parsed (type={type(documentListParam).__name__}); " - f"expected DocumentReferenceList, list of strings/dicts, or " - f"a wrapper dict like {{'documents': [...]}}" - ) - - # Start progress tracking - parentOperationId = parameters.get('parentOperationId') - self.services.chat.progressLogStart( - operationId, - "Extracting content from documents", - "Content Extraction", - f"Documents: {len(documentList.references)}", - parentOperationId=parentOperationId - ) - - # Get ChatDocuments from documentList - self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents") - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) - - if not chatDocuments: - self.services.chat.progressLogFinish(operationId, False) - return ActionResult.isFailure(error="No documents found in documentList") - - logger.info(f"Extracting content from {len(chatDocuments)} documents") - - # Prepare extraction options - self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options") - extractionOptionsParam = parameters.get("extractionOptions") - - # Convert dict to ExtractionOptions object if needed, or create defaults - if extractionOptionsParam: - if isinstance(extractionOptionsParam, dict): - # Ensure required fields are present - if "prompt" not in extractionOptionsParam: - extractionOptionsParam["prompt"] = "Extract all content from the document" - if "mergeStrategy" not in extractionOptionsParam: - extractionOptionsParam["mergeStrategy"] = MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ) - # Convert dict to ExtractionOptions object - try: - extractionOptions = ExtractionOptions(**extractionOptionsParam) - except Exception as e: - logger.warning(f"Failed to create ExtractionOptions from dict: {str(e)}, using defaults") - extractionOptions = None - elif isinstance(extractionOptionsParam, ExtractionOptions): - extractionOptions = extractionOptionsParam - else: - # Invalid type, use defaults - logger.warning(f"Invalid extractionOptions type: {type(extractionOptionsParam)}, using defaults") - extractionOptions = None +# Bumped when ``ActionResult.data`` shape changes (`_meta.extractPayloadSchemaVersion`). +EXTRACT_PAYLOAD_SCHEMA_VERSION = 3 + +LEGACY_HANDOVER_KIND = "context.extractContent.handover.v1" +HANDOVER_KIND = LEGACY_HANDOVER_KIND +PRESENTATION_KIND = "context.extractContent.presentation.v1" + +_CONTENT_FILTER_OPTIONS = ("all", "textOnly", "imagesOnly", "noImages") +_CONTENT_FILTER_BY_LOWER = {k.lower(): k for k in _CONTENT_FILTER_OPTIONS} + + +def _canonical_content_filter(raw: Any) -> str: + """Map JSON / UI values to canonical ``_CONTENT_FILTER_OPTIONS`` keys (case-insensitive).""" + s = str(raw if raw is not None else "all").strip() + if not s: + return "all" + if s in _CONTENT_FILTER_OPTIONS: + return s + return _CONTENT_FILTER_BY_LOWER.get(s.lower()) or "all" + + +PRESENTATION_SCHEMA_VERSION = 1 + +_PDF_EXTRACT_PRESENTATION_MODES = ("text", "tables", "images", "all") +_OUTPUT_MODES = ("blob", "lines", "pages", "chunks", "structured") +_SPLIT_BY_VALUES = ("newline", "paragraph", "sentence") +_CHUNK_UNITS = ("tokens", "characters", "words") + +def _apply_content_filter(payload: Dict[str, Any], content_filter: str) -> Dict[str, Any]: + """Filter parts in the handover payload by content_filter. + + Semantics: + - all: keep every part (no-op). + - textOnly: whitelist — only typeGroup in (text, table, structure). + - imagesOnly: whitelist — only typeGroup == image. + - noImages: blacklist — every typeGroup except image (wider than textOnly; + future non-image types are retained). + """ + import copy + + if content_filter == "all": + return payload + result = copy.deepcopy(payload) + for bucket in (result.get("files") or {}).values(): + if not isinstance(bucket, dict): + continue + parts = bucket.get("parts") or [] + if content_filter == "textOnly": + parts = [p for p in parts if isinstance(p, dict) and (p.get("typeGroup") or "") in ("text", "table", "structure")] + elif content_filter == "imagesOnly": + parts = [p for p in parts if isinstance(p, dict) and (p.get("typeGroup") or "") == "image"] + elif content_filter == "noImages": + parts = [p for p in parts if isinstance(p, dict) and (p.get("typeGroup") or "") != "image"] + bucket["parts"] = parts + bucket["byTypeGroup"] = _rebuild_by_type_group(parts) + return result + + +def _filter_extractions_by_content_filter( + extracted_results: List[ContentExtracted], + content_filter: str, +) -> List[ContentExtracted]: + """Return copies with ``parts`` trimmed (same semantics as ``_apply_content_filter``).""" + if content_filter == "all": + return extracted_results + out: List[ContentExtracted] = [] + for ec in extracted_results: + parts = list(ec.parts or []) + if content_filter == "textOnly": + parts = [ + p + for p in parts + if (getattr(p, "typeGroup", None) or "") in ("text", "table", "structure") + ] + elif content_filter == "imagesOnly": + parts = [p for p in parts if (getattr(p, "typeGroup", None) or "") == "image"] + elif content_filter == "noImages": + parts = [p for p in parts if (getattr(p, "typeGroup", None) or "") != "image"] + copied = ec.model_copy(update={"parts": parts}) + out.append(copied) + return out + + +def _serialize_content_extracted_for_output(ec: ContentExtracted) -> Dict[str, Any]: + """Serialize for internal persist path (no exported ``summary``); not emitted on ``ActionResult.data``.""" + d = ec.model_dump(mode="json", exclude_none=True) if hasattr(ec, "model_dump") else ec.dict(exclude_none=True) + if isinstance(d, dict): + d.pop("summary", None) + return d + + +def _default_extraction_options() -> ExtractionOptions: + """No merge — keep all parts for downstream JSON selection.""" + return ExtractionOptions( + prompt="Extract all content from the document", + mergeStrategy=None, + processDocumentsIndividually=True, + outputFormat="parts", + outputDetail="full", + ) + + +def _file_json_key(display_name: str, index: int, key_counts: Dict[str, int]) -> str: + stem = (display_name or "").strip() or f"document_{index + 1}" + slug = stem.replace("/", "_").replace("\\", "_").replace(" ", "_") + slug = _UNSAFE_FILE_KEY.sub("_", slug).strip("_") or f"document_{index + 1}" + base = f"file_{index + 1}_{slug}" + n = key_counts.get(base, 0) + key_counts[base] = n + 1 + return base if n == 0 else f"{base}__{n}" + + +def _serialize_parts(parts: Any) -> List[Dict[str, Any]]: + out: List[Dict[str, Any]] = [] + for p in parts or []: + if hasattr(p, "model_dump"): + out.append(p.model_dump(mode="json")) + elif isinstance(p, dict): + out.append(dict(p)) + return out + + +def _rebuild_by_type_group(parts_ser: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]: + by_type: Dict[str, List[Dict[str, Any]]] = {} + for entry in parts_ser: + if not isinstance(entry, dict): + continue + tg = (entry.get("typeGroup") or "").strip() or "_other" + by_type.setdefault(tg, []).append(entry) + return by_type + + +def _part_carries_plain_text(p: dict) -> bool: + """Whether a serialized extraction part contributes to a flat ``response`` string.""" + if not isinstance(p, dict): + return False + tg = (p.get("typeGroup") or "").strip() + if tg in ("text", "table"): + return True + mime = (p.get("mimeType") or "").strip().lower() + if tg == "structure" and mime in ("text/plain", "text/html", "text/markdown"): + return True + return False + + +def _joined_text_from_handover_payload(payload: Dict[str, Any]) -> str: + """Concatenate text parts across fileOrder for AiResult-compatible ``response``.""" + files_section = payload.get("files") or {} + ordered = payload.get("fileOrder") + keys: List[str] = ordered if isinstance(ordered, list) and ordered else list(files_section.keys()) + chunks: List[str] = [] + for fk in keys: + bucket = files_section.get(fk) + if not isinstance(bucket, dict): + continue + for p in bucket.get("parts") or []: + if not isinstance(p, dict): + continue + if not _part_carries_plain_text(p): + continue + raw = p.get("data") + if raw is None: + continue + s = str(raw).strip() + if s: + chunks.append(s) + return "\n\n".join(chunks) + + +def _normalize_bool_select(value: Any, *, default: bool) -> bool: + s = str(value if value is not None else "").strip().lower() + if s in ("true", "1", "yes", "on"): + return True + if s in ("false", "0", "no", "off"): + return False + return default + + +def _parse_positive_int(value: Any, default: int) -> int: + try: + n = int(str(value).strip()) + return n if n > 0 else default + except (TypeError, ValueError): + return default + + +def _parse_non_negative_int(value: Any, default: int) -> int: + try: + n = int(str(value).strip()) + return n if n >= 0 else default + except (TypeError, ValueError): + return default + + +def parse_presentation_parameters(parameters: Dict[str, Any]) -> Dict[str, Any]: + """Defaults match ``context.extractContent`` node schema in ``context.py``. + + ``contentFilter=all`` plus legacy default ``pdfExtractMode=text`` would drop + image parts from **presentation** even though extraction kept them — we + coerce that combination to ``all``. When ``pdfExtractMode`` is omitted, + sensible defaults derive from ``contentFilter``. + """ + output_mode = str(parameters.get("outputMode") or "lines").strip().lower() + if output_mode not in _OUTPUT_MODES: + output_mode = "lines" + split_by = str(parameters.get("splitBy") or "newline").strip().lower() + if split_by not in _SPLIT_BY_VALUES: + split_by = "newline" + chunk_unit = str(parameters.get("chunkSizeUnit") or "tokens").strip().lower() + if chunk_unit not in _CHUNK_UNITS: + chunk_unit = "tokens" + content_filter = _canonical_content_filter(parameters.get("contentFilter")) + raw_pdf = parameters.get("pdfExtractMode") + raw_pdf_str = str(raw_pdf).strip() if raw_pdf is not None else "" + if raw_pdf_str: + pdf_mode = raw_pdf_str.lower() + elif content_filter == "imagesOnly": + pdf_mode = "images" + elif content_filter in ("textOnly", "noImages"): + pdf_mode = "text" + else: + pdf_mode = "all" + if pdf_mode not in _PDF_EXTRACT_PRESENTATION_MODES: + pdf_mode = "all" + # Coerce pdfExtractMode to match contentFilter intent. contentFilter is the + # authoritative user choice; pdfExtractMode is a presentation-layer detail that + # must stay consistent with it. + if content_filter == "all" and pdf_mode == "text": + pdf_mode = "all" + elif content_filter == "imagesOnly" and pdf_mode != "images": + pdf_mode = "images" + elif content_filter == "textOnly" and pdf_mode not in ("text", "tables"): + pdf_mode = "text" + elif content_filter == "noImages" and pdf_mode == "images": + pdf_mode = "text" + return { + "outputMode": output_mode, + "splitBy": split_by, + "chunkSizeUnit": chunk_unit, + "chunkSize": _parse_positive_int(parameters.get("chunkSize"), 500), + "chunkOverlap": _parse_non_negative_int(parameters.get("chunkOverlap"), 0), + "filterEmptyLines": _normalize_bool_select(parameters.get("filterEmptyLines"), default=True), + "trimWhitespace": _normalize_bool_select(parameters.get("trimWhitespace"), default=True), + "includeLineNumbers": _normalize_bool_select(parameters.get("includeLineNumbers"), default=False), + "includeMetadata": _normalize_bool_select(parameters.get("includeMetadata"), default=False), + "csvHeaderRow": _normalize_bool_select(parameters.get("csvHeaderRow"), default=True), + "pdfExtractMode": pdf_mode, + "markdownPreserveFormatting": _normalize_bool_select( + parameters.get("markdownPreserveFormatting"), + default=False, + ), + } + + +def _copy_part(p: Dict[str, Any]) -> Dict[str, Any]: + return dict(p) + + +def _presentation_filter_parts(parts: List[Dict[str, Any]], pdf_mode: str) -> List[Dict[str, Any]]: + """Filter **copies** of parts for the presentation layer (``pdfExtractMode``).""" + if pdf_mode == "all": + return [_copy_part(p) for p in parts if isinstance(p, dict)] + out: List[Dict[str, Any]] = [] + for p in parts: + if not isinstance(p, dict): + continue + tg = (p.get("typeGroup") or "").strip() + if pdf_mode == "text": + if tg == "image": + continue + if tg in ("text", "table", "structure"): + out.append(_copy_part(p)) + elif pdf_mode == "tables": + if tg == "table": + out.append(_copy_part(p)) + elif pdf_mode == "images": + if tg == "image": + out.append(_copy_part(p)) + return out + + +def _simplify_markdown_light(text: str) -> str: + """Cheap markdown-to-plain pass (no tokenizer library).""" + s = text + s = re.sub(r"`([^`]*)`", r"\1", s) + s = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", s) + s = re.sub(r"^#+\s*", "", s, flags=re.MULTILINE) + s = s.replace("**", "").replace("__", "") + s = re.sub(r"[*_]{1,2}([^*_]+)[*_]{1,2}", r"\1", s) + return s.strip() + + +def _apply_markdown_presentation_on_parts(parts: List[Dict[str, Any]], preserve: bool) -> None: + if preserve: + return + for p in parts: + mime = (p.get("mimeType") or "").strip().lower() + if mime != "text/markdown": + continue + raw = p.get("data") + if raw is None: + continue + p["data"] = _simplify_markdown_light(str(raw)) + + +def _part_metadata_dict(p: Dict[str, Any]) -> Dict[str, Any]: + meta = p.get("metadata") + if isinstance(meta, dict): + return dict(meta) + return {} + + +def _page_index_from_part(p: Dict[str, Any]) -> int: + meta = _part_metadata_dict(p) + pi = meta.get("pageIndex") + try: + return int(pi) if pi is not None else 0 + except (TypeError, ValueError): + return 0 + + +def _is_csv_source(source_file_name: str, parts: List[Dict[str, Any]]) -> bool: + low = (source_file_name or "").lower() + if low.endswith(".csv"): + return True + for p in parts: + if not isinstance(p, dict): + continue + mime = (p.get("mimeType") or "").strip().lower() + if mime == "text/csv" or mime.endswith("csv"): + return True + return False + + +def _csv_text_from_parts(parts: List[Dict[str, Any]]) -> Optional[str]: + """Prefer explicit CSV table part payload; else None.""" + for p in parts: + if not isinstance(p, dict): + continue + if (p.get("typeGroup") or "").strip() != "table": + continue + mime = (p.get("mimeType") or "").strip().lower() + if "csv" in mime or mime == "text/plain": + raw = p.get("data") + if raw is None: + continue + return str(raw) + for p in parts: + if not isinstance(p, dict): + continue + if (p.get("typeGroup") or "").strip() == "text": + mime = (p.get("mimeType") or "").strip().lower() + if mime == "text/csv": + raw = p.get("data") + if raw is not None: + return str(raw) + return None + + +def _parse_csv_rows(csv_text: str, header_row: bool) -> Optional[Dict[str, Any]]: + try: + reader = csv.reader(StringIO(csv_text)) + rows = [list(r) for r in reader] + except csv.Error: + return None + if not rows: + return {"headers": [], "rows": []} + if not header_row: + return {"headers": [], "rows": rows} + headers = [str(c).strip() for c in rows[0]] + body = rows[1:] + dict_rows: List[Dict[str, str]] = [] + for r in body: + item: Dict[str, str] = {} + for i, h in enumerate(headers): + key = h or f"column_{i + 1}" + item[key] = str(r[i]).strip() if i < len(r) else "" + dict_rows.append(item) + return {"headers": headers, "rows": dict_rows} + + +def _segment_merged_text(merged: str, split_by: str) -> List[str]: + if split_by == "paragraph": + return [s for s in re.split(r"\n\s*\n+", merged) if s != ""] + if split_by == "sentence": + pieces = re.split(r"(?<=[.!?])\s+", merged) + return [s for s in pieces if s.strip() != ""] + return merged.split("\n") + + +def _apply_line_filters( + segments: List[str], + *, + filter_empty: bool, + trim_ws: bool, +) -> List[str]: + out: List[str] = [] + for seg in segments: + s = seg + if trim_ws: + s = s.strip() else: - extractionOptions = None - - # If extractionOptions not provided, create defaults - if not extractionOptions: - # Default extraction options for pure content extraction (no AI processing) - extractionOptions = ExtractionOptions( - prompt="Extract all content from the document", - mergeStrategy=MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ), - processDocumentsIndividually=True - ) - - # Call extraction service with hierarchical progress logging - self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating") - self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents") - # Pass operationId for hierarchical per-document progress logging - extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId) - - # Build ActionDocuments from ContentExtracted results - self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents") - actionDocuments = [] - # Map extracted results back to original documents by index (results are in same order) - for i, extracted in enumerate(extractedResults): - # Get original document name if available - originalDoc = chatDocuments[i] if i < len(chatDocuments) else None - if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName: - # Use original filename with "extracted_" prefix - baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName - documentName = f"{baseName}_extracted_{extracted.id}.json" + s = str(s) + if filter_empty and (not s or not s.strip()): + continue + out.append(s) + return out + + +def _chars_per_unit(unit: str, chunk_size: int) -> int: + # Token path: rough heuristic ~4 characters per token (documented convention). + if unit == "tokens": + return max(1, chunk_size * 4) + if unit == "words": + return max(1, chunk_size * 6) + return max(1, chunk_size) + + +def _overlap_chars(unit: str, overlap: int, chunk_size: int) -> int: + return min(_chars_per_unit(unit, overlap), _chars_per_unit(unit, chunk_size)) + + +def _chunk_plain_text(text: str, cfg: Dict[str, Any]) -> List[str]: + unit = cfg["chunkSizeUnit"] + size = cfg["chunkSize"] + overlap_amount = cfg["chunkOverlap"] + if unit == "words": + words = text.split() + if not words: + return [] + out: List[str] = [] + step = max(1, size - overlap_amount) + i = 0 + while i < len(words): + chunk_words = words[i : i + size] + out.append(" ".join(chunk_words)) + if len(chunk_words) < size: + break + i += step + return out + csize = _chars_per_unit(unit, size) + ovl = min(_overlap_chars(unit, overlap_amount, size), csize - 1) if csize > 1 else 0 + if not text: + return [] + out: List[str] = [] + start = 0 + while start < len(text): + end = min(len(text), start + csize) + out.append(text[start:end]) + if end >= len(text): + break + start = max(0, end - ovl) + return out + + +def _base_item_meta( + source_file_name: str, + cfg: Dict[str, Any], + *, + segment_index: int, + offset_hint: Optional[int] = None, + page_index: Optional[int] = None, +) -> Optional[Dict[str, Any]]: + if not cfg.get("includeMetadata"): + return None + m: Dict[str, Any] = {"segmentIndex": segment_index} + if source_file_name: + m["sourceFileName"] = source_file_name + if offset_hint is not None: + m["charOffsetApprox"] = offset_hint + if page_index is not None: + m["pageIndex"] = page_index + return m + + +def summarize_presentation_payload(presentation: Dict[str, Any]) -> Dict[str, Any]: + """Compact shape for logs / run traces (no full ``data`` payload).""" + files_out: Dict[str, Any] = {} + for fk, bucket in (presentation.get("files") or {}).items(): + if not isinstance(bucket, dict): + continue + om = bucket.get("outputMode") + d = bucket.get("data") + shape: Dict[str, Any] = {"outputMode": om, "dataPythonType": type(d).__name__} + if isinstance(d, str): + shape["stringLength"] = len(d) + shape["head"] = d[:200] + shape["tail"] = d[-120:] if len(d) > 320 else None + elif isinstance(d, list): + shape["listLength"] = len(d) + if d: + el0 = d[0] + shape["firstElementPythonType"] = type(el0).__name__ + if isinstance(el0, str): + shape["firstStringLength"] = len(el0) + shape["firstHead"] = el0[:160] + elif isinstance(el0, dict): + shape["firstKeys"] = list(el0.keys())[:12] + files_out[str(fk)] = shape + return { + "schemaVersion": presentation.get("schemaVersion"), + "kind": presentation.get("kind"), + "rootOutputMode": presentation.get("outputMode"), + "fileOrder": presentation.get("fileOrder"), + "files": files_out, + } + + +def _joined_text_from_content_extracted_serial(items: List[Any]) -> str: + """Plain text from serialized ``contentExtracted`` list (dict items with ``parts``).""" + chunks: List[str] = [] + for item in items: + if not isinstance(item, dict): + continue + for p in item.get("parts") or []: + if not isinstance(p, dict): + continue + if not _part_carries_plain_text(p): + continue + raw = p.get("data") + if raw is None: + continue + s = str(raw).strip() + if s: + chunks.append(s) + return "\n\n".join(chunks) + + +def presentation_dict_without_meta(data: Dict[str, Any]) -> Dict[str, Any]: + """Strip ``_meta`` for helpers that expect a bare presentation envelope.""" + return {k: v for k, v in data.items() if k != "_meta"} + + +def joined_text_from_extract_node_data(data: Any) -> str: + """Primary text / mergeContext: presentation-root ``data``, ``contentExtracted``, or legacy handover.""" + if not isinstance(data, dict): + return "" + if data.get("kind") == PRESENTATION_KIND: + return presentation_response_text(presentation_dict_without_meta(data)) + ce = data.get("contentExtracted") + if isinstance(ce, list) and ce: + return _joined_text_from_content_extracted_serial(ce) + if data.get("files") is not None: + return _joined_text_from_handover_payload(data) + return "" + + +def presentation_response_text( + presentation: Dict[str, Any], + file_order_hint: Optional[Any] = None, +) -> str: + """Derive flattened ``response`` text from ``presentation.files``.""" + files_section = presentation.get("files") or {} + keys: List[str] = [] + if isinstance(file_order_hint, dict): + ord0 = file_order_hint.get("fileOrder") + keys = ord0 if isinstance(ord0, list) and ord0 else [] + elif isinstance(file_order_hint, list): + keys = file_order_hint + if not keys: + po = presentation.get("fileOrder") + keys = po if isinstance(po, list) and po else list(files_section.keys()) + chunks_out: List[str] = [] + for fk in keys: + bucket = files_section.get(fk) + if not isinstance(bucket, dict): + continue + texts = _flat_text_segments_from_presentation_bucket(bucket) + chunks_out.extend(texts) + return "\n\n".join(chunks_out) + + +def _flat_text_segments_from_presentation_bucket(bucket: Dict[str, Any]) -> List[str]: + """Derive plain-text segments from ``presentation.files[*]``. + + Prefer **data** when set (canonical shape for tooling): + - ``blob``: ``data`` is a single ``str``. + - ``lines``: ``data`` is a ``list[dict]``, one dict per extraction part (order preserved): same + fields as serialised ``ContentPart`` (image ``data`` redacted) plus ``lines`` (split/filtered text; + empty for non-text/table/structure plain-text parts). + - ``chunks``: ``data`` is ``list[str]``. + - ``pages``: ``data`` is ``list[{"pageIndex": int, "lines": [...]}]``. + - ``structured``: ``data`` mirrors ``items`` — list of part-like dicts; text from ``data`` fields. + """ + if not isinstance(bucket, dict): + return [] + raw_data = bucket.get("data") + mode = str(bucket.get("outputMode") or "").strip() + + if isinstance(raw_data, str): + s = raw_data.strip() + return [s] if s else [] + if isinstance(raw_data, list): + extracted: List[str] = [] + for el in raw_data: + if isinstance(el, str): + lt = el.strip() + if lt: + extracted.append(lt) + elif isinstance(el, dict): + if el.get("type") == "image": + continue + if el.get("typeGroup") == "image": + continue + line_block = el.get("lines") + if isinstance(line_block, list): + for ln in line_block: + if isinstance(ln, str): + s = ln.strip() + if s: + extracted.append(s) + elif ln is not None: + s = str(ln).strip() + if s: + extracted.append(s) + elif _part_carries_plain_text(el): + d = el.get("data") + if isinstance(d, str): + s = d.strip() + if s: + extracted.append(s) + if extracted: + return extracted + + # Legacy layouts (omit ``data`` or empty list interpreted as fallback) + out: List[str] = [] + if mode == "blob": + t = bucket.get("text") + if isinstance(t, str) and t.strip(): + out.append(t.strip()) + elif mode == "lines": + for it in bucket.get("items") or []: + if isinstance(it, dict): + tx = it.get("text") + if isinstance(tx, str) and tx.strip(): + out.append(tx.strip()) + elif mode == "pages": + for pg in bucket.get("pages") or []: + if not isinstance(pg, dict): + continue + for it in pg.get("items") or []: + if isinstance(it, dict): + tx = it.get("text") + if isinstance(tx, str) and tx.strip(): + out.append(tx.strip()) + elif mode == "chunks": + for it in bucket.get("chunks") or []: + if isinstance(it, dict): + tx = it.get("text") + if isinstance(tx, str) and tx.strip(): + out.append(tx.strip()) + elif mode == "structured": + for it in bucket.get("items") or []: + if isinstance(it, dict): + if not _part_carries_plain_text(it): + continue + tx = it.get("data") + if isinstance(tx, str) and tx.strip(): + out.append(tx.strip()) + return out + + +def build_presentation_for_payload(payload: Dict[str, Any], cfg: Dict[str, Any]) -> Dict[str, Any]: + """Build root ``presentation`` object (does not mutate ``payload``).""" + files_section = payload.get("files") or {} + ordered = payload.get("fileOrder") + keys: List[str] = ordered if isinstance(ordered, list) and ordered else list(files_section.keys()) + out_files: Dict[str, Any] = {} + for fk in keys: + bucket = files_section.get(fk) + if not isinstance(bucket, dict): + continue + source_name = str(bucket.get("sourceFileName") or "") + raw_parts = [p for p in (bucket.get("parts") or []) if isinstance(p, dict)] + parts = _presentation_filter_parts(raw_parts, cfg["pdfExtractMode"]) + _apply_markdown_presentation_on_parts(parts, cfg["markdownPreserveFormatting"]) + out_files[fk] = _build_file_presentation(source_name, parts, cfg) + return { + "schemaVersion": PRESENTATION_SCHEMA_VERSION, + "kind": PRESENTATION_KIND, + "outputMode": cfg["outputMode"], + "fileOrder": keys, + "files": out_files, + } + + +def build_presentation_for_serial_extractions( + serial_docs: List[Dict[str, Any]], + source_file_names: List[str], + cfg: Dict[str, Any], +) -> Dict[str, Any]: + """Build presentation from serialized extraction dicts (possibly after image persist).""" + key_counts: Dict[str, int] = {} + keys: List[str] = [] + out_files: Dict[str, Any] = {} + for i, blob in enumerate(serial_docs): + if not isinstance(blob, dict): + continue + name = source_file_names[i] if i < len(source_file_names) else "" + fk = _file_json_key(str(name), i, key_counts) + keys.append(fk) + raw_parts = [p for p in (blob.get("parts") or []) if isinstance(p, dict)] + parts = _presentation_filter_parts(raw_parts, cfg["pdfExtractMode"]) + _apply_markdown_presentation_on_parts(parts, cfg["markdownPreserveFormatting"]) + out_files[fk] = _build_file_presentation(str(name), parts, cfg) + return { + "schemaVersion": PRESENTATION_SCHEMA_VERSION, + "kind": PRESENTATION_KIND, + "outputMode": cfg["outputMode"], + "fileOrder": keys, + "files": out_files, + } + + +def build_presentation_for_extractions( + extracted_results: List[ContentExtracted], + source_file_names: List[str], + cfg: Dict[str, Any], +) -> Dict[str, Any]: + """Build ``presentation`` from [`mainServiceExtraction.extractContent`] results.""" + serial = [_serialize_content_extracted_for_output(ec) for ec in extracted_results] + return build_presentation_for_serial_extractions(serial, source_file_names, cfg) + + +def build_presentation_envelope_from_plain_text( + text: str, + *, + source_name: str = "content", + output_mode: str = "lines", +) -> Dict[str, Any]: + """Wrap plain text in ``context.extractContent.presentation.v1`` for unified ``file.create`` handover.""" + t = (text or "").strip() + if not t: + return {} + cfg = parse_presentation_parameters({"outputMode": output_mode}) + label = (source_name or "content").strip() or "content" + serial = [{ + "parts": [{ + "typeGroup": "text", + "mimeType": "text/plain", + "data": t, + "label": label, + "id": f"plain_{label}", + }], + }] + return build_presentation_for_serial_extractions(serial, [label], cfg) + + +def _join_parts_plain_text(parts: List[Dict[str, Any]]) -> str: + blocks: List[str] = [] + for p in parts: + if not _part_carries_plain_text(p): + continue + raw = p.get("data") + if raw is None: + continue + s = str(raw).strip() + if s: + blocks.append(s) + return "\n\n".join(blocks) + + +def _redact_large_part_payload(p: Dict[str, Any]) -> Dict[str, Any]: + pc = dict(p) + tg = (pc.get("typeGroup") or "").strip().lower() + mime = (pc.get("mimeType") or "").strip().lower() + if tg == "image" or mime.startswith("image/"): + pc["data"] = "" + return pc + + +def _attach_redacted_image_parts(bucket: Dict[str, Any], parts: List[Dict[str, Any]]) -> None: + """Attach aggregate ``imageParts`` for ``pages`` / ``chunks`` where ``data`` stays non-part-shaped. + + ``lines`` mode carries each image as its own entry in ``data`` (same order as extraction parts). + """ + imgs = [_redact_large_part_payload(_copy_part(p)) for p in parts if (p.get("typeGroup") or "").strip() == "image"] + if imgs: + bucket["imageParts"] = imgs + + +def _line_segments_filtered_for_text_fragment(fragment: str, cfg: Dict[str, Any]) -> List[str]: + frag = fragment.strip() + if not frag: + return [] + segs = _segment_merged_text(frag, cfg["splitBy"]) + return _apply_line_filters(segs, filter_empty=cfg["filterEmptyLines"], trim_ws=cfg["trimWhitespace"]) + + +def _rows_to_csv_payload(rows: List[List[Any]]) -> str: + lines: List[str] = [] + for row in rows: + cells = [str(c or "").replace('"', '""') for c in row] + lines.append(",".join(f'"{c}"' for c in cells)) + return "\n".join(lines) + + +def _table_matrix_from_csv(csv_text: str, *, header_row: bool) -> Optional[tuple[List[str], List[List[str]]]]: + """Parse CSV table payload into (headers, body rows) for ``renderReport`` tables.""" + parsed = _parse_csv_rows(csv_text, header_row) + if not parsed: + return None + headers = [str(h) for h in (parsed.get("headers") or [])] + raw_rows = parsed.get("rows") or [] + if not raw_rows: + return None + if isinstance(raw_rows[0], dict): + if not headers: + headers = list(raw_rows[0].keys()) + body = [[str(row.get(h, "")) for h in headers] for row in raw_rows] + return headers, body + body = [[str(c) for c in row] for row in raw_rows if isinstance(row, list)] + if not body: + return None + if not headers: + headers = [f"Column {i + 1}" for i in range(len(body[0]))] + return headers, body + + +def _presentation_line_slot_from_part(part: Dict[str, Any], cfg: Dict[str, Any]) -> Dict[str, Any]: + """One presentation row per extraction part: serialised part (redacted) + ``lines`` for this part only.""" + slot = _redact_large_part_payload(_copy_part(part)) + if (part.get("typeGroup") or "").strip() == "table": + # Keep CSV / structured table payload intact — do not split into ``lines``. + slot["lines"] = [] + return slot + if _part_carries_plain_text(part): + slot["lines"] = _line_segments_filtered_for_text_fragment(str(part.get("data") or ""), cfg) + else: + slot["lines"] = [] + return slot + + +def _presentation_line_slots_from_part(part: Dict[str, Any], cfg: Dict[str, Any]) -> List[Dict[str, Any]]: + """Expand one extraction part to presentation slots (CSV tables → one slot per row in ``lines`` mode).""" + if (part.get("typeGroup") or "").strip() != "table": + return [_presentation_line_slot_from_part(part, cfg)] + if cfg.get("outputMode") != "lines": + return [_presentation_line_slot_from_part(part, cfg)] + csv_txt = str(part.get("data") or "") + if not csv_txt.strip(): + return [_presentation_line_slot_from_part(part, cfg)] + segs = _segment_merged_text(csv_txt, cfg["splitBy"]) + segs = _apply_line_filters( + segs, + filter_empty=cfg["filterEmptyLines"], + trim_ws=cfg["trimWhitespace"], + ) + if len(segs) <= 1: + return [_presentation_line_slot_from_part(part, cfg)] + out: List[Dict[str, Any]] = [] + part_id = str(part.get("id") or "table") + for idx, seg in enumerate(segs, start=1): + row_part = _copy_part(part) + row_part["typeGroup"] = "text" + row_part["mimeType"] = "text/plain" + row_part["data"] = seg + row_part["label"] = str(part.get("label") or "row") + row_part["id"] = f"{part_id}_line_{idx}" + slot = _redact_large_part_payload(row_part) + slot["lines"] = [seg] + out.append(slot) + return out + + +def _presentation_image_marker_in_data(part: Dict[str, Any]) -> Dict[str, Any]: + """Builds an image reference blob (used by ``blob`` output as ``[image:]`` token only).""" + rp = _redact_large_part_payload(_copy_part(part)) + marker: Dict[str, Any] = {"type": "image", "typeGroup": "image", "partId": rp.get("id")} + mime = rp.get("mimeType") + if mime: + marker["mimeType"] = str(mime).strip() + lbl = rp.get("label") + if lbl: + marker["label"] = lbl + eid = rp.get("embeddedImageFileId") + if eid: + marker["embeddedImageFileId"] = str(eid) + enfn = rp.get("embeddedImageFileName") + if enfn: + marker["embeddedImageFileName"] = str(enfn) + meta = rp.get("metadata") + extra: Dict[str, Any] = {} + if isinstance(meta, dict): + pi = meta.get("pageIndex") + if pi is not None: + try: + extra["pageIndex"] = int(pi) + except (TypeError, ValueError): + extra["pageIndex"] = pi + cr = meta.get("contextRef") + if isinstance(cr, dict): + loc = cr.get("location") + if loc: + extra["contextLocation"] = loc + cp = cr.get("containerPath") + if cp: + extra["contextContainerPath"] = cp + if extra: + marker["extra"] = extra + return marker + + +_BLOB_IMAGE_CHUNK_RE = re.compile(r"^\[image(?:\:([^\]]+))?\]$") + + +def parse_blob_data_segments(data: str) -> List[Dict[str, Any]]: + """Split presentation ``blob`` ``data`` into virtual slots (text chunks + image markers).""" + segments: List[Dict[str, Any]] = [] + if not isinstance(data, str) or not data.strip(): + return segments + for idx, chunk in enumerate(data.split("\n\n")): + piece = chunk.strip() + if not piece: + continue + m = _BLOB_IMAGE_CHUNK_RE.fullmatch(piece) + if m: + token = (m.group(1) or "").strip() + seg: Dict[str, Any] = {"typeGroup": "image", "mimeType": "image/*", "data": piece} + if token: + seg["id"] = token else: - # Fallback to generic name with index - documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json" - - # Store ContentExtracted object in ActionDocument.documentData - validationMetadata = { - "actionType": "context.extractContent", - "documentIndex": i, - "extractedId": extracted.id, - "partCount": len(extracted.parts) if extracted.parts else 0, - "originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None - } - actionDoc = ActionDocument( - documentName=documentName, - documentData=extracted, # ContentExtracted object - mimeType="application/json", - validationMetadata=validationMetadata + seg["id"] = f"blob_image_{idx}" + segments.append(seg) + else: + segments.append({"typeGroup": "text", "mimeType": "text/plain", "data": piece, "id": f"blob_text_{idx}"}) + return segments + + +def filter_blob_bucket_by_content_type(bucket: Dict[str, Any], content_type: str) -> Dict[str, Any]: + """Keep only blob segments matching ``content_type`` (re-join as ``\\n\\n`` string).""" + out = copy.deepcopy(bucket) + raw = out.get("data") + if not isinstance(raw, str): + return out + target = (content_type or "").strip().lower() + kept: List[str] = [] + for seg in parse_blob_data_segments(raw): + tg = (seg.get("typeGroup") or "").strip().lower() + if target == "media" and tg in ("image", "media", "video", "audio"): + kept.append(str(seg.get("data") or "")) + elif target == "text" and tg in ("text", "table", "structure"): + kept.append(str(seg.get("data") or "")) + elif tg == target: + kept.append(str(seg.get("data") or "")) + out["data"] = "\n\n".join(s for s in kept if s.strip()) + return out + + +def _build_file_presentation( + source_file_name: str, + parts: List[Dict[str, Any]], + cfg: Dict[str, Any], +) -> Dict[str, Any]: + output_mode = cfg["outputMode"] + merge_plain = _join_parts_plain_text(parts) + + csv_block: Optional[Dict[str, Any]] = None + if _is_csv_source(source_file_name, parts): + csv_txt = _csv_text_from_parts(parts) + if csv_txt is not None: + csv_block = _parse_csv_rows(csv_txt, cfg["csvHeaderRow"]) + + base: Dict[str, Any] = { + "outputMode": output_mode, + "sourceFileName": source_file_name or None, + } + + if output_mode == "blob": + chunks_blob: List[str] = [] + for p in parts: + tg = (p.get("typeGroup") or "").strip() + if tg == "image": + m = _presentation_image_marker_in_data(p) + token = str(m.get("embeddedImageFileId") or m.get("partId") or "").strip() + chunks_blob.append(f"[image:{token}]" if token else "[image]") + continue + if _part_carries_plain_text(p): + raw = p.get("data") + if raw is None: + continue + s = str(raw).strip() + if not s: + continue + chunks_blob.append(s) + base["data"] = "\n\n".join(chunks_blob) + return base + + if output_mode == "structured": + if csv_block is not None: + base["csv"] = csv_block + items_list = [_redact_large_part_payload(_copy_part(p)) for p in parts] + base["items"] = items_list + base["data"] = list(items_list) + return base + + if output_mode == "pages": + by_page: Dict[int, List[str]] = {} + for p in parts: + if not _part_carries_plain_text(p): + continue + raw = p.get("data") + if raw is None: + continue + s = str(raw).strip() + if not s: + continue + pi = _page_index_from_part(p) + by_page.setdefault(pi, []).append(s) + ordered_pages = sorted(by_page.keys()) + page_objs: List[Dict[str, Any]] = [] + for pi in ordered_pages: + merged = "\n\n".join(by_page[pi]) + segs = _segment_merged_text(merged, cfg["splitBy"]) + segs = _apply_line_filters( + segs, + filter_empty=cfg["filterEmptyLines"], + trim_ws=cfg["trimWhitespace"], ) - actionDocuments.append(actionDoc) - - self.services.chat.progressLogFinish(operationId, True) - - return ActionResult.isSuccess(documents=actionDocuments) - - except Exception as e: - logger.error(f"Error in content extraction: {str(e)}") - + items: List[Dict[str, Any]] = [] + offset = 0 + for idx, seg in enumerate(segs, start=1): + meta = _base_item_meta( + source_file_name, + cfg, + segment_index=idx, + offset_hint=offset, + page_index=pi, + ) + row: Dict[str, Any] = {"text": seg} + if cfg["includeLineNumbers"]: + row["lineNumber"] = idx + if meta: + row["metadata"] = meta + items.append(row) + offset += len(seg) + 1 + page_objs.append({"pageIndex": pi, "items": items}) + base["pages"] = page_objs + base["data"] = [ + { + "pageIndex": int(po["pageIndex"]), + "lines": [ + str(it["text"]) + for it in (po.get("items") or []) + if isinstance(it, dict) and isinstance(it.get("text"), str) + ], + } + for po in page_objs + if isinstance(po, dict) + ] + _attach_redacted_image_parts(base, parts) + return base + + if output_mode == "chunks": + segs = _segment_merged_text(merge_plain, cfg["splitBy"]) + segs = _apply_line_filters( + segs, + filter_empty=cfg["filterEmptyLines"], + trim_ws=cfg["trimWhitespace"], + ) + flat = "\n".join(segs) + chunk_texts = _chunk_plain_text(flat, cfg) + chunk_objs: List[Dict[str, Any]] = [] + for idx, ct in enumerate(chunk_texts, start=1): + meta = _base_item_meta(source_file_name, cfg, segment_index=idx) + row: Dict[str, Any] = {"index": idx, "text": ct} + if meta: + row["metadata"] = meta + chunk_objs.append(row) + base["chunks"] = chunk_objs + base["data"] = [str(row["text"]) for row in chunk_objs if isinstance(row.get("text"), str)] + _attach_redacted_image_parts(base, parts) + return base + + # lines (default): same part order/cardinality as extraction; segmentation inside each part. + slots: List[Dict[str, Any]] = [] + for p in parts: + if isinstance(p, dict): + slots.extend(_presentation_line_slots_from_part(p, cfg)) + base["data"] = slots + if cfg["includeLineNumbers"] or cfg["includeMetadata"]: + flat_items: List[Dict[str, Any]] = [] + line_no = 0 + seg_off = 0 + for slot in slots: + tg_slot = (slot.get("typeGroup") or "").strip() + part_id = slot.get("id") + page_ix = _page_index_from_part(slot) + + if tg_slot == "image": + line_no += 1 + meta_i = _base_item_meta( + source_file_name, + cfg, + segment_index=line_no, + offset_hint=seg_off, + page_index=page_ix, + ) + row_im: Dict[str, Any] = {"type": "image", "partId": slot.get("id"), "mimeType": slot.get("mimeType")} + if cfg["includeLineNumbers"]: + row_im["lineNumber"] = line_no + if meta_i: + row_im["metadata"] = meta_i + flat_items.append(row_im) + seg_off += 1 + continue + + for ln in slot.get("lines") or []: + if not isinstance(ln, str): + continue + line_no += 1 + meta_t = _base_item_meta( + source_file_name, + cfg, + segment_index=line_no, + offset_hint=seg_off, + page_index=page_ix, + ) + row_t: Dict[str, Any] = {"text": ln} + if cfg["includeLineNumbers"]: + row_t["lineNumber"] = line_no + if meta_t: + row_t["metadata"] = meta_t + flat_items.append(row_t) + seg_off += len(ln) + 1 + base["items"] = flat_items + return base + + +def _mime_to_file_extension(mime: str) -> str: + m = (mime or "").split(";")[0].strip().lower() + mapping = { + "image/jpeg": "jpg", + "image/jpg": "jpg", + "image/png": "png", + "image/gif": "gif", + "image/webp": "webp", + "image/bmp": "bmp", + "image/tiff": "tiff", + } + return mapping.get(m, m.rsplit("/", 1)[-1] if "/" in m else "bin") + + +def _persist_extracted_image_parts( + content_extracted_serial: List[Dict[str, Any]], + *, + name_stem: str, + run_context: Optional[Dict[str, Any]], +) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: + """Decode base64 image parts, persist bytes, replace with ``embeddedImageFileId``; return artifacts meta.""" + artifacts: List[Dict[str, Any]] = [] + if not run_context or not isinstance(run_context, dict): + logger.warning("extractContent image persist: _runContext missing — images not stored") + return content_extracted_serial, artifacts + mandate_id = run_context.get("mandateId") + instance_id = run_context.get("instanceId") + if not mandate_id or not instance_id: + logger.warning( + "extractContent image persist: mandateId/instanceId missing in _runContext (mandate=%r instance=%r)", + mandate_id, + instance_id, + ) + return content_extracted_serial, artifacts + + try: + from modules.interfaces.interfaceDbManagement import getInterface as _get_mgmt + from modules.interfaces.interfaceDbApp import getInterface as _get_app + from modules.security.rootAccess import getRootUser + except Exception as exc: + logger.warning("extractContent image persist: import failed: %s", exc) + return content_extracted_serial, artifacts + + owner = getRootUser() + uid = run_context.get("userId") + if uid: try: - if operationId: - self.services.chat.progressLogFinish(operationId, False) + umap = _get_app(getRootUser()).getUsersByIds([str(uid)]) + owner = umap.get(str(uid)) or owner except Exception: pass - - return ActionResult.isFailure(error=str(e)) + try: + mgmt = _get_mgmt(owner, mandateId=str(mandate_id), featureInstanceId=str(instance_id)) + except Exception as exc: + logger.warning("extractContent image persist: mgmt interface failed: %s", exc) + return content_extracted_serial, artifacts + + stem = re.sub(r"[^\w\-]+", "_", name_stem).strip("_") or "extract" + + for doc_idx, blob in enumerate(content_extracted_serial): + if not isinstance(blob, dict): + continue + parts = blob.get("parts") + if not isinstance(parts, list): + continue + new_parts: List[Any] = [] + for p in parts: + if not isinstance(p, dict): + new_parts.append(p) + continue + tg = (p.get("typeGroup") or "").strip() + mime = (p.get("mimeType") or "").strip() + raw_data = p.get("data") + if tg != "image" or not mime.lower().startswith("image/") or not raw_data: + new_parts.append(p) + continue + raw_s = raw_data.strip() if isinstance(raw_data, str) else "" + try: + img_bytes = _b64.b64decode(raw_s, validate=True) if raw_s else b"" + except (_binascii.Error, TypeError, ValueError): + new_parts.append(p) + continue + if not img_bytes: + new_parts.append(p) + continue + part_id = str(p.get("id") or "part") + safe_id = re.sub(r"[^\w\-.]+", "_", part_id).strip("_") or "media" + if len(safe_id) > 200: + safe_id = safe_id[:200] + ext = _mime_to_file_extension(mime) + # Stable name (no run timestamp) so duplicate content reuses the same FileItem. + media_name = f"extract_media_{safe_id}.{ext}" + try: + file_item = mgmt.createFile(media_name, mime, img_bytes, folderId=None) + mgmt.createFileData(file_item.id, img_bytes) + try: + mgmt.updateFile(str(file_item.id), {"tags": ["_workflowInternal"]}) + except Exception as tag_exc: + logger.warning( + "extractContent image persist: could not tag internal file %s: %s", + file_item.id, + tag_exc, + ) + except Exception as exc: + logger.warning("extractContent image persist: createFile failed %s: %s", part_id, exc) + new_parts.append(p) + continue + p_new = dict(p) + p_new["data"] = "" + p_new["embeddedImageFileId"] = str(file_item.id) + p_new["embeddedImageFileName"] = str(getattr(file_item, "fileName", media_name)) + new_parts.append(p_new) + artifacts.append( + { + "fileId": str(file_item.id), + "fileName": str(getattr(file_item, "fileName", media_name)), + "mimeType": mime, + "sourcePartId": part_id, + "documentIndex": doc_idx, + "suppressInWorkflowFileLists": True, + } + ) + blob["parts"] = new_parts + + return content_extracted_serial, artifacts + + +def _one_file_bucket(ec: ContentExtracted, source_file_name: str) -> Dict[str, Any]: + parts_ser = _serialize_parts(ec.parts) + + ud = getattr(ec, "udm", None) + if hasattr(ud, "model_dump"): + ud = ud.model_dump(mode="json") + + summary = getattr(ec, "summary", None) + if hasattr(summary, "model_dump"): + summary = summary.model_dump(mode="json") + elif isinstance(summary, dict): + summary = dict(summary) + elif summary is None: + summary = {} + + return { + "sourceFileName": source_file_name, + "extractedId": getattr(ec, "id", ""), + "summary": summary, + "udm": ud, + "parts": parts_ser, + "byTypeGroup": _rebuild_by_type_group(parts_ser), + } + + + +_MAX_IMAGE_EMBED_BYTES = 300_000 +_IMAGE_MAX_DIMENSION = 1200 + + +def _get_mgmt_for_presentation_render(services: Any) -> Optional[Any]: + mgmt = getattr(services, "interfaceDbComponent", None) if services else None + if mgmt: + return mgmt + if not services: + return None + try: + import modules.interfaces.interfaceDbManagement as iface + + user = getattr(services, "user", None) + if not user: + return None + return iface.getInterface( + user, + mandateId=getattr(services, "mandateId", None) or "", + featureInstanceId=getattr(services, "featureInstanceId", None) or "", + ) + except Exception as exc: + logger.warning("presentation render: mgmt interface failed: %s", exc) + return None + + +def _sniff_image_mime(image_bytes: bytes) -> str: + """Detect image mime type from raw bytes (magic numbers). + + Raises ``ValueError`` for unknown / unreadable signatures — callers must NOT + silently fall back to a guessed mime type, because that produces broken + renders downstream (wrong content-type in data URIs, wrong file extensions). + """ + if not image_bytes or len(image_bytes) < 12: + raise ValueError( + f"image bytes too short to detect mime type ({len(image_bytes) if image_bytes else 0} bytes)" + ) + head = image_bytes[:12] + if head[:8] == b"\x89PNG\r\n\x1a\n": + return "image/png" + if head[:3] == b"\xff\xd8\xff": + return "image/jpeg" + if head[:6] in (b"GIF87a", b"GIF89a"): + return "image/gif" + if head[:4] == b"RIFF" and head[8:12] == b"WEBP": + return "image/webp" + if head[:2] == b"BM": + return "image/bmp" + if head[:4] in (b"II*\x00", b"MM\x00*"): + return "image/tiff" + raise ValueError(f"unknown image signature: {head[:8]!r}") + + +def _resize_image_bytes_for_document(image_bytes: bytes) -> bytes: + from PIL import Image as PILImage + + img = PILImage.open(BytesIO(image_bytes)) + if img.mode in ("RGBA", "LA"): + bg = PILImage.new("RGB", img.size, (255, 255, 255)) + bg.paste(img, mask=img.split()[-1]) + img = bg + elif img.mode == "P": + img = img.convert("RGBA") + bg = PILImage.new("RGB", img.size, (255, 255, 255)) + bg.paste(img, mask=img.split()[-1]) + img = bg + elif img.mode != "RGB": + img = img.convert("RGB") + if max(img.size) > _IMAGE_MAX_DIMENSION: + img.thumbnail((_IMAGE_MAX_DIMENSION, _IMAGE_MAX_DIMENSION), PILImage.BILINEAR) + out = BytesIO() + img.save(out, format="JPEG", quality=85, optimize=True) + return out.getvalue() + + +def _load_image_bytes_by_file_id(services: Any, file_id: str) -> Optional[bytes]: + mgmt = _get_mgmt_for_presentation_render(services) + if not mgmt or not hasattr(mgmt, "getFileData"): + raise ValueError( + "no management interface available to load persisted image bytes — " + "services.interfaceDbComponent / mandate / instance must be set" + ) + return mgmt.getFileData(str(file_id)) + + +def _inline_runs_from_presentation_lines(lines: List[Any]) -> List[Dict[str, Any]]: + """Map presentation ``lines`` to inline runs, preserving line order with explicit breaks.""" + from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import _parseInlineRuns + + runs: List[Dict[str, Any]] = [] + first = True + for ln in lines: + if not first: + runs.append({"type": "text", "value": "\n"}) + first = False + piece = str(ln) if ln is not None else "" + if not piece: + continue + runs.extend(_parseInlineRuns(piece)) + return runs if runs else [{"type": "text", "value": ""}] + + +def _is_presentation_file_bucket(d: Dict[str, Any]) -> bool: + """True for a single ``presentation.files[*]`` bucket (loop item value / per-file handover).""" + if d.get("kind") == PRESENTATION_KIND: + return False + data = d.get("data") + if not isinstance(data, (list, str)): + return False + return "outputMode" in d or "sourceFileName" in d + + +def _is_loop_presentation_file_item(d: Dict[str, Any]) -> bool: + val = d.get("value") + return isinstance(d.get("name"), str) and isinstance(val, dict) and _is_presentation_file_bucket(val) + + +def _is_presentation_line_slot(d: Dict[str, Any]) -> bool: + """Single slot from ``presentation.files[*].data[]`` (e.g. loop iteration over one CSV row).""" + if d.get("kind") == PRESENTATION_KIND or _is_presentation_file_bucket(d): + return False + tg = (d.get("typeGroup") or "").strip() + if tg in ("text", "table", "image", "structure"): + return True + return isinstance(d.get("lines"), list) + + +def presentation_envelope_from_file_bucket( + bucket: Dict[str, Any], + *, + file_key: Optional[str] = None, +) -> Dict[str, Any]: + """Wrap one ``presentation.files`` entry as a full presentation envelope.""" + fk = (file_key or "").strip() + if not fk: + src = str(bucket.get("sourceFileName") or "").strip() + fk = f"file_1_{src}" if src else "file_1" + return { + "schemaVersion": PRESENTATION_SCHEMA_VERSION, + "kind": PRESENTATION_KIND, + "outputMode": bucket.get("outputMode") or "lines", + "fileOrder": [fk], + "files": {fk: bucket}, + } + + +def normalize_presentation_envelopes(raw: Any) -> List[Dict[str, Any]]: + """Collect ``context.extractContent.presentation.v1`` dicts from ActionResult / list shapes.""" + if raw is None: + return [] + if isinstance(raw, list): + out: List[Dict[str, Any]] = [] + for item in raw: + out.extend(normalize_presentation_envelopes(item)) + return out + if isinstance(raw, dict): + if raw.get("kind") == PRESENTATION_KIND: + return [raw] + if _is_loop_presentation_file_item(raw): + return [ + presentation_envelope_from_file_bucket( + raw["value"], + file_key=str(raw.get("name") or "file_1"), + ) + ] + if isinstance(raw.get("name"), str) and isinstance(raw.get("value"), dict): + slot = raw["value"] + if _is_presentation_line_slot(slot): + bucket = { + "outputMode": slot.get("outputMode") or "lines", + "sourceFileName": "", + "data": [slot], + } + return [ + presentation_envelope_from_file_bucket( + bucket, + file_key=str(raw.get("name") or "file_1"), + ) + ] + if _is_presentation_file_bucket(raw): + return [presentation_envelope_from_file_bucket(raw)] + if _is_presentation_line_slot(raw): + bucket = {"outputMode": "lines", "sourceFileName": "", "data": [raw]} + return [presentation_envelope_from_file_bucket(bucket)] + inner = raw.get("data") + if isinstance(inner, dict) and inner.get("kind") == PRESENTATION_KIND: + return [inner] + for key in ("data", "merged", "value"): + nested = raw.get(key) + if isinstance(nested, dict) and nested is not raw: + found = normalize_presentation_envelopes(nested) + if found: + return found + return [] + + +def _artifacts_by_part_id_from_meta(meta: Any) -> Dict[str, str]: + out: Dict[str, str] = {} + if not isinstance(meta, dict): + return out + for art in meta.get("persistedImageArtifacts") or []: + if not isinstance(art, dict): + continue + sp = str(art.get("sourcePartId") or "").strip() + fid = str(art.get("fileId") or "").strip() + if sp and fid: + out[sp] = fid + return out + + +def _collect_artifacts_by_part_id(envelopes: List[Dict[str, Any]]) -> Dict[str, str]: + merged: Dict[str, str] = {} + for envelope in envelopes: + merged.update(_artifacts_by_part_id_from_meta(envelope.get("_meta"))) + return merged + + +def presentation_envelopes_to_document_json( + raw: Any, + *, + title: str, + language: str, + services: Any = None, +) -> Dict[str, Any]: + """Map presentation envelope(s) to ``renderReport`` ``extractedContent`` (documents/sections).""" + from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import _parseInlineRuns + + envelopes = normalize_presentation_envelopes(raw) + if not envelopes: + raise ValueError( + "context must be presentation data from Inhalt extrahieren (kind=context.extractContent.presentation.v1)" + ) + + artifacts_by_part = _collect_artifacts_by_part_id(envelopes) + + sections: List[Dict[str, Any]] = [] + order = 0 + + def _next_id() -> str: + nonlocal order + order += 1 + return f"s_{order}" + + def _append_heading(text: str, level: int = 2) -> None: + t = (text or "").strip() + if not t: + return + sections.append({ + "id": _next_id(), + "content_type": "heading", + "order": order, + "elements": [{"content": {"text": t, "level": level}}], + }) + + def _append_paragraph(text: str) -> None: + t = (text or "").strip() + if not t: + return + sections.append({ + "id": _next_id(), + "content_type": "paragraph", + "order": order, + "elements": [{"content": {"inlineRuns": _parseInlineRuns(t)}}], + }) + + def _resolve_image_file_id(slot: Dict[str, Any]) -> Optional[str]: + fid = slot.get("embeddedImageFileId") + if fid: + return str(fid).strip() or None + candidates: List[str] = [] + sid = str(slot.get("id") or "").strip() + if sid: + candidates.append(sid) + raw_d = slot.get("data") + if isinstance(raw_d, str): + m = _BLOB_IMAGE_CHUNK_RE.fullmatch(raw_d.strip()) + if m: + tok = (m.group(1) or "").strip() + if tok: + candidates.append(tok) + for cand in candidates: + if cand in artifacts_by_part: + return artifacts_by_part[cand] + # Marker may already carry the persisted storage file id. + try: + blob = _load_image_bytes_by_file_id(services, cand) + if blob: + return cand + except Exception: + pass + return None + + def _append_image_slot(slot: Dict[str, Any]) -> None: + fid = _resolve_image_file_id(slot) + if not fid: + raise ValueError( + "image slot is missing embeddedImageFileId — " + "extractContent must persist every image part before handover" + ) + blob = _load_image_bytes_by_file_id(services, str(fid)) + if not blob: + raise ValueError( + f"could not load persisted image bytes for fileId={fid!r}" + ) + if len(blob) > _MAX_IMAGE_EMBED_BYTES: + blob = _resize_image_bytes_for_document(blob) + name = slot.get("embeddedImageFileName") or slot.get("label") + if not name: + raise ValueError( + f"image slot is missing embeddedImageFileName/label for fileId={fid!r}" + ) + mime = _sniff_image_mime(blob) + sections.append({ + "id": _next_id(), + "content_type": "image", + "order": order, + "elements": [{ + "content": { + "altText": str(name), + "base64Data": _b64.b64encode(blob).decode("ascii"), + "fileId": str(fid), + "fileName": str(name), + "mimeType": mime, + }, + }], + }) + + def _append_text_slot(slot: Dict[str, Any]) -> None: + lines = slot.get("lines") + if isinstance(lines, list) and lines: + sections.append({ + "id": _next_id(), + "content_type": "paragraph", + "order": order, + "elements": [{"content": {"inlineRuns": _inline_runs_from_presentation_lines(lines)}}], + }) + return + raw_d = slot.get("data") + if isinstance(raw_d, str) and raw_d.strip(): + sections.append({ + "id": _next_id(), + "content_type": "paragraph", + "order": order, + "elements": [{"content": {"inlineRuns": _inline_runs_from_presentation_lines(raw_d.splitlines())}}], + }) + + def _append_table_slot(slot: Dict[str, Any]) -> None: + raw = slot.get("data") + if not isinstance(raw, str) or not raw.strip(): + return + header_row = True + meta = slot.get("metadata") + if isinstance(meta, dict) and meta.get("csvHeaderRow") is False: + header_row = False + parsed = _table_matrix_from_csv(raw, header_row=header_row) + if not parsed: + return + headers, body = parsed + sections.append({ + "id": _next_id(), + "content_type": "table", + "order": order, + "elements": [{"content": {"headers": headers, "rows": body}}], + }) + + def _append_slot(slot: Dict[str, Any]) -> None: + tg = (slot.get("typeGroup") or "").strip().lower() + mime = (slot.get("mimeType") or "").strip().lower() + if tg == "image" or mime.startswith("image/"): + _append_image_slot(slot) + return + if tg == "container": + return + if tg == "table" or ("csv" in mime and slot.get("data")): + _append_table_slot(slot) + return + if _part_carries_plain_text(slot): + _append_text_slot(slot) + + def _append_bucket(bucket: Dict[str, Any], *, show_file_heading: bool) -> None: + if show_file_heading: + src = str(bucket.get("sourceFileName") or "").strip() + if src: + _append_heading(src) + raw_data = bucket.get("data") + mode = str(bucket.get("outputMode") or "").strip().lower() + if isinstance(raw_data, str) and mode == "blob": + for seg in parse_blob_data_segments(raw_data): + _append_slot(seg) + return + if isinstance(raw_data, str): + _append_paragraph(raw_data) + return + if isinstance(raw_data, list): + for el in raw_data: + if isinstance(el, dict): + _append_slot(el) + elif isinstance(el, str): + _append_paragraph(el) + return + if isinstance(raw_data, dict): + _append_slot(raw_data) + + for envelope in envelopes: + files_section = envelope.get("files") or {} + file_order = envelope.get("fileOrder") + keys: List[str] = ( + list(file_order) if isinstance(file_order, list) and file_order else list(files_section.keys()) + ) + multi_files = len(keys) > 1 + for fk in keys: + bucket = files_section.get(fk) + if isinstance(bucket, dict): + _append_bucket(bucket, show_file_heading=multi_files) + + if not sections: + raise ValueError("presentation produced no renderable sections") + + lang = (language or "de").strip() or "de" + doc_title = (title or "Document").strip() or "Document" + return { + "metadata": { + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "context_extract_presentation", + "title": doc_title, + "language": lang, + }, + "documents": [{ + "id": "doc_1", + "title": doc_title, + "language": lang, + "sections": sections, + }], + } + + +async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: + operation_id = None + try: + wf = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operation_id = f"context_extract_{wf}_{int(time.time())}" + + document_list_param = parameters.get("documentList") + if not document_list_param: + return ActionResult.isFailure(error="documentList is required") + + dl = coerceDocumentReferenceList(document_list_param) + if not dl.references: + return ActionResult.isFailure( + error=( + f"documentList could not be parsed (type={type(document_list_param).__name__}); " + "expected DocumentReferenceList, list of strings/dicts, or " + "a wrapper dict like {'documents': [...]}" + ), + ) + + parent_operation_id = parameters.get("parentOperationId") + self.services.chat.progressLogStart( + operation_id, + "Extracting content from documents", + "Content Extraction", + f"Documents: {len(dl.references)}", + parentOperationId=parent_operation_id, + ) + + self.services.chat.progressLogUpdate(operation_id, 0.2, "Loading documents") + chat_documents = self.services.chat.getChatDocumentsFromDocumentList(dl) + if not chat_documents: + self.services.chat.progressLogFinish(operation_id, False) + return ActionResult.isFailure(error="No documents found in documentList") + + logger.info(f"Extracting content from {len(chat_documents)} documents") + + self.services.chat.progressLogUpdate(operation_id, 0.3, "Preparing extraction options") + + eo_param = parameters.get("extractionOptions") + extraction_options: ExtractionOptions + if isinstance(eo_param, dict) and eo_param: + eo = dict(eo_param) + eo.setdefault("prompt", "Extract all content from the document") + if "mergeStrategy" not in eo: + eo["mergeStrategy"] = None + try: + extraction_options = ExtractionOptions(**eo) + except Exception as e: + logger.warning(f"Invalid extractionOptions, using defaults: {e}") + extraction_options = _default_extraction_options() + elif isinstance(eo_param, ExtractionOptions): + extraction_options = eo_param + else: + extraction_options = _default_extraction_options() + + self.services.chat.progressLogUpdate(operation_id, 0.4, "Extracting …") + self.services.chat.progressLogUpdate(operation_id, 0.5, f"Extracting {len(chat_documents)} document(s)") + extracted_results = self.services.extraction.extractContent(chat_documents, extraction_options, operationId=operation_id) + + file_names = [getattr(cd, "fileName", "") or "" for cd in chat_documents] + + content_filter = _canonical_content_filter(parameters.get("contentFilter")) + filtered_extractions = _filter_extractions_by_content_filter(extracted_results, content_filter) + + pres_cfg = parse_presentation_parameters(parameters) + + stem = f"{wf}_{int(time.time())}" + run_ctx = parameters.get("_runContext") + + content_extracted_serial = [_serialize_content_extracted_for_output(ec) for ec in filtered_extractions] + image_artifacts: List[Dict[str, Any]] = [] + if content_filter in ("all", "imagesOnly"): + content_extracted_serial, image_artifacts = _persist_extracted_image_parts( + content_extracted_serial, + name_stem=stem, + run_context=run_ctx if isinstance(run_ctx, dict) else None, + ) + + presentation = build_presentation_for_serial_extractions(content_extracted_serial, file_names, pres_cfg) + + try: + _pc_json = json.dumps(dict(pres_cfg), ensure_ascii=False, default=str) + _sum = summarize_presentation_payload(presentation) + _sum_json = json.dumps(_sum, ensure_ascii=False, default=str) + logger.info( + "extractContent op=%s presentationConfig=%s presentationSummary=%s", + operation_id, + _pc_json, + _sum_json[:8000] + ("…" if len(_sum_json) > 8000 else ""), + ) + except Exception as _log_e: + logger.debug("extractContent presentation trace log skipped: %s", _log_e) + + data_out: Dict[str, Any] = { + **presentation, + "_meta": { + "actionType": "context.extractContent", + "operationRef": operation_id, + "sourceFileNames": list(file_names), + "documentCountInput": len(chat_documents), + "documentCountRoots": len(extracted_results), + "extractPayloadSchemaVersion": EXTRACT_PAYLOAD_SCHEMA_VERSION, + "presentationConfig": dict(pres_cfg), + "persistedImageArtifacts": image_artifacts, + "suppressInWorkflowFileLists": True, + "persistedImageCount": len(image_artifacts), + }, + } + + self.services.chat.progressLogFinish(operation_id, True) + return ActionResult.isSuccess(documents=[], data=data_out) + + except Exception as e: + logger.error(f"Error in content extraction: {str(e)}") + try: + if operation_id: + self.services.chat.progressLogFinish(operation_id, False) + except Exception: + pass + return ActionResult.isFailure(error=str(e)) diff --git a/modules/workflows/methods/methodContext/actions/filterContext.py b/modules/workflows/methods/methodContext/actions/filterContext.py new file mode 100644 index 00000000..6087b380 --- /dev/null +++ b/modules/workflows/methods/methodContext/actions/filterContext.py @@ -0,0 +1,141 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Action ``context.filterContext``. + +Allow- or block-lists keys/paths from the upstream payload using simple glob +patterns. Implementation uses ``fnmatch`` (no regex) and traverses dotted paths +on dicts. +""" + +from __future__ import annotations + +import copy +import fnmatch +import logging +from typing import Any, Dict, List, Optional, Tuple + +from modules.datamodels.datamodelChat import ActionResult + +logger = logging.getLogger(__name__) + + +_META_KEYS = ("_success", "_error", "_transit", "_meta", "_warnings") + + +def _flatten(payload: Any, prefix: str = "") -> Dict[str, Any]: + """Yield ``{dotted.path: value}`` for every leaf in a dict tree.""" + out: Dict[str, Any] = {} + if not isinstance(payload, dict): + if prefix: + out[prefix] = payload + return out + for k, v in payload.items(): + path = f"{prefix}.{k}" if prefix else str(k) + if isinstance(v, dict): + out.update(_flatten(v, path)) + else: + out[path] = v + return out + + +def _set_path(target: Dict[str, Any], dotted: str, value: Any) -> None: + parts = dotted.split(".") + cur = target + for seg in parts[:-1]: + nxt = cur.get(seg) + if not isinstance(nxt, dict): + nxt = {} + cur[seg] = nxt + cur = nxt + cur[parts[-1]] = value + + +def _del_path(target: Dict[str, Any], dotted: str) -> bool: + parts = dotted.split(".") + cur: Any = target + stack: List[Tuple[Dict[str, Any], str]] = [] + for seg in parts[:-1]: + if not isinstance(cur, dict) or seg not in cur: + return False + stack.append((cur, seg)) + cur = cur[seg] + if not isinstance(cur, dict) or parts[-1] not in cur: + return False + del cur[parts[-1]] + return True + + +def _match_any(pattern: str, all_paths: List[str]) -> List[str]: + """Return every flattened path matching the glob pattern.""" + return [p for p in all_paths if fnmatch.fnmatchcase(p, pattern)] + + +async def filterContext(self, parameters: Dict[str, Any]) -> ActionResult: + try: + mode = str(parameters.get("mode") or "allow") + if mode not in ("allow", "block"): + return ActionResult.isFailure(error=f"Invalid mode '{mode}', expected 'allow' or 'block'") + + keys: List[str] = parameters.get("keys") or [] + if not isinstance(keys, list) or not keys: + return ActionResult.isFailure(error="'keys' must be a non-empty list of paths or patterns") + + missing_behavior = str(parameters.get("missingKeyBehavior") or "skip") + if missing_behavior not in ("skip", "nullFill", "error"): + return ActionResult.isFailure(error=f"Invalid missingKeyBehavior '{missing_behavior}'") + + preserve_meta = bool(parameters.get("preserveMeta", True)) + upstream = parameters.get("_upstreamPayload") or {} + if not isinstance(upstream, dict): + upstream = {"value": upstream} + + flat = _flatten(upstream) + all_paths = list(flat.keys()) + + if mode == "allow": + result: Dict[str, Any] = {} + missing: List[str] = [] + for pat in keys: + p = str(pat).strip() + if not p: + continue + matches = _match_any(p, all_paths) + if not matches: + missing.append(p) + if missing_behavior == "nullFill": + _set_path(result, p, None) + continue + for m in matches: + _set_path(result, m, flat[m]) + + if missing and missing_behavior == "error": + return ActionResult.isFailure(error=f"Missing keys: {missing}") + + if preserve_meta: + for mk in _META_KEYS: + if mk in upstream: + result[mk] = upstream[mk] + + data: Dict[str, Any] = result + if missing and missing_behavior != "error": + data["_missingKeys"] = missing + return ActionResult.isSuccess(data=data) + + # mode == "block" + cloned = copy.deepcopy(upstream) + removed: List[str] = [] + for pat in keys: + p = str(pat).strip() + if not p: + continue + matches = _match_any(p, all_paths) + for m in matches: + if preserve_meta and m in _META_KEYS: + continue + if _del_path(cloned, m): + removed.append(m) + cloned["_removedKeys"] = removed + return ActionResult.isSuccess(data=cloned) + except Exception as exc: + logger.exception("filterContext failed") + return ActionResult.isFailure(error=str(exc)) diff --git a/modules/workflows/methods/methodContext/actions/mergeContext.py b/modules/workflows/methods/methodContext/actions/mergeContext.py new file mode 100644 index 00000000..79582cf2 --- /dev/null +++ b/modules/workflows/methods/methodContext/actions/mergeContext.py @@ -0,0 +1,254 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Action ``context.mergeContext``. + +Receives a list of results (e.g. from ``flow.loop`` ``bodyResults``) via the +``dataSource`` DataRef parameter and deep-merges them into a single dict. + +``dataSource`` must be set explicitly (resolved DataRef). There is no implicit +fallback to ``_upstreamPayload`` or loop payloads. +""" + +from __future__ import annotations + +import copy +import json +import logging +from typing import Any, Dict, List, Optional + +from modules.datamodels.datamodelChat import ActionResult +from modules.workflows.methods.methodContext.actions.extractContent import ( + joined_text_from_extract_node_data, +) +from modules.workflows.methods.methodContext.contextEnvelope import wrap_merge_context_data + +logger = logging.getLogger(__name__) + + +def _deep_merge(target: Dict[str, Any], source: Dict[str, Any], conflicts: List[str], path: str = "") -> None: + for k, v in source.items(): + full = f"{path}.{k}" if path else k + if k not in target: + target[k] = copy.deepcopy(v) if isinstance(v, (dict, list)) else v + continue + existing = target[k] + if isinstance(existing, dict) and isinstance(v, dict): + _deep_merge(existing, v, conflicts, full) + elif isinstance(existing, list) and isinstance(v, list): + target[k] = existing + v + else: + if existing != v: + conflicts.append(full) + target[k] = copy.deepcopy(v) if isinstance(v, (dict, list)) else v + + +def _coerce_to_list(value: Any) -> List[Any]: + """Normalise ``value`` to a list of items to merge.""" + if isinstance(value, list): + return value + if value is None: + return [] + return [value] + + +def _strip_document_data(doc: Any) -> Any: + """Keep document metadata but drop the raw blob so deep-merge stays small.""" + if not isinstance(doc, dict): + return doc + out = dict(doc) + out["documentData"] = None + return out + + +def _merge_payload(item: Any) -> Optional[Dict[str, Any]]: + """Return the dict to deep-merge for this item, or ``None`` to skip. + + ``documents[n].documentData`` is nulled before merging so large blobs + (e.g. ~3–4 MB handover-JSON per extractContent iteration) don't accumulate. + ``imageDocumentsOnly`` is left intact — ``_deep_merge`` list-concats it + across iterations, giving downstream nodes all images from all iterations. + """ + if not isinstance(item, dict): + return None + # Opt-in: only merge items that explicitly report success. + # Items without a ``success`` key (e.g. DocumentList, Transit outputs) are + # still included so non-action node results are not silently dropped. + success_val = item.get("success") + if success_val is not None and success_val is not True: + return None + out = dict(item) + if isinstance(out.get("documents"), list): + out["documents"] = [_strip_document_data(d) for d in out["documents"]] + return out + + +def _primary_text_from_item(it: Any) -> str: + """Same sources as ``actionNodeExecutor`` / ``context.extractContent`` for primary text.""" + if not isinstance(it, dict): + return "" + r = it.get("response") + if r is not None and str(r).strip(): + return str(r).strip() + inner = it.get("data") + if isinstance(inner, dict): + r = inner.get("response") + if r is not None and str(r).strip(): + return str(r).strip() + ce_text = joined_text_from_extract_node_data(inner) + if ce_text.strip(): + return ce_text.strip() + docs = it.get("documents") + if not isinstance(docs, list) or not docs: + return "" + doc0 = docs[0] + raw: Any = None + if isinstance(doc0, dict): + raw = doc0.get("documentData") + elif hasattr(doc0, "documentData"): + raw = getattr(doc0, "documentData", None) + if isinstance(raw, bytes): + try: + return raw.decode("utf-8").strip() + except (UnicodeDecodeError, ValueError): + return "" + if isinstance(raw, dict): + return (joined_text_from_extract_node_data(raw) or "").strip() + if isinstance(raw, str) and raw.strip(): + s = raw.strip() + if s.startswith("{") and s.endswith("}"): + try: + parsed = json.loads(s) + if isinstance(parsed, dict): + return (joined_text_from_extract_node_data(parsed) or "").strip() + except (json.JSONDecodeError, TypeError): + pass + return s + return "" + + +def _sanitize_heading_title(name: str) -> str: + t = " ".join(name.replace("\r", " ").replace("\n", " ").split()).strip() + return t[:160] if len(t) > 160 else t + + +def _iteration_heading_from_item(it: Any) -> Optional[str]: + if not isinstance(it, dict): + return None + inner = it.get("data") + if isinstance(inner, dict): + meta = inner.get("_meta") if isinstance(inner.get("_meta"), dict) else {} + sf = inner.get("sourceFileNames") or meta.get("sourceFileNames") + if isinstance(sf, list) and sf: + first = sf[0] + if isinstance(first, str) and first.strip(): + return _sanitize_heading_title(first.strip()) + docs = it.get("documents") + if not isinstance(docs, list) or not docs: + return None + d0 = docs[0] + if not isinstance(d0, dict): + return None + name = d0.get("documentName") + if isinstance(name, str) and name.strip(): + return _sanitize_heading_title(name.strip()) + return None + + +def _synthesize_primary_response(merged: Dict[str, Any], inputs: List[Any]) -> str: + """Flat text for ``ActionResult.response`` / file.create. + + Prefer concatenating each input's primary text (loop bodyResults) so no + iteration is dropped — ``deep_merge`` overwrites scalar ``response`` with + the last item only; that merged value is a fallback when no per-item text + is found. + + When several inputs are merged, prefix each chunk with a markdown ``###`` + heading from ``documents[0].documentName`` so ``file.create`` renders clear + sections (CSV vs PDF vs …). + """ + chunks: List[str] = [] + multi = len(inputs) > 1 + for it in inputs: + t = _primary_text_from_item(it) + if not t: + continue + if multi: + h = _iteration_heading_from_item(it) + if h: + chunks.append(f"### {h}\n\n{t}") + continue + chunks.append(t) + if chunks: + return "\n\n".join(chunks) + + if isinstance(merged, dict): + r = merged.get("response") + if r is not None and str(r).strip(): + return str(r).strip() + + if isinstance(merged, dict) and merged: + try: + return json.dumps(merged, ensure_ascii=False, indent=2, default=str) + except Exception: + return str(merged) + return "" + + +async def mergeContext(self, parameters: Dict[str, Any]) -> ActionResult: + try: + if "dataSource" not in parameters: + raise ValueError("dataSource is required (set a DataRef on the merge node)") + raw = parameters["dataSource"] + if isinstance(raw, str) and not raw.strip(): + raw = None + if raw is None: + return ActionResult.isFailure(error="dataSource ist erforderlich (DataRef auf die Quelle setzen).") + if isinstance(raw, list) and len(raw) == 0: + return ActionResult.isFailure(error="Keine Datenquelle angegeben oder Datenquelle ist leer.") + + items = _coerce_to_list(raw) + + if not items: + return ActionResult.isFailure(error="Keine Datenquelle angegeben oder Datenquelle ist leer.") + + merged: Dict[str, Any] = {} + conflicts: List[str] = [] + inputs: List[Any] = [] + + for item in items: + if item is None: + continue + inputs.append(item) + payload = _merge_payload(item) + if payload: + _deep_merge(merged, payload, conflicts) + + if not inputs: + return ActionResult.isFailure(error="Alle Einträge in der Datenquelle sind leer.") + + primary = _synthesize_primary_response(merged, inputs) + # ``response`` lives only at the top-level of the data envelope (``payload["response"]``). + # Do NOT set ``merged["response"]`` — that would duplicate it inside the deep-merged blob + # and overwrite whatever the natural merge produced for debugging. + + _ps = primary if isinstance(primary, str) else repr(primary) + logger.info( + "mergeContext: inputs=%d merged_keys=%s primary_len=%d primary_preview=%r conflicts=%d", + len(inputs), + list(merged.keys())[:20], + len(_ps or ""), + (_ps[:200] + "\u2026") if len(_ps) > 200 else _ps, + len(conflicts), + ) + payload: Dict[str, Any] = { + "merged": merged, + "inputs": inputs, + "first": inputs[0] if inputs else None, + "count": len(inputs), + "conflicts": sorted(set(conflicts)) if conflicts else [], + "response": primary, + } + return ActionResult.isSuccess(data=wrap_merge_context_data(payload)) + except Exception as exc: + logger.exception("mergeContext failed") + return ActionResult.isFailure(error=str(exc)) diff --git a/modules/workflows/methods/methodContext/actions/neutralizeData.py b/modules/workflows/methods/methodContext/actions/neutralizeData.py index 38276dc7..8efc7954 100644 --- a/modules/workflows/methods/methodContext/actions/neutralizeData.py +++ b/modules/workflows/methods/methodContext/actions/neutralizeData.py @@ -1,240 +1,310 @@ # Copyright (c) 2025 Patrick Motsch # All rights reserved. +import base64 as _b64 import logging import time -from typing import Dict, Any +from typing import Any, Dict + from modules.datamodels.datamodelChat import ActionResult, ActionDocument -from modules.datamodels.datamodelDocref import ( - DocumentReferenceList, - coerceDocumentReferenceList, -) +from modules.datamodels.datamodelDocref import coerceDocumentReferenceList from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart +from .extractContent import _one_file_bucket + logger = logging.getLogger(__name__) +HANDOVER_KIND = "context.extractContent.handover.v1" + + +async def _neutralize_one_content_extracted( + *, + svc, + content_extracted: ContentExtracted, + operation_id: str, + chat_doc_slot: int, + chat_documents_len: int, +) -> ContentExtracted: + """Neutralize every part inside a ContentExtracted (copied semantics from legacy inline loop).""" + neutralized_parts = [] + for part in content_extracted.parts: + if not isinstance(part, ContentPart): + if isinstance(part, dict): + try: + part = ContentPart(**part) + except Exception as e: + logger.warning(f"Could not parse ContentPart: {str(e)}") + neutralized_parts.append(part) + continue + else: + neutralized_parts.append(part) + continue + + _type_group = getattr(part, "typeGroup", "") or "" + prog = 0.3 + (chat_doc_slot / max(1, chat_documents_len)) * 0.6 + + if _type_group == "image" and part.data: + try: + svc.services.chat.progressLogUpdate( + operation_id, + prog, + f"Checking image part {len(neutralized_parts) + 1}", + ) + _img_bytes = _b64.b64decode(str(part.data)) + _img_result = await svc.services.neutralization.processImageAsync(_img_bytes, f"part_{part.id}") + if _img_result.get("status") == "ok": + neutralized_parts.append(part) + else: + logger.warning("Fail-Safe: Image part %s blocked (PII), SKIPPING", part.id) + except Exception as _img_err: + logger.error(f"Fail-Safe: Image check failed for part {part.id}: {_img_err}, SKIPPING") + elif part.data: + try: + svc.services.chat.progressLogUpdate( + operation_id, + prog, + f"Neutralizing part {len(neutralized_parts) + 1}", + ) + neut_res = await svc.services.neutralization.processTextAsync(part.data) + if neut_res and "neutralized_text" in neut_res: + neutral_data = neut_res["neutralized_text"] + neutralized_parts.append( + ContentPart( + id=part.id, + parentId=part.parentId, + label=part.label, + typeGroup=part.typeGroup, + mimeType=part.mimeType, + data=neutral_data, + metadata=part.metadata.copy() if part.metadata else {}, + ) + ) + else: + logger.warning( + "Fail-Safe: Neutralization incomplete for part %s — SKIPPING (not passing original)", + part.id, + ) + continue + except Exception as e: + logger.error(f"Fail-Safe: Error neutralizing part {part.id}: {str(e)}, SKIPPING") + continue + else: + neutralized_parts.append(part) + + return ContentExtracted( + id=content_extracted.id, + parts=neutralized_parts, + summary=content_extracted.summary, + ) + + async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult: - operationId = None + operation_id = None try: - workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" - operationId = f"context_neutralize_{workflowId}_{int(time.time())}" - - neutralizationEnabled = False + workflow_id = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operation_id = f"context_neutralize_{workflow_id}_{int(time.time())}" + + neutralization_enabled = False try: config = self.services.neutralization.getConfig() - neutralizationEnabled = config and config.enabled + neutralization_enabled = config and config.enabled except Exception as e: logger.debug(f"Could not check neutralization config: {str(e)}") - if not neutralizationEnabled: + if not neutralization_enabled: logger.info("Neutralization is not enabled, returning documents unchanged") - # Return original documents if neutralization is disabled - documentListParam = parameters.get("documentList") - if not documentListParam: + document_list_param = parameters.get("documentList") + if not document_list_param: return ActionResult.isFailure(error="documentList is required") - documentList = coerceDocumentReferenceList(documentListParam) - if not documentList.references: - return ActionResult.isFailure( - error=f"documentList could not be parsed (type={type(documentListParam).__name__})" - ) - - # Get ChatDocuments from documentList - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) - if not chatDocuments: + doc_list = coerceDocumentReferenceList(document_list_param) + if not doc_list.references: + return ActionResult.isFailure(error=f"documentList invalid (empty)") + + chat_docs = self.services.chat.getChatDocumentsFromDocumentList(doc_list) + if not chat_docs: return ActionResult.isFailure(error="No documents found in documentList") - - # Return original documents as ActionDocuments - actionDocuments = [] - for chatDoc in chatDocuments: - # Extract ContentExtracted from documentData if available - if hasattr(chatDoc, 'documentData') and chatDoc.documentData: - actionDoc = ActionDocument( - documentName=getattr(chatDoc, 'fileName', 'unknown'), - documentData=chatDoc.documentData, - mimeType=getattr(chatDoc, 'mimeType', 'application/json'), - validationMetadata={ - "actionType": "context.neutralizeData", - "neutralized": False, - "reason": "Neutralization disabled" - } + + action_documents = [] + for chat_doc in chat_docs: + if hasattr(chat_doc, "documentData") and chat_doc.documentData: + action_documents.append( + ActionDocument( + documentName=getattr(chat_doc, "fileName", "unknown"), + documentData=chat_doc.documentData, + mimeType=getattr(chat_doc, "mimeType", "application/json"), + validationMetadata={ + "actionType": "context.neutralizeData", + "neutralized": False, + "reason": "Neutralization disabled", + }, + ) ) - actionDocuments.append(actionDoc) - - return ActionResult.isSuccess(documents=actionDocuments) - - documentListParam = parameters.get("documentList") - if not documentListParam: + return ActionResult.isSuccess(documents=action_documents) + + document_list_param = parameters.get("documentList") + if not document_list_param: return ActionResult.isFailure(error="documentList is required") - documentList = coerceDocumentReferenceList(documentListParam) - if not documentList.references: - return ActionResult.isFailure( - error=f"documentList could not be parsed (type={type(documentListParam).__name__})" - ) - - # Start progress tracking - parentOperationId = parameters.get('parentOperationId') + doc_list = coerceDocumentReferenceList(document_list_param) + if not doc_list.references: + return ActionResult.isFailure(error=f"documentList invalid") + + parent_operation_id = parameters.get("parentOperationId") self.services.chat.progressLogStart( - operationId, + operation_id, "Neutralizing data from documents", "Data Neutralization", - f"Documents: {len(documentList.references)}", - parentOperationId=parentOperationId + f"Documents: {len(doc_list.references)}", + parentOperationId=parent_operation_id, ) - - # Get ChatDocuments from documentList - self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents") - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) - - if not chatDocuments: - self.services.chat.progressLogFinish(operationId, False) + + self.services.chat.progressLogUpdate(operation_id, 0.2, "Loading documents") + chat_documents = self.services.chat.getChatDocumentsFromDocumentList(doc_list) + if not chat_documents: + self.services.chat.progressLogFinish(operation_id, False) return ActionResult.isFailure(error="No documents found in documentList") - - logger.info(f"Neutralizing data from {len(chatDocuments)} documents") - - # Process each document - self.services.chat.progressLogUpdate(operationId, 0.3, "Processing documents") - actionDocuments = [] - - for i, chatDoc in enumerate(chatDocuments): + + logger.info(f"Neutralizing data from {len(chat_documents)} document(s)") + self.services.chat.progressLogUpdate(operation_id, 0.3, "Processing documents") + action_documents = [] + + for i, chat_doc in enumerate(chat_documents): try: - # Extract ContentExtracted from documentData - if not hasattr(chatDoc, 'documentData') or not chatDoc.documentData: - logger.warning(f"Document {i+1} has no documentData, skipping") + dd = getattr(chat_doc, "documentData", None) + if not dd: + logger.warning(f"Document {i + 1} has no documentData, skipping") continue - - documentData = chatDoc.documentData - - # Check if it's a ContentExtracted object - if isinstance(documentData, ContentExtracted): - contentExtracted = documentData - elif isinstance(documentData, dict): - # Try to parse as ContentExtracted + + fn = str(getattr(chat_doc, "fileName", "") or "") + mime_guess = str(getattr(chat_doc, "mimeType", "") or "").lower() + if ( + mime_guess.startswith("image/") + and fn.startswith("extract_media_") + and not (isinstance(dd, dict) and dd.get("kind") == HANDOVER_KIND) + ): + action_documents.append( + ActionDocument( + documentName=fn or f"media_{i + 1}", + documentData=dd, + mimeType=mime_guess or "application/octet-stream", + validationMetadata={ + "actionType": "context.neutralizeData", + "neutralized": False, + "reason": "extractContent_media_sidecar_pass_through", + }, + ) + ) + continue + + # --- Unified JSON envelope from context.extractContent (v1) --- + if isinstance(dd, dict) and dd.get("kind") == HANDOVER_KIND: + bundle = dict(dd) + files_section = dd.get("files") or {} + new_files = {} + for fk, bucket in files_section.items(): + if not isinstance(bucket, dict): + continue + parts_raw = bucket.get("parts") or [] + parsed_parts = [] + for pd in parts_raw: + parsed_parts.append(ContentPart(**pd) if isinstance(pd, dict) else pd) + + summary = bucket.get("summary") or {} + if hasattr(summary, "model_dump"): + summary = summary.model_dump(mode="json") + + ce = ContentExtracted( + id=str(bucket.get("extractedId") or ""), + parts=parsed_parts, + summary=summary if isinstance(summary, dict) else {}, + ) + + ce_out = await _neutralize_one_content_extracted( + svc=self, + content_extracted=ce, + operation_id=operation_id, + chat_doc_slot=i, + chat_documents_len=max(len(chat_documents), 1), + ) + new_files[fk] = _one_file_bucket(ce_out, str(bucket.get("sourceFileName") or fk)) + + bundle["files"] = new_files + original_filename = getattr(chat_doc, "fileName", f"neutralized_bundle_{workflow_id}.json") + bn = original_filename.rsplit(".", 1)[0] if "." in original_filename else original_filename + action_documents.append( + ActionDocument( + documentName=f"{bn}_neutralized.json", + documentData=bundle, + mimeType="application/json", + validationMetadata={ + "actionType": "context.neutralizeData", + "neutralized": True, + "handoverKind": HANDOVER_KIND, + "bundleFileCount": len(new_files), + }, + ) + ) + continue + + # --- Legacy ContentExtracted per persisted document --- + if isinstance(dd, ContentExtracted): + content_extracted = dd + elif isinstance(dd, dict): try: - contentExtracted = ContentExtracted(**documentData) - except Exception as e: - logger.warning(f"Document {i+1} documentData is not ContentExtracted: {str(e)}") + content_extracted = ContentExtracted(**dd) + except Exception: + logger.warning(f"Document {i + 1} documentData cannot be parsed as ContentExtracted dict") continue else: - logger.warning(f"Document {i+1} documentData is not ContentExtracted or dict") + logger.warning(f"Document {i + 1} documentData is not supported") continue - - # Neutralize each ContentPart's data field - neutralizedParts = [] - for part in contentExtracted.parts: - if not isinstance(part, ContentPart): - # Try to parse as ContentPart - if isinstance(part, dict): - try: - part = ContentPart(**part) - except Exception as e: - logger.warning(f"Could not parse ContentPart: {str(e)}") - neutralizedParts.append(part) - continue - else: - neutralizedParts.append(part) - continue - - # Neutralize the data field based on typeGroup - _typeGroup = getattr(part, 'typeGroup', '') or '' - if _typeGroup == 'image' and part.data: - import base64 as _b64 - try: - self.services.chat.progressLogUpdate( - operationId, - 0.3 + (i / len(chatDocuments)) * 0.6, - f"Checking image part {len(neutralizedParts) + 1} of document {i+1}" - ) - _imgBytes = _b64.b64decode(str(part.data)) - _imgResult = await self.services.neutralization.processImageAsync(_imgBytes, f"part_{part.id}") - if _imgResult.get("status") == "ok": - neutralizedParts.append(part) - else: - logger.warning(f"Fail-Safe: Image part {part.id} blocked (PII detected), SKIPPING") - except Exception as _imgErr: - logger.error(f"Fail-Safe: Image check failed for part {part.id}: {_imgErr}, SKIPPING") - elif part.data: - try: - self.services.chat.progressLogUpdate( - operationId, - 0.3 + (i / len(chatDocuments)) * 0.6, - f"Neutralizing part {len(neutralizedParts) + 1} of document {i+1}" - ) - - neutralizationResult = await self.services.neutralization.processTextAsync(part.data) - - if neutralizationResult and 'neutralized_text' in neutralizationResult: - neutralizedData = neutralizationResult['neutralized_text'] - - neutralizedPart = ContentPart( - id=part.id, - parentId=part.parentId, - label=part.label, - typeGroup=part.typeGroup, - mimeType=part.mimeType, - data=neutralizedData, - metadata=part.metadata.copy() if part.metadata else {} - ) - neutralizedParts.append(neutralizedPart) - else: - logger.warning(f"Fail-Safe: Neutralization incomplete for part {part.id}, SKIPPING (not passing original)") - continue - except Exception as e: - logger.error(f"Fail-Safe: Error neutralizing part {part.id}, SKIPPING document (not passing original): {str(e)}") - continue - else: - neutralizedParts.append(part) - - # Create neutralized ContentExtracted object - neutralizedContentExtracted = ContentExtracted( - id=contentExtracted.id, - parts=neutralizedParts, - summary=contentExtracted.summary + + neut_out = await _neutralize_one_content_extracted( + svc=self, + content_extracted=content_extracted, + operation_id=operation_id, + chat_doc_slot=i, + chat_documents_len=max(len(chat_documents), 1), ) - - # Create ActionDocument - originalFileName = getattr(chatDoc, 'fileName', f"document_{i+1}.json") - baseName = originalFileName.rsplit('.', 1)[0] if '.' in originalFileName else originalFileName - documentName = f"{baseName}_neutralized_{contentExtracted.id}.json" - - validationMetadata = { - "actionType": "context.neutralizeData", - "documentIndex": i, - "extractedId": contentExtracted.id, - "partCount": len(neutralizedParts), - "neutralized": True, - "originalFileName": originalFileName - } - - actionDoc = ActionDocument( - documentName=documentName, - documentData=neutralizedContentExtracted, - mimeType="application/json", - validationMetadata=validationMetadata + + original_file_name = getattr(chat_doc, "fileName", f"document_{i + 1}.json") + base_name = original_file_name.rsplit(".", 1)[0] if "." in original_file_name else original_file_name + document_name = f"{base_name}_neutralized_{neut_out.id}.json" + + action_documents.append( + ActionDocument( + documentName=document_name, + documentData=neut_out, + mimeType="application/json", + validationMetadata={ + "actionType": "context.neutralizeData", + "documentIndex": i, + "extractedId": neut_out.id, + "partCount": len(neut_out.parts), + "neutralized": True, + "originalFileName": original_file_name, + }, + ) ) - actionDocuments.append(actionDoc) - + except Exception as e: - logger.error(f"Error processing document {i+1}: {str(e)}") - # Continue with other documents + logger.error(f"Error processing document {i + 1}: {str(e)}") continue - - if not actionDocuments: - self.services.chat.progressLogFinish(operationId, False) - return ActionResult.isFailure(error="No valid ContentExtracted documents found to neutralize") - - self.services.chat.progressLogFinish(operationId, True) - - return ActionResult.isSuccess(documents=actionDocuments) - + + if not action_documents: + self.services.chat.progressLogFinish(operation_id, False) + return ActionResult.isFailure(error="No valid documents found to neutralize") + + self.services.chat.progressLogFinish(operation_id, True) + return ActionResult.isSuccess(documents=action_documents) + except Exception as e: logger.error(f"Error in data neutralization: {str(e)}") - try: - if operationId: - self.services.chat.progressLogFinish(operationId, False) + if operation_id: + self.services.chat.progressLogFinish(operation_id, False) except Exception: pass - + return ActionResult.isFailure(error=str(e)) diff --git a/modules/workflows/methods/methodContext/actions/setContext.py b/modules/workflows/methods/methodContext/actions/setContext.py new file mode 100644 index 00000000..10f292b7 --- /dev/null +++ b/modules/workflows/methods/methodContext/actions/setContext.py @@ -0,0 +1,459 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Action ``context.setContext``. + +Stores values in the workflow context (``local`` | ``global`` | ``session``). + +Each **assignment** row defines a target ``contextKey`` and how to obtain the value: + +- ``valueSource=pickUpstream`` — use ``upstreamRef`` (DataRef resolved by the graph) or, + for experts, a dotted ``sourcePath`` on ``_upstreamPayload``. +- ``valueSource=literal`` — use ``literal`` (with ``valueType`` coercion). +- ``valueSource=humanTask`` — pause and create a task (requires ``_automation2Interface``). + +Legacy graphs may still send ``entries`` / ``upstreamPick`` + ``targetKey``; those are +normalized into the same shape before processing. +""" + +from __future__ import annotations + +import json +import logging +from typing import Any, Dict, List, Optional, Tuple + +from modules.datamodels.datamodelChat import ActionResult +from modules.workflows.automation2.executors.inputExecutor import PauseForHumanTaskError + +logger = logging.getLogger(__name__) + + +_VALID_MODES = {"set", "setIfEmpty", "append", "increment"} +_VALID_SCOPES = {"local", "global", "session"} +_VALID_VALUE_SOURCES = {"pickUpstream", "literal", "humanTask"} + + +def _get_by_path(data: Any, dotted: str) -> Any: + """Traverse dict/list by dotted path (``payload.status``, ``items.0.name``).""" + if not dotted or not str(dotted).strip(): + return None + cur: Any = data + for seg in str(dotted).strip().split("."): + if cur is None: + return None + if isinstance(cur, dict) and seg in cur: + cur = cur[seg] + continue + if isinstance(cur, (list, tuple)): + try: + idx = int(seg) + except ValueError: + return None + if 0 <= idx < len(cur): + cur = cur[idx] + continue + return None + return cur + + +def _is_unresolved_ref(value: Any) -> bool: + return isinstance(value, dict) and value.get("type") == "ref" + + +def _coerce_type(value: Any, type_str: str) -> Any: + """Best-effort coerce ``value`` into the declared entry ``type``.""" + if type_str in (None, "", "any", "Any"): + return value + try: + if type_str == "str": + return "" if value is None else str(value) + if type_str == "int": + if isinstance(value, bool): + return int(value) + if value is None or value == "": + return 0 + return int(float(value)) + if type_str == "float": + if value is None or value == "": + return 0.0 + return float(value) + if type_str == "bool": + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return bool(value) + return str(value).strip().lower() in ("1", "true", "yes", "on", "ja") + if type_str in ("list", "List", "array"): + if value is None: + return [] + if isinstance(value, str) and value.strip().startswith(("[", "{")): + try: + parsed = json.loads(value) + return parsed if isinstance(parsed, list) else [parsed] + except json.JSONDecodeError: + pass + return value if isinstance(value, list) else [value] + if type_str in ("object", "dict", "Dict"): + if isinstance(value, str) and value.strip().startswith("{"): + try: + parsed = json.loads(value) + return parsed if isinstance(value, dict) else {"value": parsed} + except json.JSONDecodeError: + pass + return value if isinstance(value, dict) else {"value": value} + except (TypeError, ValueError) as exc: + logger.warning("setContext._coerce_type %r → %s failed: %s", value, type_str, exc) + return value + + +def _resolve_store(scope: str, run_context: Optional[Dict[str, Any]]) -> Dict[str, Any]: + """Return the dict that backs the requested scope.""" + if not isinstance(run_context, dict): + return {} + if scope == "global": + return run_context.setdefault("_globalContext", {}) + if scope == "session": + return run_context.setdefault("_sessionContext", {}) + return run_context.setdefault("_localContext", {}) + + +def _entry_context_key(entry: Dict[str, Any]) -> Optional[str]: + ck = entry.get("contextKey") or entry.get("key") + if ck is None: + return None + s = str(ck).strip() + return s or None + + +def _apply_value_to_store( + store: Dict[str, Any], + context_key: str, + value: Any, + mode: str, + type_str: str, +) -> Optional[str]: + """Apply coerced ``value`` to ``store[context_key]``. Returns error string or None.""" + if mode not in _VALID_MODES: + return f"unknown mode '{mode}' on key '{context_key}'" + + coerced = _coerce_type(value, str(type_str or "")) + + if mode == "set": + store[context_key] = coerced + return None + if mode == "setIfEmpty": + if context_key not in store or store.get(context_key) in (None, "", [], {}): + store[context_key] = coerced + return None + if mode == "append": + existing = store.get(context_key) + if existing is None: + store[context_key] = [coerced] if not isinstance(coerced, list) else list(coerced) + elif isinstance(existing, list): + if isinstance(coerced, list): + existing.extend(coerced) + else: + existing.append(coerced) + elif isinstance(existing, str): + store[context_key] = existing + ("" if coerced is None else str(coerced)) + else: + store[context_key] = [existing, coerced] + return None + if mode == "increment": + existing = store.get(context_key, 0) + try: + store[context_key] = ( + float(existing) + float(coerced) + if isinstance(existing, float) or isinstance(coerced, float) + else int(existing) + int(coerced) + ) + except (TypeError, ValueError): + return f"increment requires numeric value/state for key '{context_key}'" + return None + return None + + +def _value_source(row: Dict[str, Any]) -> str: + vs = row.get("valueSource") + if isinstance(vs, str) and vs.strip() in _VALID_VALUE_SOURCES: + return vs.strip() + am = str(row.get("assignmentMode") or "direct").strip() + if am == "fromUpstream": + return "pickUpstream" + if am == "humanTask": + return "humanTask" + if am == "direct": + return "literal" + return "literal" + + +def _normalize_assignments(parameters: Dict[str, Any]) -> List[Dict[str, Any]]: + """Build a single list of assignment dicts from new or legacy parameters.""" + raw = parameters.get("assignments") + if isinstance(raw, list) and raw: + out: List[Dict[str, Any]] = [] + for item in raw: + if isinstance(item, dict): + out.append(dict(item)) + if out: + return out + + legacy_entries = parameters.get("entries") + global_pick = parameters.get("upstreamPick") + + if isinstance(legacy_entries, list) and legacy_entries: + out = [] + for entry in legacy_entries: + if not isinstance(entry, dict): + continue + row = dict(entry) + row["valueSource"] = _value_source(entry) + am = str(entry.get("assignmentMode") or "direct").strip() + if am == "fromUpstream" and not str(entry.get("sourcePath") or "").strip(): + if global_pick is not None and not (isinstance(global_pick, str) and not global_pick.strip()): + if not (isinstance(global_pick, (list, dict)) and len(global_pick) == 0): + row["upstreamRef"] = global_pick + if am == "direct": + row["literal"] = entry.get("value") + row["valueSource"] = "literal" + out.append(row) + if out: + return out + + tk = str(parameters.get("targetKey") or "").strip() + if tk and global_pick is not None: + if isinstance(global_pick, str) and not global_pick.strip(): + pass + elif isinstance(global_pick, (list, dict)) and len(global_pick) == 0: + pass + else: + return [ + { + "contextKey": tk, + "valueSource": "pickUpstream", + "upstreamRef": global_pick, + "mode": "set", + "valueType": "str", + } + ] + + return [] + + +def _resolve_pick_upstream( + row: Dict[str, Any], + upstream: Any, + parameters: Dict[str, Any], +) -> Tuple[Optional[Any], Optional[str]]: + path = str(row.get("sourcePath") or "").strip() + ref_val = row.get("upstreamRef") + + if ref_val is not None and ref_val != "": + if _is_unresolved_ref(ref_val): + return None, "upstream DataRef konnte nicht aufgelöst werden" + base: Any = ref_val + if path: + hit = _get_by_path(base, path) + if hit is None and isinstance(upstream, dict): + hit = _get_by_path(upstream, path) + if hit is not None: + return hit, None + return None, f"path '{path}' not found under picked value or upstream payload" + return base, None + + if path: + if not isinstance(upstream, dict): + return None, "sourcePath benötigt ein strukturiertes Upstream-Payload (dict)" + return _get_by_path(upstream, path), None + + return None, "Picker: Datenquelle wählen oder sourcePath (z. B. payload.status) setzen" + + +def _resolve_literal(row: Dict[str, Any]) -> Tuple[Optional[Any], Optional[str]]: + raw = row.get("literal") + if raw is None and "value" in row: + raw = row.get("value") + if raw is None: + return None, "literal value missing" + if isinstance(raw, (dict, list, bool, int, float)) or raw is None: + return raw, None + s = str(raw) + type_str = str(row.get("valueType") or row.get("type") or "str") + if type_str in ("object", "dict", "Dict", "list", "List", "array") and s.strip().startswith(("[", "{")): + try: + return json.loads(s), None + except json.JSONDecodeError as exc: + return None, f"invalid JSON literal: {exc}" + return s, None + + +def _pause_for_human_tasks( + *, + iface: Any, + run_context: Dict[str, Any], + parameters: Dict[str, Any], + pending_entries: List[Dict[str, Any]], + scope: str, +) -> None: + """Create a single human task for all ``humanTask`` rows and pause the run.""" + run_id = str(run_context.get("_runId") or "") + workflow_id = str(run_context.get("workflowId") or "") + node_id = str(parameters.get("_workflowNodeId") or "") + user_id = run_context.get("userId") + + cfg = { + "kind": "contextSetAssignment", + "scope": scope, + "entries": pending_entries, + "description": ( + "Set or confirm workflow context keys. After completion, resume the run;" + " submitted values should be merged into context by the task handler." + ), + } + + task = iface.createTask( + runId=run_id, + workflowId=workflow_id, + nodeId=node_id, + nodeType="context.setContext", + config=cfg, + assigneeId=str(user_id) if user_id else None, + ) + task_id = str((task or {}).get("id") or "") + ordered_ids = [n.get("id") for n in (run_context.get("_orderedNodes") or []) if n.get("id")] + from modules.workflows.automation2.graphicalEditorRunFileLogger import merge_persisted_run_context + + _pause_ctx = merge_persisted_run_context( + iface, + run_id, + { + "connectionMap": run_context.get("connectionMap"), + "inputSources": run_context.get("inputSources"), + "orderedNodeIds": ordered_ids, + "pauseReason": "contextAssignment", + }, + ) + iface.updateRun( + run_id, + status="paused", + nodeOutputs=run_context.get("nodeOutputs"), + currentNodeId=node_id, + context=_pause_ctx, + ) + if not (run_id and task_id and node_id): + raise RuntimeError("humanTask requires _runId, task id, and _workflowNodeId") + raise PauseForHumanTaskError(runId=run_id, taskId=task_id, nodeId=node_id) + + +async def setContext(self, parameters: Dict[str, Any]) -> ActionResult: + try: + scope = str(parameters.get("scope") or "local") + if scope not in _VALID_SCOPES: + return ActionResult.isFailure(error=f"Invalid scope '{scope}', expected one of {sorted(_VALID_SCOPES)}") + + entries: List[Dict[str, Any]] = _normalize_assignments(parameters) + if not entries: + return ActionResult.isFailure( + error="Mindestens eine Zuweisung konfigurieren (Ziel-Schlüssel, Quelle und Wert / Picker / Task).", + ) + + run_context = parameters.get("_runContext") + if not isinstance(run_context, dict): + return ActionResult.isFailure(error="internal: execution context missing") + + store = _resolve_store(scope, run_context) + upstream = parameters.get("_upstreamPayload") + + applied: Dict[str, Any] = {} + errors: List[str] = [] + human_rows: List[Dict[str, Any]] = [] + + for entry in entries: + if not isinstance(entry, dict): + errors.append("entry is not an object") + continue + + ck = _entry_context_key(entry) + if not ck: + errors.append("assignment needs contextKey") + continue + + vs = _value_source(entry) + if vs not in _VALID_VALUE_SOURCES: + errors.append(f"{ck}: unknown valueSource '{vs}'") + continue + + if vs == "humanTask": + human_rows.append( + { + "contextKey": ck, + "sourcePath": entry.get("sourcePath"), + "taskTitle": entry.get("taskTitle"), + "taskDescription": entry.get("taskDescription"), + "type": entry.get("valueType") or entry.get("type"), + "mode": entry.get("mode") or "set", + } + ) + continue + + val: Any = None + err: Optional[str] = None + + if vs == "pickUpstream": + val, err = _resolve_pick_upstream(entry, upstream, parameters) + else: + val, err = _resolve_literal(entry) + + if err: + errors.append(f"{ck}: {err}") + continue + + err2 = _apply_value_to_store( + store, + ck, + val, + str(entry.get("mode") or "set"), + str(entry.get("valueType") or entry.get("type") or ""), + ) + if err2: + errors.append(f"{ck}: {err2}") + continue + applied[ck] = store.get(ck) + + iface = run_context.get("_automation2Interface") + if human_rows: + if iface: + _pause_for_human_tasks( + iface=iface, + run_context=run_context, + parameters=parameters, + pending_entries=human_rows, + scope=scope, + ) + else: + applied["_humanTaskFallback"] = ( + "humanTask requires a live automation2 interface on the run; " + "configure execution via the graphical editor API or add an input.human node." + ) + applied["_pendingHumanContextKeys"] = [r["contextKey"] for r in human_rows] + + if errors and not applied and not human_rows: + return ActionResult.isFailure(error="; ".join(errors)) + + data: Dict[str, Any] = dict(applied) + data["_scope"] = scope + data["_appliedKeys"] = [k for k in applied if not str(k).startswith("_")] + if errors: + data["_warnings"] = errors + + if isinstance(upstream, dict): + meta = upstream.get("_meta") + if isinstance(meta, dict): + data["_meta"] = meta + data.setdefault("_transit", True) + + return ActionResult.isSuccess(data=data) + except PauseForHumanTaskError: + raise + except Exception as exc: + logger.exception("setContext failed") + return ActionResult.isFailure(error=str(exc)) diff --git a/modules/workflows/methods/methodContext/actions/transformContext.py b/modules/workflows/methods/methodContext/actions/transformContext.py new file mode 100644 index 00000000..ffff183d --- /dev/null +++ b/modules/workflows/methods/methodContext/actions/transformContext.py @@ -0,0 +1,223 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Action ``context.transformContext``. + +Applies a sequence of mappings to the upstream payload. Supported operations: + +- ``rename`` — copy a source path to a new output key +- ``cast`` — copy and convert to a target type (errors recorded in ``_castErrors``) +- ``nest`` — group several mappings under a dotted ``outputField`` (e.g. ``address.city``) +- ``flatten`` — copy a nested dict's leaves up to the configured ``flattenDepth`` +- ``compute`` — render a ``{{...}}`` template using the upstream payload as scope +""" + +from __future__ import annotations + +import logging +import re +from typing import Any, Dict, List, Optional + +from modules.datamodels.datamodelChat import ActionResult +from modules.workflows.methods.methodContext.contextEnvelope import wrap_transform_context_data + +logger = logging.getLogger(__name__) + + +_VALID_OPERATIONS = {"rename", "cast", "nest", "flatten", "compute"} + + +def _get_path(payload: Any, dotted: str) -> Any: + cur = payload + for seg in str(dotted).split("."): + if cur is None: + return None + if isinstance(cur, dict): + cur = cur.get(seg) + continue + if isinstance(cur, list): + try: + cur = cur[int(seg)] + except (ValueError, IndexError): + return None + continue + return None + return cur + + +def _set_path(target: Dict[str, Any], dotted: str, value: Any) -> None: + parts = str(dotted).split(".") + cur = target + for seg in parts[:-1]: + nxt = cur.get(seg) + if not isinstance(nxt, dict): + nxt = {} + cur[seg] = nxt + cur = nxt + cur[parts[-1]] = value + + +def _coerce_type(value: Any, type_str: str) -> Any: + if type_str in (None, "", "any", "Any"): + return value + if type_str == "str": + return "" if value is None else str(value) + if type_str == "int": + if isinstance(value, bool): + return int(value) + if value is None or value == "": + raise ValueError("empty value") + return int(float(value)) + if type_str == "float": + if value is None or value == "": + raise ValueError("empty value") + return float(value) + if type_str == "bool": + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return bool(value) + return str(value).strip().lower() in ("1", "true", "yes", "on", "ja") + if type_str in ("list", "List", "array"): + return value if isinstance(value, list) else ([value] if value is not None else []) + if type_str in ("object", "dict", "Dict"): + return value if isinstance(value, dict) else {"value": value} + return value + + +_TEMPLATE_RE = re.compile(r"\{\{\s*([^{}\s|]+)(?:\s*\|\s*([^{}]*))?\s*\}\}") + + +def _apply_filter(value: Any, filter_chain: str) -> Any: + """Minimal filter pipeline: ``upper``, ``lower``, ``trim``, ``default:foo``.""" + out = value + for token in filter_chain.split("|"): + f = token.strip() + if not f: + continue + if f == "upper": + out = "" if out is None else str(out).upper() + elif f == "lower": + out = "" if out is None else str(out).lower() + elif f == "trim": + out = "" if out is None else str(out).strip() + elif f.startswith("default:"): + if out is None or out == "": + out = f.split(":", 1)[1] + else: + logger.debug("transformContext: unknown filter '%s' ignored", f) + return out + + +def _render_template(template: str, scope: Dict[str, Any]) -> str: + def replace(match: re.Match) -> str: + path = match.group(1) + filters = match.group(2) or "" + value = _get_path(scope, path) + if filters: + value = _apply_filter(value, filters) + return "" if value is None else str(value) + + return _TEMPLATE_RE.sub(replace, template) + + +def _flatten_with_depth(node: Any, depth: int, prefix: str = "") -> Dict[str, Any]: + out: Dict[str, Any] = {} + if not isinstance(node, dict) or depth == 0: + if prefix: + out[prefix] = node + return out + for k, v in node.items(): + path = f"{prefix}.{k}" if prefix else str(k) + if isinstance(v, dict) and depth != 1: + out.update(_flatten_with_depth(v, depth - 1 if depth > 0 else -1, path)) + elif isinstance(v, dict): + out[path] = v + else: + out[path] = v + return out + + +async def transformContext(self, parameters: Dict[str, Any]) -> ActionResult: + try: + mappings: List[Dict[str, Any]] = parameters.get("mappings") or [] + if not isinstance(mappings, list) or not mappings: + return ActionResult.isFailure(error="'mappings' must be a non-empty list") + + passthrough = bool(parameters.get("passthroughUnmapped", False)) + flatten_depth = int(parameters.get("flattenDepth") or 1) + + upstream = parameters.get("_upstreamPayload") + if not isinstance(upstream, dict): + upstream = {"value": upstream} if upstream is not None else {} + + result: Dict[str, Any] = {} + consumed_paths: set = set() + cast_errors: Dict[str, str] = {} + + for m in mappings: + if not isinstance(m, dict): + continue + op = str(m.get("operation") or "rename") + if op not in _VALID_OPERATIONS: + cast_errors[str(m.get("outputField") or "?")] = f"unknown operation '{op}'" + continue + output_field = str(m.get("outputField") or "").strip() + if not output_field: + continue + source_field = str(m.get("sourceField") or "").strip() + target_type = str(m.get("type") or "") + + if op == "compute": + expression = str(m.get("expression") or m.get("sourceField") or "") + value = _render_template(expression, upstream) + if target_type: + try: + value = _coerce_type(value, target_type) + except (TypeError, ValueError) as exc: + cast_errors[output_field] = str(exc) + value = None + _set_path(result, output_field, value) + continue + + if op == "flatten": + base = _get_path(upstream, source_field) if source_field else upstream + flat = _flatten_with_depth(base, flatten_depth, output_field if source_field else "") + for path, val in flat.items(): + _set_path(result, path or output_field, val) + if source_field: + consumed_paths.add(source_field) + continue + + value = _get_path(upstream, source_field) if source_field else None + if source_field: + consumed_paths.add(source_field) + + if op == "cast" and target_type: + try: + value = _coerce_type(value, target_type) + except (TypeError, ValueError) as exc: + cast_errors[output_field] = str(exc) + value = None + elif op == "rename" and target_type: + # Optional explicit type on rename is treated like cast best-effort. + try: + value = _coerce_type(value, target_type) + except (TypeError, ValueError) as exc: + cast_errors[output_field] = str(exc) + # ``nest`` is implicit: dotted ``outputField`` writes into a nested dict + _set_path(result, output_field, value) + + if passthrough: + for k, v in upstream.items(): + if k.startswith("_"): + continue + if k in result or k in consumed_paths: + continue + result[k] = v + + if cast_errors: + result["_castErrors"] = cast_errors + return ActionResult.isSuccess(data=wrap_transform_context_data(result)) + except Exception as exc: + logger.exception("transformContext failed") + return ActionResult.isFailure(error=str(exc)) diff --git a/modules/workflows/methods/methodContext/contextEnvelope.py b/modules/workflows/methods/methodContext/contextEnvelope.py new file mode 100644 index 00000000..c35836cf --- /dev/null +++ b/modules/workflows/methods/methodContext/contextEnvelope.py @@ -0,0 +1,42 @@ +# Copyright (c) 2026 Patrick Motsch +"""Versioned ``ActionResult.data`` envelope for context.* actions (merge, transform).""" + +from __future__ import annotations + +from typing import Any, Dict + +CONTEXT_MERGE_KIND = "context.mergeContext.v1" +CONTEXT_MERGE_SCHEMA_VERSION = 1 + +CONTEXT_TRANSFORM_KIND = "context.transformContext.v1" +CONTEXT_TRANSFORM_SCHEMA_VERSION = 1 + + +def wrap_merge_context_data(body: Dict[str, Any]) -> Dict[str, Any]: + """Wrap merge payload: ``schemaVersion``, ``kind``, body fields, ``_meta`` last.""" + meta: Dict[str, Any] = { + "actionType": "context.mergeContext", + "mergePayloadSchemaVersion": CONTEXT_MERGE_SCHEMA_VERSION, + } + out: Dict[str, Any] = { + "schemaVersion": CONTEXT_MERGE_SCHEMA_VERSION, + "kind": CONTEXT_MERGE_KIND, + } + out.update(body) + out["_meta"] = meta + return out + + +def wrap_transform_context_data(fields: Dict[str, Any]) -> Dict[str, Any]: + """Wrap transform output fields under a versioned envelope (``_meta`` overwrites same key in fields).""" + meta: Dict[str, Any] = { + "actionType": "context.transformContext", + "transformPayloadSchemaVersion": CONTEXT_TRANSFORM_SCHEMA_VERSION, + } + out: Dict[str, Any] = { + "schemaVersion": CONTEXT_TRANSFORM_SCHEMA_VERSION, + "kind": CONTEXT_TRANSFORM_KIND, + } + out.update(fields) + out["_meta"] = meta + return out diff --git a/modules/workflows/methods/methodContext/methodContext.py b/modules/workflows/methods/methodContext/methodContext.py index d5f62772..80e0c089 100644 --- a/modules/workflows/methods/methodContext/methodContext.py +++ b/modules/workflows/methods/methodContext/methodContext.py @@ -15,6 +15,10 @@ from .actions.getDocumentIndex import getDocumentIndex from .actions.extractContent import extractContent from .actions.neutralizeData import neutralizeData from .actions.triggerPreprocessingServer import triggerPreprocessingServer +from .actions.setContext import setContext +from .actions.mergeContext import mergeContext +from .actions.filterContext import filterContext +from .actions.transformContext import transformContext logger = logging.getLogger(__name__) @@ -52,7 +56,11 @@ class MethodContext(MethodBase): ), "extractContent": WorkflowActionDefinition( actionId="context.extractContent", - description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.", + description=( + "Extract document content without AI. Returns `data` as the configured presentation " + "envelope (`fileOrder`, `files`, …) plus `_meta`; no duplicated service payload or bundled " + "plain-text column. Persisted images appear via `embeddedImageFileId` in internal serial only." + ), dynamicMode=True, outputType="UdmDocument", parameters={ @@ -61,15 +69,125 @@ class MethodContext(MethodBase): type="DocumentList", frontendType=FrontendType.DOCUMENT_REFERENCE, required=True, - description="Document reference(s) to extract content from" + description="Document reference(s) to extract content from", ), - "extractionOptions": WorkflowActionParameter( - name="extractionOptions", - type="Dict[str,Any]", - frontendType=FrontendType.JSON, + "contentFilter": WorkflowActionParameter( + name="contentFilter", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["all", "textOnly", "imagesOnly", "noImages"], required=False, - description="Extraction options (if not provided, defaults are used). Note: This action does NOT use AI - it performs pure content extraction. Images are preserved as base64 data, not converted to text." - ) + default="all", + description="Which extracted parts to keep (text, images, etc.).", + ), + "outputMode": WorkflowActionParameter( + name="outputMode", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["blob", "lines", "pages", "chunks", "structured"], + required=False, + default="lines", + description="How results are structured under presentation files.", + ), + "splitBy": WorkflowActionParameter( + name="splitBy", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["newline", "paragraph", "sentence"], + required=False, + default="newline", + description="Delimiter for lines/chunks output modes.", + ), + "chunkSizeUnit": WorkflowActionParameter( + name="chunkSizeUnit", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["tokens", "characters", "words"], + required=False, + default="tokens", + description="Unit for chunkSize and chunkOverlap when outputMode is chunks.", + ), + "chunkSize": WorkflowActionParameter( + name="chunkSize", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["256", "500", "1000", "2000", "4000"], + required=False, + default="500", + description="Target chunk size when outputMode is chunks.", + ), + "chunkOverlap": WorkflowActionParameter( + name="chunkOverlap", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["0", "25", "50", "100", "200"], + required=False, + default="0", + description="Overlap between consecutive chunks.", + ), + "filterEmptyLines": WorkflowActionParameter( + name="filterEmptyLines", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["true", "false"], + required=False, + default="true", + description="Remove empty segments for lines/chunks modes.", + ), + "trimWhitespace": WorkflowActionParameter( + name="trimWhitespace", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["true", "false"], + required=False, + default="true", + description="Trim leading/trailing whitespace per segment.", + ), + "includeLineNumbers": WorkflowActionParameter( + name="includeLineNumbers", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["true", "false"], + required=False, + default="false", + description="Prefix line numbers when outputMode is lines.", + ), + "includeMetadata": WorkflowActionParameter( + name="includeMetadata", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["true", "false"], + required=False, + default="false", + description="Attach filename and offsets to line/chunk entries.", + ), + "csvHeaderRow": WorkflowActionParameter( + name="csvHeaderRow", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["true", "false"], + required=False, + default="true", + description="Treat first CSV row as column headers.", + ), + "pdfExtractMode": WorkflowActionParameter( + name="pdfExtractMode", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["text", "tables", "images", "all"], + required=False, + default="all", + description="Presentation-layer filter by part type group.", + ), + "markdownPreserveFormatting": WorkflowActionParameter( + name="markdownPreserveFormatting", + type="str", + frontendType=FrontendType.SELECT, + frontendOptions=["true", "false"], + required=False, + default="false", + description="Keep markdown markup on text parts when false strips lightly.", + ), }, execute=extractContent.__get__(self, self.__class__) ), @@ -116,7 +234,123 @@ class MethodContext(MethodBase): ) }, execute=triggerPreprocessingServer.__get__(self, self.__class__) - ) + ), + "setContext": WorkflowActionDefinition( + actionId="context.setContext", + description=( + "Set workflow context: list of assignments with target key, then upstream picker, " + "fixed literal, or human task per row." + ), + outputType="Transit", + parameters={ + "scope": WorkflowActionParameter( + name="scope", type="str", required=False, + frontendType=FrontendType.SELECT, + frontendOptions=["local", "global", "session"], + default="local", + description="Storage scope for keys written by this node", + ), + "assignments": WorkflowActionParameter( + name="assignments", type="List[Any]", required=True, + frontendType=FrontendType.CONTEXT_ASSIGNMENTS, + default=[], + description=( + "List of rows: contextKey, valueSource (pickUpstream | literal | humanTask), " + "upstreamRef, literal, sourcePath, mode, valueType, task fields." + ), + ), + }, + execute=setContext.__get__(self, self.__class__), + ), + "mergeContext": WorkflowActionDefinition( + actionId="context.mergeContext", + description=( + "Führt Schritte zu einem Dict zusammen. ``data`` enthält einen versionierten Umschlag " + "(``context.mergeContext.v1``, ``merged``, ``response``, …) und ``_meta``." + ), + outputType="ActionResult", + parameters={ + "dataSource": WorkflowActionParameter( + name="dataSource", + type="Any", + frontendType=FrontendType.CONTEXT_BUILDER, + required=True, + description=( + "Datenquelle (DataRef), meist Schleife → Alle Schleifen-Ergebnisse. " + "Pflichtfeld — die Implementierung wirft einen Fehler wenn kein Wert übergeben wird." + ), + ), + }, + execute=mergeContext.__get__(self, self.__class__), + ), + "filterContext": WorkflowActionDefinition( + actionId="context.filterContext", + description=( + "Allow- or block-list keys/paths from the upstream payload. " + "Supports glob patterns (user.*, *.id) and dotted paths (address.city). " + "Missing-key behaviour is configurable (skip, nullFill, error)." + ), + outputType="Transit", + parameters={ + "mode": WorkflowActionParameter( + name="mode", type="str", required=False, + frontendType=FrontendType.SELECT, + frontendOptions=["allow", "block"], + default="allow", + description="allow = only these keys pass; block = these keys are removed", + ), + "keys": WorkflowActionParameter( + name="keys", type="List[str]", required=True, + frontendType=FrontendType.JSON, + default=[], + description="Key paths or glob patterns", + ), + "missingKeyBehavior": WorkflowActionParameter( + name="missingKeyBehavior", type="str", required=False, + frontendType=FrontendType.SELECT, + frontendOptions=["skip", "nullFill", "error"], + default="skip", + description="What to do when an allowed key is missing in the input", + ), + "preserveMeta": WorkflowActionParameter( + name="preserveMeta", type="bool", required=False, + frontendType=FrontendType.CHECKBOX, + default=True, + description="Always pass through internal meta fields (_success, _error, _transit)", + ), + }, + execute=filterContext.__get__(self, self.__class__), + ), + "transformContext": WorkflowActionDefinition( + actionId="context.transformContext", + description=( + "Transform mappings on the upstream payload. ``data`` trägt " + "``schemaVersion``, ``kind: context.transformContext.v1``, die gemappten Felder " + "und optional ``_castErrors``, plus ``_meta``." + ), + outputType="Transit", + parameters={ + "mappings": WorkflowActionParameter( + name="mappings", type="List[Any]", required=True, + frontendType=FrontendType.MAPPING_TABLE, + default=[], + description="List of mapping entries", + ), + "passthroughUnmapped": WorkflowActionParameter( + name="passthroughUnmapped", type="bool", required=False, + frontendType=FrontendType.CHECKBOX, + default=False, + description="Forward fields of the upstream payload that no mapping consumed", + ), + "flattenDepth": WorkflowActionParameter( + name="flattenDepth", type="int", required=False, + frontendType=FrontendType.NUMBER, + default=1, + description="Depth for flatten operation (1 = one level, -1 = full)", + ), + }, + execute=transformContext.__get__(self, self.__class__), + ), } # Validate actions after definition @@ -127,4 +361,8 @@ class MethodContext(MethodBase): self.extractContent = extractContent.__get__(self, self.__class__) self.neutralizeData = neutralizeData.__get__(self, self.__class__) self.triggerPreprocessingServer = triggerPreprocessingServer.__get__(self, self.__class__) + self.setContext = setContext.__get__(self, self.__class__) + self.mergeContext = mergeContext.__get__(self, self.__class__) + self.filterContext = filterContext.__get__(self, self.__class__) + self.transformContext = transformContext.__get__(self, self.__class__) diff --git a/modules/workflows/methods/methodFile/actions/create.py b/modules/workflows/methods/methodFile/actions/create.py index 2fef9e9e..9342767f 100644 --- a/modules/workflows/methods/methodFile/actions/create.py +++ b/modules/workflows/methods/methodFile/actions/create.py @@ -1,20 +1,60 @@ # Copyright (c) 2025 Patrick Motsch # All rights reserved. +from typing import Any, Dict, List, Optional + +import ast import base64 +import binascii +import io +import json import logging -from typing import Dict, Any +import re from modules.datamodels.datamodelChat import ActionResult, ActionDocument -from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson from modules.shared.i18nRegistry import normalizePrimaryLanguageTag +from modules.workflows.automation2.executors.actionNodeExecutor import _coerce_document_data_to_bytes +from modules.workflows.methods.methodAi._common import is_image_action_document_list +from modules.workflows.methods.methodContext.actions.extractContent import ( + presentation_envelopes_to_document_json, +) logger = logging.getLogger(__name__) +_SAFE_FILENAME = re.compile(r'[^\w\-.\(\)\s\[\]%@+]') + + +def _coerce_structured_context(raw: Any) -> Any: + """Undo legacy ``str`` coercion on structured refs (loop ``bodyResults``, presentation).""" + if not isinstance(raw, str): + return raw + stripped = raw.strip() + if not stripped or stripped[0] not in ("[", "{"): + return raw + for loader in (json.loads, ast.literal_eval): + try: + parsed = loader(stripped) + except (json.JSONDecodeError, ValueError, SyntaxError, TypeError): + continue + if isinstance(parsed, (dict, list)): + return parsed + return raw + + +def _raw_context_preview_for_log(raw: Any, max_len: int = 500) -> str: + if raw is None: + return "None" + s = raw if isinstance(raw, str) else repr(raw) + s = s.replace("\r", "\\r").replace("\n", "\\n") + if len(s) <= max_len: + return s + return s[:max_len] + f"...<{len(s)} chars>" + def _persistDocumentsToUserFiles( action_documents: list, services, + folder_id: Optional[str] = None, ) -> None: """Persist file.create output documents to user's file storage (like upload). Adds fileId to each document's validationMetadata for download links in UI.""" @@ -35,19 +75,16 @@ def _persistDocumentsToUserFiles( return if not mgmt: return - logger.info( - "file.create persist: mgmt=%s id(mgmt)=%s has_createFileData=%s", - type(mgmt).__name__, - id(mgmt), - hasattr(mgmt, "createFileData"), - ) for doc in action_documents: try: doc_data = doc.documentData if hasattr(doc, "documentData") else doc.get("documentData") if not doc_data: continue if isinstance(doc_data, str): - content = base64.b64decode(doc_data) + try: + content = base64.b64decode(doc_data, validate=True) + except (TypeError, ValueError, binascii.Error): + content = doc_data.encode("utf-8") else: content = doc_data doc_name = ( @@ -60,15 +97,8 @@ def _persistDocumentsToUserFiles( or doc.get("mimeType") or "application/octet-stream" ) - logger.info( - "file.create persist: calling createFile name=%s bytes=%s", - doc_name, - len(content), - ) - file_item = mgmt.createFile(doc_name, mime, content) - logger.info("file.create persist: createFile returned id=%s", file_item.id) - ok = mgmt.createFileData(file_item.id, content) - logger.info("file.create persist: createFileData returned %s for id=%s", ok, file_item.id) + file_item = mgmt.createFile(doc_name, mime, content, folderId=folder_id) + mgmt.createFileData(file_item.id, content) meta = getattr(doc, "validationMetadata", None) or doc.get("validationMetadata") or {} if isinstance(meta, dict): meta["fileId"] = file_item.id @@ -76,27 +106,154 @@ def _persistDocumentsToUserFiles( doc.validationMetadata = meta elif isinstance(doc, dict): doc["validationMetadata"] = meta - logger.info("file.create: persisted %s to user files (id=%s)", doc_name, file_item.id) except Exception as e: dname = getattr(doc, "documentName", None) or doc.get("documentName", "?") logger.warning("file.create: failed to persist document %s: %s", dname, e) -async def create(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Create a file from context (text/markdown from upstream AI node). - Uses GenerationService.renderReport to produce docx, pdf, txt, md, html, xlsx, etc. - """ - from modules.workflows.methods.methodAi._common import serialize_context - raw_context = parameters.get("context", "") or parameters.get("text", "") or "" - context = serialize_context(raw_context) +def _sanitize_output_stem(title: str) -> str: + t = (title or "").strip() or "Document" + stem = _SAFE_FILENAME.sub("_", t).strip("._") + return stem[:120] if stem else "Document" - if not context: - logger.warning( - "file.create: context empty after resolve — check DataRefs (e.g. Antworttext / " - "documents[0].documentData from the AI step)." + +def _get_management_interface(services) -> Optional[Any]: + mgmt = getattr(services, "interfaceDbComponent", None) + if mgmt: + return mgmt + try: + import modules.interfaces.interfaceDbManagement as iface + + user = getattr(services, "user", None) + if not user: + return None + return iface.getInterface( + user, + mandateId=getattr(services, "mandateId", None) or "", + featureInstanceId=getattr(services, "featureInstanceId", None) or "", ) - return ActionResult.isFailure(error="context is required (connect an AI node or provide text)") + except Exception as e: + logger.warning("file.create: could not get management interface: %s", e) + return None + + +def _load_image_bytes_from_action_doc(doc: dict, services) -> Optional[bytes]: + raw = doc.get("documentData") + blob = _coerce_document_data_to_bytes(raw) + if blob: + return blob + fid = doc.get("fileId") + if not fid and isinstance(doc.get("validationMetadata"), dict): + fid = (doc.get("validationMetadata") or {}).get("fileId") + if fid and str(fid).strip(): + mgmt = _get_management_interface(services) + if mgmt and hasattr(mgmt, "getFileData"): + try: + return mgmt.getFileData(str(fid)) + except Exception as e: + logger.warning("file.create: getFileData(%s) failed: %s", fid, e) + return None + + +def _images_list_to_pdf(image_bytes_list: List[bytes]) -> bytes: + import fitz + + pdf = fitz.open() + try: + for blob in image_bytes_list: + page = pdf.new_page() + page.insert_image(page.rect, stream=blob, keep_proportion=True) + return pdf.tobytes() + finally: + pdf.close() + + +def _images_list_to_docx(image_bytes_list: List[bytes]) -> bytes: + from docx import Document + from docx.shared import Inches + + doc = Document() + for blob in image_bytes_list: + p = doc.add_paragraph() + run = p.add_run() + run.add_picture(io.BytesIO(blob), width=Inches(6.5)) + doc.add_paragraph() + out = io.BytesIO() + doc.save(out) + return out.getvalue() + + +async def _create_merged_image_documents( + self, + parameters: Dict[str, Any], + image_docs: List[dict], +) -> ActionResult: + """Build one PDF or DOCX containing all extracted images (``imageDocumentsOnly``).""" + output_format = (parameters.get("outputFormat") or "docx").strip().lower().lstrip(".") + title = (parameters.get("title") or "Document").strip() + stem = _sanitize_output_stem(title) + folder_id: Optional[str] = None + raw_folder = parameters.get("folderId") + if raw_folder is not None and str(raw_folder).strip(): + folder_id = str(raw_folder).strip() + + if output_format not in ("pdf", "docx"): + return ActionResult.isFailure( + error=( + f"Nur-Bilder-Kontext: „{output_format}“ wird nicht unterstützt. " + "Bitte Ausgabeformat „pdf“ oder „docx“ wählen." + ) + ) + + blobs: List[bytes] = [] + for d in image_docs: + b = _load_image_bytes_from_action_doc(d, self.services) + if not b: + name = d.get("documentName") or "?" + return ActionResult.isFailure( + error=f"Bilddaten fehlen oder sind nicht lesbar (Datei: {name})." + ) + blobs.append(b) + + if output_format == "pdf": + try: + combined = _images_list_to_pdf(blobs) + except Exception as e: + logger.warning("file.create: PDF merge failed: %s", e, exc_info=True) + return ActionResult.isFailure(error=f"PDF aus Bildern konnte nicht erzeugt werden: {e}") + out_name = f"{stem}.pdf" + mime = "application/pdf" + else: + combined = _images_list_to_docx(blobs) + out_name = f"{stem}.docx" + mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + + if not combined: + return ActionResult.isFailure(error="Zusammenfügen der Bilder ergab leere Ausgabe") + + doc_b64 = base64.b64encode(combined).decode("ascii") + action_documents = [ + ActionDocument( + documentName=out_name, + documentData=doc_b64, + mimeType=mime, + validationMetadata={ + "actionType": "file.create", + "outputFormat": output_format, + "source": "mergedImageDocumentsOnly", + }, + ) + ] + _persistDocumentsToUserFiles(action_documents, self.services, folder_id=folder_id) + return ActionResult.isSuccess(documents=action_documents) + + +async def create(self, parameters: Dict[str, Any]) -> ActionResult: + """Create a file from ``context.extractContent`` presentation data via ``renderReport``.""" + raw_context = _coerce_structured_context(parameters.get("context", "")) + + if isinstance(raw_context, list) and is_image_action_document_list(raw_context): + return await _create_merged_image_documents(self, parameters, raw_context) outputFormat = (parameters.get("outputFormat") or "docx").strip().lower().lstrip(".") title = (parameters.get("title") or "Document").strip() @@ -106,16 +263,36 @@ async def create(self, parameters: Dict[str, Any]) -> ActionResult: "de", ) + folder_id: Optional[str] = None + raw_folder = parameters.get("folderId") + if raw_folder is not None and str(raw_folder).strip(): + folder_id = str(raw_folder).strip() + try: - structured_content = markdownToDocumentJson(context, title, language) - if templateName: - structured_content.setdefault("metadata", {})["templateName"] = templateName + structured_content = presentation_envelopes_to_document_json( + raw_context, + title=title, + language=language, + services=self.services, + ) + except ValueError as e: + logger.warning( + "file.create: invalid presentation context type=%s preview=%r: %s", + type(raw_context).__name__, + _raw_context_preview_for_log(raw_context), + e, + ) + return ActionResult.isFailure(error=str(e)) - generation = getattr(self.services, "generation", None) - if not generation: - return ActionResult.isFailure(error="Generation service not available") + if templateName: + structured_content.setdefault("metadata", {})["templateName"] = templateName - ai_service = getattr(self.services, "ai", None) + generation = getattr(self.services, "generation", None) + if not generation: + return ActionResult.isFailure(error="Generation service not available") + + ai_service = getattr(self.services, "ai", None) + try: rendered_docs = await generation.renderReport( extractedContent=structured_content, outputFormat=outputFormat, @@ -125,43 +302,50 @@ async def create(self, parameters: Dict[str, Any]) -> ActionResult: aiService=ai_service, parentOperationId=parameters.get("parentOperationId"), ) - - if not rendered_docs: - return ActionResult.isFailure(error="Rendering produced no output") - - action_documents = [] - mime_map = { - "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "pdf": "application/pdf", - "txt": "text/plain", - "md": "text/markdown", - "html": "text/html", - "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "csv": "text/csv", - "json": "application/json", - } - for rd in rendered_docs: - doc_data = rd.documentData if hasattr(rd, "documentData") else getattr(rd, "document_data", None) - doc_name = getattr(rd, "filename", None) or getattr(rd, "documentName", None) or getattr(rd, "document_name", f"output.{outputFormat}") - mime = getattr(rd, "mimeType", None) or getattr(rd, "mime_type", None) or mime_map.get(outputFormat, "application/octet-stream") - - if isinstance(doc_data, bytes): - doc_data = base64.b64encode(doc_data).decode("ascii") - - action_documents.append(ActionDocument( - documentName=doc_name, - documentData=doc_data, - mimeType=mime, - validationMetadata={ - "actionType": "file.create", - "outputFormat": outputFormat, - "templateName": templateName, - }, - )) - - _persistDocumentsToUserFiles(action_documents, self.services) - return ActionResult.isSuccess(documents=action_documents) - except Exception as e: - logger.error(f"file.create failed: {e}", exc_info=True) + logger.error("file.create failed: %s", e, exc_info=True) return ActionResult.isFailure(error=str(e)) + + if not rendered_docs: + return ActionResult.isFailure(error="Rendering produced no output") + + action_documents = [] + mime_map = { + "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "pdf": "application/pdf", + "txt": "text/plain", + "md": "text/markdown", + "html": "text/html", + "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "csv": "text/csv", + "json": "application/json", + } + for rd in rendered_docs: + doc_data = rd.documentData if hasattr(rd, "documentData") else getattr(rd, "document_data", None) + doc_name = ( + getattr(rd, "filename", None) + or getattr(rd, "documentName", None) + or getattr(rd, "document_name", f"output.{outputFormat}") + ) + mime = ( + getattr(rd, "mimeType", None) + or getattr(rd, "mime_type", None) + or mime_map.get(outputFormat, "application/octet-stream") + ) + + if isinstance(doc_data, bytes): + doc_data = base64.b64encode(doc_data).decode("ascii") + + action_documents.append(ActionDocument( + documentName=doc_name, + documentData=doc_data, + mimeType=mime, + validationMetadata={ + "actionType": "file.create", + "outputFormat": outputFormat, + "templateName": templateName, + }, + )) + + _persistDocumentsToUserFiles(action_documents, self.services, folder_id=folder_id) + return ActionResult.isSuccess(documents=action_documents) diff --git a/modules/workflows/methods/methodFile/methodFile.py b/modules/workflows/methods/methodFile/methodFile.py index 8724ab11..c30f86a4 100644 --- a/modules/workflows/methods/methodFile/methodFile.py +++ b/modules/workflows/methods/methodFile/methodFile.py @@ -35,10 +35,13 @@ class MethodFile(MethodBase): ), "context": WorkflowActionParameter( name="context", - type="str", + type="Any", frontendType=FrontendType.HIDDEN, required=False, - description="Injected from contentSource or upstream connection", + description=( + "Resolved context: presentation envelope(s) from context.extractContent " + "(dict or list, e.g. loop bodyResults), or legacy plain text string." + ), ), "outputFormat": WorkflowActionParameter( name="outputFormat", @@ -73,6 +76,13 @@ class MethodFile(MethodBase): default="de", description="Language code", ), + "folderId": WorkflowActionParameter( + name="folderId", + type="str", + frontendType=FrontendType.USER_FILE_FOLDER, + required=False, + description="Optional My Files folder to store created documents", + ), }, execute=create.__get__(self, self.__class__), ), diff --git a/modules/workflows/processing/core/actionExecutor.py b/modules/workflows/processing/core/actionExecutor.py index 2cb216f9..3d4ed7fc 100644 --- a/modules/workflows/processing/core/actionExecutor.py +++ b/modules/workflows/processing/core/actionExecutor.py @@ -251,6 +251,7 @@ class ActionExecutor: return ActionResult( success=result.success, documents=result.documents, # Return original ActionDocument objects + data=result.data, resultLabel=action.execResultLabel, # Always use action's execResultLabel error=result.error or "" ) @@ -265,18 +266,21 @@ class ActionExecutor: ) def _extractResultText(self, result: ActionResult) -> str: - """Extract result text from ActionResult documents""" - if not result.success or not result.documents: + """Extract result text from ActionResult documents or structured data (e.g. ai.process handover).""" + if not result.success: return "" - - # Extract text directly from ActionDocument objects - resultParts = [] - for doc in result.documents: - if hasattr(doc, 'documentData') and doc.documentData: - resultParts.append(str(doc.documentData)) - - # Join all document results with separators - return "\n\n---\n\n".join(resultParts) if resultParts else "" + if result.documents: + resultParts = [] + for doc in result.documents: + if hasattr(doc, "documentData") and doc.documentData: + resultParts.append(str(doc.documentData)) + return "\n\n---\n\n".join(resultParts) if resultParts else "" + data = getattr(result, "data", None) + if isinstance(data, dict): + handover = data.get("response") + if handover is not None: + return str(handover) + return "" async def _createActionCompletionMessage(self, action: ActionItem, result: ActionResult, workflow: ChatWorkflow, taskStep: TaskStep, taskIndex: int, actionIndex: int): diff --git a/modules/workflows/processing/core/messageCreator.py b/modules/workflows/processing/core/messageCreator.py index 48df832d..e0c49a52 100644 --- a/modules/workflows/processing/core/messageCreator.py +++ b/modules/workflows/processing/core/messageCreator.py @@ -161,6 +161,17 @@ class MessageCreator: messageText = f"**Action {currentAction} ({action.execMethod}.{action.execAction})**\n\n" messageText += f"❌ {userFriendlyText}\n\n" messageText += f"{errorDetails}\n\n" + + # Text handover without attachment (e.g. ai.process): show content in the message body + if ( + result.success + and not createdDocuments + and getattr(result, "data", None) + and isinstance(result.data, dict) + ): + handover_txt = result.data.get("response") + if handover_txt is not None and str(handover_txt).strip(): + messageText += "\n\n" + str(handover_txt).strip() # Build concise summary to persist for history context doc_count = len(createdDocuments) if createdDocuments else 0 diff --git a/tests/integration/workflows/test_execute_graph_loop_aggregate_consolidate.py b/tests/integration/workflows/test_execute_graph_loop_aggregate_consolidate.py index 428fcd25..751de6d4 100644 --- a/tests/integration/workflows/test_execute_graph_loop_aggregate_consolidate.py +++ b/tests/integration/workflows/test_execute_graph_loop_aggregate_consolidate.py @@ -28,15 +28,14 @@ async def test_execute_graph_loop_and_aggregate_collects_items(): "type": "flow.loop", "parameters": { "items": {"type": "ref", "nodeId": "t1", "path": ["payload", "items"]}, - "level": "auto", "concurrency": 1, }, }, {"id": "agg1", "type": "data.aggregate", "parameters": {"mode": "collect"}}, ], "connections": [ - {"source": "t1", "target": "loop1"}, - {"source": "loop1", "target": "agg1"}, + {"source": "t1", "target": "loop1", "targetInput": 0}, + {"source": "loop1", "target": "agg1", "sourceOutput": 0, "targetInput": 0}, ], } run_envelope = default_run_envelope( @@ -72,15 +71,14 @@ async def test_data_consolidate_after_aggregate_same_context_as_post_loop(): "type": "flow.loop", "parameters": { "items": {"type": "ref", "nodeId": "t1", "path": ["payload", "items"]}, - "level": "auto", "concurrency": 1, }, }, {"id": "agg1", "type": "data.aggregate", "parameters": {"mode": "collect"}}, ], "connections": [ - {"source": "t1", "target": "loop1"}, - {"source": "loop1", "target": "agg1"}, + {"source": "t1", "target": "loop1", "targetInput": 0}, + {"source": "loop1", "target": "agg1", "sourceOutput": 0, "targetInput": 0}, ], } run_envelope = default_run_envelope( @@ -121,3 +119,43 @@ async def test_data_consolidate_after_aggregate_same_context_as_post_loop(): assert len(result["rows"]) == 2 assert result["rows"][0].get("currentItem", {}).get("a") == 1 assert result["rows"][1].get("currentItem", {}).get("b") == "y" + + +@pytest.mark.asyncio +async def test_loop_done_branch_runs_once_after_body(): + """Loop output 1 (Fertig) runs downstream once; body stays on output 0.""" + graph = { + "nodes": [ + {"id": "t1", "type": "trigger.manual", "parameters": {}}, + { + "id": "loop1", + "type": "flow.loop", + "parameters": { + "items": {"type": "ref", "nodeId": "t1", "path": ["payload", "items"]}, + "concurrency": 1, + }, + }, + {"id": "agg1", "type": "data.aggregate", "parameters": {"mode": "collect"}}, + {"id": "c1", "type": "data.consolidate", "parameters": {"mode": "table"}}, + ], + "connections": [ + {"source": "t1", "target": "loop1", "targetInput": 0}, + {"source": "loop1", "target": "agg1", "sourceOutput": 0, "targetInput": 0}, + {"source": "loop1", "target": "c1", "sourceOutput": 1, "targetInput": 0}, + ], + } + run_envelope = default_run_envelope( + "manual", + payload={"items": [{"a": 1}, {"a": 2}]}, + ) + res = await executeGraph( + graph, + services=_minimal_services(), + run_envelope=run_envelope, + userId="test-user", + ) + assert res.get("success") is True, res + out = res["nodeOutputs"] + assert out["agg1"]["count"] == 2 + assert out["c1"]["count"] == 2 + assert out["c1"]["mode"] == "table" diff --git a/tests/unit/features/trustee/test_accountingConnectorAbacus_balances.py b/tests/unit/features/trustee/test_accountingConnectorAbacus_balances.py index ad84e171..ae1a39ad 100644 --- a/tests/unit/features/trustee/test_accountingConnectorAbacus_balances.py +++ b/tests/unit/features/trustee/test_accountingConnectorAbacus_balances.py @@ -27,18 +27,16 @@ class TestAbacusGetAccountBalances: rawEntries = [ { - "Id": "e1", "JournalDate": "2025-01-15T00:00:00", - "Lines": [ - {"AccountId": "1020", "DebitAmount": 1000.0, "CreditAmount": 0.0}, - {"AccountId": "6000", "DebitAmount": 0.0, "CreditAmount": 1000.0}, - ], + "Id": "e1", "Date": "2025-01-15T00:00:00", + "Amount": {"KeyAmount": 1000.0}, + "Debit": {"AccountId": "1020"}, + "Credit": {"AccountId": "6000"}, }, { - "Id": "e2", "JournalDate": "2025-12-20T00:00:00", - "Lines": [ - {"AccountId": "1020", "DebitAmount": 500.0, "CreditAmount": 0.0}, - {"AccountId": "6000", "DebitAmount": 0.0, "CreditAmount": 500.0}, - ], + "Id": "e2", "Date": "2025-12-20T00:00:00", + "Amount": {"KeyAmount": 500.0}, + "Debit": {"AccountId": "1020"}, + "Credit": {"AccountId": "6000"}, }, ] @@ -68,11 +66,10 @@ class TestAbacusGetAccountBalances: rawEntries = [ { - "Id": "e1", "JournalDate": "2024-06-30T00:00:00", - "Lines": [ - {"AccountId": "1020", "DebitAmount": 7000.0, "CreditAmount": 0.0}, - {"AccountId": "9999", "DebitAmount": 0.0, "CreditAmount": 7000.0}, - ], + "Id": "e1", "Date": "2024-06-30T00:00:00", + "Amount": {"KeyAmount": 7000.0}, + "Debit": {"AccountId": "1020"}, + "Credit": {"AccountId": "9999"}, }, ] diff --git a/tests/unit/graphicalEditor/test_condition_operator_catalog.py b/tests/unit/graphicalEditor/test_condition_operator_catalog.py new file mode 100644 index 00000000..a1954448 --- /dev/null +++ b/tests/unit/graphicalEditor/test_condition_operator_catalog.py @@ -0,0 +1,49 @@ +# Copyright (c) 2025 Patrick Motsch +"""Tests for backend-driven condition operator catalog.""" + +from modules.features.graphicalEditor.conditionOperators import ( + CONDITION_OPERATOR_CATALOG, + VALUE_KINDS, + apply_condition_operator, + catalog_type_to_value_kind, + localize_operator_catalog, +) + + +def test_all_value_kinds_have_operators(): + for kind in VALUE_KINDS: + assert kind in CONDITION_OPERATOR_CATALOG + assert len(CONDITION_OPERATOR_CATALOG[kind]) > 0 + + +def test_operator_ids_unique_per_kind(): + for kind, ops in CONDITION_OPERATOR_CATALOG.items(): + ids = [o["id"] for o in ops] + assert len(ids) == len(set(ids)), f"duplicate operator id in {kind}" + + +def test_localize_operator_catalog_has_labels(): + loc = localize_operator_catalog("de") + assert "string" in loc + assert all("label" in o and o["label"] for o in loc["string"]) + + +def test_catalog_type_mapping(): + assert catalog_type_to_value_kind("str") == "string" + assert catalog_type_to_value_kind("int") == "number" + assert catalog_type_to_value_kind("bool") == "boolean" + assert catalog_type_to_value_kind("List[Any]") == "array" + assert catalog_type_to_value_kind("Dict") == "object" + + +def test_string_operators_apply(): + assert apply_condition_operator("hello", "starts_with", "he", "string") + assert apply_condition_operator("hello", "ends_with", "lo", "string") + assert apply_condition_operator("hello", "regex", "ell", "string") + assert not apply_condition_operator("hello", "contains", "xyz", "string") + + +def test_array_length_operators(): + assert apply_condition_operator([1, 2, 3], "length_eq", 3, "array") + assert apply_condition_operator([1, 2, 3], "length_gt", 2, "array") + assert apply_condition_operator([], "empty", None, "array") diff --git a/tests/unit/graphicalEditor/test_resolve_value_kind.py b/tests/unit/graphicalEditor/test_resolve_value_kind.py new file mode 100644 index 00000000..35b53e07 --- /dev/null +++ b/tests/unit/graphicalEditor/test_resolve_value_kind.py @@ -0,0 +1,60 @@ +# Copyright (c) 2025 Patrick Motsch +"""Tests for condition valueKind resolution.""" + +from modules.features.graphicalEditor.conditionOperators import resolve_value_kind + + +def _graph(nodes, connections=None, target=None): + return { + "nodes": nodes, + "connections": connections or [], + "targetNodeId": target or nodes[-1]["id"], + } + + +def test_form_payload_field_is_string(): + graph = _graph( + [ + {"id": "f1", "type": "input.form", "parameters": {"formFields": [{"name": "email", "type": "email"}]}}, + {"id": "if1", "type": "flow.ifElse", "parameters": {}}, + ], + target="if1", + ) + ref = {"nodeId": "f1", "path": ["payload", "email"]} + assert resolve_value_kind(graph, ref) == "string" + + +def test_extract_content_data_is_context(): + graph = _graph( + [ + {"id": "ext1", "type": "context.extractContent", "parameters": {}}, + {"id": "if1", "type": "flow.ifElse", "parameters": {}}, + ], + target="if1", + ) + ref = {"nodeId": "ext1", "path": ["data"]} + assert resolve_value_kind(graph, ref) == "context" + + +def test_upload_file_is_file(): + graph = _graph( + [ + {"id": "up1", "type": "input.upload", "parameters": {}}, + {"id": "if1", "type": "flow.ifElse", "parameters": {}}, + ], + target="if1", + ) + ref = {"nodeId": "up1", "path": ["file"]} + assert resolve_value_kind(graph, ref) == "file" + + +def test_upload_mime_is_string(): + graph = _graph( + [ + {"id": "up1", "type": "input.upload", "parameters": {}}, + {"id": "if1", "type": "flow.ifElse", "parameters": {}}, + ], + target="if1", + ) + ref = {"nodeId": "up1", "path": ["file", "mimeType"]} + assert resolve_value_kind(graph, ref) == "string" diff --git a/tests/unit/graphicalEditor/test_upstream_paths_and_graph_schema.py b/tests/unit/graphicalEditor/test_upstream_paths_and_graph_schema.py index 16aec90d..13072b3f 100644 --- a/tests/unit/graphicalEditor/test_upstream_paths_and_graph_schema.py +++ b/tests/unit/graphicalEditor/test_upstream_paths_and_graph_schema.py @@ -50,6 +50,25 @@ def test_parse_graph_defined_schema_nested_group(): assert "addr.zip" in names +def test_compute_upstream_paths_switch_context_branch_items(): + graph = { + "nodes": [ + {"id": "ext1", "type": "context.extractContent", "parameters": {}}, + {"id": "sw1", "type": "flow.switch", "parameters": {"cases": [{"operator": "contains_content", "value": "image"}]}}, + {"id": "ai1", "type": "ai.prompt", "parameters": {"aiPrompt": "summarize"}}, + ], + "connections": [ + {"source": "ext1", "target": "sw1", "sourceOutput": 0, "targetInput": 0}, + {"source": "sw1", "target": "ai1", "sourceOutput": 0, "targetInput": 0}, + ], + } + paths = compute_upstream_paths(graph, "ai1") + sw_paths = [p for p in paths if p.get("producerNodeId") == "sw1"] + items_paths = [p for p in sw_paths if p.get("path") == ["items"]] + assert items_paths, sw_paths + assert items_paths[0].get("type") == "List[Any]" + + def test_validate_graph_port_mismatch_errors(): node_type_ids = {n["id"] for n in STATIC_NODE_TYPES} graph = { diff --git a/tests/unit/services/test_costEstimate.py b/tests/unit/services/test_costEstimate.py index e49aca6a..00fbb6b6 100644 --- a/tests/unit/services/test_costEstimate.py +++ b/tests/unit/services/test_costEstimate.py @@ -17,7 +17,7 @@ class TestCostEstimate(unittest.TestCase): {"maxBytes": 200 * 1024 * 1024}, kind="files", ) self.assertIn("estimatedTokens", result) - self.assertIn("estimatedUsd", result) + self.assertIn("estimatedChf", result) self.assertIn("basis", result) self.assertIn("assumptions", result["basis"]) self.assertIn("formula", result["basis"]["assumptions"]) @@ -39,12 +39,12 @@ class TestCostEstimate(unittest.TestCase): def test_unknown_kind_returns_zero(self): result = _costEstimate.estimateBootstrapCost({}, kind="totally-unknown") self.assertEqual(result["estimatedTokens"], 0) - self.assertEqual(result["estimatedUsd"], 0.0) + self.assertEqual(result["estimatedChf"], 0.0) - def test_usd_is_rounded_4_decimals(self): + def test_chf_is_rounded_4_decimals(self): result = _costEstimate.estimateBootstrapCost({"maxBytes": 1024 * 1024}, kind="files") - rounded = round(result["estimatedUsd"], 4) - self.assertEqual(result["estimatedUsd"], rounded) + rounded = round(result["estimatedChf"], 4) + self.assertEqual(result["estimatedChf"], rounded) def test_basis_includes_input_limits(self): result = _costEstimate.estimateBootstrapCost({"maxBytes": 42}, kind="files") diff --git a/tests/unit/workflow/test_extract_content_handover.py b/tests/unit/workflow/test_extract_content_handover.py new file mode 100644 index 00000000..c0009251 --- /dev/null +++ b/tests/unit/workflow/test_extract_content_handover.py @@ -0,0 +1,681 @@ +# Unit tests: context.extractContent serialize + presentation helpers (legacy handover dicts vs new paths). + +import base64 +import copy as _copy + +from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart + +from modules.workflows.methods.methodContext.actions.extractContent import ( + HANDOVER_KIND, + EXTRACT_PAYLOAD_SCHEMA_VERSION, + _apply_content_filter, + _canonical_content_filter, + _joined_text_from_content_extracted_serial, + _filter_extractions_by_content_filter, + _joined_text_from_handover_payload, + _persist_extracted_image_parts, + _serialize_content_extracted_for_output, + build_presentation_for_extractions, + build_presentation_for_payload, + joined_text_from_extract_node_data, + parse_presentation_parameters, + presentation_response_text, + summarize_presentation_payload, +) + + +def test_joined_text_orders_text_table_and_skips_container(): + payload = { + "kind": HANDOVER_KIND, + "fileOrder": ["f1"], + "files": { + "f1": { + "parts": [ + {"typeGroup": "text", "data": " A\n", "id": "x"}, + {"typeGroup": "container", "data": "", "id": "c"}, + {"typeGroup": "text", "data": "B", "id": "y"}, + ] + } + }, + } + assert _joined_text_from_handover_payload(payload) == "A\n\nB" + + +def test_joined_text_from_extract_node_data_prefers_content_extracted(): + data = { + "contentExtracted": [ + {"id": "x", "parts": [{"typeGroup": "text", "mimeType": "text/plain", "data": "Z", "id": "p"}]} + ] + } + assert joined_text_from_extract_node_data(data) == "Z" + + +def test_joined_text_serial_list(): + items = [{"parts": [{"typeGroup": "text", "mimeType": "text/plain", "data": "a", "id": "1"}]}] + assert _joined_text_from_content_extracted_serial(items) == "a" + + +def test_serialize_content_extracted_drops_summary(): + ce = ContentExtracted( + id="doc1", + parts=[ContentPart(id="p", label="main", typeGroup="text", mimeType="text/plain", data="hi")], + summary={"ignored": True}, + ) + d = _serialize_content_extracted_for_output(ce) + assert "summary" not in d + + +def test_persist_images_without_run_context_is_noop(): + raw = b"fake-binary-image" + b64 = base64.b64encode(raw).decode("ascii") + serial = [ + { + "id": "1", + "parts": [ + {"typeGroup": "text", "data": "x", "mimeType": "text/plain", "id": "t1"}, + {"typeGroup": "image", "mimeType": "image/png", "data": b64, "id": "img1"}, + ], + } + ] + original = _copy.deepcopy(serial) + out, arts = _persist_extracted_image_parts(serial, name_stem="stem", run_context=None) + assert arts == [] + assert out == original + + +def test_filter_extractions_by_content_filter_text_only(): + ec = ContentExtracted( + id="id1", + parts=[ + ContentPart(id="t", label="t", typeGroup="text", mimeType="text/plain", data="a"), + ContentPart(id="i", label="i", typeGroup="image", mimeType="image/png", data=""), + ], + ) + out = _filter_extractions_by_content_filter([ec], "textOnly") + assert len(out) == 1 + assert len(out[0].parts) == 1 + assert out[0].parts[0].typeGroup == "text" + + +def test_canonical_content_filter_is_case_insensitive(): + assert _canonical_content_filter("imagesOnly") == "imagesOnly" + assert _canonical_content_filter("IMAGESONLY") == "imagesOnly" + assert _canonical_content_filter("textOnly") == "textOnly" + assert _canonical_content_filter("unknown") == "all" + + +def test_parse_presentation_parameters_content_filter_all_coerces_legacy_pdf_text(): + """Graphs with „Alles“ but stored pdfExtractMode ``text`` must not drop image parts in presentation.""" + cfg = parse_presentation_parameters({"contentFilter": "all", "pdfExtractMode": "text"}) + assert cfg["pdfExtractMode"] == "all" + + +def test_parse_presentation_parameters_images_only_defaults_pdf_mode(): + cfg = parse_presentation_parameters({"contentFilter": "imagesOnly"}) + assert cfg["pdfExtractMode"] == "images" + + +def test_presentation_lines_includes_redacted_image_parts_when_pdf_mode_all(): + payload = { + "fileOrder": ["f1"], + "files": { + "f1": { + "sourceFileName": "x.pdf", + "parts": [ + {"typeGroup": "text", "data": "body", "id": "t"}, + {"typeGroup": "image", "mimeType": "image/png", "data": "YQ==", "id": "img1"}, + ], + }, + }, + } + cfg = parse_presentation_parameters({"contentFilter": "all", "outputMode": "lines", "pdfExtractMode": "all"}) + pres = build_presentation_for_payload(payload, cfg) + bf = pres["files"]["f1"] + assert len(bf["data"]) == 2 + assert bf["data"][0]["typeGroup"] == "text" + assert bf["data"][0]["lines"] == ["body"] + assert bf["data"][1]["typeGroup"] == "image" + assert bf["data"][1]["lines"] == [] + assert bf["data"][1].get("data") == "" + assert "imageParts" not in bf + + +def test_build_presentation_for_extractions_matches_payload_path(): + ce = ContentExtracted( + id="id", + parts=[ContentPart(id="p", label="main", typeGroup="text", mimeType="text/plain", data="a\n\nb")], + ) + cfg = parse_presentation_parameters({"outputMode": "lines", "splitBy": "paragraph"}) + pres = build_presentation_for_extractions([ce], ["f.txt"], cfg) + fk = pres["fileOrder"][0] + b1 = pres["files"][fk] + assert b1["outputMode"] == "lines" + assert len(b1["data"]) == 1 + assert b1["data"][0]["lines"] == ["a", "b"] + assert "items" not in b1 + + +def test_joined_text_includes_csv_table_parts(): + payload = { + "fileOrder": ["f1"], + "files": { + "f1": { + "parts": [ + {"typeGroup": "table", "mimeType": "text/csv", "data": "a,b\n1,2", "id": "t"}, + ] + } + }, + } + assert _joined_text_from_handover_payload(payload) == "a,b\n1,2" + + +def _mixed_payload(): + return { + "kind": HANDOVER_KIND, + "schemaVersion": 1, + "fileOrder": ["f1"], + "files": { + "f1": { + "parts": [ + {"typeGroup": "text", "data": "hello", "id": "t1"}, + {"typeGroup": "table", "mimeType": "text/csv", "data": "a,b", "id": "tb1"}, + {"typeGroup": "image", "mimeType": "image/png", "data": "abc=", "id": "i1"}, + {"typeGroup": "structure", "mimeType": "text/html", "data": "

", "id": "s1"}, + ], + } + }, + } + + +def test_content_filter_all_is_noop(): + payload = _mixed_payload() + result = _apply_content_filter(payload, "all") + assert result is payload + + +def test_content_filter_text_only_keeps_text_table_structure(): + result = _apply_content_filter(_mixed_payload(), "textOnly") + parts = result["files"]["f1"]["parts"] + type_groups = {p["typeGroup"] for p in parts} + assert type_groups == {"text", "table", "structure"} + assert "image" not in type_groups + + +def test_content_filter_images_only(): + result = _apply_content_filter(_mixed_payload(), "imagesOnly") + parts = result["files"]["f1"]["parts"] + assert all(p["typeGroup"] == "image" for p in parts) + assert len(parts) == 1 + + +def test_content_filter_no_images_removes_only_images(): + result = _apply_content_filter(_mixed_payload(), "noImages") + parts = result["files"]["f1"]["parts"] + type_groups = {p["typeGroup"] for p in parts} + assert "image" not in type_groups + assert {"text", "table", "structure"} == type_groups + + +def test_content_filter_text_only_joined_text_has_no_image_data(): + result = _apply_content_filter(_mixed_payload(), "textOnly") + text = _joined_text_from_handover_payload(result) + assert "hello" in text + assert "abc=" not in text + + +def test_presentation_lines_and_response(): + payload = { + "kind": HANDOVER_KIND, + "fileOrder": ["f1"], + "files": { + "f1": { + "sourceFileName": "x.txt", + "parts": [ + {"typeGroup": "text", "data": "a\n\nb", "id": "1"}, + ], + }, + }, + } + cfg = parse_presentation_parameters({"outputMode": "lines", "splitBy": "paragraph"}) + pres = build_presentation_for_payload(payload, cfg) + b1 = pres["files"]["f1"] + assert b1["outputMode"] == "lines" + assert isinstance(b1["data"], list) + assert len(b1["data"]) == 1 + assert b1["data"][0]["lines"] == ["a", "b"] + assert presentation_response_text(pres) == "a\n\nb" + + +def test_presentation_pdf_mode_tables_only(): + payload = { + "fileOrder": ["f1"], + "files": { + "f1": { + "sourceFileName": "d.pdf", + "parts": [ + {"typeGroup": "text", "data": "t", "id": "a"}, + {"typeGroup": "table", "mimeType": "text/csv", "data": "h1,h2\n1,2", "id": "b"}, + ], + }, + }, + } + cfg = parse_presentation_parameters({"pdfExtractMode": "tables", "outputMode": "blob"}) + pres = build_presentation_for_payload(payload, cfg) + bf = pres["files"]["f1"] + assert isinstance(bf["data"], str) + assert bf["data"] == "h1,h2\n1,2" + + +def test_presentation_csv_rows(): + payload = { + "fileOrder": ["f1"], + "files": { + "f1": { + "sourceFileName": "f.csv", + "parts": [{"typeGroup": "table", "mimeType": "text/csv", "data": "a,b\n1,2", "id": "t"}], + }, + }, + } + cfg = parse_presentation_parameters({"outputMode": "structured", "csvHeaderRow": "true"}) + pres = build_presentation_for_payload(payload, cfg) + csv = pres["files"]["f1"]["csv"] + assert csv["headers"] == ["a", "b"] + assert csv["rows"] == [{"a": "1", "b": "2"}] + + +def test_presentation_pages_groups_by_page_index(): + payload = { + "fileOrder": ["f1"], + "files": { + "f1": { + "sourceFileName": "p.pdf", + "parts": [ + {"typeGroup": "text", "data": "p0", "metadata": {"pageIndex": 0}, "id": "a"}, + {"typeGroup": "text", "data": "p1a\np1b", "metadata": {"pageIndex": 1}, "id": "b"}, + ], + }, + }, + } + cfg = parse_presentation_parameters({"outputMode": "pages", "splitBy": "newline"}) + pres = build_presentation_for_payload(payload, cfg) + pages = pres["files"]["f1"]["pages"] + assert [(p["pageIndex"], [it["text"] for it in p["items"]]) for p in pages] == [ + (0, ["p0"]), + (1, ["p1a", "p1b"]), + ] + pdata = pres["files"]["f1"]["data"] + assert pdata == [ + {"pageIndex": 0, "lines": ["p0"]}, + {"pageIndex": 1, "lines": ["p1a", "p1b"]}, + ] + + +def test_presentation_chunks_with_overlap_chars(): + payload = { + "fileOrder": ["f1"], + "files": {"f1": {"sourceFileName": "t.txt", "parts": [{"typeGroup": "text", "data": "abcdefghij", "id": "a"}]}}, + } + cfg = parse_presentation_parameters( + {"outputMode": "chunks", "chunkSizeUnit": "characters", "chunkSize": "4", "chunkOverlap": "2"} + ) + pres = build_presentation_for_payload(payload, cfg) + texts = [c["text"] for c in pres["files"]["f1"]["chunks"]] + assert texts == ["abcd", "cdef", "efgh", "ghij"] + assert pres["files"]["f1"]["data"] == texts + + +def test_presentation_keeps_pres_key_after_inline_image_strip_simulation(): + raw = b"x" + b64 = base64.b64encode(raw).decode("ascii") + payload = { + "kind": HANDOVER_KIND, + "schemaVersion": 1, + "fileOrder": ["f1"], + "files": { + "f1": { + "parts": [ + {"typeGroup": "text", "data": "txt", "id": "t"}, + {"typeGroup": "image", "mimeType": "image/png", "data": b64, "id": "img"}, + ] + } + }, + } + pres = build_presentation_for_payload(payload, parse_presentation_parameters({})) + serial = _copy.deepcopy([{"id": "1", "parts": payload["files"]["f1"]["parts"]}]) + stayed, arts = _persist_extracted_image_parts(serial, name_stem="s", run_context=None) + assert arts == [] + wrapper = {**pres, "_meta": {}} + fk = pres["fileOrder"][0] + assert isinstance(wrapper["files"][fk].get("data"), list) + assert len(wrapper["files"][fk]["data"]) == 2 + + +def test_summarize_presentation_payload_shape(): + payload = { + "fileOrder": ["f1"], + "files": {"f1": {"sourceFileName": "t.txt", "parts": [{"typeGroup": "text", "data": "hello", "id": "a"}]}}, + } + pres = build_presentation_for_payload(payload, parse_presentation_parameters({"outputMode": "blob"})) + s = summarize_presentation_payload(pres) + assert s["fileOrder"] == ["f1"] + assert "f1" in s["files"] + assert s["files"]["f1"]["outputMode"] == "blob" + assert s["files"]["f1"]["stringLength"] == 5 + assert "hello" in (s["files"]["f1"].get("head") or "") + + +def test_joined_text_from_extract_node_data_uses_presentation_root(): + from modules.workflows.methods.methodContext.actions.extractContent import PRESENTATION_KIND + + data = { + "schemaVersion": 1, + "kind": PRESENTATION_KIND, + "outputMode": "lines", + "fileOrder": ["f1"], + "files": {"f1": {"outputMode": "lines", "sourceFileName": "x.txt", "data": ["body"]}}, + "_meta": {"extractPayloadSchemaVersion": EXTRACT_PAYLOAD_SCHEMA_VERSION}, + } + assert joined_text_from_extract_node_data(data) == "body" + assert data["_meta"]["extractPayloadSchemaVersion"] == EXTRACT_PAYLOAD_SCHEMA_VERSION + + +def test_action_result_contract_new_extract_payload_keys(): + from modules.workflows.methods.methodContext.actions.extractContent import PRESENTATION_KIND + + data = { + "schemaVersion": 1, + "kind": PRESENTATION_KIND, + "outputMode": "lines", + "fileOrder": ["f1"], + "files": {"f1": {"outputMode": "lines", "sourceFileName": "x.txt", "data": ["body"]}}, + "_meta": {"actionType": "context.extractContent", "extractPayloadSchemaVersion": EXTRACT_PAYLOAD_SCHEMA_VERSION}, + } + assert data["kind"] == PRESENTATION_KIND + assert joined_text_from_extract_node_data(data) == "body" + + +def test_automation_workspace_suppresses_extract_artifacts(): + from modules.workflows.automation2.workflowArtifactVisibility import suppress_workflow_file_in_workspace_ui + + assert suppress_workflow_file_in_workspace_ui({"fileName": "extracted_content_transient-abc_99.json"}) + assert suppress_workflow_file_in_workspace_ui({"fileName": "extract_media_stem_uuid.png"}) + assert not suppress_workflow_file_in_workspace_ui({"fileName": "export_2026.csv"}) + assert suppress_workflow_file_in_workspace_ui({"fileName": "", "suppressInWorkflowFileLists": True}) + assert suppress_workflow_file_in_workspace_ui({"fileName": "report.pdf", "tags": ["_workflowInternal"]}) + assert not suppress_workflow_file_in_workspace_ui({"fileName": "report.pdf", "tags": ["invoice"]}) + + +def test_normalize_presentation_envelopes_action_result_and_list(): + from modules.workflows.methods.methodContext.actions.extractContent import ( + PRESENTATION_KIND, + normalize_presentation_envelopes, + ) + + pres = { + "kind": PRESENTATION_KIND, + "fileOrder": ["f1"], + "files": {"f1": {"outputMode": "lines", "sourceFileName": "x.txt", "data": []}}, + } + wrapped = {"success": True, "data": pres} + assert len(normalize_presentation_envelopes(wrapped)) == 1 + assert len(normalize_presentation_envelopes([wrapped])) == 1 + + +def test_method_base_preserves_run_context_injection(): + from modules.workflows.methods.methodFile.methodFile import MethodFile + + class _Svc: + pass + + action_def = MethodFile(_Svc())._actions["create"] + validated = MethodFile(_Svc())._validateParameters( + {"context": "x", "outputFormat": "pdf", "_runContext": {"mandateId": "m", "instanceId": "i"}}, + action_def.parameters, + ) + assert validated.get("_runContext") == {"mandateId": "m", "instanceId": "i"} + + +def test_presentation_envelopes_to_document_json_one_section_per_data_slot(): + from modules.workflows.methods.methodContext.actions.extractContent import ( + PRESENTATION_KIND, + presentation_envelopes_to_document_json, + ) + + pres = { + "kind": PRESENTATION_KIND, + "outputMode": "lines", + "fileOrder": ["f1"], + "files": { + "f1": { + "outputMode": "lines", + "sourceFileName": "a.pdf", + "data": [ + { + "typeGroup": "text", + "mimeType": "text/plain", + "data": "ignored", + "lines": ["Line A", "Line B"], + }, + ], + }, + }, + } + out = presentation_envelopes_to_document_json( + {"success": True, "data": pres}, + title="T", + language="de", + ) + paragraphs = [ + s for s in out["documents"][0]["sections"] + if s.get("content_type") == "paragraph" + ] + assert len(paragraphs) == 1 + runs = paragraphs[0]["elements"][0]["content"]["inlineRuns"] + joined = "".join(r.get("value", "") for r in runs) + assert "Line A" in joined + assert "Line B" in joined + assert "\n" in joined + + +def test_presentation_envelopes_table_slot_becomes_table_section(): + from modules.workflows.methods.methodContext.actions.extractContent import ( + PRESENTATION_KIND, + presentation_envelopes_to_document_json, + ) + + pres = { + "kind": PRESENTATION_KIND, + "outputMode": "lines", + "fileOrder": ["f1"], + "files": { + "f1": { + "outputMode": "lines", + "sourceFileName": "sheet.csv", + "data": [ + { + "typeGroup": "table", + "mimeType": "text/csv", + "data": '"Name","Amount"\n"Alice","100"\n"Bob","200"', + "lines": [], + }, + ], + }, + }, + } + out = presentation_envelopes_to_document_json( + {"success": True, "data": pres}, + title="T", + language="de", + ) + tables = [s for s in out["documents"][0]["sections"] if s.get("content_type") == "table"] + assert len(tables) == 1 + content = tables[0]["elements"][0]["content"] + assert content["headers"] == ["Name", "Amount"] + assert content["rows"] == [["Alice", "100"], ["Bob", "200"]] + + +def test_presentation_line_slot_preserves_table_without_lines(): + from modules.workflows.methods.methodContext.actions.extractContent import ( + _presentation_line_slot_from_part, + _presentation_line_slots_from_part, + parse_presentation_parameters, + ) + + cfg = parse_presentation_parameters({"outputMode": "lines", "splitBy": "newline"}) + part = { + "typeGroup": "table", + "mimeType": "text/csv", + "data": '"A","B"\n"1","2"\n"3","4"', + "id": "t1", + } + slot = _presentation_line_slot_from_part(part, cfg) + assert slot.get("lines") == [] + assert slot.get("data") == part["data"] + slots = _presentation_line_slots_from_part(part, cfg) + assert len(slots) == 3 + assert slots[0]["lines"] == ['"A","B"'] + assert slots[1]["lines"] == ['"1","2"'] + + +def test_presentation_envelopes_preserves_data_slot_order_text_image_text(): + import base64 + + from modules.workflows.methods.methodContext.actions.extractContent import ( + PRESENTATION_KIND, + presentation_envelopes_to_document_json, + ) + + class _Mgmt: + def getFileData(self, _fid: str) -> bytes: + return base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==" + ) + + class _Svc: + interfaceDbComponent = _Mgmt() + + pres = { + "kind": PRESENTATION_KIND, + "outputMode": "lines", + "fileOrder": ["f1"], + "files": { + "f1": { + "outputMode": "lines", + "sourceFileName": "a.pdf", + "data": [ + {"typeGroup": "text", "mimeType": "text/plain", "lines": ["Before"]}, + { + "typeGroup": "image", + "mimeType": "image/png", + "embeddedImageFileId": "00000000-0000-0000-0000-000000000001", + "embeddedImageFileName": "img.png", + }, + {"typeGroup": "text", "mimeType": "text/plain", "lines": ["After"]}, + ], + }, + }, + } + out = presentation_envelopes_to_document_json( + {"success": True, "data": pres}, + title="T", + language="de", + services=_Svc(), + ) + types = [s.get("content_type") for s in out["documents"][0]["sections"]] + assert types == ["paragraph", "image", "paragraph"] + + +def test_presentation_envelopes_to_document_json_text_slots(): + from modules.workflows.methods.methodContext.actions.extractContent import ( + PRESENTATION_KIND, + presentation_envelopes_to_document_json, + ) + + pres = { + "kind": PRESENTATION_KIND, + "outputMode": "lines", + "fileOrder": ["f1"], + "files": { + "f1": { + "outputMode": "lines", + "sourceFileName": "a.pdf", + "data": [ + { + "typeGroup": "text", + "mimeType": "text/plain", + "data": "Hello", + "lines": ["Hello", "World"], + }, + ], + }, + }, + } + out = presentation_envelopes_to_document_json( + [{"success": True, "data": pres}], + title="T", + language="de", + ) + paragraphs = [ + s for s in out["documents"][0]["sections"] + if s.get("content_type") == "paragraph" + ] + assert len(paragraphs) == 1 + all_text = [] + for p in paragraphs: + runs = p["elements"][0]["content"]["inlineRuns"] + all_text.append("".join(r.get("value", "") for r in runs)) + assert any("Hello" in t for t in all_text) + assert any("World" in t for t in all_text) + + +def test_presentation_envelopes_to_document_json_image_slot(): + import base64 + + from modules.workflows.methods.methodContext.actions.extractContent import ( + PRESENTATION_KIND, + presentation_envelopes_to_document_json, + ) + + fid = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + pres = { + "kind": PRESENTATION_KIND, + "outputMode": "lines", + "fileOrder": ["f1"], + "files": { + "f1": { + "outputMode": "lines", + "sourceFileName": "a.pdf", + "data": [ + { + "typeGroup": "image", + "mimeType": "image/png", + "embeddedImageFileId": fid, + "embeddedImageFileName": "clip.png", + }, + ], + }, + }, + } + + class _Mgmt: + def getFileData(self, file_id): + assert file_id == fid + # Valid PNG signature + enough bytes for mime sniffing (>= 12 bytes). + return b"\x89PNG\r\n\x1a\n" + b"\x00" * 16 + + class _Svc: + interfaceDbComponent = _Mgmt() + + out = presentation_envelopes_to_document_json( + pres, + title="Img", + language="de", + services=_Svc(), + ) + img_secs = [ + s for s in out["documents"][0]["sections"] + if s.get("content_type") == "image" + ] + assert len(img_secs) == 1 + b64 = img_secs[0]["elements"][0]["content"]["base64Data"] + assert base64.b64decode(b64).startswith(b"\x89PNG") diff --git a/tests/unit/workflow/test_flow_executor_conditions.py b/tests/unit/workflow/test_flow_executor_conditions.py new file mode 100644 index 00000000..70cc84f4 --- /dev/null +++ b/tests/unit/workflow/test_flow_executor_conditions.py @@ -0,0 +1,94 @@ +# Copyright (c) 2025 Patrick Motsch +"""FlowExecutor structured condition evaluation with Item dataRef.""" + +import pytest + +from modules.workflows.automation2.executors.flowExecutor import FlowExecutor +from modules.workflows.methods.methodContext.actions.extractContent import PRESENTATION_KIND + + +@pytest.fixture +def executor(): + return FlowExecutor() + + +def test_if_else_uses_item_param(executor): + node_outputs = { + "n1": {"payload": {"status": "ok"}}, + } + node = { + "id": "if1", + "type": "flow.ifElse", + "parameters": { + "Item": {"type": "ref", "nodeId": "n1", "path": ["payload", "status"]}, + "condition": {"type": "condition", "operator": "eq", "value": "ok"}, + }, + } + ok = executor._evalStructuredCondition( + node["parameters"]["condition"], + node_outputs, + item_param=node["parameters"]["Item"], + node=node, + ) + assert ok is True + + +def test_legacy_condition_ref_fallback(executor): + node_outputs = {"n1": {"count": 5}} + node = {"id": "if1", "type": "flow.ifElse", "parameters": {}} + cond = { + "type": "condition", + "ref": {"type": "ref", "nodeId": "n1", "path": ["count"]}, + "operator": "gt", + "value": 3, + } + assert executor._evalStructuredCondition(cond, node_outputs, node=node) is True + + +def test_context_contains_content(executor): + presentation = { + "kind": PRESENTATION_KIND, + "outputMode": "lines", + "fileOrder": ["f1"], + "files": { + "f1": { + "outputMode": "lines", + "data": [{"typeGroup": "text", "lines": ["Hallo Welt"]}], + } + }, + } + cond = {"type": "condition", "operator": "contains_content", "value": "text"} + assert executor._evalStructuredCondition(cond, {"n1": presentation}, item_param={"type": "ref", "nodeId": "n1", "path": []}, node={"id": "if1", "type": "flow.ifElse"}) is True + + +def test_context_contains_content_blob_mode(executor): + presentation = { + "kind": PRESENTATION_KIND, + "outputMode": "blob", + "fileOrder": ["f1"], + "files": { + "f1": { + "outputMode": "blob", + "data": "Invoice text\n\n[image:abc123]", + } + }, + } + img_cond = {"type": "condition", "operator": "contains_content", "value": "image"} + txt_cond = {"type": "condition", "operator": "contains_content", "value": "text"} + item = {"type": "ref", "nodeId": "n1", "path": []} + node = {"id": "if1", "type": "flow.ifElse"} + assert executor._evalStructuredCondition(img_cond, {"n1": presentation}, item_param=item, node=node) is True + assert executor._evalStructuredCondition(txt_cond, {"n1": presentation}, item_param=item, node=node) is True + + +def test_switch_uses_shared_operators(executor): + assert executor._evalSwitchCase("abc", {"operator": "starts_with", "value": "ab"}) is True + assert executor._evalSwitchCase([1, 2], {"operator": "length_eq", "value": 2}) is True + + +def test_switch_resolves_value_kind_for_string_ops(executor): + assert executor._evalSwitchCase( + "hello", + {"operator": "starts_with", "value": "he"}, + value_kind="string", + ) is True diff --git a/tests/unit/workflow/test_merge_context_handover.py b/tests/unit/workflow/test_merge_context_handover.py new file mode 100644 index 00000000..30a60b8f --- /dev/null +++ b/tests/unit/workflow/test_merge_context_handover.py @@ -0,0 +1,206 @@ +# Unit tests: context.mergeContext primary text from extract handover (documents[0]). + +import json + +import pytest + +from modules.workflows.methods.methodContext.actions.extractContent import HANDOVER_KIND +from modules.workflows.methods.methodContext.actions.mergeContext import mergeContext + + +def _handover(text: str) -> dict: + return { + "kind": HANDOVER_KIND, + "fileOrder": ["f1"], + "files": { + "f1": { + "parts": [ + {"typeGroup": "text", "data": text, "id": "t1"}, + ] + } + }, + } + + +@pytest.mark.asyncio +async def test_mergeContext_requires_dataSource(): + result = await mergeContext(object(), {}) + assert not result.success + err = result.error or "" + assert "dataSource" in err or "erforderlich" in err.lower() + + +@pytest.mark.asyncio +async def test_mergeContext_handover_only_in_documents_yields_data_response(): + item = { + "success": True, + "data": {}, + "documents": [ + { + "documentName": "handover.json", + "mimeType": "application/json", + "documentData": _handover("only-from-handover"), + } + ], + } + result = await mergeContext(object(), {"dataSource": [item]}) + assert result.success + assert result.data.get("kind") == "context.mergeContext.v1" + assert result.data.get("response") == "only-from-handover" + + +@pytest.mark.asyncio +async def test_mergeContext_handover_json_string_in_documentData(): + payload = _handover("from-json-string") + item = { + "success": True, + "data": {}, + "documents": [ + { + "documentName": "handover.json", + "mimeType": "application/json", + "documentData": json.dumps(payload), + } + ], + } + result = await mergeContext(object(), {"dataSource": [item]}) + assert result.success + assert result.data.get("response") == "from-json-string" + + +@pytest.mark.asyncio +async def test_mergeContext_joins_multiple_handover_items(): + items = [ + { + "success": True, + "data": {}, + "documents": [{"documentData": _handover("alpha"), "documentName": "a.json"}], + }, + { + "success": True, + "data": {}, + "documents": [{"documentData": _handover("beta"), "documentName": "b.json"}], + }, + ] + result = await mergeContext(object(), {"dataSource": items}) + assert result.success + assert result.data.get("response") == "### a.json\n\nalpha\n\n### b.json\n\nbeta" + + +@pytest.mark.asyncio +async def test_mergeContext_merged_response_wins_over_handover_chunks(): + items = [ + { + "success": True, + "data": {"response": "merged-wins"}, + "documents": [{"documentData": _handover("ignored"), "documentName": "a.json"}], + }, + ] + result = await mergeContext(object(), {"dataSource": items}) + assert result.success + assert result.data.get("response") == "merged-wins" + + +@pytest.mark.asyncio +async def test_mergeContext_concatenates_each_iteration_data_response_not_only_last(): + """Synthesized response must include every loop body chunk, not just the last one. + + ``response`` lives only at the top level of the data envelope (``data["response"]``). + The deep-merged ``data["merged"]`` dict retains whatever the natural merge produced + for per-item fields — it is NOT overwritten with the synthesized primary text. + """ + items = [ + {"success": True, "data": {"response": "chunk-aaa"}}, + {"success": True, "data": {"response": "chunk-bbb"}}, + {"success": True, "data": {"response": "chunk-ccc"}}, + ] + result = await mergeContext(object(), {"dataSource": items}) + assert result.success + r = result.data.get("response") or "" + assert "chunk-aaa" in r + assert "chunk-bbb" in r + assert "chunk-ccc" in r + assert r == "chunk-aaa\n\nchunk-bbb\n\nchunk-ccc" + # ``merged["response"]`` reflects the natural deep-merge result (last chunk wins), + # NOT the synthesized primary. The canonical synthesized text is at data["response"]. + assert result.data["merged"].get("response") != r or len(items) == 1 + + +@pytest.mark.asyncio +async def test_mergeContext_primary_serializes_as_plain_text_for_file_create(): + from modules.workflows.methods.methodAi._common import serialize_context + + items = [ + {"success": True, "data": {"response": "section-one"}}, + {"success": True, "data": {"response": "section-two"}}, + ] + result = await mergeContext(object(), {"dataSource": items}) + primary = result.data.get("response") + assert isinstance(primary, str) + assert serialize_context(primary) == primary + + +@pytest.mark.asyncio +async def test_mergeContext_strips_document_data_from_merged_documents(): + """documentData must be None in merged.documents — blobs must not accumulate.""" + big_blob = "x" * 100_000 + items = [ + { + "success": True, + "data": {"response": "a"}, + "documents": [ + {"documentName": "a.json", "mimeType": "application/json", "documentData": big_blob}, + ], + }, + { + "success": True, + "data": {"response": "b"}, + "documents": [ + {"documentName": "b.json", "mimeType": "application/json", "documentData": big_blob}, + ], + }, + ] + result = await mergeContext(object(), {"dataSource": items}) + assert result.success + merged_docs = result.data["merged"].get("documents") or [] + assert len(merged_docs) >= 1 + for doc in merged_docs: + assert doc.get("documentData") is None, "documentData must be stripped before deep-merge" + + +@pytest.mark.asyncio +async def test_mergeContext_accumulates_image_documents_only_across_iterations(): + """imageDocumentsOnly from every iteration must be list-concat in merged.""" + img_a = {"documentName": "img_a.png", "mimeType": "image/png", "documentData": "aaa="} + img_b = {"documentName": "img_b.png", "mimeType": "image/png", "documentData": "bbb="} + items = [ + {"success": True, "data": {"response": "a"}, "imageDocumentsOnly": [img_a]}, + {"success": True, "data": {"response": "b"}, "imageDocumentsOnly": [img_b]}, + ] + result = await mergeContext(object(), {"dataSource": items}) + assert result.success + imgs = result.data["merged"].get("imageDocumentsOnly") or [] + names = [d.get("documentName") for d in imgs] + assert "img_a.png" in names + assert "img_b.png" in names + + +@pytest.mark.asyncio +async def test_transform_context_envelope_has_kind_and_meta(): + from modules.workflows.methods.methodContext.actions.transformContext import transformContext + + svc = object() + result = await transformContext( + svc, + { + "mappings": [{"operation": "rename", "sourceField": "a", "outputField": "b"}], + "_upstreamPayload": {"a": 42}, + }, + ) + assert result.success and result.data + assert result.data.get("kind") == "context.transformContext.v1" + assert result.data.get("schemaVersion") == 1 + assert result.data.get("b") == 42 + meta = result.data.get("_meta") + assert isinstance(meta, dict) + assert meta.get("actionType") == "context.transformContext" diff --git a/tests/unit/workflow/test_node_combinations.py b/tests/unit/workflow/test_node_combinations.py new file mode 100644 index 00000000..2fd5dd00 --- /dev/null +++ b/tests/unit/workflow/test_node_combinations.py @@ -0,0 +1,702 @@ +# Tests: node handover compatibility across all major node combinations. +# +# Covers: +# - extractContent → file.create (direct, no loop) +# - loop.bodyResults → mergeContext → file.create +# - ai.prompt → transformContext → file.create +# - flow.merge with mixed upstream schemas (AiResult + ActionResult) +# - flow.ifElse Transit output accepted by downstream nodes +# - extractContent fan-in → mergeContext (multiple items, no loop) +# - data.aggregate → data.consolidate path +# - Node flags for executor behaviour (no hardcoded type strings) + +import json + +import pytest + +from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES +from modules.features.graphicalEditor.portTypes import PORT_TYPE_CATALOG +from modules.workflows.methods.methodContext.actions.extractContent import ( + PRESENTATION_KIND, + build_presentation_envelope_from_plain_text, + normalize_presentation_envelopes, +) +from modules.workflows.methods.methodContext.actions.mergeContext import mergeContext + +_NODE_BY_ID = {n["id"]: n for n in STATIC_NODE_TYPES} + + +# --------------------------------------------------------------------------- +# Helper builders +# --------------------------------------------------------------------------- + +def _extract_output(text: str) -> dict: + """Minimal extractContent-style output (presentation envelope in ``data``).""" + pres = build_presentation_envelope_from_plain_text(text, source_name="test") + return {"success": True, "response": "", "data": pres, "documents": []} + + +def _ai_output(response: str) -> dict: + """Minimal ai.prompt-style output.""" + return {"success": True, "response": response, "data": {}, "documents": []} + + +# --------------------------------------------------------------------------- +# 1. extractContent → file.create (direct path) +# --------------------------------------------------------------------------- + +def test_extract_to_file_create_recommended_ref_is_data(): + """materializeRecommendedDataPickRef must resolve extractContent port 0 to path ['data'].""" + from modules.workflows.automation2.pickNotPushMigration import materializeRecommendedDataPickRef + + graph = { + "nodes": [ + {"id": "ex1", "type": "context.extractContent", "parameters": {}}, + { + "id": "fc1", + "type": "file.create", + "parameters": {"context": "", "outputFormat": "docx"}, + }, + ], + "connections": [{"source": "ex1", "target": "fc1", "sourceOutput": 0, "targetInput": 0}], + } + migrated = materializeRecommendedDataPickRef(graph) + fc = next(n for n in migrated["nodes"] if n["id"] == "fc1") + ctx_ref = fc["parameters"].get("context") + # file.create.context has frontendType="contextBuilder" → materialized as a list + assert isinstance(ctx_ref, list), "context should be materialized as a contextBuilder list" + assert len(ctx_ref) == 1 + assert ctx_ref[0]["nodeId"] == "ex1" + assert ctx_ref[0]["path"] == ["data"] + + +def test_extract_output_is_accepted_as_file_create_context(): + """extractContent presentation envelope must be normalizable for file.create.""" + out = _extract_output("Hello world") + envelopes = normalize_presentation_envelopes(out["data"]) + assert len(envelopes) == 1 + assert envelopes[0].get("kind") == PRESENTATION_KIND + + +def test_extract_output_response_is_empty(): + """extractContent must leave ``response`` empty — canonical text is in ``data``.""" + out = _extract_output("Some extracted content") + assert out["response"] == "" + + +# --------------------------------------------------------------------------- +# 2. primaryTextRef: extractContent overrides path to ["data"] +# --------------------------------------------------------------------------- + +def test_extract_primary_text_ref_override_materializes_to_data(): + """When ai.prompt connects to extractContent, primaryTextRef must resolve to ['data'].""" + from modules.workflows.automation2.pickNotPushMigration import materializePrimaryTextHandover + + graph = { + "nodes": [ + {"id": "ex1", "type": "context.extractContent", "parameters": {}}, + { + "id": "ai1", + "type": "ai.prompt", + "parameters": {"context": "", "aiPrompt": "Summarize"}, + }, + ], + "connections": [{"source": "ex1", "target": "ai1", "sourceOutput": 0, "targetInput": 0}], + } + migrated = materializePrimaryTextHandover(graph) + ai = next(n for n in migrated["nodes"] if n["id"] == "ai1") + ctx_ref = ai["parameters"].get("context") + # ai.prompt.context is primaryTextRef → single DataRef dict (not wrapped in list) + assert isinstance(ctx_ref, dict), f"Expected a DataRef dict, got {ctx_ref!r}" + assert ctx_ref["nodeId"] == "ex1" + assert ctx_ref["path"] == ["data"], ( + "extractContent.response is empty; primaryTextRef must point to ['data']" + ) + + +# --------------------------------------------------------------------------- +# 3. loop.bodyResults → mergeContext → file.create +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_loop_body_results_into_merge_context_produces_file_create_compatible_envelope(): + """bodyResults from a loop (list of extractContent outputs) must merge correctly.""" + body_results = [ + _extract_output("Page 1 text"), + _extract_output("Page 2 text"), + ] + result = await mergeContext(object(), {"dataSource": body_results}) + assert result.success + data = result.data + assert data.get("kind") == "context.mergeContext.v1" + assert "response" in data + assert data["response"] + # Downstream file.create uses normalize_presentation_envelopes on the full payload + envelopes = normalize_presentation_envelopes(data) + assert len(envelopes) >= 1 + + +@pytest.mark.asyncio +async def test_merge_context_response_not_duplicated_in_merged_blob(): + """``response`` must live only at the top-level of ``data``, not inside ``data.merged``.""" + body_results = [_extract_output("Item A"), _extract_output("Item B")] + result = await mergeContext(object(), {"dataSource": body_results}) + assert result.success + merged_blob = result.data.get("merged", {}) + # The natural deep-merge may include response from individual items — but + # _synthesize_primary_response no longer OVERWRITES merged["response"]. + # Verify canonical response is the synthesized one at top-level. + assert result.data.get("response") + assert "Item A" in result.data["response"] or "Item B" in result.data["response"] + + +@pytest.mark.asyncio +async def test_merge_context_skips_failed_items(): + """Items with ``success=False`` must be excluded from the deep-merge. + + Note: ``count`` reflects total inputs (including failed ones since they were + received); only the deep-merge payload excludes failed items. + """ + good = _extract_output("Good text") + bad = {"success": False, "error": "something failed", "data": {}, "documents": []} + result = await mergeContext(object(), {"dataSource": [good, bad]}) + assert result.success + # response is synthesized only from good items + assert "Good text" in result.data.get("response", "") + # merged blob should not contain the error or failed item's data + merged = result.data.get("merged", {}) + assert merged.get("error") != "something failed" + + +@pytest.mark.asyncio +async def test_merge_context_items_without_success_key_are_included(): + """Items without a ``success`` key (e.g. DocumentList output) must not be dropped.""" + no_success = {"documents": [{"documentName": "a.pdf"}], "count": 1} + result = await mergeContext(object(), {"dataSource": [no_success]}) + assert result.success + assert result.data.get("count") == 1 + + +# --------------------------------------------------------------------------- +# 4. ai.prompt → transformContext (primaryTextRef) +# --------------------------------------------------------------------------- + +def test_ai_prompt_primary_text_ref_materializes_to_response(): + """primaryTextRef from ai.prompt output must resolve to ['response'].""" + from modules.workflows.automation2.pickNotPushMigration import materializePrimaryTextHandover + + graph = { + "nodes": [ + {"id": "ai1", "type": "ai.prompt", "parameters": {}}, + { + "id": "ai2", + "type": "ai.prompt", + "parameters": {"context": "", "aiPrompt": "Continue"}, + }, + ], + "connections": [{"source": "ai1", "target": "ai2", "sourceOutput": 0, "targetInput": 0}], + } + migrated = materializePrimaryTextHandover(graph) + ai2 = next(n for n in migrated["nodes"] if n["id"] == "ai2") + ctx_ref = ai2["parameters"].get("context") + assert isinstance(ctx_ref, dict), f"Expected DataRef dict, got {ctx_ref!r}" + assert ctx_ref["path"] == ["response"] + + +def test_transform_context_from_graph_result_schema_is_action_result(): + """context.transformContext must declare ``fromGraphResultSchema: ActionResult``.""" + node = _NODE_BY_ID["context.transformContext"] + port = node["outputPorts"][0] + assert port.get("fromGraphResultSchema") == "ActionResult", ( + "fromGraph port on transformContext must be normalized as ActionResult, not FormPayload" + ) + + +# --------------------------------------------------------------------------- +# 5. flow.merge with mixed upstream schemas +# --------------------------------------------------------------------------- + +def test_flow_merge_accepts_ai_result_and_action_result(): + """Both AiResult and ActionResult must be in flow.merge input accepts.""" + node = _NODE_BY_ID["flow.merge"] + all_accepts = set() + for port in node.get("inputPorts", {}).values(): + all_accepts.update(port.get("accepts", [])) + assert "AiResult" in all_accepts + assert "ActionResult" in all_accepts + assert "Transit" in all_accepts + + +def test_flow_merge_input_count_parameter_exists_with_correct_range(): + """inputCount parameter must allow 2–5 inputs.""" + node = _NODE_BY_ID["flow.merge"] + ic_param = next((p for p in node["parameters"] if p["name"] == "inputCount"), None) + assert ic_param is not None + opts = ic_param.get("frontendOptions", {}) + assert opts.get("min") == 2 + assert opts.get("max") == 5 + + +# --------------------------------------------------------------------------- +# 6. flow.ifElse Transit output accepted downstream +# --------------------------------------------------------------------------- + +def test_flow_if_else_output_is_transit(): + """flow.ifElse must output Transit on both branches.""" + node = _NODE_BY_ID["flow.ifElse"] + for port_ix in (0, 1): + schema = node["outputPorts"][port_ix].get("schema") + assert schema == "Transit", f"ifElse port {port_ix} must be Transit, got {schema!r}" + + +def test_transit_accepted_by_all_major_downstream_nodes(): + """All major action nodes must accept Transit input on port 0.""" + expected_transit_accepting = [ + "context.extractContent", + "context.mergeContext", + "context.transformContext", + "ai.prompt", + "file.create", + ] + for node_id in expected_transit_accepting: + node = _NODE_BY_ID[node_id] + accepts = node["inputPorts"][0].get("accepts", []) + assert "Transit" in accepts, f"{node_id} port 0 must accept Transit" + + +# --------------------------------------------------------------------------- +# 7. extractContent fan-in → mergeContext (multiple items, no loop) +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_multiple_extract_outputs_fan_in_to_merge_context(): + """Multiple extractContent outputs passed as a list must merge into one envelope.""" + items = [_extract_output(f"Document {i}") for i in range(3)] + result = await mergeContext(object(), {"dataSource": items}) + assert result.success + assert result.data.get("count") == 3 + assert result.data.get("kind") == "context.mergeContext.v1" + response = result.data.get("response", "") + for i in range(3): + assert f"Document {i}" in response + + +# --------------------------------------------------------------------------- +# 8. data.aggregate → data.consolidate schema compatibility +# --------------------------------------------------------------------------- + +def test_data_aggregate_output_accepted_by_consolidate(): + """data.consolidate must accept AggregateResult from data.aggregate.""" + agg_node = _NODE_BY_ID["data.aggregate"] + con_node = _NODE_BY_ID["data.consolidate"] + agg_schema = agg_node["outputPorts"][0].get("schema") + con_accepts = con_node["inputPorts"][0].get("accepts", []) + assert agg_schema in con_accepts, ( + f"data.consolidate port 0 must accept {agg_schema!r} output from data.aggregate" + ) + + +# --------------------------------------------------------------------------- +# 9. Node executor flags (no hardcoded type strings in executor) +# --------------------------------------------------------------------------- + +def test_extract_content_executor_flags(): + """context.extractContent must carry all executor-behaviour flags.""" + node = _NODE_BY_ID["context.extractContent"] + assert node.get("skipUnifiedPresentation") is True + assert node.get("clearResponse") is True + assert node.get("imageDocumentsFromExtractData") is True + assert node.get("popDocumentsFromOutput") is True + + +def test_extract_content_primary_text_ref_path_override(): + """context.extractContent output port 0 must declare primaryTextRefPath=['data'].""" + node = _NODE_BY_ID["context.extractContent"] + port = node["outputPorts"][0] + assert port.get("primaryTextRefPath") == ["data"] + + +def test_merge_context_image_documents_flag(): + """context.mergeContext must carry imageDocumentsFromMerged flag.""" + node = _NODE_BY_ID["context.mergeContext"] + assert node.get("imageDocumentsFromMerged") is True + + +def test_file_create_log_context_resolution_flag(): + """file.create must carry logContextResolution flag.""" + node = _NODE_BY_ID["file.create"] + assert node.get("logContextResolution") is True + + +# --------------------------------------------------------------------------- +# 10. AiResult catalog must include data field +# --------------------------------------------------------------------------- + +def test_ai_result_catalog_has_data_field(): + """AiResult in PORT_TYPE_CATALOG must document the ``data`` field.""" + schema = PORT_TYPE_CATALOG["AiResult"] + field_names = [f.name for f in schema.fields] + assert "data" in field_names, "AiResult must document the data field set by executor" + + +# --------------------------------------------------------------------------- +# 11. _outputSchemaForNode returns ActionResult for context.transformContext +# --------------------------------------------------------------------------- + +def test_output_schema_for_transform_context_is_action_result(): + """_outputSchemaForNode must return ActionResult for context.transformContext.""" + from modules.workflows.automation2.executionEngine import _outputSchemaForNode + schema = _outputSchemaForNode("context.transformContext") + assert schema == "ActionResult", ( + f"Expected ActionResult, got {schema!r}. fromGraph port must use fromGraphResultSchema." + ) + + +# --------------------------------------------------------------------------- +# 12. flow.merge barrier, context.mergeContext NOT a barrier +# --------------------------------------------------------------------------- + +def test_flow_merge_is_barrier(): + from modules.workflows.automation2.executionEngine import _isBarrierNode + assert _isBarrierNode("flow.merge") is True + + +def test_context_merge_context_is_not_barrier(): + """context.mergeContext is not a barrier — it receives data via dataSource DataRef.""" + from modules.workflows.automation2.executionEngine import _isBarrierNode + assert _isBarrierNode("context.mergeContext") is False + + +def test_no_node_named_is_merge_node_in_engine(): + """Legacy _isMergeNode alias must be removed from executionEngine.""" + import modules.workflows.automation2.executionEngine as eng + assert not hasattr(eng, "_isMergeNode"), "_isMergeNode legacy alias must be deleted" + + +# --------------------------------------------------------------------------- +# 13. methodBase parameter passthrough — node-defined params must reach the action +# --------------------------------------------------------------------------- + +def test_method_base_validate_parameters_passes_through_undeclared_keys(): + """_validateParameters must keep parameters the action did not formally declare. + + Regression: WorkflowActionDefinition for context.extractContent only declares + ``documentList``, but the node exposes contentFilter, pdfExtractMode, outputMode, ... + Those MUST reach the action implementation. + """ + from modules.workflows.methods.methodBase import MethodBase + from modules.datamodels.datamodelWorkflowActions import WorkflowActionParameter + from modules.shared.frontendTypes import FrontendType + + paramDefs = { + "documentList": WorkflowActionParameter( + name="documentList", type="Any", frontendType=FrontendType.HIDDEN, + required=True, description="docs", + ), + } + + class _Svc: + pass + + mb = MethodBase.__new__(MethodBase) + mb.services = _Svc() + + incoming = { + "documentList": ["doc1"], + "contentFilter": "imagesOnly", + "pdfExtractMode": "all", + "outputMode": "lines", + "_runContext": {"mandateId": "m"}, + "parentOperationId": "op1", + } + validated = mb._validateParameters(incoming, paramDefs) + assert validated["documentList"] == ["doc1"] + assert validated["contentFilter"] == "imagesOnly", ( + "contentFilter must pass through even though the action did not declare it" + ) + assert validated["pdfExtractMode"] == "all" + assert validated["outputMode"] == "lines" + assert validated["_runContext"] == {"mandateId": "m"} + assert validated["parentOperationId"] == "op1" + + +def test_parse_presentation_parameters_imagesonly_coerces_pdf_mode_to_images(): + """contentFilter=imagesOnly must override pdfExtractMode=all (node default).""" + from modules.workflows.methods.methodContext.actions.extractContent import ( + parse_presentation_parameters, + ) + cfg = parse_presentation_parameters({"contentFilter": "imagesOnly", "pdfExtractMode": "all"}) + assert cfg["pdfExtractMode"] == "images", ( + "imagesOnly + pdfExtractMode=all must coerce to 'images' — otherwise text parts " + "leak into the presentation layer." + ) + + +def test_parse_presentation_parameters_textonly_coerces_pdf_mode(): + from modules.workflows.methods.methodContext.actions.extractContent import ( + parse_presentation_parameters, + ) + cfg = parse_presentation_parameters({"contentFilter": "textOnly", "pdfExtractMode": "images"}) + assert cfg["pdfExtractMode"] == "text" + + +def test_sniff_image_mime_recognizes_common_signatures(): + from modules.workflows.methods.methodContext.actions.extractContent import ( + _sniff_image_mime, + ) + assert _sniff_image_mime(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8) == "image/png" + assert _sniff_image_mime(b"\xff\xd8\xff\xe0" + b"\x00" * 8) == "image/jpeg" + assert _sniff_image_mime(b"GIF89a" + b"\x00" * 8) == "image/gif" + assert _sniff_image_mime(b"RIFF" + b"\x00\x00\x00\x00" + b"WEBP") == "image/webp" + + +def test_sniff_image_mime_raises_on_unknown_signature(): + """No silent fallback to image/png — unknown signatures must error out.""" + import pytest as _pt + from modules.workflows.methods.methodContext.actions.extractContent import ( + _sniff_image_mime, + ) + with _pt.raises(ValueError): + _sniff_image_mime(b"NOT_AN_IMAGE_" + b"\x00" * 8) + with _pt.raises(ValueError): + _sniff_image_mime(b"") + + +def test_markdown_renderer_image_uses_relative_path_and_emits_sidecar_files(): + """Images: relative ![alt](file.png) in md + separate image RenderedDocuments.""" + import asyncio + import base64 as _b64 + from modules.serviceCenter.services.serviceGeneration.renderers.rendererMarkdown import ( + RendererMarkdown, + ) + + png_b64 = _b64.b64encode( + b"\x89PNG\r\n\x1a\n" + b"\x00" * 16 + ).decode("ascii") + + content = { + "metadata": {"title": "doc"}, + "documents": [{ + "id": "d1", + "title": "doc", + "outputFormat": "md", + "language": "de", + "sections": [{ + "id": "s1", + "content_type": "image", + "order": 1, + "elements": [{ + "content": { + "altText": "alpha.png", + "fileName": "alpha.png", + "mimeType": "image/png", + "base64Data": png_b64, + }, + }], + }], + }], + } + r = RendererMarkdown() + rendered = asyncio.run(r.render(content, title="doc")) + assert len(rendered) == 2, "markdown render must return .md + sidecar image" + md = rendered[0].documentData.decode("utf-8") + assert "![alpha.png](alpha.png)" in md + assert "/api/files/" not in md + assert "base64" not in md.lower() + assert rendered[1].filename == "alpha.png" + assert rendered[1].mimeType == "image/png" + assert len(rendered[1].documentData) > 0 + + +def test_markdown_renderer_image_raises_without_base64_data(): + """Missing base64Data must fail — no API URL fallback.""" + import asyncio + import pytest as _pt + from modules.serviceCenter.services.serviceGeneration.renderers.rendererMarkdown import ( + RendererMarkdown, + ) + + content = { + "metadata": {}, + "documents": [{ + "id": "d1", + "title": "doc", + "outputFormat": "md", + "language": "de", + "sections": [{ + "id": "s1", + "content_type": "image", + "order": 1, + "elements": [{ + "content": { + "altText": "beta.jpg", + "fileName": "beta.jpg", + "mimeType": "image/jpeg", + "fileId": "FILE-1", + }, + }], + }], + }], + } + r = RendererMarkdown() + with _pt.raises(ValueError, match="base64Data"): + asyncio.run(r.render(content, title="doc")) + + +def test_markdown_renderer_unknown_section_type_raises(): + """No fallback to paragraph — unknown section types must surface.""" + import asyncio + import pytest as _pt + from modules.serviceCenter.services.serviceGeneration.renderers.rendererMarkdown import ( + RendererMarkdown, + ) + + content = { + "metadata": {}, + "documents": [{ + "id": "d1", + "title": "doc", + "outputFormat": "md", + "language": "de", + "sections": [{ + "id": "s1", + "content_type": "totally_unknown", + "order": 1, + "elements": [{"content": {"text": "x"}}], + }], + }], + } + r = RendererMarkdown() + with _pt.raises(ValueError, match="unsupported section content_type"): + asyncio.run(r.render(content, title="doc")) + + +def test_markdown_renderer_accepts_image_section_type(): + """Regression: markdown must declare 'image' as accepted to avoid silent filtering.""" + from modules.serviceCenter.services.serviceGeneration.renderers.rendererMarkdown import ( + RendererMarkdown, + ) + accepted = RendererMarkdown.getAcceptedSectionTypes("md") + assert "image" in accepted, "image must be in accepted section types for markdown" + + +def test_extract_image_slot_carries_file_id_and_mime(): + """Presentation→document conversion must propagate fileId & mimeType to renderers.""" + from modules.workflows.methods.methodContext.actions.extractContent import ( + presentation_envelopes_to_document_json, + PRESENTATION_KIND, + PRESENTATION_SCHEMA_VERSION, + ) + + class _MgmtStub: + def getFileData(self, fid): + return b"\xff\xd8\xff\xe0" + b"\x00" * 100 + + class _Services: + def __init__(self): + self.interfaceDbComponent = _MgmtStub() + + envelope = { + "schemaVersion": PRESENTATION_SCHEMA_VERSION, + "kind": PRESENTATION_KIND, + "outputMode": "lines", + "fileOrder": ["file_1_x.pdf"], + "files": { + "file_1_x.pdf": { + "outputMode": "lines", + "sourceFileName": "x.pdf", + "data": [ + { + "id": "img1", + "typeGroup": "image", + "mimeType": "image/jpeg", + "embeddedImageFileId": "FILE-7", + "embeddedImageFileName": "extract_media_x.jpg", + "label": "x", + } + ], + } + }, + } + doc_json = presentation_envelopes_to_document_json( + envelope, title="t", language="de", services=_Services() + ) + sections = doc_json["documents"][0]["sections"] + image_sections = [s for s in sections if s.get("content_type") == "image"] + assert len(image_sections) == 1 + content = image_sections[0]["elements"][0]["content"] + assert content.get("fileId") == "FILE-7" + assert content.get("mimeType") == "image/jpeg", ( + f"mime must be sniffed from bytes (JPEG magic), got {content.get('mimeType')!r}" + ) + assert content.get("base64Data"), "base64Data must be present for embed-capable renderers" + + +def test_extract_image_slot_raises_when_file_id_missing(): + """No silent skip — missing embeddedImageFileId must fail loudly.""" + import pytest as _pt + from modules.workflows.methods.methodContext.actions.extractContent import ( + presentation_envelopes_to_document_json, + PRESENTATION_KIND, + PRESENTATION_SCHEMA_VERSION, + ) + + class _Services: + interfaceDbComponent = None + + envelope = { + "schemaVersion": PRESENTATION_SCHEMA_VERSION, + "kind": PRESENTATION_KIND, + "outputMode": "lines", + "fileOrder": ["file_1_x.pdf"], + "files": { + "file_1_x.pdf": { + "outputMode": "lines", + "sourceFileName": "x.pdf", + "data": [ + { + "id": "img1", + "typeGroup": "image", + "mimeType": "image/jpeg", + "label": "x", + } + ], + } + }, + } + with _pt.raises(ValueError, match="embeddedImageFileId"): + presentation_envelopes_to_document_json( + envelope, title="t", language="de", services=_Services() + ) + + +def test_parse_presentation_parameters_noimages_drops_images_mode(): + from modules.workflows.methods.methodContext.actions.extractContent import ( + parse_presentation_parameters, + ) + cfg = parse_presentation_parameters({"contentFilter": "noImages", "pdfExtractMode": "images"}) + assert cfg["pdfExtractMode"] == "text" + + +def test_method_base_validate_parameters_applies_defaults_for_declared(): + """Declared parameters still get defaults applied even when undeclared keys pass through.""" + from modules.workflows.methods.methodBase import MethodBase + from modules.datamodels.datamodelWorkflowActions import WorkflowActionParameter + from modules.shared.frontendTypes import FrontendType + + paramDefs = { + "outputFormat": WorkflowActionParameter( + name="outputFormat", type="str", frontendType=FrontendType.TEXT, + required=False, default="docx", description="fmt", + ), + } + + class _Svc: + pass + + mb = MethodBase.__new__(MethodBase) + mb.services = _Svc() + + validated = mb._validateParameters({"unknown": "x"}, paramDefs) + assert validated["outputFormat"] == "docx" + assert validated["unknown"] == "x" diff --git a/tests/unit/workflow/test_phase3_context_node.py b/tests/unit/workflow/test_phase3_context_node.py index 7172c6e7..76fbc972 100644 --- a/tests/unit/workflow/test_phase3_context_node.py +++ b/tests/unit/workflow/test_phase3_context_node.py @@ -18,11 +18,42 @@ def test_context_extractContent_node_exists(): def test_context_extractContent_node_shape(): node = next(n for n in STATIC_NODE_TYPES if n["id"] == "context.extractContent") assert node["category"] == "context" + assert node.get("injectRunContext") is True assert node["meta"]["usesAi"] is False assert node["_method"] == "context" assert node["_action"] == "extractContent" - assert node["outputPorts"][0]["schema"] == "UdmDocument" + assert node["outputPorts"][0]["schema"] == "ActionResult" assert "DocumentList" in node["inputPorts"][0]["accepts"] + assert "LoopItem" in node["inputPorts"][0]["accepts"] + names = [p["name"] for p in node["parameters"]] + assert names == [ + "documentList", + "contentFilter", + "outputMode", + "splitBy", + "chunkSizeUnit", + "chunkSize", + "chunkOverlap", + "filterEmptyLines", + "trimWhitespace", + "includeLineNumbers", + "includeMetadata", + "csvHeaderRow", + "pdfExtractMode", + "markdownPreserveFormatting", + ] + + pick_paths = [opt["path"] for opt in node["outputPorts"][0]["dataPickOptions"]] + assert ["data", "files"] in pick_paths + assert ["data", "_meta"] in pick_paths + + + +def test_context_transformContext_has_envelope_data_pick_paths(): + node = next(n for n in STATIC_NODE_TYPES if n["id"] == "context.transformContext") + pick_paths = [opt["path"] for opt in node["outputPorts"][0]["dataPickOptions"]] + assert ["data"] in pick_paths + assert ["data", "_meta"] in pick_paths def test_udm_port_types_registered(): @@ -62,3 +93,17 @@ def test_getExecutor_dispatches_context(): from modules.workflows.automation2.executors import ActionNodeExecutor executor = _getExecutor("context.extractContent", None) assert isinstance(executor, ActionNodeExecutor) + + +def test_context_mergeContext_has_envelope_data_pick_paths(): + node = next(n for n in STATIC_NODE_TYPES if n["id"] == "context.mergeContext") + pick_paths = [opt["path"] for opt in node["outputPorts"][0]["dataPickOptions"]] + assert ["data"] in pick_paths + assert ["data", "_meta"] in pick_paths + assert ["merged"] in pick_paths + + +def test_context_mergeContext_surfaces_data_pick_paths_match_node_outputs(): + """DataPicker uses paths like ``merged``; executor must surface ``data.*`` to top level.""" + node = next(n for n in STATIC_NODE_TYPES if n["id"] == "context.mergeContext") + assert node.get("surfaceDataAsTopLevel") is True diff --git a/tests/unit/workflow/test_phase4_workflow_nodes.py b/tests/unit/workflow/test_phase4_workflow_nodes.py index 69f16f89..eb478bda 100644 --- a/tests/unit/workflow/test_phase4_workflow_nodes.py +++ b/tests/unit/workflow/test_phase4_workflow_nodes.py @@ -27,10 +27,13 @@ class TestNodeDefinitions: assert "iterationMode" in paramNames assert "iterationStride" in paramNames assert "concurrency" in paramNames + assert "level" not in paramNames modeParam = next(p for p in node["parameters"] if p["name"] == "iterationMode") - assert "all" in modeParam["frontendOptions"]["options"] + assert "every_nth" in modeParam["frontendOptions"]["options"] concParam = next(p for p in node["parameters"] if p["name"] == "concurrency") assert concParam["default"] == 1 + assert node["inputs"] == 1 + assert node["outputs"] == 2 def test_flow_loop_accepts_udm(self): node = next(n for n in STATIC_NODE_TYPES if n["id"] == "flow.loop") @@ -146,13 +149,27 @@ class TestFlowLoopUdmLevel: ex = FlowExecutor() udm = {"id": "d1", "role": "document", "children": [{"id": "p1"}, {"id": "p2"}]} node = {"type": "flow.loop", "id": "loop1", - "parameters": {"items": "direct", "level": "auto"}} + "parameters": {"items": "direct"}} ctx = {"nodeOutputs": {"loop1": udm, "direct": udm}, "connectionMap": {}, "inputSources": {"loop1": {0: ("direct", 0)}}} from unittest.mock import patch with patch("modules.workflows.automation2.graphUtils.resolveParameterReferences", return_value=udm): result = await ex.execute(node, ctx) assert result["count"] == 2 + @pytest.mark.asyncio + async def test_loop_every_nth_stride(self): + from modules.workflows.automation2.executors.flowExecutor import FlowExecutor + ex = FlowExecutor() + node = {"type": "flow.loop", "id": "loop1", "parameters": { + "items": {"type": "value", "value": [10, 20, 30, 40, 50]}, + "iterationMode": "every_nth", + "iterationStride": 2, + }} + ctx = {"nodeOutputs": {}, "connectionMap": {}, "inputSources": {"loop1": {}}} + result = await ex.execute(node, ctx) + assert result["count"] == 3 + assert result["items"] == [10, 30, 50] + @pytest.mark.asyncio class TestDataFilterUdm: diff --git a/tests/unit/workflow/test_switch_filtered_output.py b/tests/unit/workflow/test_switch_filtered_output.py new file mode 100644 index 00000000..1cfac160 --- /dev/null +++ b/tests/unit/workflow/test_switch_filtered_output.py @@ -0,0 +1,359 @@ +# Copyright (c) 2025 Patrick Motsch +"""flow.switch ContextBranch: filtered presentation + loop-ready items.""" + +import pytest + +from modules.features.graphicalEditor.portTypes import unwrapTransit, wrapTransit +from modules.features.graphicalEditor.switchOutput import ( + build_switch_branch_payload, + build_switch_combined_output, + build_switch_default_payload, + unwrap_transit_for_port, +) +from modules.workflows.automation2.executionEngine import _is_node_on_active_path +from modules.workflows.automation2.executors.flowExecutor import FlowExecutor +from modules.workflows.automation2.graphUtils import resolveParameterReferences +from modules.workflows.methods.methodContext.actions.extractContent import PRESENTATION_KIND + + +def _presentation_with_text_and_image(): + return { + "kind": PRESENTATION_KIND, + "schemaVersion": "1", + "outputMode": "parts", + "fileOrder": ["doc"], + "files": { + "doc": { + "outputMode": "parts", + "data": [ + {"typeGroup": "text", "id": "t1", "data": "Hello"}, + {"typeGroup": "image", "id": "i1", "mimeType": "image/png", "data": "YQ=="}, + ], + } + }, + } + + +def _presentation_blob_with_text_and_image(): + blob_data = "Hello world\n\n[image:img1]\n\nMore text" + return { + "kind": PRESENTATION_KIND, + "schemaVersion": "1", + "outputMode": "blob", + "fileOrder": ["doc"], + "files": { + "doc": { + "outputMode": "blob", + "sourceFileName": "test.pdf", + "data": blob_data, + } + }, + } + + +def test_build_switch_branch_payload_filters_blob_image(): + pres = _presentation_blob_with_text_and_image() + payload = build_switch_branch_payload( + pres, + {"operator": "contains_content", "value": "image"}, + value_kind="context", + match_index=0, + ) + assert payload["filterApplied"] is True + assert len(payload["items"]) == 1 + assert payload["items"][0]["value"]["typeGroup"] == "image" + assert "[image:img1]" in payload["data"]["files"]["doc"]["data"] + + +def test_build_switch_branch_payload_filters_blob_text(): + pres = _presentation_blob_with_text_and_image() + payload = build_switch_branch_payload( + pres, + {"operator": "contains_content", "value": "text"}, + value_kind="context", + match_index=1, + ) + assert payload["filterApplied"] is True + assert len(payload["items"]) == 2 + assert all(i["value"]["typeGroup"] == "text" for i in payload["items"]) + filtered = payload["data"]["files"]["doc"]["data"] + assert "Hello world" in filtered + assert "[image:" not in filtered + + +@pytest.mark.asyncio +async def test_switch_blob_multi_match(): + executor = FlowExecutor() + pres = _presentation_blob_with_text_and_image() + sw_id = "sw1" + node_outputs = { + "ext1": pres, + "_context": { + "graphNodesById": { + "ext1": {"id": "ext1", "type": "context.extractContent"}, + sw_id: {"id": sw_id, "type": "flow.switch"}, + } + }, + } + executor._getInputData = lambda *_a, **_k: pres # type: ignore[method-assign] + node = { + "id": sw_id, + "type": "flow.switch", + "parameters": { + "value": {"type": "ref", "nodeId": "ext1", "path": []}, + "cases": [ + {"operator": "contains_content", "value": "image"}, + {"operator": "contains_content", "value": "text"}, + ], + }, + } + out = await executor._switch(node, node_outputs, sw_id, {}) + assert out["_meta"]["matches"] == [0, 1] + assert len(unwrap_transit_for_port(out, 0)["items"]) == 1 + assert len(unwrap_transit_for_port(out, 1)["items"]) == 2 + + +def test_switch_blob_image_items_get_embedded_file_id(): + part_id = "dbd27119-cd21-4a62-b5e2-b06d3b81470b" + file_id = "storage-file-uuid-1" + pres = { + "kind": PRESENTATION_KIND, + "outputMode": "blob", + "fileOrder": ["doc"], + "files": { + "doc": { + "outputMode": "blob", + "data": f"Hello\n\n[image:{part_id}]", + } + }, + "_meta": { + "persistedImageArtifacts": [ + {"sourcePartId": part_id, "fileId": file_id, "mimeType": "image/png"}, + ] + }, + } + payload = build_switch_branch_payload( + pres, + {"operator": "contains_content", "value": "image"}, + value_kind="context", + match_index=0, + ) + assert len(payload["items"]) == 1 + slot = payload["items"][0]["value"] + assert slot.get("embeddedImageFileId") == file_id + + +def test_build_switch_branch_payload_filters_images(): + pres = _presentation_with_text_and_image() + case = {"operator": "contains_content", "value": "image"} + payload = build_switch_branch_payload( + pres, + case, + value_kind="context", + match_index=0, + ) + assert payload["filterApplied"] is True + assert payload["contentType"] == "image" + assert len(payload["items"]) == 1 + assert payload["items"][0]["value"]["typeGroup"] == "image" + data = payload["data"] + assert data["kind"] == PRESENTATION_KIND + slots = data["files"]["doc"]["data"] + assert len(slots) == 1 + assert slots[0]["typeGroup"] == "image" + + +def test_build_switch_default_payload_passthrough(): + pres = _presentation_with_text_and_image() + payload = build_switch_default_payload(pres, match_index=2) + assert payload["filterApplied"] is False + assert payload["match"] == 2 + assert payload["data"]["fileOrder"] == pres["fileOrder"] + assert len(payload["items"]) == 2 + + +@pytest.mark.asyncio +async def test_switch_executor_match_and_default_branch(): + executor = FlowExecutor() + pres = _presentation_with_text_and_image() + ext_id = "ext1" + sw_id = "sw1" + node_outputs = { + ext_id: pres, + "_context": { + "graphNodesById": { + ext_id: {"id": ext_id, "type": "context.extractContent"}, + sw_id: {"id": sw_id, "type": "flow.switch"}, + } + }, + } + + def _inp(_nid, _sources, _outputs, _output_index=0): + return pres + + executor._getInputData = _inp # type: ignore[method-assign] + + match_node = { + "id": sw_id, + "type": "flow.switch", + "parameters": { + "value": {"type": "ref", "nodeId": ext_id, "path": []}, + "cases": [{"operator": "contains_content", "value": "image"}], + }, + } + match_out = await executor._switch(match_node, node_outputs, sw_id, {}) + match_payload = unwrapTransit(match_out) + assert match_out["_meta"]["match"] == 0 + assert match_out["_meta"]["matches"] == [0] + assert match_payload["filterApplied"] is True + assert len(match_payload["items"]) == 1 + assert match_payload["branches"]["0"]["contentType"] == "image" + + default_node = { + **match_node, + "parameters": { + **match_node["parameters"], + "cases": [{"operator": "contains_content", "value": "video"}], + }, + } + default_out = await executor._switch(default_node, node_outputs, sw_id, {}) + assert default_out["_meta"]["match"] == 1 + assert default_out["_meta"]["matches"] == [1] + default_payload = unwrapTransit(default_out) + assert default_payload["filterApplied"] is False + assert default_payload["data"]["fileOrder"] == pres["fileOrder"] + + +@pytest.mark.asyncio +async def test_switch_multi_match_text_and_image_branches(): + executor = FlowExecutor() + pres = _presentation_with_text_and_image() + sw_id = "sw1" + node_outputs = { + "ext1": pres, + "_context": { + "graphNodesById": { + "ext1": {"id": "ext1", "type": "context.extractContent"}, + sw_id: {"id": sw_id, "type": "flow.switch"}, + } + }, + } + executor._getInputData = lambda *_a, **_k: pres # type: ignore[method-assign] + + node = { + "id": sw_id, + "type": "flow.switch", + "parameters": { + "value": {"type": "ref", "nodeId": "ext1", "path": []}, + "cases": [ + {"operator": "contains_content", "value": "image"}, + {"operator": "contains_content", "value": "text"}, + ], + }, + } + out = await executor._switch(node, node_outputs, sw_id, {}) + assert out["_meta"]["matches"] == [0, 1] + img = unwrap_transit_for_port(out, 0) + txt = unwrap_transit_for_port(out, 1) + assert img["contentType"] == "image" + assert txt["contentType"] == "text" + assert len(img["items"]) == 1 + assert len(txt["items"]) == 1 + assert img["items"][0]["value"]["typeGroup"] == "image" + assert txt["items"][0]["value"]["typeGroup"] == "text" + + +def test_active_path_allows_all_matching_switch_ports(): + combined = build_switch_combined_output( + _presentation_with_text_and_image(), + [ + {"operator": "contains_content", "value": "image"}, + {"operator": "contains_content", "value": "text"}, + ], + matched_indices=[0, 1], + value_kind="context", + ) + sw_out = wrapTransit(combined, {"match": 0, "matches": [0, 1]}) + node_outputs = {"sw1": sw_out} + conn_map = { + "loop_img": [("sw1", 0, 0)], + "file_txt": [("sw1", 1, 0)], + } + assert _is_node_on_active_path("loop_img", conn_map, node_outputs) + assert _is_node_on_active_path("file_txt", conn_map, node_outputs) + assert not _is_node_on_active_path("other", {"other": [("sw1", 2, 0)]}, node_outputs) + + +@pytest.mark.asyncio +async def test_loop_uses_switch_items_ref(): + executor = FlowExecutor() + pres = _presentation_with_text_and_image() + branch = build_switch_branch_payload( + pres, + {"operator": "contains_content", "value": "image"}, + value_kind="context", + match_index=0, + ) + sw_id = "sw1" + node_outputs = {sw_id: wrapTransit(branch, {"match": 0})} + + loop_node = { + "id": "loop1", + "type": "flow.loop", + "parameters": { + "items": {"type": "ref", "nodeId": sw_id, "path": ["items"]}, + }, + } + out = await executor._loop(loop_node, node_outputs, "loop1", {}) + assert out["count"] == 1 + assert out["items"][0]["value"]["typeGroup"] == "image" + + +def test_resolve_context_builder_ref_uses_switch_output_port(): + """file.create context ref to switch.items must use the wired source output port.""" + pres = _presentation_with_text_and_image() + combined = build_switch_combined_output( + pres, + [ + {"operator": "contains_content", "value": "image"}, + {"operator": "contains_content", "value": "text"}, + ], + matched_indices=[0, 1], + value_kind="context", + ) + sw_id = "sw1" + consumer_id = "fc1" + node_outputs = {sw_id: wrapTransit(combined, {"match": 0, "matches": [0, 1]})} + input_sources = {consumer_id: {0: (sw_id, 1)}} + resolved = resolveParameterReferences( + { + "context": [ + { + "type": "ref", + "nodeId": sw_id, + "path": ["items"], + } + ], + }, + node_outputs, + consumer_node_id=consumer_id, + input_sources=input_sources, + ) + items = resolved["context"] + assert isinstance(items, list) + assert len(items) == 1 + assert items[0]["value"]["typeGroup"] == "text" + branch = build_switch_branch_payload( + _presentation_with_text_and_image(), + {"operator": "contains_content", "value": "image"}, + value_kind="context", + match_index=0, + ) + node_outputs = {"sw1": wrapTransit(branch, {"match": 0})} + resolved = resolveParameterReferences( + {"type": "ref", "nodeId": "sw1", "path": ["items"]}, + node_outputs, + ) + assert isinstance(resolved, list) + assert len(resolved) == 1 + assert resolved[0]["value"]["typeGroup"] == "image" diff --git a/tests/unit/workflows/test_automation2_graphUtils.py b/tests/unit/workflows/test_automation2_graphUtils.py index 5ea7126a..f76b9545 100644 --- a/tests/unit/workflows/test_automation2_graphUtils.py +++ b/tests/unit/workflows/test_automation2_graphUtils.py @@ -5,7 +5,64 @@ Unit tests for automation2 graphUtils - resolveParameterReferences (ref/value fo import pytest -from modules.workflows.automation2.graphUtils import resolveParameterReferences +from modules.workflows.automation2.graphUtils import resolveParameterReferences, validateGraph + + +_KNOWN_TYPES = frozenset({"trigger.manual", "trigger.form", "ai.prompt", "flow.pass"}) + + +class TestValidateGraphStartNode: + """Non-empty graphs must include at least one start (trigger.*) node.""" + + def test_empty_graph_ok_without_start(self): + assert validateGraph({"nodes": [], "connections": []}, _KNOWN_TYPES) == [] + + def test_non_empty_graph_without_start_fails(self): + graph = { + "nodes": [{"id": "a", "type": "ai.prompt", "parameters": {}}], + "connections": [], + } + errs = validateGraph(graph, _KNOWN_TYPES) + assert any("no start node" in e.lower() for e in errs) + + def test_non_empty_graph_with_start_ok(self): + graph = { + "nodes": [ + {"id": "t", "type": "trigger.manual", "parameters": {}}, + {"id": "a", "type": "ai.prompt", "parameters": {}}, + ], + "connections": [], + } + errs = validateGraph(graph, _KNOWN_TYPES) + assert not any("no start node" in e.lower() for e in errs) + + + def test_switch_second_output_to_ai_prompt_ok(self): + from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES + + node_type_ids = {n["id"] for n in STATIC_NODE_TYPES} + graph = { + "nodes": [ + {"id": "t", "type": "trigger.manual", "parameters": {}}, + { + "id": "sw", + "type": "flow.switch", + "parameters": { + "cases": [ + {"operator": "contains_content", "value": "image"}, + {"operator": "contains_content", "value": "text"}, + ], + }, + }, + {"id": "ai", "type": "ai.prompt", "parameters": {"aiPrompt": "hi"}}, + ], + "connections": [ + {"source": "sw", "target": "ai", "sourceOutput": 1, "targetInput": 0}, + ], + } + errs = validateGraph(graph, node_type_ids) + port_errs = [e for e in errs if "Port mismatch" in e] + assert port_errs == [], port_errs class TestResolveParameterReferences: @@ -175,3 +232,37 @@ class TestPathContainsWildcard: def test_literal_star_in_int_segment_does_not_match(self): from modules.workflows.automation2.graphUtils import _pathContainsWildcard assert _pathContainsWildcard([1, 2, 3]) is False + + +class TestLoopBodyAndDoneReachability: + """flow.loop: body only from output 0; done branch from output 1 (engine helpers).""" + + def test_body_only_output_0_not_done_chain(self): + from modules.workflows.automation2.graphUtils import buildConnectionMap, getLoopBodyNodeIds, getLoopDoneNodeIds + + conns = [ + {"source": "tr", "target": "loop", "targetInput": 0}, + {"source": "loop", "target": "a", "sourceOutput": 0, "targetInput": 0}, + {"source": "loop", "target": "d", "sourceOutput": 1, "targetInput": 0}, + {"source": "a", "target": "b"}, + ] + cm = buildConnectionMap(conns) + assert getLoopBodyNodeIds("loop", cm) == {"a", "b"} + assert getLoopDoneNodeIds("loop", cm) == {"d"} + + def test_primary_input_prefers_outside_body(self): + from modules.workflows.automation2.graphUtils import ( + buildConnectionMap, + getLoopBodyNodeIds, + getLoopPrimaryInputSource, + ) + + conns = [ + {"source": "tr", "target": "loop", "targetInput": 0}, + {"source": "a", "target": "loop", "targetInput": 0}, + {"source": "loop", "target": "a", "sourceOutput": 0, "targetInput": 0}, + ] + cm = buildConnectionMap(conns) + body = getLoopBodyNodeIds("loop", cm) + assert body == {"a"} + assert getLoopPrimaryInputSource("loop", cm, body) == ("tr", 0) diff --git a/tests/unit/workflows/test_trigger_executor.py b/tests/unit/workflows/test_trigger_executor.py new file mode 100644 index 00000000..446d92da --- /dev/null +++ b/tests/unit/workflows/test_trigger_executor.py @@ -0,0 +1,31 @@ +# Copyright (c) 2025 Patrick Motsch +"""TriggerExecutor: form start output must match FormPayload (payload.* refs).""" + +import pytest + +from modules.workflows.automation2.executors.triggerExecutor import TriggerExecutor +from modules.workflows.automation2.runEnvelope import default_run_envelope + + +@pytest.mark.asyncio +async def test_trigger_form_returns_payload_only(): + ex = TriggerExecutor() + node = { + "id": "f1", + "type": "trigger.form", + "parameters": {"formFields": [{"name": "q", "type": "str", "label": "Q"}]}, + } + env = default_run_envelope("form", entry_point_id="f1", payload={"q": "hello"}) + out = await ex.execute(node, {"runEnvelope": env, "userId": "u1"}) + assert out == {"payload": {"q": "hello"}, "_success": True} + + +@pytest.mark.asyncio +async def test_trigger_manual_still_returns_full_envelope(): + ex = TriggerExecutor() + node = {"id": "m1", "type": "trigger.manual", "parameters": {}} + env = default_run_envelope("manual", payload={"x": 1}) + out = await ex.execute(node, {"runEnvelope": env, "userId": "u1"}) + assert isinstance(out, dict) + assert out.get("trigger", {}).get("type") == "manual" + assert out.get("payload") == {"x": 1}