From 56d6ecf978799f6333d427a4ac52e18e5b20c7a7 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Wed, 17 Dec 2025 10:45:09 +0100
Subject: [PATCH] refactored actions handlers
---
modules/MODULE_DEPENDENCIES.md | 292 ----------
.../datamodels/datamodelWorkflowActions.py | 88 ++++
modules/interfaces/interfaceBootstrap.py | 105 ++++
modules/interfaces/interfaceDbAppObjects.py | 114 +++-
modules/routes/routeRbac.py | 112 +++-
modules/routes/routeWorkflows.py | 244 +++++++++
.../methods/{methodAi.py => methodAi.py.old} | 0
.../workflows/methods/methodAi/__init__.py | 7 +
.../methods/methodAi/actions/__init__.py | 26 +
.../methods/methodAi/actions/convert.py | 157 ++++++
.../methodAi/actions/convertDocument.py | 52 ++
.../methods/methodAi/actions/extractData.py | 59 +++
.../methodAi/actions/generateDocument.py | 53 ++
.../methods/methodAi/actions/process.py | 219 ++++++++
.../methodAi/actions/summarizeDocument.py | 55 ++
.../methodAi/actions/translateDocument.py | 60 +++
.../methods/methodAi/actions/webResearch.py | 117 +++++
.../methods/methodAi/helpers/__init__.py | 5 +
.../methods/methodAi/helpers/csvProcessing.py | 59 +++
.../workflows/methods/methodAi/methodAi.py | 383 ++++++++++++++
modules/workflows/methods/methodBase.py | 218 +++++++-
...{methodContext.py => methodContext.py.old} | 0
.../methods/methodContext/__init__.py | 7 +
.../methods/methodContext/actions/__init__.py | 16 +
.../methodContext/actions/extractContent.py | 156 ++++++
.../methodContext/actions/getDocumentIndex.py | 94 ++++
.../actions/triggerPreprocessingServer.py | 121 +++++
.../methods/methodContext/helpers/__init__.py | 5 +
.../methodContext/helpers/documentIndex.py | 89 ++++
.../methodContext/helpers/formatting.py | 75 +++
.../methods/methodContext/methodContext.py | 108 ++++
.../{methodJira.py => methodJira.py.old} | 0
.../workflows/methods/methodJira/__init__.py | 7 +
.../methods/methodJira/actions/__init__.py | 26 +
.../methods/methodJira/actions/connectJira.py | 139 +++++
.../methodJira/actions/createCsvContent.py | 157 ++++++
.../methodJira/actions/createExcelContent.py | 157 ++++++
.../methodJira/actions/exportTicketsAsJson.py | 84 +++
.../actions/importTicketsFromJson.py | 101 ++++
.../methodJira/actions/mergeTicketData.py | 157 ++++++
.../methodJira/actions/parseCsvContent.py | 112 ++++
.../methodJira/actions/parseExcelContent.py | 121 +++++
.../methods/methodJira/helpers/__init__.py | 5 +
.../methodJira/helpers/adfConverter.py | 180 +++++++
.../methodJira/helpers/documentParsing.py | 81 +++
.../methods/methodJira/methodJira.py | 322 ++++++++++++
...{methodOutlook.py => methodOutlook.py.old} | 0
.../methods/methodOutlook/__init__.py | 7 +
.../methods/methodOutlook/actions/__init__.py | 18 +
.../composeAndDraftEmailWithContext.py | 362 +++++++++++++
.../methodOutlook/actions/readEmails.py | 245 +++++++++
.../methodOutlook/actions/searchEmails.py | 257 +++++++++
.../methodOutlook/actions/sendDraftEmail.py | 312 +++++++++++
.../methods/methodOutlook/helpers/__init__.py | 5 +
.../methodOutlook/helpers/connection.py | 95 ++++
.../methodOutlook/helpers/emailProcessing.py | 184 +++++++
.../methodOutlook/helpers/folderManagement.py | 110 ++++
.../methods/methodOutlook/methodOutlook.py | 237 +++++++++
...dSharepoint.py => methodSharepoint.py.old} | 0
.../methods/methodSharepoint/__init__.py | 7 +
.../methodSharepoint/actions/__init__.py | 28 +
.../actions/analyzeFolderUsage.py | 337 ++++++++++++
.../methodSharepoint/actions/copyFile.py | 163 ++++++
.../actions/downloadFileByPath.py | 117 +++++
.../actions/findDocumentPath.py | 497 ++++++++++++++++++
.../methodSharepoint/actions/findSiteByUrl.py | 88 ++++
.../methodSharepoint/actions/listDocuments.py | 345 ++++++++++++
.../methodSharepoint/actions/readDocuments.py | 290 ++++++++++
.../actions/uploadDocument.py | 278 ++++++++++
.../methodSharepoint/actions/uploadFile.py | 145 +++++
.../methodSharepoint/helpers/__init__.py | 5 +
.../methodSharepoint/helpers/apiClient.py | 102 ++++
.../methodSharepoint/helpers/connection.py | 67 +++
.../helpers/documentParsing.py | 252 +++++++++
.../helpers/pathProcessing.py | 338 ++++++++++++
.../methodSharepoint/helpers/siteDiscovery.py | 173 ++++++
.../methodSharepoint/methodSharepoint.py | 387 ++++++++++++++
.../processing/shared/methodDiscovery.py | 12 +-
78 files changed, 9858 insertions(+), 350 deletions(-)
delete mode 100644 modules/MODULE_DEPENDENCIES.md
create mode 100644 modules/datamodels/datamodelWorkflowActions.py
rename modules/workflows/methods/{methodAi.py => methodAi.py.old} (100%)
create mode 100644 modules/workflows/methods/methodAi/__init__.py
create mode 100644 modules/workflows/methods/methodAi/actions/__init__.py
create mode 100644 modules/workflows/methods/methodAi/actions/convert.py
create mode 100644 modules/workflows/methods/methodAi/actions/convertDocument.py
create mode 100644 modules/workflows/methods/methodAi/actions/extractData.py
create mode 100644 modules/workflows/methods/methodAi/actions/generateDocument.py
create mode 100644 modules/workflows/methods/methodAi/actions/process.py
create mode 100644 modules/workflows/methods/methodAi/actions/summarizeDocument.py
create mode 100644 modules/workflows/methods/methodAi/actions/translateDocument.py
create mode 100644 modules/workflows/methods/methodAi/actions/webResearch.py
create mode 100644 modules/workflows/methods/methodAi/helpers/__init__.py
create mode 100644 modules/workflows/methods/methodAi/helpers/csvProcessing.py
create mode 100644 modules/workflows/methods/methodAi/methodAi.py
rename modules/workflows/methods/{methodContext.py => methodContext.py.old} (100%)
create mode 100644 modules/workflows/methods/methodContext/__init__.py
create mode 100644 modules/workflows/methods/methodContext/actions/__init__.py
create mode 100644 modules/workflows/methods/methodContext/actions/extractContent.py
create mode 100644 modules/workflows/methods/methodContext/actions/getDocumentIndex.py
create mode 100644 modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py
create mode 100644 modules/workflows/methods/methodContext/helpers/__init__.py
create mode 100644 modules/workflows/methods/methodContext/helpers/documentIndex.py
create mode 100644 modules/workflows/methods/methodContext/helpers/formatting.py
create mode 100644 modules/workflows/methods/methodContext/methodContext.py
rename modules/workflows/methods/{methodJira.py => methodJira.py.old} (100%)
create mode 100644 modules/workflows/methods/methodJira/__init__.py
create mode 100644 modules/workflows/methods/methodJira/actions/__init__.py
create mode 100644 modules/workflows/methods/methodJira/actions/connectJira.py
create mode 100644 modules/workflows/methods/methodJira/actions/createCsvContent.py
create mode 100644 modules/workflows/methods/methodJira/actions/createExcelContent.py
create mode 100644 modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py
create mode 100644 modules/workflows/methods/methodJira/actions/importTicketsFromJson.py
create mode 100644 modules/workflows/methods/methodJira/actions/mergeTicketData.py
create mode 100644 modules/workflows/methods/methodJira/actions/parseCsvContent.py
create mode 100644 modules/workflows/methods/methodJira/actions/parseExcelContent.py
create mode 100644 modules/workflows/methods/methodJira/helpers/__init__.py
create mode 100644 modules/workflows/methods/methodJira/helpers/adfConverter.py
create mode 100644 modules/workflows/methods/methodJira/helpers/documentParsing.py
create mode 100644 modules/workflows/methods/methodJira/methodJira.py
rename modules/workflows/methods/{methodOutlook.py => methodOutlook.py.old} (100%)
create mode 100644 modules/workflows/methods/methodOutlook/__init__.py
create mode 100644 modules/workflows/methods/methodOutlook/actions/__init__.py
create mode 100644 modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py
create mode 100644 modules/workflows/methods/methodOutlook/actions/readEmails.py
create mode 100644 modules/workflows/methods/methodOutlook/actions/searchEmails.py
create mode 100644 modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py
create mode 100644 modules/workflows/methods/methodOutlook/helpers/__init__.py
create mode 100644 modules/workflows/methods/methodOutlook/helpers/connection.py
create mode 100644 modules/workflows/methods/methodOutlook/helpers/emailProcessing.py
create mode 100644 modules/workflows/methods/methodOutlook/helpers/folderManagement.py
create mode 100644 modules/workflows/methods/methodOutlook/methodOutlook.py
rename modules/workflows/methods/{methodSharepoint.py => methodSharepoint.py.old} (100%)
create mode 100644 modules/workflows/methods/methodSharepoint/__init__.py
create mode 100644 modules/workflows/methods/methodSharepoint/actions/__init__.py
create mode 100644 modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py
create mode 100644 modules/workflows/methods/methodSharepoint/actions/copyFile.py
create mode 100644 modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py
create mode 100644 modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py
create mode 100644 modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py
create mode 100644 modules/workflows/methods/methodSharepoint/actions/listDocuments.py
create mode 100644 modules/workflows/methods/methodSharepoint/actions/readDocuments.py
create mode 100644 modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
create mode 100644 modules/workflows/methods/methodSharepoint/actions/uploadFile.py
create mode 100644 modules/workflows/methods/methodSharepoint/helpers/__init__.py
create mode 100644 modules/workflows/methods/methodSharepoint/helpers/apiClient.py
create mode 100644 modules/workflows/methods/methodSharepoint/helpers/connection.py
create mode 100644 modules/workflows/methods/methodSharepoint/helpers/documentParsing.py
create mode 100644 modules/workflows/methods/methodSharepoint/helpers/pathProcessing.py
create mode 100644 modules/workflows/methods/methodSharepoint/helpers/siteDiscovery.py
create mode 100644 modules/workflows/methods/methodSharepoint/methodSharepoint.py
diff --git a/modules/MODULE_DEPENDENCIES.md b/modules/MODULE_DEPENDENCIES.md
deleted file mode 100644
index 09415105..00000000
--- a/modules/MODULE_DEPENDENCIES.md
+++ /dev/null
@@ -1,292 +0,0 @@
-# Module Dependencies Analysis
-
-This document provides a comprehensive analysis of import dependencies between modules in the `modules` directory.
-
-## Overview
-
-The codebase is organized into the following top-level modules:
-- **aicore** - AI core functionality and model management
-- **auth** - High-level authentication and token management
-- **connectors** - External service connectors
-- **datamodels** - Data models and schemas
-- **features** - Feature modules (workflow, dynamicOptions, etc.)
-- **interfaces** - Database and service interfaces
-- **routes** - API route handlers
-- **security** - Low-level core security (RBAC and root access)
-- **services** - Business logic services
-- **shared** - Shared utilities and helpers
-- **workflows** - Workflow processing and management
-
-## Bidirectional Dependency Matrix
-
-This table shows all module pairs with dependencies, displaying imports in both directions.
-
-| Module X | Module Y | X → Y | Y → X | Total |
-|----------|----------|-------|-------|-------|
-| aicore | connectors | 1 | 0 | 1 |
-| aicore | datamodels | 13 | 0 | 13 |
-| aicore | interfaces | 0 | 2 | 2 |
-| aicore | security | 2 | 0 | 2 |
-| aicore | services | 0 | 2 | 2 |
-| aicore | shared | 5 | 0 | 5 |
-| auth | datamodels | 5 | 0 | 5 |
-| auth | interfaces | 4 | 0 | 4 |
-| auth | routes | 0 | 32 | 32 |
-| auth | security | 4 | 0 | 4 |
-| auth | services | 0 | 1 | 1 |
-| auth | shared | 8 | 0 | 8 |
-| connectors | datamodels | 4 | 0 | 4 |
-| connectors | interfaces | 0 | 10 | 10 |
-| connectors | shared | 5 | 0 | 5 |
-| datamodels | features | 0 | 6 | 6 |
-| datamodels | interfaces | 0 | 27 | 27 |
-| datamodels | routes | 0 | 48 | 48 |
-| datamodels | security | 0 | 5 | 5 |
-| datamodels | services | 0 | 52 | 52 |
-| datamodels | shared | 19 | 0 | 19 |
-| datamodels | workflows | 0 | 72 | 72 |
-| features | interfaces | 0 | 0 | 0 |
-| features | routes | 0 | 6 | 6 |
-| features | services | 4 | 0 | 4 |
-| features | shared | 3 | 0 | 3 |
-| features | workflows | 1 | 0 | 1 |
-| interfaces | routes | 0 | 29 | 29 |
-| interfaces | security | 9 | 0 | 9 |
-| interfaces | services | 0 | 8 | 8 |
-| interfaces | shared | 11 | 0 | 11 |
-| routes | interfaces | 29 | 0 | 29 |
-| routes | services | 5 | 0 | 5 |
-| routes | shared | 21 | 0 | 21 |
-| security | connectors | 2 | 0 | 2 |
-| security | datamodels | 5 | 0 | 5 |
-| services | shared | 16 | 0 | 16 |
-| services | workflows | 0 | 1 | 1 |
-| shared | workflows | 0 | 9 | 9 |
-
-**Legend:**
-- **X → Y**: Number of imports from Module X to Module Y
-- **Y → X**: Number of imports from Module Y to Module X
-- **Total**: Sum of imports in both directions
-
-## Bidirectional Dependencies Only (Circular Dependencies)
-
-This table shows only module pairs where imports exist in **both directions**, indicating potential circular dependencies that should be monitored.
-
-| Module X | Module Y | X → Y | Y → X | Total |
-|----------|----------|-------|-------|-------|
-
-**Total bidirectional dependencies: 0**
-
-**Note:** All circular dependencies have been eliminated. The architecture now has clean one-way dependencies.
-
-**Key Improvements:**
-1. **Eliminated `connectors ↔ security` circular dependency**: After moving RBAC logic from `connectorDbPostgre.py` to `interfaces/interfaceRbac.py`, connectors no longer import from security. Security still imports from connectors (for `rootAccess` to create `DatabaseConnector` instances), but this is a one-way dependency (security → connectors: 2, connectors → security: 0).
-2. **Eliminated `shared ↔ security` circular dependency**: Moved `rbacHelpers.py` from `shared` to `security` module since it was only used in `aicore` and `aicore` already imports from `security`. This eliminates the architectural violation where `shared` imported from `security`.
-3. **Eliminated `datamodels ↔ shared` circular dependency**: `shared` no longer has any static imports from `datamodels`. The only reference is a dynamic import in `attributeUtils.py` using `importlib.import_module()` for runtime model discovery, which is not detected by static analysis. This is acceptable as it's a runtime-only dependency.
-4. **New `interfaces/interfaceRbac.py` module**: Created to handle RBAC filtering for interfaces, importing from both `security` and `connectors`. This maintains proper architectural layering where connectors remain generic.
-5. **Updated dependency counts**:
- - `interfaces` → `connectors`: increased from 9 to 10 (interfaceRbac imports connectorDbPostgre)
- - `interfaces` → `security`: increased from 7 to 9 (interfaceRbac imports rbac and rootAccess)
- - `features` → `interfaces`: increased from 1 to 2 (mainWorkflow imports interfaceRbac)
- - `routes` → `interfaces`: increased from 28 to 29 (routeWorkflows imports interfaceRbac)
- - `aicore` → `security`: increased from 1 to 2 (now imports rbacHelpers from security)
- - `security` → `datamodels`: increased from 3 to 5 (rbacHelpers adds datamodel imports)
-
-## Dependency Graph (Mermaid)
-
-```mermaid
-graph TD
- aicore[aicore]
- auth[auth]
- connectors[connectors]
- datamodels[datamodels]
- features[features]
- interfaces[interfaces]
- routes[routes]
- security[security]
- services[services]
- shared[shared]
- workflows[workflows]
-
- aicore -->|13| datamodels
- aicore -->|1| connectors
- aicore -->|2| security
- aicore -->|5| shared
-
- auth -->|5| datamodels
- auth -->|4| interfaces
- auth -->|4| security
- auth -->|8| shared
-
- connectors -->|4| datamodels
- connectors -->|5| shared
-
- datamodels -->|19| shared
-
- features -->|6| datamodels
- features -->|0| interfaces
- features -->|4| services
- features -->|3| shared
- features -->|1| workflows
-
- interfaces -->|29| datamodels
- interfaces -->|10| connectors
- interfaces -->|2| aicore
- interfaces -->|9| security
- interfaces -->|11| shared
-
- routes -->|48| datamodels
- routes -->|29| interfaces
- routes -->|32| auth
- routes -->|21| shared
- routes -->|6| features
- routes -->|5| services
-
- security -->|5| datamodels
- security -->|2| connectors
- security -->|1| shared
-
- services -->|52| datamodels
- services -->|8| interfaces
- services -->|2| aicore
- services -->|1| auth
- services -->|16| shared
-
-
- workflows -->|72| datamodels
- workflows -->|1| services
- workflows -->|9| shared
-```
-
-## Detailed Module Dependencies
-
-### aicore
-**Imports from:**
-- `connectors` (1 import)
-- `datamodels` (13 imports)
-- `security` (2 imports: rbac, rbacHelpers)
-- `shared` (4 imports)
-
-**Dependencies:** Low-level AI functionality, depends on data models and connectors.
-
-### auth
-**Imports from:**
-- `datamodels` (5 imports)
-- `interfaces` (4 imports)
-- `security` (4 imports)
-- `shared` (8 imports)
-
-**Dependencies:** High-level authentication and token management, used by routes and services.
-
-### connectors
-**Imports from:**
-- `datamodels` (4 imports)
-- `shared` (5 imports)
-
-**Dependencies:** External service connectors, minimal dependencies. No longer imports from security or interfaces. Connectors are now fully generic and do not depend on security modules.
-
-### datamodels
-**Imports from:**
-- `shared` (19 imports)
-
-**Dependencies:** Core data models, only depends on shared utilities.
-
-### features
-**Imports from:**
-- `datamodels` (6 imports)
-- `services` (4 imports)
-- `shared` (3 imports)
-- `workflows` (1 import)
-
-**Dependencies:** Feature modules that orchestrate workflows and services. Features now use services exclusively, not interfaces directly, maintaining proper architectural layering.
-
-### interfaces
-**Imports from:**
-- `aicore` (2 imports)
-- `connectors` (10 imports)
-- `datamodels` (29 imports)
-- `security` (9 imports)
-- `shared` (11 imports)
-
-**Dependencies:** Database and service interfaces, heavily depends on data models. Includes `interfaceRbac.py` which handles RBAC filtering for all interfaces. No longer creates circular dependency with connectors.
-
-### routes
-**Imports from:**
-- `auth` (32 imports)
-- `datamodels` (48 imports)
-- `features` (6 imports)
-- `interfaces` (29 imports)
-- `services` (5 imports)
-- `shared` (21 imports)
-
-**Dependencies:** API endpoints, highest dependency count, orchestrates all layers. Now imports from `auth` instead of `security` for authentication. Increased use of services (from 2 to 5 imports) after architectural refactoring to use services instead of direct interface access in features.
-
-### security
-**Imports from:**
-- `connectors` (2 imports)
-- `datamodels` (5 imports: rbac uses 3, rbacHelpers uses 2)
-- `shared` (1 import: rootAccess uses configuration)
-
-**Dependencies:** Low-level core security (RBAC, root access, and RBAC helper functions). Used by interfaces (including `interfaceRbac.py`), auth, and aicore. The `rbacHelpers` module was moved from `shared` to `security` to eliminate the architectural violation where `shared` imported from `security`. Security imports from connectors only for `rootAccess` to create `DatabaseConnector` instances - this is acceptable as it's a one-way dependency (security → connectors).
-
-### services
-**Imports from:**
-- `aicore` (2 imports)
-- `auth` (1 import)
-- `datamodels` (52 imports)
-- `interfaces` (8 imports)
-- `shared` (16 imports)
-
-**Dependencies:** Business logic services, heavily depends on data models.
-
-### shared
-**Imports from:**
-- None (0 imports)
-
-**Dependencies:** Shared utilities, completely self-contained with no dependencies on other modules. No longer imports from security (rbacHelpers was moved to security module) or datamodels (only uses dynamic imports at runtime for model discovery in `attributeUtils.py`), maintaining proper architectural layering.
-
-### workflows
-**Imports from:**
-- `datamodels` (72 imports)
-- `services` (1 import)
-- `shared` (9 imports)
-
-**Dependencies:** Workflow processing, heavily depends on data models (highest count). Reduced from 74 to 72 imports after removing unused imports from `contentValidator.py`.
-
-## Key Observations
-
-1. **datamodels** is the most imported module (used by 9 out of 11 modules)
-2. **shared** is widely used but has minimal dependencies (good design)
-3. **routes** has the most diverse dependencies (imports from 6 different modules)
-4. **workflows** has the highest number of imports from datamodels (72)
-5. **auth** is now a separate module, used exclusively by routes and services
-6. **security** is now a low-level module, used by interfaces (including `interfaceRbac.py`)
-7. **connectors** are now fully generic - no dependencies on security or interfaces
-8. **Circular dependencies eliminated**: Reduced from 3 to 0 after RBAC refactoring and `rbacHelpers` move (eliminated `connectors ↔ security`, `shared ↔ security`, and `datamodels ↔ shared`)
-9. **New `interfaceRbac.py` module** centralizes RBAC filtering logic for all interfaces
-10. **`shared` module is now completely self-contained** - no static imports from any other module
-11. **Features architectural improvements**: Features no longer import directly from interfaces (reduced from 2 to 0). All features now use services exclusively, maintaining proper layering: Features → Services → Interfaces → Connectors
-12. **Routes increased services usage**: Routes now import from services 5 times (up from 2) after refactoring features to use services instead of direct interface access
-
-## Dependency Layers
-
-Based on the analysis, the architecture follows these layers:
-
-1. **Foundation Layer**: `shared`, `datamodels`
-2. **Core Layer**: `aicore`, `connectors`, `security`
-3. **Interface Layer**: `interfaces`
-4. **Authentication Layer**: `auth`
-5. **Business Logic Layer**: `services`, `workflows`
-6. **Feature Layer**: `features`
-7. **API Layer**: `routes`
-
-## Recommendations
-
-1. **datamodels** should remain stable as it's a core dependency
-2. **shared** is excellently designed - completely self-contained with zero dependencies (perfect foundation layer)
-3. **security** split and RBAC refactoring were successful - eliminated all circular dependencies (`connectors ↔ security`, `shared ↔ security`)
-4. **connectors** are now fully generic and maintainable - keep them free of security/interface dependencies
-5. **interfaceRbac.py** successfully centralizes RBAC logic - consider this pattern for other cross-cutting concerns
-6. Consider breaking down **workflows** if it continues to grow
-7. **routes** could benefit from further abstraction to reduce direct dependencies
-8. **Architecture is now clean** - no circular dependencies remain, maintaining clear separation of concerns
diff --git a/modules/datamodels/datamodelWorkflowActions.py b/modules/datamodels/datamodelWorkflowActions.py
new file mode 100644
index 00000000..1857883b
--- /dev/null
+++ b/modules/datamodels/datamodelWorkflowActions.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""Workflow Action models: WorkflowActionParameter, WorkflowActionDefinition."""
+
+from typing import Optional, Any, Union, List, Dict, Callable, Awaitable
+from pydantic import BaseModel, Field
+from modules.datamodels.datamodelChat import ActionResult
+from modules.shared.frontendTypes import FrontendType
+from modules.shared.attributeUtils import registerModelLabels
+
+
+class WorkflowActionParameter(BaseModel):
+ """
+ Parameter schema definition for a workflow action.
+
+ This defines the structure and UI rendering for a single action parameter,
+ NOT the actual parameter values (those are in ActionDefinition.parameters).
+ """
+ name: str = Field(description="Parameter name")
+ type: str = Field(description="Python type as string: 'str', 'int', 'bool', 'List[str]', etc.")
+ frontendType: FrontendType = Field(description="UI rendering type (from global FrontendType enum)")
+ frontendOptions: Optional[Union[str, List[Dict[str, Any]]]] = Field(
+ None,
+ description="Options for select/multiselect/custom types. String reference (e.g., 'user.connection') or static list. For custom types, this is automatically set to the API endpoint."
+ )
+ required: bool = Field(False, description="Whether parameter is required")
+ default: Optional[Any] = Field(None, description="Default value")
+ description: str = Field("", description="Parameter description")
+ validation: Optional[Dict[str, Any]] = Field(
+ None,
+ description="Validation rules (e.g., {'min': 1, 'max': 100})"
+ )
+
+
+class WorkflowActionDefinition(BaseModel):
+ """
+ Complete schema definition of a workflow action.
+
+ This defines the metadata, parameters, and execution function for an action.
+ This is different from datamodelWorkflow.ActionDefinition which contains
+ actual execution values (action, actionObjective, parameters with values).
+
+ This class defines the ACTION SCHEMA, not the execution plan.
+ """
+ actionId: str = Field(
+ description="Unique action identifier for RBAC (format: 'module.actionName', e.g., 'outlook.readEmails')"
+ )
+ description: str = Field(description="Action description")
+ parameters: Dict[str, WorkflowActionParameter] = Field(
+ default_factory=dict,
+ description="Parameter schema definitions"
+ )
+ execute: Optional[Callable] = Field(
+ None,
+ description="Execution function - async function that takes parameters dict and returns ActionResult. Set dynamically."
+ )
+ category: Optional[str] = Field(None, description="Action category for grouping")
+ tags: List[str] = Field(default_factory=list, description="Tags for search/filtering")
+
+
+# Register model labels for UI
+registerModelLabels(
+ "WorkflowActionDefinition",
+ {"en": "Workflow Action Definition", "fr": "Définition d'action de workflow"},
+ {
+ "actionId": {"en": "Action ID", "fr": "ID d'action"},
+ "description": {"en": "Description", "fr": "Description"},
+ "parameters": {"en": "Parameters", "fr": "Paramètres"},
+ "category": {"en": "Category", "fr": "Catégorie"},
+ "tags": {"en": "Tags", "fr": "Étiquettes"},
+ },
+)
+
+registerModelLabels(
+ "WorkflowActionParameter",
+ {"en": "Workflow Action Parameter", "fr": "Paramètre d'action de workflow"},
+ {
+ "name": {"en": "Name", "fr": "Nom"},
+ "type": {"en": "Type", "fr": "Type"},
+ "frontendType": {"en": "Frontend Type", "fr": "Type frontend"},
+ "frontendOptions": {"en": "Frontend Options", "fr": "Options frontend"},
+ "required": {"en": "Required", "fr": "Requis"},
+ "default": {"en": "Default", "fr": "Par défaut"},
+ "description": {"en": "Description", "fr": "Description"},
+ "validation": {"en": "Validation", "fr": "Validation"},
+ },
+)
+
diff --git a/modules/interfaces/interfaceBootstrap.py b/modules/interfaces/interfaceBootstrap.py
index e784c192..71bdd1e7 100644
--- a/modules/interfaces/interfaceBootstrap.py
+++ b/modules/interfaces/interfaceBootstrap.py
@@ -233,6 +233,9 @@ def initRbacRules(db: DatabaseConnector) -> None:
# Create RESOURCE context rules
createResourceContextRules(db)
+ # Create Action-specific RBAC rules
+ createActionRules(db)
+
logger.info("RBAC rules initialization completed")
@@ -785,6 +788,108 @@ def createResourceContextRules(db: DatabaseConnector) -> None:
logger.info(f"Created {len(resourceRules)} RESOURCE context rules")
+def createActionRules(db: DatabaseConnector) -> None:
+ """
+ Create default RBAC rules for workflow actions.
+
+ This function dynamically discovers all available actions from all methods
+ and creates RBAC rules for them. Actions are protected via RESOURCE context
+ with actionId as the item identifier (format: 'module.actionName').
+
+ Args:
+ db: Database connector instance
+ """
+ try:
+ # Import method discovery to get all actions
+ from modules.workflows.processing.shared.methodDiscovery import discoverMethods
+ from modules.services import getInterface as getServices
+ from modules.datamodels.datamodelUam import User
+
+ # Create a temporary user context for discovery (will be filtered by RBAC later)
+ # We need to discover methods, but we'll use a minimal user context
+ # In production, this should use a system user or admin user
+ try:
+ # Try to get an admin user for discovery
+ adminUsers = db.getRecordset("User", recordFilter={"roleLabel": "sysadmin"}, limit=1)
+ if adminUsers:
+ tempUser = User(**adminUsers[0])
+ else:
+ # Fallback: create minimal user context
+ tempUser = User(id="system", roleLabel="sysadmin")
+ except:
+ # Fallback: create minimal user context
+ tempUser = User(id="system", roleLabel="sysadmin")
+
+ # Get services and discover methods
+ services = getServices(tempUser, None)
+ discoverMethods(services)
+
+ # Import methods catalog
+ from modules.workflows.processing.shared.methodDiscovery import methods
+
+ # Collect all action IDs
+ allActionIds = []
+ for methodName, methodInfo in methods.items():
+ # Skip duplicate entries (same method stored with full and short name)
+ if methodName.startswith('Method'):
+ continue
+
+ methodInstance = methodInfo['instance']
+ methodActions = methodInstance.actions
+
+ for actionName in methodActions.keys():
+ actionId = f"{methodInstance.name}.{actionName}"
+ allActionIds.append(actionId)
+
+ logger.info(f"Discovered {len(allActionIds)} actions for RBAC rule creation")
+
+ # Define default action access by role
+ # SysAdmin and Admin: Access to all actions
+ # User: Access to common actions (read, search, process, etc.)
+ # Viewer: Read-only actions
+
+ actionRules = []
+
+ # All roles: Generic access to all actions
+ # Using item=None grants access to all resources (all actions) in RESOURCE context
+
+ # SysAdmin: Access to all actions
+ actionRules.append(AccessRule(
+ roleLabel="sysadmin",
+ context=AccessRuleContext.RESOURCE,
+ item=None, # All resources (covers all actions)
+ view=True
+ ))
+
+ # Admin: Access to all actions
+ actionRules.append(AccessRule(
+ roleLabel="admin",
+ context=AccessRuleContext.RESOURCE,
+ item=None, # All resources (covers all actions)
+ view=True
+ ))
+
+ # User: Access to all actions (generic rights)
+ actionRules.append(AccessRule(
+ roleLabel="user",
+ context=AccessRuleContext.RESOURCE,
+ item=None, # All resources (covers all actions)
+ view=True
+ ))
+
+
+ # Create all action rules
+ for rule in actionRules:
+ db.recordCreate(AccessRule, rule)
+
+ logger.info(f"Created {len(actionRules)} action RBAC rules")
+
+ except Exception as e:
+ logger.error(f"Error creating action RBAC rules: {str(e)}", exc_info=True)
+ # Don't fail bootstrap if action rules can't be created
+ # They can be created manually or via migration script
+
+
def _addMissingTableRules(db: DatabaseConnector, existingRules: List[Dict[str, Any]]) -> None:
"""
Add missing RBAC rules for tables that were added after initial bootstrap.
diff --git a/modules/interfaces/interfaceDbAppObjects.py b/modules/interfaces/interfaceDbAppObjects.py
index 9a8ff308..f8397477 100644
--- a/modules/interfaces/interfaceDbAppObjects.py
+++ b/modules/interfaces/interfaceDbAppObjects.py
@@ -1574,18 +1574,21 @@ class AppObjects:
self,
roleLabel: Optional[str] = None,
context: Optional[AccessRuleContext] = None,
- item: Optional[str] = None
- ) -> List[AccessRule]:
+ item: Optional[str] = None,
+ pagination: Optional[PaginationParams] = None
+ ) -> Union[List[AccessRule], PaginatedResult]:
"""
- Get access rules with optional filters.
+ Get access rules with optional filters and pagination.
Args:
roleLabel: Optional role label filter
context: Optional context filter
item: Optional item filter
+ pagination: Optional pagination parameters. If None, returns all items.
Returns:
- List of AccessRule objects
+ If pagination is None: List[AccessRule]
+ If pagination is provided: PaginatedResult with items and metadata
"""
try:
recordFilter = {}
@@ -1596,11 +1599,55 @@ class AppObjects:
if item:
recordFilter["item"] = item
- rules = self.db.getRecordset(AccessRule, recordFilter=recordFilter if recordFilter else None)
- return [AccessRule(**rule) for rule in rules]
+ # Use RBAC filtering
+ rules = getRecordsetWithRBAC(
+ self.db,
+ AccessRule,
+ self.currentUser,
+ recordFilter=recordFilter if recordFilter else None
+ )
+
+ # Filter out database-specific fields
+ filteredRules = []
+ for rule in rules:
+ cleanedRule = {k: v for k, v in rule.items() if not k.startswith("_")}
+ filteredRules.append(cleanedRule)
+
+ # If no pagination requested, return all items
+ if pagination is None:
+ return [AccessRule(**rule) for rule in filteredRules]
+
+ # Apply filtering (if filters provided)
+ if pagination.filters:
+ filteredRules = self._applyFilters(filteredRules, pagination.filters)
+
+ # Apply sorting (in order of sortFields)
+ if pagination.sort:
+ filteredRules = self._applySorting(filteredRules, pagination.sort)
+
+ # Count total items after filters
+ totalItems = len(filteredRules)
+ totalPages = math.ceil(totalItems / pagination.pageSize) if totalItems > 0 else 0
+
+ # Apply pagination (skip/limit)
+ startIdx = (pagination.page - 1) * pagination.pageSize
+ endIdx = startIdx + pagination.pageSize
+ pagedRules = filteredRules[startIdx:endIdx]
+
+ # Convert to model objects
+ items = [AccessRule(**rule) for rule in pagedRules]
+
+ return PaginatedResult(
+ items=items,
+ totalItems=totalItems,
+ totalPages=totalPages
+ )
except Exception as e:
logger.error(f"Error getting access rules: {str(e)}")
- return []
+ if pagination is None:
+ return []
+ else:
+ return PaginatedResult(items=[], totalItems=0, totalPages=0)
def getAccessRulesForRoles(
self,
@@ -1701,19 +1748,62 @@ class AppObjects:
logger.error(f"Error getting role by label {roleLabel}: {str(e)}")
return None
- def getAllRoles(self) -> List[Role]:
+ def getAllRoles(self, pagination: Optional[PaginationParams] = None) -> Union[List[Role], PaginatedResult]:
"""
- Get all roles.
+ Get all roles with optional pagination, sorting, and filtering.
+
+ Args:
+ pagination: Optional pagination parameters. If None, returns all items.
Returns:
- List of Role objects
+ If pagination is None: List[Role]
+ If pagination is provided: PaginatedResult with items and metadata
"""
try:
+ # Get all roles from database
roles = self.db.getRecordset(Role)
- return [Role(**role) for role in roles]
+
+ # Filter out database-specific fields
+ filteredRoles = []
+ for role in roles:
+ cleanedRole = {k: v for k, v in role.items() if not k.startswith("_")}
+ filteredRoles.append(cleanedRole)
+
+ # If no pagination requested, return all items
+ if pagination is None:
+ return [Role(**role) for role in filteredRoles]
+
+ # Apply filtering (if filters provided)
+ if pagination.filters:
+ filteredRoles = self._applyFilters(filteredRoles, pagination.filters)
+
+ # Apply sorting (in order of sortFields)
+ if pagination.sort:
+ filteredRoles = self._applySorting(filteredRoles, pagination.sort)
+
+ # Count total items after filters
+ totalItems = len(filteredRoles)
+ totalPages = math.ceil(totalItems / pagination.pageSize) if totalItems > 0 else 0
+
+ # Apply pagination (skip/limit)
+ startIdx = (pagination.page - 1) * pagination.pageSize
+ endIdx = startIdx + pagination.pageSize
+ pagedRoles = filteredRoles[startIdx:endIdx]
+
+ # Convert to model objects
+ items = [Role(**role) for role in pagedRoles]
+
+ return PaginatedResult(
+ items=items,
+ totalItems=totalItems,
+ totalPages=totalPages
+ )
except Exception as e:
logger.error(f"Error getting all roles: {str(e)}")
- return []
+ if pagination is None:
+ return []
+ else:
+ return PaginatedResult(items=[], totalItems=0, totalPages=0)
def updateRole(self, roleId: str, role: Role) -> Role:
"""
diff --git a/modules/routes/routeRbac.py b/modules/routes/routeRbac.py
index 5b54ad45..363a6b81 100644
--- a/modules/routes/routeRbac.py
+++ b/modules/routes/routeRbac.py
@@ -8,10 +8,13 @@ Implements endpoints for role-based access control permissions.
from fastapi import APIRouter, HTTPException, Depends, Query, Body, Path, Request
from typing import Optional, List, Dict, Any
import logging
+import json
+import math
from modules.auth import getCurrentUser, limiter
from modules.datamodels.datamodelUam import User, UserPermissions, AccessLevel
from modules.datamodels.datamodelRbac import AccessRuleContext, AccessRule, Role
+from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata
from modules.interfaces.interfaceDbAppObjects import getInterface
# Configure logger
@@ -86,15 +89,16 @@ async def getPermissions(
)
-@router.get("/rules", response_model=list)
+@router.get("/rules", response_model=PaginatedResponse)
@limiter.limit("30/minute")
async def getAccessRules(
request: Request,
roleLabel: Optional[str] = Query(None, description="Filter by role label"),
context: Optional[str] = Query(None, description="Filter by context (DATA, UI, RESOURCE)"),
item: Optional[str] = Query(None, description="Filter by item identifier"),
+ pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
currentUser: User = Depends(getCurrentUser)
- ) -> list:
+ ) -> PaginatedResponse:
"""
Get access rules with optional filters.
Only returns rules that the current user has permission to view.
@@ -143,15 +147,45 @@ async def getAccessRules(
detail=f"Invalid context '{context}'. Must be one of: DATA, UI, RESOURCE"
)
- # Get rules
- rules = interface.getAccessRules(
+ # Parse pagination parameter
+ paginationParams = None
+ if pagination:
+ try:
+ paginationDict = json.loads(pagination)
+ paginationParams = PaginationParams(**paginationDict) if paginationDict else None
+ except (json.JSONDecodeError, ValueError) as e:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Invalid pagination parameter: {str(e)}"
+ )
+
+ # Get rules with optional pagination
+ result = interface.getAccessRules(
roleLabel=roleLabel,
context=accessContext,
- item=item
+ item=item,
+ pagination=paginationParams
)
- # Convert to dict for JSON serialization
- return [rule.model_dump() for rule in rules]
+ # If pagination was requested, result is PaginatedResult
+ # If no pagination, result is List[AccessRule]
+ if paginationParams:
+ return PaginatedResponse(
+ items=[rule.model_dump() for rule in result.items],
+ pagination=PaginationMetadata(
+ currentPage=paginationParams.page,
+ pageSize=paginationParams.pageSize,
+ totalItems=result.totalItems,
+ totalPages=result.totalPages,
+ sort=paginationParams.sort,
+ filters=paginationParams.filters
+ )
+ )
+ else:
+ return PaginatedResponse(
+ items=[rule.model_dump() for rule in result],
+ pagination=None
+ )
except HTTPException:
raise
@@ -489,12 +523,13 @@ def _ensureAdminAccess(currentUser: User) -> None:
)
-@router.get("/roles", response_model=List[Dict[str, Any]])
+@router.get("/roles", response_model=PaginatedResponse)
@limiter.limit("60/minute")
async def listRoles(
request: Request,
+ pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
currentUser: User = Depends(getCurrentUser)
-) -> List[Dict[str, Any]]:
+) -> PaginatedResponse:
"""
Get list of all available roles with metadata.
@@ -506,14 +541,27 @@ async def listRoles(
interface = getInterface(currentUser)
- # Get all roles from database
- dbRoles = interface.getAllRoles()
+ # Parse pagination parameter
+ paginationParams = None
+ if pagination:
+ try:
+ paginationDict = json.loads(pagination)
+ paginationParams = PaginationParams(**paginationDict) if paginationDict else None
+ except (json.JSONDecodeError, ValueError) as e:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Invalid pagination parameter: {str(e)}"
+ )
+
+ # Get all roles from database (without pagination) to enrich with user counts and add custom roles
+ # Note: We get all roles first because we need to add custom roles before pagination
+ dbRoles = interface.getAllRoles(pagination=None)
# Get all users to count role assignments
# Since _ensureAdminAccess ensures user is sysadmin or admin,
# and getUsersByMandate returns all users for sysadmin regardless of mandateId,
# we can pass the current user's mandateId (for sysadmin it will be ignored by RBAC)
- allUsers = interface.getUsersByMandate(currentUser.mandateId or "")
+ allUsers = interface.getUsersByMandate(currentUser.mandateId or "", pagination=None)
# Count users per role
roleCounts: Dict[str, int] = {}
@@ -544,7 +592,45 @@ async def listRoles(
"isSystemRole": False
})
- return result
+ # Apply filtering and sorting if pagination requested
+ if paginationParams:
+ # Apply filtering (if filters provided)
+ if paginationParams.filters:
+ # Use the interface's filter method
+ filteredResult = interface._applyFilters(result, paginationParams.filters)
+ else:
+ filteredResult = result
+
+ # Apply sorting (in order of sortFields)
+ if paginationParams.sort:
+ sortedResult = interface._applySorting(filteredResult, paginationParams.sort)
+ else:
+ sortedResult = filteredResult
+
+ # Apply pagination
+ totalItems = len(sortedResult)
+ totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
+ startIdx = (paginationParams.page - 1) * paginationParams.pageSize
+ endIdx = startIdx + paginationParams.pageSize
+ paginatedResult = sortedResult[startIdx:endIdx]
+
+ return PaginatedResponse(
+ items=paginatedResult,
+ pagination=PaginationMetadata(
+ currentPage=paginationParams.page,
+ pageSize=paginationParams.pageSize,
+ totalItems=totalItems,
+ totalPages=totalPages,
+ sort=paginationParams.sort,
+ filters=paginationParams.filters
+ )
+ )
+ else:
+ # No pagination - return all roles
+ return PaginatedResponse(
+ items=result,
+ pagination=None
+ )
except HTTPException:
raise
diff --git a/modules/routes/routeWorkflows.py b/modules/routes/routeWorkflows.py
index 8be49268..352fcfd3 100644
--- a/modules/routes/routeWorkflows.py
+++ b/modules/routes/routeWorkflows.py
@@ -572,3 +572,247 @@ async def delete_file_from_message(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error deleting file reference: {str(e)}"
)
+
+
+# Action Discovery Endpoints
+
+@router.get("/actions", response_model=Dict[str, Any])
+@limiter.limit("120/minute")
+async def get_all_actions(
+ request: Request,
+ currentUser: User = Depends(getCurrentUser)
+) -> Dict[str, Any]:
+ """
+ Get all available workflow actions for the current user (filtered by RBAC).
+
+ Returns:
+ - Dictionary with actions grouped by module, filtered by RBAC permissions
+
+ Example response:
+ {
+ "actions": [
+ {
+ "module": "outlook",
+ "actionId": "outlook.readEmails",
+ "name": "readEmails",
+ "description": "Read emails and metadata from a mailbox folder",
+ "parameters": {...}
+ },
+ ...
+ ]
+ }
+ """
+ try:
+ from modules.services import getInterface as getServices
+ from modules.workflows.processing.shared.methodDiscovery import discoverMethods
+
+ # Get services and discover methods
+ services = getServices(currentUser, None)
+ discoverMethods(services)
+
+ # Import methods catalog
+ from modules.workflows.processing.shared.methodDiscovery import methods
+
+ # Collect all actions from all methods
+ allActions = []
+ for methodName, methodInfo in methods.items():
+ # Skip duplicate entries (same method stored with full and short name)
+ if methodName.startswith('Method'):
+ continue
+
+ methodInstance = methodInfo['instance']
+ methodActions = methodInstance.actions
+
+ for actionName, actionInfo in methodActions.items():
+ # Build action response
+ actionResponse = {
+ "module": methodInstance.name,
+ "actionId": f"{methodInstance.name}.{actionName}",
+ "name": actionName,
+ "description": actionInfo.get('description', ''),
+ "parameters": actionInfo.get('parameters', {})
+ }
+ allActions.append(actionResponse)
+
+ return {
+ "actions": allActions
+ }
+
+ except Exception as e:
+ logger.error(f"Error getting all actions: {str(e)}", exc_info=True)
+ raise HTTPException(
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+ detail=f"Failed to get actions: {str(e)}"
+ )
+
+
+@router.get("/actions/{method}", response_model=Dict[str, Any])
+@limiter.limit("120/minute")
+async def get_method_actions(
+ request: Request,
+ method: str = Path(..., description="Method name (e.g., 'outlook', 'sharepoint')"),
+ currentUser: User = Depends(getCurrentUser)
+) -> Dict[str, Any]:
+ """
+ Get all available actions for a specific method (filtered by RBAC).
+
+ Path Parameters:
+ - method: Method name (e.g., 'outlook', 'sharepoint', 'ai')
+
+ Returns:
+ - Dictionary with actions for the specified method
+
+ Example response:
+ {
+ "module": "outlook",
+ "actions": [
+ {
+ "actionId": "outlook.readEmails",
+ "name": "readEmails",
+ "description": "Read emails and metadata from a mailbox folder",
+ "parameters": {...}
+ },
+ ...
+ ]
+ }
+ """
+ try:
+ from modules.services import getInterface as getServices
+ from modules.workflows.processing.shared.methodDiscovery import discoverMethods
+
+ # Get services and discover methods
+ services = getServices(currentUser, None)
+ discoverMethods(services)
+
+ # Import methods catalog
+ from modules.workflows.processing.shared.methodDiscovery import methods
+
+ # Find method instance
+ methodInstance = None
+ for methodName, methodInfo in methods.items():
+ if methodInfo['instance'].name == method:
+ methodInstance = methodInfo['instance']
+ break
+
+ if not methodInstance:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail=f"Method '{method}' not found"
+ )
+
+ # Collect actions for this method
+ actions = []
+ methodActions = methodInstance.actions
+
+ for actionName, actionInfo in methodActions.items():
+ actionResponse = {
+ "actionId": f"{methodInstance.name}.{actionName}",
+ "name": actionName,
+ "description": actionInfo.get('description', ''),
+ "parameters": actionInfo.get('parameters', {})
+ }
+ actions.append(actionResponse)
+
+ return {
+ "module": methodInstance.name,
+ "description": methodInstance.description,
+ "actions": actions
+ }
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error getting actions for method {method}: {str(e)}", exc_info=True)
+ raise HTTPException(
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+ detail=f"Failed to get actions for method {method}: {str(e)}"
+ )
+
+
+@router.get("/actions/{method}/{action}", response_model=Dict[str, Any])
+@limiter.limit("120/minute")
+async def get_action_schema(
+ request: Request,
+ method: str = Path(..., description="Method name (e.g., 'outlook', 'sharepoint')"),
+ action: str = Path(..., description="Action name (e.g., 'readEmails', 'uploadDocument')"),
+ currentUser: User = Depends(getCurrentUser)
+) -> Dict[str, Any]:
+ """
+ Get action schema with parameter definitions for a specific action.
+
+ Path Parameters:
+ - method: Method name (e.g., 'outlook', 'sharepoint', 'ai')
+ - action: Action name (e.g., 'readEmails', 'uploadDocument')
+
+ Returns:
+ - Action schema with full parameter definitions
+
+ Example response:
+ {
+ "method": "outlook",
+ "action": "readEmails",
+ "actionId": "outlook.readEmails",
+ "description": "Read emails and metadata from a mailbox folder",
+ "parameters": {
+ "connectionReference": {
+ "name": "connectionReference",
+ "type": "str",
+ "frontendType": "userConnection",
+ "frontendOptions": "user.connection",
+ "required": true,
+ "description": "Microsoft connection label"
+ },
+ ...
+ }
+ }
+ """
+ try:
+ from modules.services import getInterface as getServices
+ from modules.workflows.processing.shared.methodDiscovery import discoverMethods
+
+ # Get services and discover methods
+ services = getServices(currentUser, None)
+ discoverMethods(services)
+
+ # Import methods catalog
+ from modules.workflows.processing.shared.methodDiscovery import methods
+
+ # Find method instance
+ methodInstance = None
+ for methodName, methodInfo in methods.items():
+ if methodInfo['instance'].name == method:
+ methodInstance = methodInfo['instance']
+ break
+
+ if not methodInstance:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail=f"Method '{method}' not found"
+ )
+
+ # Get action
+ methodActions = methodInstance.actions
+ if action not in methodActions:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail=f"Action '{action}' not found in method '{method}'"
+ )
+
+ actionInfo = methodActions[action]
+
+ return {
+ "method": methodInstance.name,
+ "action": action,
+ "actionId": f"{methodInstance.name}.{action}",
+ "description": actionInfo.get('description', ''),
+ "parameters": actionInfo.get('parameters', {})
+ }
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error getting action schema for {method}.{action}: {str(e)}", exc_info=True)
+ raise HTTPException(
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+ detail=f"Failed to get action schema: {str(e)}"
+ )
\ No newline at end of file
diff --git a/modules/workflows/methods/methodAi.py b/modules/workflows/methods/methodAi.py.old
similarity index 100%
rename from modules/workflows/methods/methodAi.py
rename to modules/workflows/methods/methodAi.py.old
diff --git a/modules/workflows/methods/methodAi/__init__.py b/modules/workflows/methods/methodAi/__init__.py
new file mode 100644
index 00000000..7ce40281
--- /dev/null
+++ b/modules/workflows/methods/methodAi/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+from .methodAi import MethodAi
+
+__all__ = ['MethodAi']
+
diff --git a/modules/workflows/methods/methodAi/actions/__init__.py b/modules/workflows/methods/methodAi/actions/__init__.py
new file mode 100644
index 00000000..f0ba9d4d
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/__init__.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""Action modules for AI operations."""
+
+# Export all actions
+from .process import process
+from .webResearch import webResearch
+from .summarizeDocument import summarizeDocument
+from .translateDocument import translateDocument
+from .convert import convert
+from .convertDocument import convertDocument
+from .extractData import extractData
+from .generateDocument import generateDocument
+
+__all__ = [
+ 'process',
+ 'webResearch',
+ 'summarizeDocument',
+ 'translateDocument',
+ 'convert',
+ 'convertDocument',
+ 'extractData',
+ 'generateDocument',
+]
+
diff --git a/modules/workflows/methods/methodAi/actions/convert.py b/modules/workflows/methods/methodAi/actions/convert.py
new file mode 100644
index 00000000..1c34fa9b
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/convert.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Convert action for AI operations.
+Converts documents/data between different formats with specific formatting options.
+"""
+
+import logging
+import json
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+from modules.datamodels.datamodelDocref import DocumentReferenceList
+
+logger = logging.getLogger(__name__)
+
+@action
+async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters).
+ - Input requirements: documentList (required); inputFormat and outputFormat (required).
+ - Output format: Document in target format with specified formatting options.
+ - CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed).
+
+ Parameters:
+ - documentList (list, required): Document reference(s) to convert.
+ - inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
+ - outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
+ - columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
+ - delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
+ - includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
+ - language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
+ """
+ documentList = parameters.get("documentList", [])
+ if not documentList:
+ return ActionResult.isFailure(error="documentList is required")
+
+ inputFormat = parameters.get("inputFormat")
+ outputFormat = parameters.get("outputFormat")
+ if not inputFormat or not outputFormat:
+ return ActionResult.isFailure(error="inputFormat and outputFormat are required")
+
+ # Normalize formats (remove leading dot if present)
+ normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
+ normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
+
+ # Get documents
+ if isinstance(documentList, DocumentReferenceList):
+ docRefList = documentList
+ elif isinstance(documentList, list):
+ docRefList = DocumentReferenceList.from_string_list(documentList)
+ else:
+ docRefList = DocumentReferenceList.from_string_list([documentList])
+
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
+ if not chatDocuments:
+ return ActionResult.isFailure(error="No documents found in documentList")
+
+ # Check if input is standardized JSON format - if so, use direct rendering
+ if normalizedInputFormat == "json" and len(chatDocuments) == 1:
+ try:
+ doc = chatDocuments[0]
+ # ChatDocument doesn't have documentData - need to load file content using fileId
+ docBytes = self.services.chat.getFileData(doc.fileId)
+ if not docBytes:
+ raise ValueError(f"No file data found for fileId={doc.fileId}")
+
+ # Decode bytes to string
+ docData = docBytes.decode('utf-8')
+
+ # Try to parse as JSON
+ if isinstance(docData, str):
+ jsonData = json.loads(docData)
+ elif isinstance(docData, dict):
+ jsonData = docData
+ else:
+ jsonData = None
+
+ # Check if it's standardized JSON format (has "documents" or "sections")
+ if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
+ # Use direct rendering - no AI call needed!
+ from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+ generationService = GenerationService(self.services)
+
+ # Ensure format is "documents" array
+ if "documents" not in jsonData:
+ jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
+
+ # Get title
+ title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document")
+
+ # Render with options
+ renderOptions = {}
+ if normalizedOutputFormat == "csv":
+ renderOptions["delimiter"] = parameters.get("delimiter", ",")
+ renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
+ renderOptions["includeHeader"] = parameters.get("includeHeader", True)
+
+ rendered_content, mime_type = await generationService.renderReport(
+ jsonData, normalizedOutputFormat, title, None, None
+ )
+
+ # Apply CSV options if needed (renderer will handle them)
+ if normalizedOutputFormat == "csv" and renderOptions:
+ rendered_content = self.csvProcessing.applyCsvOptions(rendered_content, renderOptions)
+
+ validationMetadata = {
+ "actionType": "ai.convert",
+ "inputFormat": normalizedInputFormat,
+ "outputFormat": normalizedOutputFormat,
+ "hasSourceJson": True,
+ "conversionType": "direct_rendering"
+ }
+ actionDoc = ActionDocument(
+ documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
+ documentData=rendered_content,
+ mimeType=mime_type,
+ sourceJson=jsonData, # Preserve source JSON for structure validation
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[actionDoc])
+
+ except Exception as e:
+ logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}")
+ # Fall through to AI-based conversion
+
+ # Fallback: Use AI for conversion (for non-JSON inputs or complex conversions)
+ columnsPerRow = parameters.get("columnsPerRow")
+ delimiter = parameters.get("delimiter", ",")
+ includeHeader = parameters.get("includeHeader", True)
+ language = parameters.get("language", "en")
+
+ aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
+
+ if normalizedOutputFormat == "csv":
+ aiPrompt += f" Use '{delimiter}' as the delimiter character."
+ if columnsPerRow:
+ aiPrompt += f" Format the output with {columnsPerRow} columns per row."
+ if not includeHeader:
+ aiPrompt += " Do not include a header row."
+ else:
+ aiPrompt += " Include a header row with column names."
+
+ if language and language != "en":
+ aiPrompt += f" Use language: {language}."
+
+ aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure."
+
+ return await self.process({
+ "aiPrompt": aiPrompt,
+ "documentList": documentList,
+ "resultType": normalizedOutputFormat
+ })
+
diff --git a/modules/workflows/methods/methodAi/actions/convertDocument.py b/modules/workflows/methods/methodAi/actions/convertDocument.py
new file mode 100644
index 00000000..e86b1d5a
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/convertDocument.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Convert Document action for AI operations.
+Converts documents between different formats (PDF→Word, Excel→CSV, etc.).
+"""
+
+import logging
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult
+
+logger = logging.getLogger(__name__)
+
+@action
+async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.).
+ - Input requirements: documentList (required); targetFormat (required).
+ - Output format: Document in target format.
+
+ Parameters:
+ - documentList (list, required): Document reference(s) to convert.
+ - targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
+ - preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
+ """
+ documentList = parameters.get("documentList", [])
+ if not documentList:
+ return ActionResult.isFailure(error="documentList is required")
+
+ targetFormat = parameters.get("targetFormat")
+ if not targetFormat:
+ return ActionResult.isFailure(error="targetFormat is required")
+
+ preserveStructure = parameters.get("preserveStructure", True)
+
+ # Normalize format (remove leading dot if present)
+ normalizedFormat = targetFormat.strip().lstrip('.').lower()
+
+ aiPrompt = f"Convert the provided document(s) to {normalizedFormat.upper()} format."
+ if preserveStructure:
+ aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout."
+ aiPrompt += " Ensure the converted document maintains the same content and information as the original."
+
+ return await self.process({
+ "aiPrompt": aiPrompt,
+ "documentList": documentList,
+ "resultType": normalizedFormat
+ })
+
diff --git a/modules/workflows/methods/methodAi/actions/extractData.py b/modules/workflows/methods/methodAi/actions/extractData.py
new file mode 100644
index 00000000..723914bd
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/extractData.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Extract Data action for AI operations.
+Extracts structured data from documents (key-value pairs, entities, facts, etc.).
+"""
+
+import logging
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult
+
+logger = logging.getLogger(__name__)
+
+@action
+async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
+ - Input requirements: documentList (required); optional dataStructure, fields.
+ - Output format: JSON by default, or specified resultType.
+
+ Parameters:
+ - documentList (list, required): Document reference(s) to extract data from.
+ - dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
+ - fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
+ - resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
+ """
+ documentList = parameters.get("documentList", [])
+ if not documentList:
+ return ActionResult.isFailure(error="documentList is required")
+
+ dataStructure = parameters.get("dataStructure", "nested")
+ fields = parameters.get("fields", [])
+ resultType = parameters.get("resultType", "json")
+
+ aiPrompt = "Extract structured data from the provided document(s)."
+ if fields:
+ fieldsStr = ", ".join(fields)
+ aiPrompt += f" Extract the following specific fields: {fieldsStr}."
+ else:
+ aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
+
+ structureInstructions = {
+ "flat": "Use a flat key-value structure with simple properties.",
+ "nested": "Use a nested JSON structure with logical grouping of related data.",
+ "list": "Structure the data as a list/array of objects, one per entity or record."
+ }
+ aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
+
+ aiPrompt += " Ensure all extracted data is accurate and complete."
+
+ return await self.process({
+ "aiPrompt": aiPrompt,
+ "documentList": documentList,
+ "resultType": resultType
+ })
+
diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py
new file mode 100644
index 00000000..5badc321
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/generateDocument.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Generate Document action for AI operations.
+Generates documents from scratch or based on templates/inputs.
+"""
+
+import logging
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult
+
+logger = logging.getLogger(__name__)
+
+@action
+async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Generate documents from scratch or based on templates/inputs.
+ - Input requirements: prompt or description (required); optional documentList (for templates/references).
+ - Output format: Document in specified format (default: docx).
+
+ Parameters:
+ - prompt (str, required): Description of the document to generate.
+ - documentList (list, optional): Template documents or reference documents to use as a guide.
+ - documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
+ - resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx.
+ """
+ prompt = parameters.get("prompt")
+ if not prompt:
+ return ActionResult.isFailure(error="prompt is required")
+
+ documentList = parameters.get("documentList", [])
+ documentType = parameters.get("documentType")
+ resultType = parameters.get("resultType", "docx")
+
+ aiPrompt = f"Generate a document based on the following requirements: {prompt}"
+ if documentType:
+ aiPrompt += f" Document type: {documentType}."
+ if documentList:
+ aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style."
+ aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization."
+
+ processParams = {
+ "aiPrompt": aiPrompt,
+ "resultType": resultType
+ }
+ if documentList:
+ processParams["documentList"] = documentList
+
+ return await self.process(processParams)
+
diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py
new file mode 100644
index 00000000..2468d949
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/process.py
@@ -0,0 +1,219 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Process action for AI operations.
+Universal AI document processing action.
+"""
+
+import logging
+import time
+from typing import Dict, Any, List, Optional
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+from modules.datamodels.datamodelAi import AiCallOptions
+from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart
+
+logger = logging.getLogger(__name__)
+
+@action
+async def process(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
+ - Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
+ - Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
+ - Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
+
+ Parameters:
+ - aiPrompt (str, required): Instruction for the AI describing what processing to perform.
+ - documentList (list, optional): Document reference(s) in any format to use as input/context.
+ - resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
+ """
+ try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"ai_process_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Generate",
+ "AI Processing",
+ f"Format: {parameters.get('resultType', 'txt')}",
+ parentOperationId=parentOperationId
+ )
+
+ aiPrompt = parameters.get("aiPrompt")
+ logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
+
+ # Update progress - preparing parameters
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
+
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+
+ documentListParam = parameters.get("documentList")
+ # Convert to DocumentReferenceList if needed
+ if documentListParam is None:
+ documentList = DocumentReferenceList(references=[])
+ elif isinstance(documentListParam, DocumentReferenceList):
+ documentList = documentListParam
+ elif isinstance(documentListParam, str):
+ documentList = DocumentReferenceList.from_string_list([documentListParam])
+ elif isinstance(documentListParam, list):
+ documentList = DocumentReferenceList.from_string_list(documentListParam)
+ else:
+ logger.error(f"Invalid documentList type: {type(documentListParam)}")
+ documentList = DocumentReferenceList(references=[])
+
+ resultType = parameters.get("resultType", "txt")
+
+
+ if not aiPrompt:
+ logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
+ return ActionResult.isFailure(
+ error="AI prompt is required"
+ )
+
+ # Determine output extension and default MIME type without duplicating service logic
+ normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
+ output_extension = f".{normalized_result_type}"
+ output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
+ logger.info(f"Using result type: {resultType} -> {output_extension}")
+
+ # Phase 7.3: Extract content first if documents provided, then use contentParts
+ # Check if contentParts are already provided (preferred path)
+ contentParts: Optional[List[ContentPart]] = None
+ if "contentParts" in parameters:
+ contentParts = parameters.get("contentParts")
+ if contentParts and not isinstance(contentParts, list):
+ # Try to extract from ContentExtracted if it's an ActionDocument
+ if hasattr(contentParts, 'parts'):
+ contentParts = contentParts.parts
+ else:
+ logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
+ contentParts = None
+
+ # If contentParts not provided but documentList is, extract content first
+ if not contentParts and documentList.references:
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
+
+ # Get ChatDocuments
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
+ if not chatDocuments:
+ logger.warning("No documents found in documentList")
+ else:
+ logger.info(f"Extracting content from {len(chatDocuments)} documents")
+
+ # Prepare extraction options (use defaults if not provided)
+ extractionOptions = parameters.get("extractionOptions")
+ if not extractionOptions:
+ extractionOptions = ExtractionOptions(
+ prompt="Extract all content from the document",
+ mergeStrategy=MergeStrategy(
+ mergeType="concatenate",
+ groupBy="typeGroup",
+ orderBy="id"
+ ),
+ processDocumentsIndividually=True
+ )
+
+ # Extract content using extraction service with hierarchical progress logging
+ # Pass operationId for per-document progress tracking
+ extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
+
+ # Combine all ContentParts from all extracted results
+ contentParts = []
+ for extracted in extractedResults:
+ if extracted.parts:
+ contentParts.extend(extracted.parts)
+
+ logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
+
+ # Update progress - preparing AI call
+ self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
+
+ # Build options with only resultFormat - let service layer handle all other parameters
+ output_format = output_extension.replace('.', '') or 'txt'
+ options = AiCallOptions(
+ resultFormat=output_format
+ # Removed all model parameters - service layer will analyze prompt and determine optimal parameters
+ )
+
+ # Update progress - calling AI
+ self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
+
+ # Use unified callAiContent method with contentParts (extraction is now separate)
+ aiResponse = await self.services.ai.callAiContent(
+ prompt=aiPrompt,
+ options=options,
+ contentParts=contentParts, # Already extracted (or None if no documents)
+ outputFormat=output_format,
+ parentOperationId=operationId
+ )
+
+ # Update progress - processing result
+ self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
+
+ # Extract documents from AiResponse
+ if aiResponse.documents and len(aiResponse.documents) > 0:
+ action_documents = []
+ for doc in aiResponse.documents:
+ validationMetadata = {
+ "actionType": "ai.process",
+ "resultType": normalized_result_type,
+ "outputFormat": output_format,
+ "hasDocuments": True,
+ "documentCount": len(aiResponse.documents)
+ }
+ action_documents.append(ActionDocument(
+ documentName=doc.documentName,
+ documentData=doc.documentData,
+ mimeType=doc.mimeType or output_mime_type,
+ sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation
+ validationMetadata=validationMetadata
+ ))
+
+ final_documents = action_documents
+ else:
+ # Text response - create document from content
+ extension = output_extension.lstrip('.')
+ meaningful_name = self._generateMeaningfulFileName(
+ base_name="ai",
+ extension=extension,
+ action_name="result"
+ )
+ validationMetadata = {
+ "actionType": "ai.process",
+ "resultType": normalized_result_type,
+ "outputFormat": output_format,
+ "hasDocuments": False,
+ "contentType": "text"
+ }
+ action_document = ActionDocument(
+ documentName=meaningful_name,
+ documentData=aiResponse.content,
+ mimeType=output_mime_type,
+ validationMetadata=validationMetadata
+ )
+ final_documents = [action_document]
+
+ # Complete progress tracking
+ self.services.chat.progressLogFinish(operationId, True)
+
+ return ActionResult.isSuccess(documents=final_documents)
+
+ except Exception as e:
+ logger.error(f"Error in AI processing: {str(e)}")
+
+ # Complete progress tracking with failure
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass # Don't fail on progress logging errors
+
+ return ActionResult.isFailure(
+ error=str(e)
+ )
+
diff --git a/modules/workflows/methods/methodAi/actions/summarizeDocument.py b/modules/workflows/methods/methodAi/actions/summarizeDocument.py
new file mode 100644
index 00000000..80588712
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/summarizeDocument.py
@@ -0,0 +1,55 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Summarize Document action for AI operations.
+Summarizes one or more documents, extracting key points and main ideas.
+"""
+
+import logging
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult
+
+logger = logging.getLogger(__name__)
+
+@action
+async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Summarize one or more documents, extracting key points and main ideas.
+ - Input requirements: documentList (required); optional summaryLength, focus.
+ - Output format: Text document with summary (default: txt, can be overridden with resultType).
+
+ Parameters:
+ - documentList (list, required): Document reference(s) to summarize.
+ - summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
+ - focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
+ - resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
+ """
+ documentList = parameters.get("documentList", [])
+ if not documentList:
+ return ActionResult.isFailure(error="documentList is required")
+
+ summaryLength = parameters.get("summaryLength", "medium")
+ focus = parameters.get("focus")
+ resultType = parameters.get("resultType", "txt")
+
+ lengthInstructions = {
+ "brief": "Create a brief summary (2-3 paragraphs)",
+ "medium": "Create a medium-length summary (comprehensive but concise)",
+ "detailed": "Create a detailed summary covering all major points"
+ }
+ lengthInstruction = lengthInstructions.get(summaryLength.lower(), lengthInstructions["medium"])
+
+ aiPrompt = f"Summarize the provided document(s). {lengthInstruction}."
+ if focus:
+ aiPrompt += f" Focus specifically on: {focus}."
+ aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format."
+
+ return await self.process({
+ "aiPrompt": aiPrompt,
+ "documentList": documentList,
+ "resultType": resultType
+ })
+
diff --git a/modules/workflows/methods/methodAi/actions/translateDocument.py b/modules/workflows/methods/methodAi/actions/translateDocument.py
new file mode 100644
index 00000000..12264e39
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/translateDocument.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Translate Document action for AI operations.
+Translates documents to a target language while preserving formatting and structure.
+"""
+
+import logging
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult
+
+logger = logging.getLogger(__name__)
+
+@action
+async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Translate documents to a target language while preserving formatting and structure.
+ - Input requirements: documentList (required); targetLanguage (required).
+ - Output format: Translated document in same format as input (default) or specified resultType.
+
+ Parameters:
+ - documentList (list, required): Document reference(s) to translate.
+ - targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
+ - sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
+ - preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
+ - resultType (str, optional): Output file extension. If not specified, uses same format as input.
+ """
+ documentList = parameters.get("documentList", [])
+ if not documentList:
+ return ActionResult.isFailure(error="documentList is required")
+
+ targetLanguage = parameters.get("targetLanguage")
+ if not targetLanguage:
+ return ActionResult.isFailure(error="targetLanguage is required")
+
+ sourceLanguage = parameters.get("sourceLanguage")
+ preserveFormatting = parameters.get("preserveFormatting", True)
+ resultType = parameters.get("resultType")
+
+ aiPrompt = f"Translate the provided document(s) to {targetLanguage}."
+ if sourceLanguage:
+ aiPrompt += f" The source language is {sourceLanguage}."
+ if preserveFormatting:
+ aiPrompt += " Preserve all formatting, structure, tables, and layout exactly as they appear in the original document."
+ else:
+ aiPrompt += " Focus on accurate translation of content."
+ aiPrompt += " Maintain the same document structure, headings, and organization."
+
+ processParams = {
+ "aiPrompt": aiPrompt,
+ "documentList": documentList
+ }
+ if resultType:
+ processParams["resultType"] = resultType
+
+ return await self.process(processParams)
+
diff --git a/modules/workflows/methods/methodAi/actions/webResearch.py b/modules/workflows/methods/methodAi/actions/webResearch.py
new file mode 100644
index 00000000..2bd5c3dd
--- /dev/null
+++ b/modules/workflows/methods/methodAi/actions/webResearch.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Web Research action for AI operations.
+Web research with two-step process: search for URLs, then crawl content.
+"""
+
+import logging
+import time
+import re
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Web research with two-step process: search for URLs, then crawl content.
+ - Input requirements: prompt (required); optional list(url), country, language, researchDepth.
+ - Output format: JSON with research results including URLs and content.
+
+ Parameters:
+ - prompt (str, required): Natural language research instruction.
+ - urlList (list, optional): Specific URLs to crawl, if needed.
+ - country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
+ - language (str, optional): Language code (lowercase, e.g., de, en, fr).
+ - researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
+ """
+ try:
+ prompt = parameters.get("prompt")
+ if not prompt:
+ return ActionResult.isFailure(error="Research prompt is required")
+
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"web_research_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Web Research",
+ "Searching and Crawling",
+ "Extracting URLs and Content",
+ parentOperationId=parentOperationId
+ )
+
+ # Call webcrawl service - service handles all AI intention analysis and processing
+ result = await self.services.web.performWebResearch(
+ prompt=prompt,
+ urls=parameters.get("urlList", []),
+ country=parameters.get("country"),
+ language=parameters.get("language"),
+ researchDepth=parameters.get("researchDepth", "general"),
+ operationId=operationId
+ )
+
+ # Complete progress tracking
+ self.services.chat.progressLogFinish(operationId, True)
+
+ # Get meaningful filename from research result (generated by intent analyzer)
+ suggestedFilename = result.get("suggested_filename")
+ if suggestedFilename:
+ # Clean and validate filename
+ cleaned = suggestedFilename.strip().strip('"\'')
+ cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
+ # Ensure it doesn't already have extension
+ if cleaned.lower().endswith('.json'):
+ cleaned = cleaned[:-5]
+ # Validate: should be reasonable length and contain only safe characters
+ if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
+ meaningfulName = f"{cleaned}.json"
+ else:
+ # Fallback to generic meaningful filename
+ meaningfulName = self._generateMeaningfulFileName(
+ base_name="web_research",
+ extension="json",
+ action_name="research"
+ )
+ else:
+ # Fallback to generic meaningful filename
+ meaningfulName = self._generateMeaningfulFileName(
+ base_name="web_research",
+ extension="json",
+ action_name="research"
+ )
+
+ validationMetadata = {
+ "actionType": "ai.webResearch",
+ "prompt": prompt,
+ "urlList": parameters.get("urlList", []),
+ "country": parameters.get("country"),
+ "language": parameters.get("language"),
+ "researchDepth": parameters.get("researchDepth", "general"),
+ "resultFormat": "json"
+ }
+ actionDocument = ActionDocument(
+ documentName=meaningfulName,
+ documentData=result,
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[actionDocument])
+
+ except Exception as e:
+ logger.error(f"Error in web research: {str(e)}")
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass
+ return ActionResult.isFailure(error=str(e))
+
diff --git a/modules/workflows/methods/methodAi/helpers/__init__.py b/modules/workflows/methods/methodAi/helpers/__init__.py
new file mode 100644
index 00000000..4833e0e7
--- /dev/null
+++ b/modules/workflows/methods/methodAi/helpers/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""Helper modules for AI method operations."""
+
diff --git a/modules/workflows/methods/methodAi/helpers/csvProcessing.py b/modules/workflows/methods/methodAi/helpers/csvProcessing.py
new file mode 100644
index 00000000..9121f43c
--- /dev/null
+++ b/modules/workflows/methods/methodAi/helpers/csvProcessing.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+CSV Processing helper for AI operations.
+Handles CSV content processing with options.
+"""
+
+import logging
+from typing import Dict, Any
+
+logger = logging.getLogger(__name__)
+
+class CsvProcessingHelper:
+ """Helper for CSV processing operations"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize CSV processing helper.
+
+ Args:
+ methodInstance: Instance of MethodAi (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ def applyCsvOptions(self, csvContent: str, options: Dict[str, Any]) -> str:
+ """
+ Apply CSV processing options to CSV content.
+
+ Args:
+ csvContent: CSV content as string
+ options: Dictionary with CSV processing options
+
+ Returns:
+ Processed CSV content as string
+ """
+ if not csvContent:
+ return csvContent
+
+ # Apply options if provided
+ if options:
+ # Handle delimiter option
+ if "delimiter" in options:
+ delimiter = options["delimiter"]
+ # Replace delimiter in content (simple approach)
+ # Note: This is a basic implementation, may need enhancement
+ if delimiter != ",":
+ csvContent = csvContent.replace(",", delimiter)
+
+ # Handle quote character option
+ if "quotechar" in options:
+ quotechar = options["quotechar"]
+ # Replace quote character (simple approach)
+ if quotechar != '"':
+ csvContent = csvContent.replace('"', quotechar)
+
+ return csvContent
+
diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py
new file mode 100644
index 00000000..101c8586
--- /dev/null
+++ b/modules/workflows/methods/methodAi/methodAi.py
@@ -0,0 +1,383 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+import logging
+from datetime import datetime, UTC
+from modules.workflows.methods.methodBase import MethodBase
+from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
+from modules.shared.frontendTypes import FrontendType
+
+# Import helpers
+from .helpers.csvProcessing import CsvProcessingHelper
+
+# Import actions
+from .actions.process import process
+from .actions.webResearch import webResearch
+from .actions.summarizeDocument import summarizeDocument
+from .actions.translateDocument import translateDocument
+from .actions.convert import convert
+from .actions.convertDocument import convertDocument
+from .actions.extractData import extractData
+from .actions.generateDocument import generateDocument
+
+logger = logging.getLogger(__name__)
+
+class MethodAi(MethodBase):
+ """AI processing methods."""
+
+ def __init__(self, services):
+ super().__init__(services)
+ self.name = "ai"
+ self.description = "AI processing methods"
+
+ # Initialize helper modules
+ self.csvProcessing = CsvProcessingHelper(self)
+
+ # RBAC-Integration: Action-Definitionen mit actionId
+ self._actions = {
+ "process": WorkflowActionDefinition(
+ actionId="ai.process",
+ description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt",
+ parameters={
+ "aiPrompt": WorkflowActionParameter(
+ name="aiPrompt",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=True,
+ description="Instruction for the AI describing what processing to perform"
+ ),
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=False,
+ description="Document reference(s) in any format to use as input/context"
+ ),
+ "resultType": WorkflowActionParameter(
+ name="resultType",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"],
+ required=False,
+ default="txt",
+ description="Output file extension. All output documents will use this format"
+ )
+ },
+ execute=process.__get__(self, self.__class__)
+ ),
+ "webResearch": WorkflowActionDefinition(
+ actionId="ai.webResearch",
+ description="Web research with two-step process: search for URLs, then crawl content",
+ parameters={
+ "prompt": WorkflowActionParameter(
+ name="prompt",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=True,
+ description="Natural language research instruction"
+ ),
+ "urlList": WorkflowActionParameter(
+ name="urlList",
+ type="List[str]",
+ frontendType=FrontendType.MULTISELECT,
+ required=False,
+ description="Specific URLs to crawl, if needed"
+ ),
+ "country": WorkflowActionParameter(
+ name="country",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=False,
+ description="Two-digit country code (lowercase, e.g., ch, us, de)"
+ ),
+ "language": WorkflowActionParameter(
+ name="language",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["de", "en", "fr", "it", "es"],
+ required=False,
+ description="Language code (lowercase, e.g., de, en, fr)"
+ ),
+ "researchDepth": WorkflowActionParameter(
+ name="researchDepth",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["fast", "general", "deep"],
+ required=False,
+ default="general",
+ description="Research depth"
+ )
+ },
+ execute=webResearch.__get__(self, self.__class__)
+ ),
+ "summarizeDocument": WorkflowActionDefinition(
+ actionId="ai.summarizeDocument",
+ description="Summarize one or more documents, extracting key points and main ideas",
+ parameters={
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference(s) to summarize"
+ ),
+ "summaryLength": WorkflowActionParameter(
+ name="summaryLength",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["brief", "medium", "detailed"],
+ required=False,
+ default="medium",
+ description="Desired summary length"
+ ),
+ "focus": WorkflowActionParameter(
+ name="focus",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=False,
+ description="Specific aspect to focus on in the summary (e.g., financial data, key decisions)"
+ ),
+ "resultType": WorkflowActionParameter(
+ name="resultType",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["txt", "md", "docx"],
+ required=False,
+ default="txt",
+ description="Output file extension"
+ )
+ },
+ execute=summarizeDocument.__get__(self, self.__class__)
+ ),
+ "translateDocument": WorkflowActionDefinition(
+ actionId="ai.translateDocument",
+ description="Translate documents to a target language while preserving formatting and structure",
+ parameters={
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference(s) to translate"
+ ),
+ "targetLanguage": WorkflowActionParameter(
+ name="targetLanguage",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Target language code or name (e.g., de, German, French, es)"
+ ),
+ "sourceLanguage": WorkflowActionParameter(
+ name="sourceLanguage",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=False,
+ description="Source language if known (e.g., en, English). If not provided, AI will detect"
+ ),
+ "preserveFormatting": WorkflowActionParameter(
+ name="preserveFormatting",
+ type="bool",
+ frontendType=FrontendType.CHECKBOX,
+ required=False,
+ default=True,
+ description="Whether to preserve original formatting"
+ ),
+ "resultType": WorkflowActionParameter(
+ name="resultType",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=False,
+ description="Output file extension. If not specified, uses same format as input"
+ )
+ },
+ execute=translateDocument.__get__(self, self.__class__)
+ ),
+ "convert": WorkflowActionDefinition(
+ actionId="ai.convert",
+ description="Convert documents/data between different formats with specific formatting options",
+ parameters={
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference(s) to convert"
+ ),
+ "inputFormat": WorkflowActionParameter(
+ name="inputFormat",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["json", "csv", "xlsx", "txt"],
+ required=True,
+ description="Source format"
+ ),
+ "outputFormat": WorkflowActionParameter(
+ name="outputFormat",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["csv", "json", "xlsx", "txt"],
+ required=True,
+ description="Target format"
+ ),
+ "columnsPerRow": WorkflowActionParameter(
+ name="columnsPerRow",
+ type="int",
+ frontendType=FrontendType.NUMBER,
+ required=False,
+ description="For CSV output, number of columns per row. Default: auto-detect",
+ validation={"min": 1, "max": 100}
+ ),
+ "delimiter": WorkflowActionParameter(
+ name="delimiter",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=False,
+ default=",",
+ description="For CSV output, delimiter character"
+ ),
+ "includeHeader": WorkflowActionParameter(
+ name="includeHeader",
+ type="bool",
+ frontendType=FrontendType.CHECKBOX,
+ required=False,
+ default=True,
+ description="For CSV output, whether to include header row"
+ ),
+ "language": WorkflowActionParameter(
+ name="language",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["de", "en", "fr"],
+ required=False,
+ default="en",
+ description="Language for output"
+ )
+ },
+ execute=convert.__get__(self, self.__class__)
+ ),
+ "convertDocument": WorkflowActionDefinition(
+ actionId="ai.convertDocument",
+ description="Convert documents between different formats (PDF→Word, Excel→CSV, etc.)",
+ parameters={
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference(s) to convert"
+ ),
+ "targetFormat": WorkflowActionParameter(
+ name="targetFormat",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"],
+ required=True,
+ description="Target format extension"
+ ),
+ "preserveStructure": WorkflowActionParameter(
+ name="preserveStructure",
+ type="bool",
+ frontendType=FrontendType.CHECKBOX,
+ required=False,
+ default=True,
+ description="Whether to preserve document structure (headings, tables, etc.)"
+ )
+ },
+ execute=convertDocument.__get__(self, self.__class__)
+ ),
+ "extractData": WorkflowActionDefinition(
+ actionId="ai.extractData",
+ description="Extract structured data from documents (key-value pairs, entities, facts, etc.)",
+ parameters={
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference(s) to extract data from"
+ ),
+ "dataStructure": WorkflowActionParameter(
+ name="dataStructure",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["flat", "nested", "list"],
+ required=False,
+ default="nested",
+ description="Desired data structure"
+ ),
+ "fields": WorkflowActionParameter(
+ name="fields",
+ type="List[str]",
+ frontendType=FrontendType.MULTISELECT,
+ required=False,
+ description="Specific fields/properties to extract (e.g., [name, date, amount])"
+ ),
+ "resultType": WorkflowActionParameter(
+ name="resultType",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["json", "csv", "xlsx"],
+ required=False,
+ default="json",
+ description="Output format"
+ )
+ },
+ execute=extractData.__get__(self, self.__class__)
+ ),
+ "generateDocument": WorkflowActionDefinition(
+ actionId="ai.generateDocument",
+ description="Generate documents from scratch or based on templates/inputs",
+ parameters={
+ "prompt": WorkflowActionParameter(
+ name="prompt",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=True,
+ description="Description of the document to generate"
+ ),
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=False,
+ description="Template documents or reference documents to use as a guide"
+ ),
+ "documentType": WorkflowActionParameter(
+ name="documentType",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["letter", "memo", "proposal", "contract", "report", "email"],
+ required=False,
+ description="Type of document"
+ ),
+ "resultType": WorkflowActionParameter(
+ name="resultType",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["docx", "pdf", "txt", "md"],
+ required=False,
+ default="docx",
+ description="Output format"
+ )
+ },
+ execute=generateDocument.__get__(self, self.__class__)
+ )
+ }
+
+ # Validate actions after definition
+ self._validateActions()
+
+ # Register actions as methods (optional, für direkten Zugriff)
+ self.process = process.__get__(self, self.__class__)
+ self.webResearch = webResearch.__get__(self, self.__class__)
+ self.summarizeDocument = summarizeDocument.__get__(self, self.__class__)
+ self.translateDocument = translateDocument.__get__(self, self.__class__)
+ self.convert = convert.__get__(self, self.__class__)
+ self.convertDocument = convertDocument.__get__(self, self.__class__)
+ self.extractData = extractData.__get__(self, self.__class__)
+ self.generateDocument = generateDocument.__get__(self, self.__class__)
+
+ def _format_timestamp_for_filename(self) -> str:
+ """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
+ return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+
diff --git a/modules/workflows/methods/methodBase.py b/modules/workflows/methods/methodBase.py
index a29d63f5..72f35c19 100644
--- a/modules/workflows/methods/methodBase.py
+++ b/modules/workflows/methods/methodBase.py
@@ -7,6 +7,9 @@ import logging
from functools import wraps
import inspect
+from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
+from modules.datamodels.datamodelRbac import AccessRuleContext
+
logger = logging.getLogger(__name__)
def action(func):
@@ -57,37 +60,194 @@ class MethodBase:
self.description: str
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+ # Actions MÜSSEN als Dictionary definiert sein
+ # Jede Method-Klasse muss _actions Dictionary in __init__ definieren
+ self._actions: Dict[str, WorkflowActionDefinition] = {}
+
+ # Nach Initialisierung: Actions validieren (wird überschrieben, wenn _actions gesetzt wird)
+ # Validierung erfolgt erst nach vollständiger Initialisierung der Subklasse
+
+ def _validateActions(self):
+ """Validate that _actions dictionary is properly defined"""
+ if not hasattr(self, '_actions') or not isinstance(self._actions, dict):
+ raise ValueError(f"Method {self.name} must define _actions dictionary in __init__")
+
+ for actionName, actionDef in self._actions.items():
+ if not isinstance(actionDef, WorkflowActionDefinition):
+ raise ValueError(f"Action '{actionName}' in {self.name} must be WorkflowActionDefinition instance")
+
+ if not actionDef.actionId:
+ raise ValueError(f"Action '{actionName}' in {self.name} must have actionId")
+
+ if not actionDef.execute:
+ raise ValueError(f"Action '{actionName}' in {self.name} must have execute function")
+
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
- """Dynamically collect all actions decorated with @action in the class."""
- actions = {}
- for attr_name in dir(self):
- # Skip the actions property itself to avoid recursion
- if attr_name == 'actions':
- continue
- try:
- attr = getattr(self, attr_name)
- if callable(attr) and getattr(attr, 'is_action', False):
- sig = inspect.signature(attr)
- params = {}
- for param_name, param in sig.parameters.items():
- if param_name not in ['self', 'parameters']:
- param_type = param.annotation if param.annotation != param.empty else Any
- params[param_name] = {
- 'type': param_type,
- 'required': param.default == param.empty,
- 'description': None,
- 'default': param.default if param.default != param.empty else None
- }
- actions[attr_name] = {
- 'description': attr.__doc__ or '',
- 'parameters': params,
- 'method': attr
- }
- except (AttributeError, RecursionError):
- # Skip attributes that cause issues
- continue
- return actions
+ """
+ Dynamically collect all actions from _actions dictionary.
+ Returns format for API/UI consumption.
+
+ REQUIREMENT: Alle Actions müssen in _actions Dictionary definiert sein.
+ Actions ohne _actions Definition sind nicht verfügbar.
+ """
+ result = {}
+
+ # Actions müssen in _actions Dictionary definiert sein
+ if not hasattr(self, '_actions') or not self._actions:
+ self.logger.error(f"Method {self.name} has no _actions dictionary defined. Actions will not be available.")
+ return result
+
+ for actionName, actionDef in self._actions.items():
+ # RBAC-Check: Prüfe ob Action für aktuellen User verfügbar ist
+ if not self._checkActionPermission(actionDef.actionId):
+ continue # Skip if user doesn't have permission
+
+ # Konvertiere WorkflowActionDefinition zu System-Format
+ result[actionName] = {
+ 'description': actionDef.description,
+ 'parameters': self._convertParametersToSystemFormat(actionDef.parameters),
+ 'method': self._createActionWrapper(actionDef)
+ }
+
+ return result
+
+ def _checkActionPermission(self, actionId: str) -> bool:
+ """
+ Check if current user has permission to execute this action.
+ Uses RBAC RESOURCE context.
+
+ REQUIREMENT: RBAC-Service muss verfügbar sein.
+ """
+ if not hasattr(self.services, 'rbac') or not self.services.rbac:
+ self.logger.error(f"RBAC service not available. Action {actionId} will be denied.")
+ return False
+
+ currentUser = self.services.chat.getCurrentUser()
+ if not currentUser:
+ self.logger.warning(f"No current user found. Action {actionId} will be denied.")
+ return False
+
+ # RBAC-Check: RESOURCE context, item = actionId
+ permissions = self.services.rbac.getUserPermissions(
+ user=currentUser,
+ context=AccessRuleContext.RESOURCE,
+ item=actionId
+ )
+
+ return permissions.view
+
+ def _convertParametersToSystemFormat(self, parameters: Dict[str, WorkflowActionParameter]) -> Dict[str, Dict[str, Any]]:
+ """Convert WorkflowActionParameter dict to system format for API/UI consumption"""
+ result = {}
+ for paramName, param in parameters.items():
+ result[paramName] = {
+ 'type': param.type,
+ 'required': param.required,
+ 'description': param.description,
+ 'default': param.default,
+ 'frontendType': param.frontendType.value,
+ 'frontendOptions': param.frontendOptions,
+ 'validation': param.validation
+ }
+ return result
+
+ def _createActionWrapper(self, actionDef: WorkflowActionDefinition):
+ """Create wrapper function for action execution with parameter validation"""
+ async def wrapper(parameters: Dict[str, Any], *args, **kwargs):
+ # Parameter-Validierung basierend auf WorkflowActionParameter definitions
+ validatedParams = self._validateParameters(parameters, actionDef.parameters)
+
+ # Execute action
+ return await actionDef.execute(validatedParams, *args, **kwargs)
+
+ wrapper.is_action = True
+ return wrapper
+
+ def _validateParameters(self, parameters: Dict[str, Any], paramDefs: Dict[str, WorkflowActionParameter]) -> Dict[str, Any]:
+ """Validate parameters against definitions"""
+ validated = {}
+
+ for paramName, paramDef in paramDefs.items():
+ value = parameters.get(paramName)
+
+ # Check required
+ if paramDef.required and value is None:
+ raise ValueError(f"Required parameter '{paramName}' is missing")
+
+ # Use default if not provided
+ if value is None and paramDef.default is not None:
+ value = paramDef.default
+
+ # Type validation
+ if value is not None:
+ value = self._validateType(value, paramDef.type)
+
+ # Custom validation rules
+ if paramDef.validation and value is not None:
+ self._applyValidationRules(value, paramDef.validation)
+
+ validated[paramName] = value
+
+ return validated
+
+ def _validateType(self, value: Any, expectedType: str) -> Any:
+ """Validate and convert value to expected type"""
+ # Type validation logic
+ typeMap = {
+ 'str': str,
+ 'int': int,
+ 'float': float,
+ 'bool': bool,
+ 'list': list,
+ 'dict': dict,
+ }
+
+ # Handle List[str], List[int], etc.
+ if expectedType.startswith('List['):
+ if not isinstance(value, list):
+ raise ValueError(f"Expected list for type '{expectedType}', got {type(value).__name__}")
+ # Extract inner type
+ innerType = expectedType[5:-1].strip() # Remove "List[" and "]"
+ if innerType in typeMap:
+ return [typeMap[innerType](v) for v in value]
+ return value
+
+ # Handle Dict[str, Any], etc.
+ if expectedType.startswith('Dict['):
+ if not isinstance(value, dict):
+ raise ValueError(f"Expected dict for type '{expectedType}', got {type(value).__name__}")
+ return value
+
+ # Handle simple types
+ if expectedType in typeMap:
+ expectedTypeClass = typeMap[expectedType]
+ if not isinstance(value, expectedTypeClass):
+ try:
+ return expectedTypeClass(value)
+ except (ValueError, TypeError) as e:
+ raise ValueError(f"Cannot convert {value} to {expectedType}: {str(e)}")
+
+ return value
+
+ def _applyValidationRules(self, value: Any, rules: Dict[str, Any]):
+ """Apply custom validation rules"""
+ if 'min' in rules:
+ if isinstance(value, (int, float)) and value < rules['min']:
+ raise ValueError(f"Value must be >= {rules['min']}")
+ elif isinstance(value, str) and len(value) < rules['min']:
+ raise ValueError(f"String length must be >= {rules['min']}")
+
+ if 'max' in rules:
+ if isinstance(value, (int, float)) and value > rules['max']:
+ raise ValueError(f"Value must be <= {rules['max']}")
+ elif isinstance(value, str) and len(value) > rules['max']:
+ raise ValueError(f"String length must be <= {rules['max']}")
+
+ if 'pattern' in rules:
+ import re
+ if not re.match(rules['pattern'], str(value)):
+ raise ValueError(f"Value does not match required pattern: {rules['pattern']}")
def getActionSignature(self, actionName: str) -> str:
"""Get formatted action signature for AI prompt generation (detailed version)"""
diff --git a/modules/workflows/methods/methodContext.py b/modules/workflows/methods/methodContext.py.old
similarity index 100%
rename from modules/workflows/methods/methodContext.py
rename to modules/workflows/methods/methodContext.py.old
diff --git a/modules/workflows/methods/methodContext/__init__.py b/modules/workflows/methods/methodContext/__init__.py
new file mode 100644
index 00000000..8d6c7823
--- /dev/null
+++ b/modules/workflows/methods/methodContext/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+from .methodContext import MethodContext
+
+__all__ = ['MethodContext']
+
diff --git a/modules/workflows/methods/methodContext/actions/__init__.py b/modules/workflows/methods/methodContext/actions/__init__.py
new file mode 100644
index 00000000..9059d6bc
--- /dev/null
+++ b/modules/workflows/methods/methodContext/actions/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""Action modules for Context operations."""
+
+# Export all actions
+from .getDocumentIndex import getDocumentIndex
+from .extractContent import extractContent
+from .triggerPreprocessingServer import triggerPreprocessingServer
+
+__all__ = [
+ 'getDocumentIndex',
+ 'extractContent',
+ 'triggerPreprocessingServer',
+]
+
diff --git a/modules/workflows/methods/methodContext/actions/extractContent.py b/modules/workflows/methods/methodContext/actions/extractContent.py
new file mode 100644
index 00000000..799ce61d
--- /dev/null
+++ b/modules/workflows/methods/methodContext/actions/extractContent.py
@@ -0,0 +1,156 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Extract Content action for Context operations.
+Extracts content from documents (separate from AI calls).
+"""
+
+import logging
+import time
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+from modules.datamodels.datamodelDocref import DocumentReferenceList
+from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
+
+logger = logging.getLogger(__name__)
+
+@action
+async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Extract content from documents (separate from AI calls).
+
+ This action performs pure content extraction without AI processing.
+ The extracted ContentParts can then be used by subsequent AI processing actions.
+
+ Parameters:
+ - documentList (list, required): Document reference(s) to extract content from.
+ - extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
+
+ Returns:
+ - ActionResult with ActionDocument containing ContentExtracted objects
+ - ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
+ """
+ try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"context_extract_{workflowId}_{int(time.time())}"
+
+ # Extract documentList from parameters dict
+ documentListParam = parameters.get("documentList")
+ if not documentListParam:
+ return ActionResult.isFailure(error="documentList is required")
+
+ # Convert to DocumentReferenceList if needed
+ if isinstance(documentListParam, DocumentReferenceList):
+ documentList = documentListParam
+ elif isinstance(documentListParam, str):
+ documentList = DocumentReferenceList.from_string_list([documentListParam])
+ elif isinstance(documentListParam, list):
+ documentList = DocumentReferenceList.from_string_list(documentListParam)
+ else:
+ return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Extracting content from documents",
+ "Content Extraction",
+ f"Documents: {len(documentList.references)}",
+ parentOperationId=parentOperationId
+ )
+
+ # Get ChatDocuments from documentList
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
+
+ if not chatDocuments:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No documents found in documentList")
+
+ logger.info(f"Extracting content from {len(chatDocuments)} documents")
+
+ # Prepare extraction options
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
+ extractionOptionsParam = parameters.get("extractionOptions")
+
+ # Convert dict to ExtractionOptions object if needed, or create defaults
+ if extractionOptionsParam:
+ if isinstance(extractionOptionsParam, dict):
+ # Convert dict to ExtractionOptions object
+ extractionOptions = ExtractionOptions(**extractionOptionsParam)
+ elif isinstance(extractionOptionsParam, ExtractionOptions):
+ extractionOptions = extractionOptionsParam
+ else:
+ # Invalid type, use defaults
+ extractionOptions = None
+ else:
+ extractionOptions = None
+
+ # If extractionOptions not provided, create defaults
+ if not extractionOptions:
+ # Default extraction options for pure content extraction (no AI processing)
+ extractionOptions = ExtractionOptions(
+ prompt="Extract all content from the document",
+ mergeStrategy=MergeStrategy(
+ mergeType="concatenate",
+ groupBy="typeGroup",
+ orderBy="id"
+ ),
+ processDocumentsIndividually=True
+ )
+
+ # Call extraction service with hierarchical progress logging
+ self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
+ self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
+ # Pass operationId for hierarchical per-document progress logging
+ extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
+
+ # Build ActionDocuments from ContentExtracted results
+ self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
+ actionDocuments = []
+ # Map extracted results back to original documents by index (results are in same order)
+ for i, extracted in enumerate(extractedResults):
+ # Get original document name if available
+ originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
+ if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
+ # Use original filename with "extracted_" prefix
+ baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
+ documentName = f"{baseName}_extracted_{extracted.id}.json"
+ else:
+ # Fallback to generic name with index
+ documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
+
+ # Store ContentExtracted object in ActionDocument.documentData
+ validationMetadata = {
+ "actionType": "context.extractContent",
+ "documentIndex": i,
+ "extractedId": extracted.id,
+ "partCount": len(extracted.parts) if extracted.parts else 0,
+ "originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None
+ }
+ actionDoc = ActionDocument(
+ documentName=documentName,
+ documentData=extracted, # ContentExtracted object
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+ actionDocuments.append(actionDoc)
+
+ self.services.chat.progressLogFinish(operationId, True)
+
+ return ActionResult.isSuccess(documents=actionDocuments)
+
+ except Exception as e:
+ logger.error(f"Error in content extraction: {str(e)}")
+
+ # Complete progress tracking with failure
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass # Don't fail on progress logging errors
+
+ return ActionResult.isFailure(error=str(e))
+
diff --git a/modules/workflows/methods/methodContext/actions/getDocumentIndex.py b/modules/workflows/methods/methodContext/actions/getDocumentIndex.py
new file mode 100644
index 00000000..6c9a6700
--- /dev/null
+++ b/modules/workflows/methods/methodContext/actions/getDocumentIndex.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Get Document Index action for Context operations.
+Generates a comprehensive index of all documents available in the current workflow.
+"""
+
+import logging
+import json
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
+ - Input requirements: No input documents required. Optional resultType parameter.
+ - Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
+
+ Parameters:
+ - resultType (str, optional): Output format (json, txt, md). Default: json.
+ """
+ try:
+ workflow = self.services.workflow
+ if not workflow:
+ return ActionResult.isFailure(
+ error="No workflow available"
+ )
+
+ resultType = parameters.get("resultType", "json").lower().strip().lstrip('.')
+
+ # Get available documents index from chat service
+ documentsIndex = self.services.chat.getAvailableDocuments(workflow)
+
+ if not documentsIndex or documentsIndex == "No documents available" or documentsIndex == "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.":
+ # Return empty index structure
+ if resultType == "json":
+ indexData = {
+ "workflowId": getattr(workflow, 'id', 'unknown'),
+ "totalDocuments": 0,
+ "rounds": [],
+ "documentReferences": []
+ }
+ indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
+ else:
+ indexContent = "Document Index\n==============\n\nNo documents available in this workflow.\n"
+ else:
+ # Parse the document index string to extract structured information
+ indexData = self.documentIndex.parseDocumentIndex(documentsIndex, workflow)
+
+ if resultType == "json":
+ indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
+ elif resultType == "md":
+ indexContent = self.formatting.formatAsMarkdown(indexData)
+ else: # txt
+ indexContent = self.formatting.formatAsText(indexData, documentsIndex)
+
+ # Generate meaningful filename
+ workflowContext = self.services.chat.getWorkflowContext()
+ filename = self._generateMeaningfulFileName(
+ "document_index",
+ resultType if resultType in ["json", "txt", "md"] else "json",
+ workflowContext,
+ "getDocumentIndex"
+ )
+
+ validationMetadata = {
+ "actionType": "context.getDocumentIndex",
+ "resultType": resultType,
+ "workflowId": getattr(workflow, 'id', 'unknown'),
+ "totalDocuments": indexData.get("totalDocuments", 0) if isinstance(indexData, dict) else 0
+ }
+
+ # Create ActionDocument
+ document = ActionDocument(
+ documentName=filename,
+ documentData=indexContent,
+ mimeType="application/json" if resultType == "json" else "text/plain",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ logger.error(f"Error generating document index: {str(e)}")
+ return ActionResult.isFailure(
+ error=f"Failed to generate document index: {str(e)}"
+ )
+
diff --git a/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py b/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py
new file mode 100644
index 00000000..7ef16d5f
--- /dev/null
+++ b/modules/workflows/methods/methodContext/actions/triggerPreprocessingServer.py
@@ -0,0 +1,121 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Trigger Preprocessing Server action for Context operations.
+Triggers preprocessing server at customer tenant to update database with configuration.
+"""
+
+import logging
+import json
+import aiohttp
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+from modules.shared.configuration import APP_CONFIG
+
+logger = logging.getLogger(__name__)
+
+@action
+async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Trigger preprocessing server at customer tenant to update database with configuration.
+
+ This action makes a POST request to the preprocessing server endpoint with the provided
+ configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key.
+
+ Parameters:
+ - endpoint (str, required): The full URL endpoint for the preprocessing server API.
+ - configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed.
+ - authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from.
+
+ Returns:
+ - ActionResult with ActionDocument containing "ok" on success, or error message on failure.
+ """
+ try:
+ endpoint = parameters.get("endpoint")
+ if not endpoint:
+ return ActionResult.isFailure(error="endpoint parameter is required")
+
+ configJsonParam = parameters.get("configJson")
+ if not configJsonParam:
+ return ActionResult.isFailure(error="configJson parameter is required")
+
+ authSecretConfigKey = parameters.get("authSecretConfigKey")
+ if not authSecretConfigKey:
+ return ActionResult.isFailure(error="authSecretConfigKey parameter is required")
+
+ # Handle configJson as either dict or JSON string
+ if isinstance(configJsonParam, str):
+ try:
+ configJson = json.loads(configJsonParam)
+ except json.JSONDecodeError as e:
+ return ActionResult.isFailure(error=f"configJson is not valid JSON: {str(e)}")
+ elif isinstance(configJsonParam, dict):
+ configJson = configJsonParam
+ else:
+ return ActionResult.isFailure(error=f"configJson must be a dict or JSON string, got {type(configJsonParam)}")
+
+ # Get authorization secret from APP_CONFIG using the provided config key
+ authSecret = APP_CONFIG.get(authSecretConfigKey)
+ if not authSecret:
+ errorMsg = f"{authSecretConfigKey} not found in APP_CONFIG"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
+ # Prepare headers with authorization (default headers as in original function)
+ headers = {
+ "X-PP-API-Key": authSecret,
+ "Content-Type": "application/json"
+ }
+
+ # Make POST request
+ timeout = aiohttp.ClientTimeout(total=60)
+ async with aiohttp.ClientSession(timeout=timeout) as session:
+ async with session.post(
+ endpoint,
+ headers=headers,
+ json=configJson
+ ) as response:
+ if response.status in [200, 201]:
+ responseText = await response.text()
+ logger.info(f"Preprocessing server trigger successful: {response.status}")
+ logger.debug(f"Response: {responseText}")
+
+ # Generate meaningful filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "preprocessing_result",
+ "txt",
+ workflowContext,
+ "triggerPreprocessingServer"
+ )
+
+ # Create validation metadata
+ validationMetadata = self._createValidationMetadata(
+ "triggerPreprocessingServer",
+ endpoint=endpoint,
+ statusCode=response.status,
+ responseText=responseText
+ )
+
+ # Return success with "ok" document
+ document = ActionDocument(
+ documentName=filename,
+ documentData="ok",
+ mimeType="text/plain",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+ else:
+ errorText = await response.text()
+ errorMsg = f"Preprocessing server trigger failed: {response.status} - {errorText}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
+ except Exception as e:
+ errorMsg = f"Error triggering preprocessing server: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodContext/helpers/__init__.py b/modules/workflows/methods/methodContext/helpers/__init__.py
new file mode 100644
index 00000000..e1e2ab56
--- /dev/null
+++ b/modules/workflows/methods/methodContext/helpers/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""Helper modules for Context method operations."""
+
diff --git a/modules/workflows/methods/methodContext/helpers/documentIndex.py b/modules/workflows/methods/methodContext/helpers/documentIndex.py
new file mode 100644
index 00000000..bba349cf
--- /dev/null
+++ b/modules/workflows/methods/methodContext/helpers/documentIndex.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Document Index helper for Context operations.
+Handles parsing and formatting of document indexes.
+"""
+
+import logging
+from typing import Dict, Any
+from datetime import datetime, UTC
+
+logger = logging.getLogger(__name__)
+
+class DocumentIndexHelper:
+ """Helper for document index operations"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize document index helper.
+
+ Args:
+ methodInstance: Instance of MethodContext (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ def parseDocumentIndex(self, documentsIndex: str, workflow: Any) -> Dict[str, Any]:
+ """Parse the document index string into structured data."""
+ try:
+ indexData = {
+ "workflowId": getattr(workflow, 'id', 'unknown'),
+ "generatedAt": datetime.now(UTC).isoformat(),
+ "totalDocuments": 0,
+ "rounds": [],
+ "documentReferences": []
+ }
+
+ # Extract document references from the index string
+ lines = documentsIndex.split('\n')
+ currentRound = None
+ currentDocList = None
+
+ for line in lines:
+ line = line.strip()
+ if not line:
+ continue
+
+ # Check for round headers
+ if "Current round documents:" in line:
+ currentRound = "current"
+ continue
+ elif "Past rounds documents:" in line:
+ currentRound = "past"
+ continue
+
+ # Check for document list references (docList:...)
+ if line.startswith("- docList:"):
+ docListRef = line.replace("- docList:", "").strip()
+ currentDocList = {
+ "reference": docListRef,
+ "round": currentRound,
+ "documents": []
+ }
+ indexData["rounds"].append(currentDocList)
+ continue
+
+ # Check for individual document references (docItem:...)
+ if line.startswith(" - docItem:") or line.startswith("- docItem:"):
+ docItemRef = line.replace(" - docItem:", "").replace("- docItem:", "").strip()
+ indexData["documentReferences"].append({
+ "reference": docItemRef,
+ "round": currentRound,
+ "docList": currentDocList["reference"] if currentDocList else None
+ })
+ indexData["totalDocuments"] += 1
+ if currentDocList:
+ currentDocList["documents"].append(docItemRef)
+
+ return indexData
+
+ except Exception as e:
+ logger.error(f"Error parsing document index: {str(e)}")
+ return {
+ "workflowId": getattr(workflow, 'id', 'unknown'),
+ "error": f"Failed to parse document index: {str(e)}",
+ "rawIndex": documentsIndex
+ }
+
diff --git a/modules/workflows/methods/methodContext/helpers/formatting.py b/modules/workflows/methods/methodContext/helpers/formatting.py
new file mode 100644
index 00000000..ac38fb86
--- /dev/null
+++ b/modules/workflows/methods/methodContext/helpers/formatting.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Formatting helper for Context operations.
+Handles formatting of document indexes in different formats.
+"""
+
+import logging
+from typing import Dict, Any
+
+logger = logging.getLogger(__name__)
+
+class FormattingHelper:
+ """Helper for formatting operations"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize formatting helper.
+
+ Args:
+ methodInstance: Instance of MethodContext (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ def formatAsMarkdown(self, indexData: Dict[str, Any]) -> str:
+ """Format document index as Markdown."""
+ try:
+ md = f"# Document Index\n\n"
+ md += f"**Workflow ID:** {indexData.get('workflowId', 'unknown')}\n\n"
+ md += f"**Generated At:** {indexData.get('generatedAt', 'unknown')}\n\n"
+ md += f"**Total Documents:** {indexData.get('totalDocuments', 0)}\n\n"
+
+ if indexData.get('rounds'):
+ md += "## Documents by Round\n\n"
+ for roundInfo in indexData['rounds']:
+ roundLabel = roundInfo.get('round', 'unknown').title()
+ md += f"### {roundLabel} Round\n\n"
+ md += f"**Document List:** `{roundInfo.get('reference', 'unknown')}`\n\n"
+ if roundInfo.get('documents'):
+ md += "**Documents:**\n\n"
+ for docRef in roundInfo['documents']:
+ md += f"- `{docRef}`\n"
+ md += "\n"
+
+ if indexData.get('documentReferences'):
+ md += "## All Document References\n\n"
+ for docRef in indexData['documentReferences']:
+ md += f"- `{docRef.get('reference', 'unknown')}`\n"
+
+ return md
+
+ except Exception as e:
+ logger.error(f"Error formatting as Markdown: {str(e)}")
+ return f"# Document Index\n\nError formatting index: {str(e)}\n"
+
+ def formatAsText(self, indexData: Dict[str, Any], rawIndex: str) -> str:
+ """Format document index as plain text."""
+ try:
+ text = "Document Index\n"
+ text += "=" * 50 + "\n\n"
+ text += f"Workflow ID: {indexData.get('workflowId', 'unknown')}\n"
+ text += f"Generated At: {indexData.get('generatedAt', 'unknown')}\n"
+ text += f"Total Documents: {indexData.get('totalDocuments', 0)}\n\n"
+
+ # Include the raw formatted index for readability
+ text += rawIndex
+
+ return text
+
+ except Exception as e:
+ logger.error(f"Error formatting as text: {str(e)}")
+ return f"Document Index\n\nError formatting index: {str(e)}\n\nRaw index:\n{rawIndex}\n"
+
diff --git a/modules/workflows/methods/methodContext/methodContext.py b/modules/workflows/methods/methodContext/methodContext.py
new file mode 100644
index 00000000..5481f70b
--- /dev/null
+++ b/modules/workflows/methods/methodContext/methodContext.py
@@ -0,0 +1,108 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+import logging
+from modules.workflows.methods.methodBase import MethodBase
+from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
+from modules.shared.frontendTypes import FrontendType
+
+# Import helpers
+from .helpers.documentIndex import DocumentIndexHelper
+from .helpers.formatting import FormattingHelper
+
+# Import actions
+from .actions.getDocumentIndex import getDocumentIndex
+from .actions.extractContent import extractContent
+from .actions.triggerPreprocessingServer import triggerPreprocessingServer
+
+logger = logging.getLogger(__name__)
+
+class MethodContext(MethodBase):
+ """Context and workflow information methods."""
+
+ def __init__(self, services):
+ super().__init__(services)
+ self.name = "context"
+ self.description = "Context and workflow information methods"
+
+ # Initialize helper modules
+ self.documentIndex = DocumentIndexHelper(self)
+ self.formatting = FormattingHelper(self)
+
+ # RBAC-Integration: Action-Definitionen mit actionId
+ self._actions = {
+ "getDocumentIndex": WorkflowActionDefinition(
+ actionId="context.getDocumentIndex",
+ description="Generate a comprehensive index of all documents available in the current workflow",
+ parameters={
+ "resultType": WorkflowActionParameter(
+ name="resultType",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["json", "txt", "md"],
+ required=False,
+ default="json",
+ description="Output format"
+ )
+ },
+ execute=getDocumentIndex.__get__(self, self.__class__)
+ ),
+ "extractContent": WorkflowActionDefinition(
+ actionId="context.extractContent",
+ description="Extract content from documents (separate from AI calls)",
+ parameters={
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference(s) to extract content from"
+ ),
+ "extractionOptions": WorkflowActionParameter(
+ name="extractionOptions",
+ type="dict",
+ frontendType=FrontendType.JSON,
+ required=False,
+ description="Extraction options (if not provided, defaults are used)"
+ )
+ },
+ execute=extractContent.__get__(self, self.__class__)
+ ),
+ "triggerPreprocessingServer": WorkflowActionDefinition(
+ actionId="context.triggerPreprocessingServer",
+ description="Trigger preprocessing server at customer tenant to update database with configuration",
+ parameters={
+ "endpoint": WorkflowActionParameter(
+ name="endpoint",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="The full URL endpoint for the preprocessing server API"
+ ),
+ "configJson": WorkflowActionParameter(
+ name="configJson",
+ type="str",
+ frontendType=FrontendType.JSON,
+ required=True,
+ description="Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string"
+ ),
+ "authSecretConfigKey": WorkflowActionParameter(
+ name="authSecretConfigKey",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="The APP_CONFIG key name to retrieve the authorization secret from"
+ )
+ },
+ execute=triggerPreprocessingServer.__get__(self, self.__class__)
+ )
+ }
+
+ # Validate actions after definition
+ self._validateActions()
+
+ # Register actions as methods (optional, für direkten Zugriff)
+ self.getDocumentIndex = getDocumentIndex.__get__(self, self.__class__)
+ self.extractContent = extractContent.__get__(self, self.__class__)
+ self.triggerPreprocessingServer = triggerPreprocessingServer.__get__(self, self.__class__)
+
diff --git a/modules/workflows/methods/methodJira.py b/modules/workflows/methods/methodJira.py.old
similarity index 100%
rename from modules/workflows/methods/methodJira.py
rename to modules/workflows/methods/methodJira.py.old
diff --git a/modules/workflows/methods/methodJira/__init__.py b/modules/workflows/methods/methodJira/__init__.py
new file mode 100644
index 00000000..e8b3822d
--- /dev/null
+++ b/modules/workflows/methods/methodJira/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+from .methodJira import MethodJira
+
+__all__ = ['MethodJira']
+
diff --git a/modules/workflows/methods/methodJira/actions/__init__.py b/modules/workflows/methods/methodJira/actions/__init__.py
new file mode 100644
index 00000000..67b0d38d
--- /dev/null
+++ b/modules/workflows/methods/methodJira/actions/__init__.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""Action modules for JIRA operations."""
+
+# Export all actions
+from .connectJira import connectJira
+from .exportTicketsAsJson import exportTicketsAsJson
+from .importTicketsFromJson import importTicketsFromJson
+from .mergeTicketData import mergeTicketData
+from .parseCsvContent import parseCsvContent
+from .parseExcelContent import parseExcelContent
+from .createCsvContent import createCsvContent
+from .createExcelContent import createExcelContent
+
+__all__ = [
+ 'connectJira',
+ 'exportTicketsAsJson',
+ 'importTicketsFromJson',
+ 'mergeTicketData',
+ 'parseCsvContent',
+ 'parseExcelContent',
+ 'createCsvContent',
+ 'createExcelContent',
+]
+
diff --git a/modules/workflows/methods/methodJira/actions/connectJira.py b/modules/workflows/methods/methodJira/actions/connectJira.py
new file mode 100644
index 00000000..8200514a
--- /dev/null
+++ b/modules/workflows/methods/methodJira/actions/connectJira.py
@@ -0,0 +1,139 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Connect JIRA action for JIRA operations.
+Connects to JIRA instance and creates ticket interface.
+"""
+
+import logging
+import json
+import uuid
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+from modules.shared.configuration import APP_CONFIG
+
+logger = logging.getLogger(__name__)
+
+@action
+async def connectJira(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Connect to JIRA instance and create ticket interface.
+
+ Parameters:
+ - apiUsername (str, required): JIRA API username/email
+ - apiTokenConfigKey (str, required): APP_CONFIG key name for JIRA API token
+ - apiUrl (str, required): JIRA instance URL (e.g., https://example.atlassian.net)
+ - projectCode (str, required): JIRA project code (e.g., "DCS")
+ - issueType (str, required): JIRA issue type (e.g., "Task")
+ - taskSyncDefinition (str or dict, required): Field mapping definition as JSON string or dict
+
+ Returns:
+ - ActionResult with ActionDocument containing connection ID
+ """
+ try:
+ apiUsername = parameters.get("apiUsername")
+ if not apiUsername:
+ return ActionResult.isFailure(error="apiUsername parameter is required")
+
+ apiTokenConfigKey = parameters.get("apiTokenConfigKey")
+ if not apiTokenConfigKey:
+ return ActionResult.isFailure(error="apiTokenConfigKey parameter is required")
+
+ apiUrl = parameters.get("apiUrl")
+ if not apiUrl:
+ return ActionResult.isFailure(error="apiUrl parameter is required")
+
+ projectCode = parameters.get("projectCode")
+ if not projectCode:
+ return ActionResult.isFailure(error="projectCode parameter is required")
+
+ issueType = parameters.get("issueType")
+ if not issueType:
+ return ActionResult.isFailure(error="issueType parameter is required")
+
+ taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
+ if not taskSyncDefinitionParam:
+ return ActionResult.isFailure(error="taskSyncDefinition parameter is required")
+
+ # Parse taskSyncDefinition
+ if isinstance(taskSyncDefinitionParam, str):
+ try:
+ taskSyncDefinition = json.loads(taskSyncDefinitionParam)
+ except json.JSONDecodeError as e:
+ return ActionResult.isFailure(error=f"taskSyncDefinition is not valid JSON: {str(e)}")
+ elif isinstance(taskSyncDefinitionParam, dict):
+ taskSyncDefinition = taskSyncDefinitionParam
+ else:
+ return ActionResult.isFailure(error=f"taskSyncDefinition must be a dict or JSON string, got {type(taskSyncDefinitionParam)}")
+
+ # Get API token from APP_CONFIG
+ apiToken = APP_CONFIG.get(apiTokenConfigKey)
+ if not apiToken:
+ errorMsg = f"{apiTokenConfigKey} not found in APP_CONFIG"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
+ # Create ticket interface
+ syncInterface = await self.services.ticket.connectTicket(
+ taskSyncDefinition=taskSyncDefinition,
+ connectorType="Jira",
+ connectorParams={
+ "apiUsername": apiUsername,
+ "apiToken": apiToken,
+ "apiUrl": apiUrl,
+ "projectCode": projectCode,
+ "ticketType": issueType,
+ },
+ )
+
+ # Store connection with unique ID
+ connectionId = str(uuid.uuid4())
+ self._connections[connectionId] = {
+ "interface": syncInterface,
+ "taskSyncDefinition": taskSyncDefinition,
+ "apiUrl": apiUrl,
+ "projectCode": projectCode,
+ }
+
+ logger.info(f"JIRA connection established: {connectionId} (Project: {projectCode})")
+
+ # Generate filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "jira_connection",
+ "json",
+ workflowContext,
+ "connectJira"
+ )
+
+ # Create connection info document
+ connectionInfo = {
+ "connectionId": connectionId,
+ "apiUrl": apiUrl,
+ "projectCode": projectCode,
+ "issueType": issueType,
+ }
+
+ validationMetadata = self._createValidationMetadata(
+ "connectJira",
+ connectionId=connectionId,
+ apiUrl=apiUrl,
+ projectCode=projectCode
+ )
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=json.dumps(connectionInfo, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ errorMsg = f"Error connecting to JIRA: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodJira/actions/createCsvContent.py b/modules/workflows/methods/methodJira/actions/createCsvContent.py
new file mode 100644
index 00000000..c856760e
--- /dev/null
+++ b/modules/workflows/methods/methodJira/actions/createCsvContent.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Create CSV Content action for JIRA operations.
+Creates CSV content with custom headers.
+"""
+
+import logging
+import json
+import base64
+import pandas as pd
+import csv as csv_module
+from io import StringIO
+from datetime import datetime, UTC
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def createCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Create CSV content with custom headers.
+
+ Parameters:
+ - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
+ - headers (str, optional): Document reference containing headers JSON (from parseCsvContent/parseExcelContent)
+ - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
+ - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
+
+ Returns:
+ - ActionResult with ActionDocument containing CSV content as bytes
+ """
+ try:
+ dataParam = parameters.get("data")
+ if not dataParam:
+ return ActionResult.isFailure(error="data parameter is required")
+
+ headersParam = parameters.get("headers")
+ columnsParam = parameters.get("columns")
+ taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
+
+ # Get data from document
+ dataJson = self.documentParsing.parseJsonFromDocument(dataParam)
+ if dataJson is None:
+ return ActionResult.isFailure(error="Could not parse data from document reference")
+
+ # Extract data array if wrapped in object
+ if isinstance(dataJson, dict) and "data" in dataJson:
+ dataList = dataJson["data"]
+ elif isinstance(dataJson, list):
+ dataList = dataJson
+ else:
+ return ActionResult.isFailure(error="Data must be a JSON array or object with 'data' field")
+
+ # Get headers
+ headers = {"header1": "Header 1", "header2": "Header 2"}
+ if headersParam:
+ headersJson = self.documentParsing.parseJsonFromDocument(headersParam)
+ if headersJson and isinstance(headersJson, dict) and "headers" in headersJson:
+ headers = headersJson["headers"]
+ elif headersJson and isinstance(headersJson, dict):
+ headers = headersJson
+
+ # Get columns
+ if columnsParam:
+ if isinstance(columnsParam, str):
+ try:
+ columns = json.loads(columnsParam) if columnsParam.startswith('[') or columnsParam.startswith('{') else columnsParam.split(',')
+ except:
+ columns = columnsParam.split(',')
+ elif isinstance(columnsParam, list):
+ columns = columnsParam
+ else:
+ columns = None
+ elif taskSyncDefinitionParam:
+ # Extract columns from taskSyncDefinition
+ if isinstance(taskSyncDefinitionParam, str):
+ taskSyncDefinition = json.loads(taskSyncDefinitionParam)
+ else:
+ taskSyncDefinition = taskSyncDefinitionParam
+ columns = list(taskSyncDefinition.keys())
+ elif dataList and len(dataList) > 0:
+ columns = list(dataList[0].keys())
+ else:
+ columns = []
+
+ # Create DataFrame
+ if not dataList:
+ df = pd.DataFrame(columns=columns)
+ else:
+ df = pd.DataFrame(dataList)
+ # Ensure all columns exist
+ for col in columns:
+ if col not in df.columns:
+ df[col] = ""
+ # Reorder columns
+ df = df[columns]
+
+ # Clean data
+ for column in df.columns:
+ df[column] = df[column].astype("object").fillna("")
+ df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False).str.replace('"', '""', regex=False)
+
+ # Create headers with timestamp
+ timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
+ header1Row = next(csv_module.reader([headers.get("header1", "Header 1")]), [])
+ header2Row = next(csv_module.reader([headers.get("header2", "Header 2")]), [])
+ if len(header2Row) > 1:
+ header2Row[1] = timestamp
+
+ headerRow1 = pd.DataFrame([header1Row + [""] * (len(df.columns) - len(header1Row))], columns=df.columns)
+ headerRow2 = pd.DataFrame([header2Row + [""] * (len(df.columns) - len(header2Row))], columns=df.columns)
+ tableHeaders = pd.DataFrame([df.columns.tolist()], columns=df.columns)
+ finalDf = pd.concat([headerRow1, headerRow2, tableHeaders, df], ignore_index=True)
+
+ # Convert to CSV bytes
+ out = StringIO()
+ finalDf.to_csv(out, index=False, header=False, quoting=1, escapechar='\\')
+ csvBytes = out.getvalue().encode('utf-8')
+
+ logger.info(f"Created CSV content: {len(dataList)} rows, {len(columns)} columns")
+
+ # Generate filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "ticket_sync",
+ "csv",
+ workflowContext,
+ "createCsvContent"
+ )
+
+ validationMetadata = self._createValidationMetadata(
+ "createCsvContent",
+ rowCount=len(dataList),
+ columnCount=len(columns)
+ )
+
+ # Store as base64 for document
+ csvBase64 = base64.b64encode(csvBytes).decode('utf-8')
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=csvBase64,
+ mimeType="application/octet-stream",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ errorMsg = f"Error creating CSV content: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodJira/actions/createExcelContent.py b/modules/workflows/methods/methodJira/actions/createExcelContent.py
new file mode 100644
index 00000000..fbf54299
--- /dev/null
+++ b/modules/workflows/methods/methodJira/actions/createExcelContent.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Create Excel Content action for JIRA operations.
+Creates Excel content with custom headers.
+"""
+
+import logging
+import json
+import base64
+import pandas as pd
+import csv as csv_module
+from io import BytesIO
+from datetime import datetime, UTC
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def createExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Create Excel content with custom headers.
+
+ Parameters:
+ - data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
+ - headers (str, optional): Document reference containing headers JSON (from parseExcelContent)
+ - columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
+ - taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
+
+ Returns:
+ - ActionResult with ActionDocument containing Excel content as bytes
+ """
+ try:
+ dataParam = parameters.get("data")
+ if not dataParam:
+ return ActionResult.isFailure(error="data parameter is required")
+
+ headersParam = parameters.get("headers")
+ columnsParam = parameters.get("columns")
+ taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
+
+ # Get data from document
+ dataJson = self.documentParsing.parseJsonFromDocument(dataParam)
+ if dataJson is None:
+ return ActionResult.isFailure(error="Could not parse data from document reference")
+
+ # Extract data array if wrapped in object
+ if isinstance(dataJson, dict) and "data" in dataJson:
+ dataList = dataJson["data"]
+ elif isinstance(dataJson, list):
+ dataList = dataJson
+ else:
+ return ActionResult.isFailure(error="Data must be a JSON array or object with 'data' field")
+
+ # Get headers
+ headers = {"header1": "Header 1", "header2": "Header 2"}
+ if headersParam:
+ headersJson = self.documentParsing.parseJsonFromDocument(headersParam)
+ if headersJson and isinstance(headersJson, dict) and "headers" in headersJson:
+ headers = headersJson["headers"]
+ elif headersJson and isinstance(headersJson, dict):
+ headers = headersJson
+
+ # Get columns
+ if columnsParam:
+ if isinstance(columnsParam, str):
+ try:
+ columns = json.loads(columnsParam) if columnsParam.startswith('[') or columnsParam.startswith('{') else columnsParam.split(',')
+ except:
+ columns = columnsParam.split(',')
+ elif isinstance(columnsParam, list):
+ columns = columnsParam
+ else:
+ columns = None
+ elif taskSyncDefinitionParam:
+ # Extract columns from taskSyncDefinition
+ if isinstance(taskSyncDefinitionParam, str):
+ taskSyncDefinition = json.loads(taskSyncDefinitionParam)
+ else:
+ taskSyncDefinition = taskSyncDefinitionParam
+ columns = list(taskSyncDefinition.keys())
+ elif dataList and len(dataList) > 0:
+ columns = list(dataList[0].keys())
+ else:
+ columns = []
+
+ # Create DataFrame
+ if not dataList:
+ df = pd.DataFrame(columns=columns)
+ else:
+ df = pd.DataFrame(dataList)
+ # Ensure all columns exist
+ for col in columns:
+ if col not in df.columns:
+ df[col] = ""
+ # Reorder columns
+ df = df[columns]
+
+ # Clean data
+ for column in df.columns:
+ df[column] = df[column].astype("object").fillna("")
+ df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False).str.replace('"', '""', regex=False)
+
+ # Create headers with timestamp
+ timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
+ header1Row = next(csv_module.reader([headers.get("header1", "Header 1")]), [])
+ header2Row = next(csv_module.reader([headers.get("header2", "Header 2")]), [])
+ if len(header2Row) > 1:
+ header2Row[1] = timestamp
+
+ headerRow1 = pd.DataFrame([header1Row + [""] * (len(df.columns) - len(header1Row))], columns=df.columns)
+ headerRow2 = pd.DataFrame([header2Row + [""] * (len(df.columns) - len(header2Row))], columns=df.columns)
+ tableHeaders = pd.DataFrame([df.columns.tolist()], columns=df.columns)
+ finalDf = pd.concat([headerRow1, headerRow2, tableHeaders, df], ignore_index=True)
+
+ # Convert to Excel bytes
+ buf = BytesIO()
+ finalDf.to_excel(buf, index=False, header=False, engine='openpyxl')
+ excelBytes = buf.getvalue()
+
+ logger.info(f"Created Excel content: {len(dataList)} rows, {len(columns)} columns")
+
+ # Generate filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "ticket_sync",
+ "xlsx",
+ workflowContext,
+ "createExcelContent"
+ )
+
+ validationMetadata = self._createValidationMetadata(
+ "createExcelContent",
+ rowCount=len(dataList),
+ columnCount=len(columns)
+ )
+
+ # Store as base64 for document
+ excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=excelBase64,
+ mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ errorMsg = f"Error creating Excel content: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py b/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py
new file mode 100644
index 00000000..85926851
--- /dev/null
+++ b/modules/workflows/methods/methodJira/actions/exportTicketsAsJson.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Export Tickets As JSON action for JIRA operations.
+Exports tickets from JIRA as JSON list.
+"""
+
+import logging
+import json
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def exportTicketsAsJson(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Export tickets from JIRA as JSON list.
+
+ Parameters:
+ - connectionId (str, required): Connection ID from connectJira action result
+ - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
+
+ Returns:
+ - ActionResult with ActionDocument containing list of tickets as JSON
+ """
+ try:
+ connectionIdParam = parameters.get("connectionId")
+ if not connectionIdParam:
+ return ActionResult.isFailure(error="connectionId parameter is required")
+
+ # Get connection ID from document if it's a reference
+ connectionId = None
+ if isinstance(connectionIdParam, str):
+ # Try to parse from document reference
+ connectionInfo = self.documentParsing.parseJsonFromDocument(connectionIdParam)
+ if connectionInfo and "connectionId" in connectionInfo:
+ connectionId = connectionInfo["connectionId"]
+ else:
+ # Assume it's the connection ID directly
+ connectionId = connectionIdParam
+
+ if not connectionId or connectionId not in self._connections:
+ return ActionResult.isFailure(error=f"Connection ID {connectionIdParam} not found. Ensure connectJira was called first.")
+
+ connection = self._connections[connectionId]
+ syncInterface = connection["interface"]
+
+ # Export tickets
+ dataList = await syncInterface.exportTicketsAsList()
+
+ logger.info(f"Exported {len(dataList)} tickets from JIRA")
+
+ # Generate filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "jira_tickets_export",
+ "json",
+ workflowContext,
+ "exportTicketsAsJson"
+ )
+
+ validationMetadata = self._createValidationMetadata(
+ "exportTicketsAsJson",
+ connectionId=connectionId,
+ ticketCount=len(dataList)
+ )
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=json.dumps(dataList, indent=2, ensure_ascii=False),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ errorMsg = f"Error exporting tickets from JIRA: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py b/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py
new file mode 100644
index 00000000..b17519ea
--- /dev/null
+++ b/modules/workflows/methods/methodJira/actions/importTicketsFromJson.py
@@ -0,0 +1,101 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Import Tickets From JSON action for JIRA operations.
+Imports ticket data from JSON back to JIRA.
+"""
+
+import logging
+import json
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def importTicketsFromJson(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Import ticket data from JSON back to JIRA.
+
+ Parameters:
+ - connectionId (str, required): Connection ID from connectJira action result
+ - ticketData (str, required): Document reference containing ticket data as JSON
+ - taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
+
+ Returns:
+ - ActionResult with ActionDocument containing import result with counts
+ """
+ try:
+ connectionIdParam = parameters.get("connectionId")
+ if not connectionIdParam:
+ return ActionResult.isFailure(error="connectionId parameter is required")
+
+ ticketDataParam = parameters.get("ticketData")
+ if not ticketDataParam:
+ return ActionResult.isFailure(error="ticketData parameter is required")
+
+ # Get connection ID from document if it's a reference
+ connectionId = None
+ if isinstance(connectionIdParam, str):
+ connectionInfo = self.documentParsing.parseJsonFromDocument(connectionIdParam)
+ if connectionInfo and "connectionId" in connectionInfo:
+ connectionId = connectionInfo["connectionId"]
+ else:
+ connectionId = connectionIdParam
+
+ if not connectionId or connectionId not in self._connections:
+ return ActionResult.isFailure(error=f"Connection ID {connectionIdParam} not found. Ensure connectJira was called first.")
+
+ connection = self._connections[connectionId]
+ syncInterface = connection["interface"]
+
+ # Get ticket data from document
+ ticketDataJson = self.documentParsing.parseJsonFromDocument(ticketDataParam)
+ if ticketDataJson is None:
+ return ActionResult.isFailure(error="Could not parse ticket data from document reference")
+
+ # Ensure it's a list
+ if not isinstance(ticketDataJson, list):
+ return ActionResult.isFailure(error="ticketData must be a JSON array")
+
+ # Import tickets
+ await syncInterface.importListToTickets(ticketDataJson)
+
+ logger.info(f"Imported {len(ticketDataJson)} tickets to JIRA")
+
+ # Generate filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "jira_import_result",
+ "json",
+ workflowContext,
+ "importTicketsFromJson"
+ )
+
+ importResult = {
+ "imported": len(ticketDataJson),
+ "connectionId": connectionId,
+ }
+
+ validationMetadata = self._createValidationMetadata(
+ "importTicketsFromJson",
+ connectionId=connectionId,
+ importedCount=len(ticketDataJson)
+ )
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=json.dumps(importResult, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ errorMsg = f"Error importing tickets to JIRA: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodJira/actions/mergeTicketData.py b/modules/workflows/methods/methodJira/actions/mergeTicketData.py
new file mode 100644
index 00000000..a8f8b486
--- /dev/null
+++ b/modules/workflows/methods/methodJira/actions/mergeTicketData.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Merge Ticket Data action for JIRA operations.
+Merges JIRA export data with existing SharePoint data.
+"""
+
+import logging
+import json
+from typing import Dict, Any, List
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def mergeTicketData(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Merge JIRA export data with existing SharePoint data.
+
+ Parameters:
+ - jiraData (str, required): Document reference containing JIRA ticket data as JSON array
+ - existingData (str, required): Document reference containing existing SharePoint data as JSON array
+ - taskSyncDefinition (str or dict, required): Field mapping definition
+ - idField (str, optional): Field name to use as ID for merging (default: "ID")
+
+ Returns:
+ - ActionResult with ActionDocument containing merged data and merge details
+ """
+ try:
+ jiraDataParam = parameters.get("jiraData")
+ if not jiraDataParam:
+ return ActionResult.isFailure(error="jiraData parameter is required")
+
+ existingDataParam = parameters.get("existingData")
+ if not existingDataParam:
+ return ActionResult.isFailure(error="existingData parameter is required")
+
+ taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
+ if not taskSyncDefinitionParam:
+ return ActionResult.isFailure(error="taskSyncDefinition parameter is required")
+
+ idField = parameters.get("idField", "ID")
+
+ # Parse taskSyncDefinition
+ if isinstance(taskSyncDefinitionParam, str):
+ try:
+ taskSyncDefinition = json.loads(taskSyncDefinitionParam)
+ except json.JSONDecodeError as e:
+ return ActionResult.isFailure(error=f"taskSyncDefinition is not valid JSON: {str(e)}")
+ elif isinstance(taskSyncDefinitionParam, dict):
+ taskSyncDefinition = taskSyncDefinitionParam
+ else:
+ return ActionResult.isFailure(error=f"taskSyncDefinition must be a dict or JSON string, got {type(taskSyncDefinitionParam)}")
+
+ # Get data from documents
+ jiraDataJson = self.documentParsing.parseJsonFromDocument(jiraDataParam)
+ if jiraDataJson is None or not isinstance(jiraDataJson, list):
+ return ActionResult.isFailure(error="Could not parse jiraData as JSON array")
+
+ existingDataJson = self.documentParsing.parseJsonFromDocument(existingDataParam)
+ if existingDataJson is None or not isinstance(existingDataJson, list):
+ # Empty existing data is OK
+ existingDataJson = []
+
+ # Perform merge
+ existingLookup = {row.get(idField): row for row in existingDataJson if row.get(idField)}
+ mergedData: List[dict] = []
+ changes: List[str] = []
+ updatedCount = addedCount = unchangedCount = 0
+
+ for jiraRow in jiraDataJson:
+ jiraId = jiraRow.get(idField)
+ if jiraId and jiraId in existingLookup:
+ existingRow = existingLookup[jiraId].copy()
+ rowChanges: List[str] = []
+
+ for fieldName, fieldConfig in taskSyncDefinition.items():
+ if fieldConfig[0] == 'get':
+ oldValue = "" if existingRow.get(fieldName) is None else str(existingRow.get(fieldName))
+ newValue = "" if jiraRow.get(fieldName) is None else str(jiraRow.get(fieldName))
+
+ # Convert ADF data to readable text for logging
+ if isinstance(newValue, dict) and newValue.get("type") == "doc":
+ newValueReadable = self.adfConverter.convertAdfToText(newValue)
+ if oldValue != newValueReadable:
+ rowChanges.append(f"{fieldName}: '{oldValue[:100]}...' -> '{newValueReadable[:100]}...'")
+ elif oldValue != newValue:
+ # Truncate long values for logging
+ oldTruncated = oldValue[:100] + "..." if len(oldValue) > 100 else oldValue
+ newTruncated = newValue[:100] + "..." if len(newValue) > 100 else newValue
+ rowChanges.append(f"{fieldName}: '{oldTruncated}' -> '{newTruncated}'")
+
+ existingRow[fieldName] = jiraRow.get(fieldName)
+
+ mergedData.append(existingRow)
+ if rowChanges:
+ updatedCount += 1
+ changes.append(f"Row ID {jiraId} updated: {', '.join(rowChanges)}")
+ else:
+ unchangedCount += 1
+ del existingLookup[jiraId]
+ else:
+ mergedData.append(jiraRow)
+ addedCount += 1
+ changes.append(f"Row ID {jiraId} added as new record")
+
+ # Add remaining existing rows
+ for remaining in existingLookup.values():
+ mergedData.append(remaining)
+ unchangedCount += 1
+
+ mergeDetails = {
+ "updated": updatedCount,
+ "added": addedCount,
+ "unchanged": unchangedCount,
+ "changes": changes
+ }
+
+ logger.info(f"Merged ticket data: {updatedCount} updated, {addedCount} added, {unchangedCount} unchanged")
+
+ # Generate filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "merged_ticket_data",
+ "json",
+ workflowContext,
+ "mergeTicketData"
+ )
+
+ result = {
+ "data": mergedData,
+ "mergeDetails": mergeDetails
+ }
+
+ validationMetadata = self._createValidationMetadata(
+ "mergeTicketData",
+ updated=updatedCount,
+ added=addedCount,
+ unchanged=unchangedCount
+ )
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=json.dumps(result, indent=2, ensure_ascii=False),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ errorMsg = f"Error merging ticket data: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodJira/actions/parseCsvContent.py b/modules/workflows/methods/methodJira/actions/parseCsvContent.py
new file mode 100644
index 00000000..3038e566
--- /dev/null
+++ b/modules/workflows/methods/methodJira/actions/parseCsvContent.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Parse CSV Content action for JIRA operations.
+Parses CSV content with custom headers.
+"""
+
+import logging
+import json
+import io
+import pandas as pd
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Parse CSV content with custom headers.
+
+ Parameters:
+ - csvContent (str, required): Document reference containing CSV file content as bytes
+ - skipRows (int, optional): Number of header rows to skip (default: 2)
+ - hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true)
+
+ Returns:
+ - ActionResult with ActionDocument containing parsed data and headers as JSON
+ """
+ try:
+ csvContentParam = parameters.get("csvContent")
+ if not csvContentParam:
+ return ActionResult.isFailure(error="csvContent parameter is required")
+
+ skipRows = parameters.get("skipRows", 2)
+ hasCustomHeaders = parameters.get("hasCustomHeaders", True)
+
+ # Get CSV content from document
+ csvBytes = self.documentParsing.getDocumentData(csvContentParam)
+ if csvBytes is None:
+ return ActionResult.isFailure(error="Could not get CSV content from document reference")
+
+ # Convert to bytes if needed
+ if isinstance(csvBytes, str):
+ csvBytes = csvBytes.encode('utf-8')
+ elif not isinstance(csvBytes, bytes):
+ return ActionResult.isFailure(error="CSV content must be bytes or string")
+
+ # Parse headers if hasCustomHeaders
+ headers = {"header1": "Header 1", "header2": "Header 2"}
+ if hasCustomHeaders:
+ csvLines = csvBytes.decode('utf-8').split('\n')
+ if len(csvLines) >= 2:
+ headers["header1"] = csvLines[0].rstrip('\r\n')
+ headers["header2"] = csvLines[1].rstrip('\r\n')
+
+ # Parse CSV data
+ df = pd.read_csv(
+ io.BytesIO(csvBytes),
+ skiprows=skipRows,
+ quoting=1,
+ escapechar='\\',
+ on_bad_lines='skip',
+ engine='python'
+ )
+
+ # Convert to dict records
+ for column in df.columns:
+ df[column] = df[column].astype('object').fillna('')
+ data = df.to_dict(orient='records')
+
+ logger.info(f"Parsed CSV: {len(data)} rows, {len(df.columns)} columns")
+
+ # Generate filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "parsed_csv_data",
+ "json",
+ workflowContext,
+ "parseCsvContent"
+ )
+
+ result = {
+ "data": data,
+ "headers": headers,
+ "rowCount": len(data),
+ "columnCount": len(df.columns)
+ }
+
+ validationMetadata = self._createValidationMetadata(
+ "parseCsvContent",
+ rowCount=len(data),
+ columnCount=len(df.columns),
+ skipRows=skipRows
+ )
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=json.dumps(result, indent=2, ensure_ascii=False),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ errorMsg = f"Error parsing CSV content: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodJira/actions/parseExcelContent.py b/modules/workflows/methods/methodJira/actions/parseExcelContent.py
new file mode 100644
index 00000000..c0d64325
--- /dev/null
+++ b/modules/workflows/methods/methodJira/actions/parseExcelContent.py
@@ -0,0 +1,121 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Parse Excel Content action for JIRA operations.
+Parses Excel content with custom headers.
+"""
+
+import logging
+import json
+import pandas as pd
+from io import BytesIO
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Parse Excel content with custom headers.
+
+ Parameters:
+ - excelContent (str, required): Document reference containing Excel file content as bytes
+ - skipRows (int, optional): Number of header rows to skip (default: 3)
+ - hasCustomHeaders (bool, optional): Whether Excel has custom header rows (default: true)
+
+ Returns:
+ - ActionResult with ActionDocument containing parsed data and headers as JSON
+ """
+ try:
+ excelContentParam = parameters.get("excelContent")
+ if not excelContentParam:
+ return ActionResult.isFailure(error="excelContent parameter is required")
+
+ skipRows = parameters.get("skipRows", 3)
+ hasCustomHeaders = parameters.get("hasCustomHeaders", True)
+
+ # Get Excel content from document
+ excelBytes = self.documentParsing.getDocumentData(excelContentParam)
+ if excelBytes is None:
+ return ActionResult.isFailure(error="Could not get Excel content from document reference")
+
+ # Convert to bytes if needed
+ if isinstance(excelBytes, str):
+ excelBytes = excelBytes.encode('latin-1') # Excel might have binary data
+ elif not isinstance(excelBytes, bytes):
+ return ActionResult.isFailure(error="Excel content must be bytes or string")
+
+ # Parse Excel
+ df = pd.read_excel(BytesIO(excelBytes), engine='openpyxl', header=None)
+
+ # Extract headers if hasCustomHeaders
+ headers = {"header1": "Header 1", "header2": "Header 2"}
+ if hasCustomHeaders and len(df) >= 3:
+ headerRow1 = df.iloc[0:1].copy()
+ headerRow2 = df.iloc[1:2].copy()
+ tableHeaders = df.iloc[2:3].copy()
+ dfData = df.iloc[skipRows:].copy()
+ dfData.columns = tableHeaders.iloc[0]
+
+ headers = {
+ "header1": ",".join([str(x) if pd.notna(x) else "" for x in headerRow1.iloc[0].tolist()]),
+ "header2": ",".join([str(x) if pd.notna(x) else "" for x in headerRow2.iloc[0].tolist()]),
+ }
+ else:
+ # No custom headers, use standard parsing
+ if skipRows > 0:
+ dfData = df.iloc[skipRows:].copy()
+ if len(df) > skipRows:
+ dfData.columns = df.iloc[skipRows-1]
+ else:
+ dfData = df.copy()
+
+ # Reset index and clean data
+ dfData = dfData.reset_index(drop=True)
+ for column in dfData.columns:
+ dfData[column] = dfData[column].astype('object').fillna('')
+
+ data = dfData.to_dict(orient='records')
+
+ logger.info(f"Parsed Excel: {len(data)} rows, {len(dfData.columns)} columns")
+
+ # Generate filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "parsed_excel_data",
+ "json",
+ workflowContext,
+ "parseExcelContent"
+ )
+
+ result = {
+ "data": data,
+ "headers": headers,
+ "rowCount": len(data),
+ "columnCount": len(dfData.columns)
+ }
+
+ validationMetadata = self._createValidationMetadata(
+ "parseExcelContent",
+ rowCount=len(data),
+ columnCount=len(dfData.columns),
+ skipRows=skipRows
+ )
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=json.dumps(result, indent=2, ensure_ascii=False),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ errorMsg = f"Error parsing Excel content: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodJira/helpers/__init__.py b/modules/workflows/methods/methodJira/helpers/__init__.py
new file mode 100644
index 00000000..cf2fc4c7
--- /dev/null
+++ b/modules/workflows/methods/methodJira/helpers/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""Helper modules for JIRA method operations."""
+
diff --git a/modules/workflows/methods/methodJira/helpers/adfConverter.py b/modules/workflows/methods/methodJira/helpers/adfConverter.py
new file mode 100644
index 00000000..d8619989
--- /dev/null
+++ b/modules/workflows/methods/methodJira/helpers/adfConverter.py
@@ -0,0 +1,180 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+ADF Converter helper for JIRA operations.
+Handles conversion of Atlassian Document Format (ADF) to plain text.
+"""
+
+import logging
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+class AdfConverterHelper:
+ """Helper for ADF conversion operations"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize ADF converter helper.
+
+ Args:
+ methodInstance: Instance of MethodJira (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ def convertAdfToText(self, adfData):
+ """Convert Atlassian Document Format (ADF) to plain text.
+
+ Based on Atlassian Document Format specification for JIRA fields.
+ Handles paragraphs, lists, text formatting, and other ADF node types.
+
+ Args:
+ adfData: ADF object or None
+
+ Returns:
+ str: Plain text content, or empty string if None/invalid
+ """
+ if not adfData or not isinstance(adfData, dict):
+ return ""
+
+ if adfData.get("type") != "doc":
+ return str(adfData) if adfData else ""
+
+ content = adfData.get("content", [])
+ if not isinstance(content, list):
+ return ""
+
+ def extractTextFromContent(contentList, listLevel=0):
+ """Recursively extract text from ADF content with proper formatting."""
+ textParts = []
+ listCounter = 1
+
+ for item in contentList:
+ if not isinstance(item, dict):
+ continue
+
+ itemType = item.get("type", "")
+
+ if itemType == "text":
+ # Extract text content, preserving formatting
+ text = item.get("text", "")
+ marks = item.get("marks", [])
+
+ # Handle text formatting (bold, italic, etc.)
+ if marks:
+ for mark in marks:
+ if mark.get("type") == "strong":
+ text = f"**{text}**"
+ elif mark.get("type") == "em":
+ text = f"*{text}*"
+ elif mark.get("type") == "code":
+ text = f"`{text}`"
+ elif mark.get("type") == "link":
+ attrs = mark.get("attrs", {})
+ href = attrs.get("href", "")
+ if href:
+ text = f"[{text}]({href})"
+
+ textParts.append(text)
+
+ elif itemType == "hardBreak":
+ textParts.append("\n")
+
+ elif itemType == "paragraph":
+ paragraphContent = item.get("content", [])
+ if paragraphContent:
+ paragraphText = extractTextFromContent(paragraphContent, listLevel)
+ if paragraphText.strip():
+ textParts.append(paragraphText)
+
+ elif itemType == "bulletList":
+ listContent = item.get("content", [])
+ if listContent:
+ listText = extractTextFromContent(listContent, listLevel + 1)
+ if listText.strip():
+ textParts.append(listText)
+
+ elif itemType == "orderedList":
+ listContent = item.get("content", [])
+ if listContent:
+ listText = extractTextFromContent(listContent, listLevel + 1)
+ if listText.strip():
+ textParts.append(listText)
+
+ elif itemType == "listItem":
+ itemContent = item.get("content", [])
+ if itemContent:
+ indent = " " * listLevel
+ itemText = extractTextFromContent(itemContent, listLevel)
+ if itemText.strip():
+ prefix = f"{indent}- " if listLevel > 0 else "- "
+ textParts.append(f"{prefix}{itemText}")
+
+ elif itemType == "heading":
+ level = item.get("attrs", {}).get("level", 1)
+ headingContent = item.get("content", [])
+ if headingContent:
+ headingText = extractTextFromContent(headingContent, listLevel)
+ if headingText.strip():
+ prefix = "#" * level + " "
+ textParts.append(f"{prefix}{headingText}")
+
+ elif itemType == "codeBlock":
+ codeContent = item.get("content", [])
+ if codeContent:
+ codeText = extractTextFromContent(codeContent, listLevel)
+ if codeText.strip():
+ textParts.append(f"```\n{codeText}\n```")
+
+ elif itemType == "blockquote":
+ quoteContent = item.get("content", [])
+ if quoteContent:
+ quoteText = extractTextFromContent(quoteContent, listLevel)
+ if quoteText.strip():
+ textParts.append(f"> {quoteText}")
+
+ elif itemType == "table":
+ tableContent = item.get("content", [])
+ if tableContent:
+ tableText = extractTextFromContent(tableContent, listLevel)
+ if tableText.strip():
+ textParts.append(tableText)
+
+ elif itemType == "tableRow":
+ rowContent = item.get("content", [])
+ if rowContent:
+ rowText = extractTextFromContent(rowContent, listLevel)
+ if rowText.strip():
+ textParts.append(rowText)
+
+ elif itemType == "tableCell":
+ cellContent = item.get("content", [])
+ if cellContent:
+ cellText = extractTextFromContent(cellContent, listLevel)
+ if cellText.strip():
+ textParts.append(cellText)
+
+ elif itemType == "mediaGroup":
+ # Skip media groups for now
+ pass
+
+ elif itemType == "media":
+ # Skip media for now
+ pass
+
+ else:
+ # Unknown type - try to extract content if available
+ if "content" in item:
+ unknownContent = item.get("content", [])
+ if unknownContent:
+ unknownText = extractTextFromContent(unknownContent, listLevel)
+ if unknownText.strip():
+ textParts.append(unknownText)
+
+ return "".join(textParts)
+
+ result = extractTextFromContent(content)
+ return result.strip() if result else ""
+
diff --git a/modules/workflows/methods/methodJira/helpers/documentParsing.py b/modules/workflows/methods/methodJira/helpers/documentParsing.py
new file mode 100644
index 00000000..b0608524
--- /dev/null
+++ b/modules/workflows/methods/methodJira/helpers/documentParsing.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Document Parsing helper for JIRA operations.
+Handles parsing of document references and JSON content.
+"""
+
+import logging
+import json
+from typing import Any, Optional, Dict
+from modules.datamodels.datamodelDocref import DocumentReferenceList
+
+logger = logging.getLogger(__name__)
+
+class DocumentParsingHelper:
+ """Helper for document parsing operations"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize document parsing helper.
+
+ Args:
+ methodInstance: Instance of MethodJira (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ def getDocumentData(self, documentReference: str) -> Any:
+ """
+ Get document data from a document reference.
+
+ Args:
+ documentReference: Document reference string
+
+ Returns:
+ Document data (bytes, str, or None)
+ """
+ try:
+ docList = DocumentReferenceList.from_string_list([documentReference])
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
+ if not chatDocuments:
+ return None
+
+ doc = chatDocuments[0]
+ fileId = getattr(doc, 'fileId', None)
+ if not fileId:
+ return None
+
+ return self.services.chat.getFileData(fileId)
+ except Exception as e:
+ logger.error(f"Error getting document data: {str(e)}")
+ return None
+
+ def parseJsonFromDocument(self, documentReference: str) -> Optional[Dict[str, Any]]:
+ """
+ Parse JSON content from a document reference.
+
+ Args:
+ documentReference: Document reference string
+
+ Returns:
+ Parsed JSON dictionary or None
+ """
+ try:
+ fileData = self.getDocumentData(documentReference)
+ if not fileData:
+ return None
+
+ # Handle bytes
+ if isinstance(fileData, bytes):
+ jsonStr = fileData.decode('utf-8')
+ else:
+ jsonStr = str(fileData)
+
+ # Parse JSON
+ return json.loads(jsonStr)
+ except Exception as e:
+ logger.error(f"Error parsing JSON from document: {str(e)}")
+ return None
+
diff --git a/modules/workflows/methods/methodJira/methodJira.py b/modules/workflows/methods/methodJira/methodJira.py
new file mode 100644
index 00000000..d7baacf0
--- /dev/null
+++ b/modules/workflows/methods/methodJira/methodJira.py
@@ -0,0 +1,322 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+import logging
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import MethodBase
+from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
+from modules.shared.frontendTypes import FrontendType
+
+# Import helpers
+from .helpers.adfConverter import AdfConverterHelper
+from .helpers.documentParsing import DocumentParsingHelper
+
+# Import actions
+from .actions.connectJira import connectJira
+from .actions.exportTicketsAsJson import exportTicketsAsJson
+from .actions.importTicketsFromJson import importTicketsFromJson
+from .actions.mergeTicketData import mergeTicketData
+from .actions.parseCsvContent import parseCsvContent
+from .actions.parseExcelContent import parseExcelContent
+from .actions.createCsvContent import createCsvContent
+from .actions.createExcelContent import createExcelContent
+
+logger = logging.getLogger(__name__)
+
+class MethodJira(MethodBase):
+ """JIRA operations methods."""
+
+ def __init__(self, services):
+ super().__init__(services)
+ self.name = "jira"
+ self.description = "JIRA operations methods"
+ # Store connections in memory (keyed by connectionId)
+ self._connections: Dict[str, Any] = {}
+
+ # Initialize helper modules
+ self.adfConverter = AdfConverterHelper(self)
+ self.documentParsing = DocumentParsingHelper(self)
+
+ # RBAC-Integration: Action-Definitionen mit actionId
+ self._actions = {
+ "connectJira": WorkflowActionDefinition(
+ actionId="jira.connectJira",
+ description="Connect to JIRA instance and create ticket interface",
+ parameters={
+ "apiUsername": WorkflowActionParameter(
+ name="apiUsername",
+ type="str",
+ frontendType=FrontendType.EMAIL,
+ required=True,
+ description="JIRA API username/email"
+ ),
+ "apiTokenConfigKey": WorkflowActionParameter(
+ name="apiTokenConfigKey",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="APP_CONFIG key name for JIRA API token"
+ ),
+ "apiUrl": WorkflowActionParameter(
+ name="apiUrl",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="JIRA instance URL (e.g., https://example.atlassian.net)"
+ ),
+ "projectCode": WorkflowActionParameter(
+ name="projectCode",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="JIRA project code (e.g., DCS)"
+ ),
+ "issueType": WorkflowActionParameter(
+ name="issueType",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="JIRA issue type (e.g., Task)"
+ ),
+ "taskSyncDefinition": WorkflowActionParameter(
+ name="taskSyncDefinition",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=True,
+ description="Field mapping definition as JSON string or dict"
+ )
+ },
+ execute=connectJira.__get__(self, self.__class__)
+ ),
+ "exportTicketsAsJson": WorkflowActionDefinition(
+ actionId="jira.exportTicketsAsJson",
+ description="Export tickets from JIRA as JSON list",
+ parameters={
+ "connectionId": WorkflowActionParameter(
+ name="connectionId",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Connection ID from connectJira action result"
+ ),
+ "taskSyncDefinition": WorkflowActionParameter(
+ name="taskSyncDefinition",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=False,
+ description="Field mapping definition (if not provided, uses stored definition)"
+ )
+ },
+ execute=exportTicketsAsJson.__get__(self, self.__class__)
+ ),
+ "importTicketsFromJson": WorkflowActionDefinition(
+ actionId="jira.importTicketsFromJson",
+ description="Import ticket data from JSON back to JIRA",
+ parameters={
+ "connectionId": WorkflowActionParameter(
+ name="connectionId",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Connection ID from connectJira action result"
+ ),
+ "ticketData": WorkflowActionParameter(
+ name="ticketData",
+ type="str",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference containing ticket data as JSON"
+ ),
+ "taskSyncDefinition": WorkflowActionParameter(
+ name="taskSyncDefinition",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=False,
+ description="Field mapping definition (if not provided, uses stored definition)"
+ )
+ },
+ execute=importTicketsFromJson.__get__(self, self.__class__)
+ ),
+ "mergeTicketData": WorkflowActionDefinition(
+ actionId="jira.mergeTicketData",
+ description="Merge JIRA export data with existing SharePoint data",
+ parameters={
+ "jiraData": WorkflowActionParameter(
+ name="jiraData",
+ type="str",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference containing JIRA ticket data as JSON array"
+ ),
+ "existingData": WorkflowActionParameter(
+ name="existingData",
+ type="str",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference containing existing SharePoint data as JSON array"
+ ),
+ "taskSyncDefinition": WorkflowActionParameter(
+ name="taskSyncDefinition",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=True,
+ description="Field mapping definition"
+ ),
+ "idField": WorkflowActionParameter(
+ name="idField",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=False,
+ default="ID",
+ description="Field name to use as ID for merging"
+ )
+ },
+ execute=mergeTicketData.__get__(self, self.__class__)
+ ),
+ "parseCsvContent": WorkflowActionDefinition(
+ actionId="jira.parseCsvContent",
+ description="Parse CSV content with custom headers",
+ parameters={
+ "csvContent": WorkflowActionParameter(
+ name="csvContent",
+ type="str",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference containing CSV file content as bytes"
+ ),
+ "skipRows": WorkflowActionParameter(
+ name="skipRows",
+ type="int",
+ frontendType=FrontendType.NUMBER,
+ required=False,
+ default=2,
+ description="Number of header rows to skip",
+ validation={"min": 0, "max": 100}
+ ),
+ "hasCustomHeaders": WorkflowActionParameter(
+ name="hasCustomHeaders",
+ type="bool",
+ frontendType=FrontendType.CHECKBOX,
+ required=False,
+ default=True,
+ description="Whether CSV has custom header rows"
+ )
+ },
+ execute=parseCsvContent.__get__(self, self.__class__)
+ ),
+ "parseExcelContent": WorkflowActionDefinition(
+ actionId="jira.parseExcelContent",
+ description="Parse Excel content with custom headers",
+ parameters={
+ "excelContent": WorkflowActionParameter(
+ name="excelContent",
+ type="str",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference containing Excel file content as bytes"
+ ),
+ "skipRows": WorkflowActionParameter(
+ name="skipRows",
+ type="int",
+ frontendType=FrontendType.NUMBER,
+ required=False,
+ default=3,
+ description="Number of header rows to skip",
+ validation={"min": 0, "max": 100}
+ ),
+ "hasCustomHeaders": WorkflowActionParameter(
+ name="hasCustomHeaders",
+ type="bool",
+ frontendType=FrontendType.CHECKBOX,
+ required=False,
+ default=True,
+ description="Whether Excel has custom header rows"
+ )
+ },
+ execute=parseExcelContent.__get__(self, self.__class__)
+ ),
+ "createCsvContent": WorkflowActionDefinition(
+ actionId="jira.createCsvContent",
+ description="Create CSV content with custom headers",
+ parameters={
+ "data": WorkflowActionParameter(
+ name="data",
+ type="str",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference containing data as JSON (with data field from mergeTicketData)"
+ ),
+ "headers": WorkflowActionParameter(
+ name="headers",
+ type="str",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=False,
+ description="Document reference containing headers JSON (from parseCsvContent/parseExcelContent)"
+ ),
+ "columns": WorkflowActionParameter(
+ name="columns",
+ type="List[str]",
+ frontendType=FrontendType.MULTISELECT,
+ required=False,
+ description="List of column names (if not provided, extracted from taskSyncDefinition or data)"
+ ),
+ "taskSyncDefinition": WorkflowActionParameter(
+ name="taskSyncDefinition",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=False,
+ description="Field mapping definition (used to extract column names if columns not provided)"
+ )
+ },
+ execute=createCsvContent.__get__(self, self.__class__)
+ ),
+ "createExcelContent": WorkflowActionDefinition(
+ actionId="jira.createExcelContent",
+ description="Create Excel content with custom headers",
+ parameters={
+ "data": WorkflowActionParameter(
+ name="data",
+ type="str",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference containing data as JSON (with data field from mergeTicketData)"
+ ),
+ "headers": WorkflowActionParameter(
+ name="headers",
+ type="str",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=False,
+ description="Document reference containing headers JSON (from parseExcelContent)"
+ ),
+ "columns": WorkflowActionParameter(
+ name="columns",
+ type="List[str]",
+ frontendType=FrontendType.MULTISELECT,
+ required=False,
+ description="List of column names (if not provided, extracted from taskSyncDefinition or data)"
+ ),
+ "taskSyncDefinition": WorkflowActionParameter(
+ name="taskSyncDefinition",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=False,
+ description="Field mapping definition (used to extract column names if columns not provided)"
+ )
+ },
+ execute=createExcelContent.__get__(self, self.__class__)
+ )
+ }
+
+ # Validate actions after definition
+ self._validateActions()
+
+ # Register actions as methods (optional, für direkten Zugriff)
+ self.connectJira = connectJira.__get__(self, self.__class__)
+ self.exportTicketsAsJson = exportTicketsAsJson.__get__(self, self.__class__)
+ self.importTicketsFromJson = importTicketsFromJson.__get__(self, self.__class__)
+ self.mergeTicketData = mergeTicketData.__get__(self, self.__class__)
+ self.parseCsvContent = parseCsvContent.__get__(self, self.__class__)
+ self.parseExcelContent = parseExcelContent.__get__(self, self.__class__)
+ self.createCsvContent = createCsvContent.__get__(self, self.__class__)
+ self.createExcelContent = createExcelContent.__get__(self, self.__class__)
+
diff --git a/modules/workflows/methods/methodOutlook.py b/modules/workflows/methods/methodOutlook.py.old
similarity index 100%
rename from modules/workflows/methods/methodOutlook.py
rename to modules/workflows/methods/methodOutlook.py.old
diff --git a/modules/workflows/methods/methodOutlook/__init__.py b/modules/workflows/methods/methodOutlook/__init__.py
new file mode 100644
index 00000000..c7653010
--- /dev/null
+++ b/modules/workflows/methods/methodOutlook/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+from .methodOutlook import MethodOutlook
+
+__all__ = ['MethodOutlook']
+
diff --git a/modules/workflows/methods/methodOutlook/actions/__init__.py b/modules/workflows/methods/methodOutlook/actions/__init__.py
new file mode 100644
index 00000000..f62e3e0a
--- /dev/null
+++ b/modules/workflows/methods/methodOutlook/actions/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""Action modules for Outlook operations."""
+
+# Export all actions
+from .readEmails import readEmails
+from .searchEmails import searchEmails
+from .composeAndDraftEmailWithContext import composeAndDraftEmailWithContext
+from .sendDraftEmail import sendDraftEmail
+
+__all__ = [
+ 'readEmails',
+ 'searchEmails',
+ 'composeAndDraftEmailWithContext',
+ 'sendDraftEmail',
+]
+
diff --git a/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py b/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py
new file mode 100644
index 00000000..2bad3838
--- /dev/null
+++ b/modules/workflows/methods/methodOutlook/actions/composeAndDraftEmailWithContext.py
@@ -0,0 +1,362 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Compose And Draft Email With Context action for Outlook operations.
+Composes email content using AI from context and optional documents, then creates a draft.
+"""
+
+import logging
+import json
+import base64
+import requests
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def composeAndDraftEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Compose email content using AI from context and optional documents, then create a draft.
+ - Input requirements: connectionReference (required); to (required); context (required); optional documentList, cc, bcc, emailStyle, maxLength.
+ - Output format: JSON confirmation with AI-generated draft metadata.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - to (list, required): Recipient email addresses.
+ - context (str, required): Detailled context for composing the email.
+ - documentList (list, optional): Document references for context/attachments.
+ - cc (list, optional): CC recipients.
+ - bcc (list, optional): BCC recipients.
+ - emailStyle (str, optional): formal | casual | business. Default: business.
+ - maxLength (int, optional): Maximum length for generated content. Default: 1000.
+ """
+ try:
+ connectionReference = parameters.get("connectionReference")
+ to = parameters.get("to")
+ context = parameters.get("context")
+ documentList = parameters.get("documentList", [])
+ cc = parameters.get("cc", [])
+ bcc = parameters.get("bcc", [])
+ emailStyle = parameters.get("emailStyle", "business")
+ maxLength = parameters.get("maxLength", 1000)
+
+ if not connectionReference or not to or not context:
+ return ActionResult.isFailure(error="connectionReference, to, and context are required")
+
+ # Convert single values to lists for all recipient parameters
+ if isinstance(to, str):
+ to = [to]
+ if isinstance(cc, str):
+ cc = [cc]
+ if isinstance(bcc, str):
+ bcc = [bcc]
+ if isinstance(documentList, str):
+ documentList = [documentList]
+
+ # Get Microsoft connection
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ return ActionResult.isFailure(error="No valid Microsoft connection found")
+
+ # Check permissions
+ permissions_ok = await self.connection.checkPermissions(connection)
+ if not permissions_ok:
+ return ActionResult.isFailure(error="Connection lacks necessary permissions for Outlook operations")
+
+ # Prepare documents for AI processing
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ chatDocuments = []
+ if documentList:
+ # Convert to DocumentReferenceList if needed
+ if isinstance(documentList, DocumentReferenceList):
+ docRefList = documentList
+ elif isinstance(documentList, list):
+ docRefList = DocumentReferenceList.from_string_list(documentList)
+ elif isinstance(documentList, str):
+ docRefList = DocumentReferenceList.from_string_list([documentList])
+ else:
+ docRefList = DocumentReferenceList(references=[])
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
+
+ # Create AI prompt for email composition
+ # Build document reference list for AI with expanded list contents when possible
+ doc_references = documentList
+ doc_list_text = ""
+ if doc_references:
+ lines = ["Available_Document_References:"]
+ for ref in doc_references:
+ # Each item is a label: resolve to its document list and render contained items
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ list_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([ref])) or []
+ if list_docs:
+ for d in list_docs:
+ doc_ref_label = self.services.chat.getDocumentReferenceFromChatDocument(d)
+ lines.append(f"- {doc_ref_label}")
+ else:
+ lines.append(" - (no documents)")
+ doc_list_text = "\n" + "\n".join(lines)
+ else:
+ doc_list_text = "Available_Document_References: (No documents available for attachment)"
+
+ # Escape only the user-controlled context to prevent prompt injection
+ escaped_context = context.replace('"', '\\"').replace('\n', '\\n').replace('\r', '\\r')
+
+ ai_prompt = f"""Compose an email based on this context:
+-------
+{escaped_context}
+-------
+
+Recipients: {to}
+Style: {emailStyle}
+Max length: {maxLength} characters
+{doc_list_text}
+
+Based on the context, decide which documents to attach.
+
+CRITICAL: Use EXACT document references from Available_Document_References above. For individual documents: ALWAYS use docItem:: format (include filename)
+
+Return JSON:
+{{
+ "subject": "subject line",
+ "body": "email body (HTML allowed)",
+ "attachments": ["docItem::"]
+}}
+"""
+
+ # Call AI service to generate email content
+ try:
+ ai_response = await self.services.ai.callAiPlanning(
+ prompt=ai_prompt,
+ placeholders=None,
+ debugType="email_composition"
+ )
+
+ # Parse AI response
+ try:
+ ai_content = ai_response
+ # Extract JSON from AI response
+ if "```json" in ai_content:
+ json_start = ai_content.find("```json") + 7
+ json_end = ai_content.find("```", json_start)
+ json_content = ai_content[json_start:json_end].strip()
+ elif "{" in ai_content and "}" in ai_content:
+ json_start = ai_content.find("{")
+ json_end = ai_content.rfind("}") + 1
+ json_content = ai_content[json_start:json_end]
+ else:
+ json_content = ai_content
+
+ email_data = json.loads(json_content)
+ subject = email_data.get("subject", "")
+ body = email_data.get("body", "")
+ ai_attachments = email_data.get("attachments", [])
+
+ if not subject or not body:
+ return ActionResult.isFailure(error="AI did not generate valid subject and body")
+
+ # Use AI-selected attachments if provided, otherwise use all documents
+ normalized_ai_attachments = []
+ if documentList:
+ try:
+ available_refs = [documentList] if isinstance(documentList, str) else documentList
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ available_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(available_refs)) or []
+ except Exception:
+ available_docs = []
+
+ # Normalize AI attachments to a list of strings
+ if isinstance(ai_attachments, str):
+ ai_attachments = [ai_attachments]
+ elif isinstance(ai_attachments, list):
+ ai_attachments = [a for a in ai_attachments if isinstance(a, str)]
+
+ if ai_attachments:
+ try:
+ ai_refs = [ai_attachments] if isinstance(ai_attachments, str) else ai_attachments
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ ai_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(ai_refs)) or []
+ except Exception:
+ ai_docs = []
+
+ # Intersect by document id
+ available_ids = {getattr(d, 'id', None) for d in available_docs}
+ selected_docs = [d for d in ai_docs if getattr(d, 'id', None) in available_ids]
+
+ if selected_docs:
+ # Map selected ChatDocuments back to docItem references (with full filename)
+ documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in selected_docs]
+ # Normalize ai_attachments to full format for storage
+ normalized_ai_attachments = documentList.copy()
+ logger.info(f"AI selected {len(documentList)} documents for attachment (resolved via ChatDocuments)")
+ else:
+ # No intersection; use all available documents
+ documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
+ normalized_ai_attachments = documentList.copy()
+ logger.warning("AI selected attachments not found in available documents, using all documents")
+ else:
+ # No AI selection; use all available documents
+ documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
+ normalized_ai_attachments = documentList.copy()
+ logger.warning("AI did not specify attachments, using all available documents")
+ else:
+ logger.info("No documents provided in documentList; skipping attachment processing")
+
+ except json.JSONDecodeError as e:
+ logger.error(f"Failed to parse AI response as JSON: {str(e)}")
+ logger.error(f"AI response content: {ai_response}")
+ return ActionResult.isFailure(error="AI response was not valid JSON format")
+
+ except Exception as e:
+ logger.error(f"Error calling AI service: {str(e)}")
+ return ActionResult.isFailure(error=f"Failed to generate email content: {str(e)}")
+
+ # Now create the email with AI-generated content
+ try:
+ graph_url = "https://graph.microsoft.com/v1.0"
+ headers = {
+ "Authorization": f"Bearer {connection['accessToken']}",
+ "Content-Type": "application/json"
+ }
+
+ # Clean and format body content
+ cleaned_body = body.strip()
+
+ # Check if body is already HTML
+ if cleaned_body.startswith('') or cleaned_body.startswith('') or '
' in cleaned_body:
+ html_body = cleaned_body
+ else:
+ # Convert plain text to proper HTML formatting
+ html_body = cleaned_body.replace('\n', '
')
+ html_body = f"{html_body}"
+
+ # Build the email message
+ message = {
+ "subject": subject,
+ "body": {
+ "contentType": "HTML",
+ "content": html_body
+ },
+ "toRecipients": [{"emailAddress": {"address": email}} for email in to],
+ "ccRecipients": [{"emailAddress": {"address": email}} for email in cc] if cc else [],
+ "bccRecipients": [{"emailAddress": {"address": email}} for email in bcc] if bcc else []
+ }
+
+ # Add documents as attachments if provided
+ if documentList:
+ message["attachments"] = []
+ for attachment_ref in documentList:
+ # Get attachment document from service center
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ attachment_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([attachment_ref]))
+ if attachment_docs:
+ for doc in attachment_docs:
+ file_id = getattr(doc, 'fileId', None)
+ if file_id:
+ try:
+ file_content = self.services.chat.getFileData(file_id)
+ if file_content:
+ if isinstance(file_content, bytes):
+ content_bytes = file_content
+ else:
+ content_bytes = str(file_content).encode('utf-8')
+
+ base64_content = base64.b64encode(content_bytes).decode('utf-8')
+
+ attachment = {
+ "@odata.type": "#microsoft.graph.fileAttachment",
+ "name": doc.fileName,
+ "contentType": doc.mimeType or "application/octet-stream",
+ "contentBytes": base64_content
+ }
+ message["attachments"].append(attachment)
+ except Exception as e:
+ logger.error(f"Error reading attachment file {doc.fileName}: {str(e)}")
+
+ # Create the draft message
+ drafts_folder_id = self.folderManagement.getFolderId("Drafts", connection)
+
+ if drafts_folder_id:
+ api_url = f"{graph_url}/me/mailFolders/{drafts_folder_id}/messages"
+ else:
+ api_url = f"{graph_url}/me/messages"
+ logger.warning("Could not find Drafts folder, creating draft in default location")
+
+ response = requests.post(api_url, headers=headers, json=message)
+
+ if response.status_code in [200, 201]:
+ draft_data = response.json()
+ draft_id = draft_data.get("id", "Unknown")
+
+ # Create draft result data with full draft information
+ draftResultData = {
+ "status": "draft",
+ "message": "Email draft created successfully with AI-generated content",
+ "draftId": draft_id,
+ "folder": "Drafts (Entwürfe)",
+ "mailbox": connection.get('userEmail', 'Unknown'),
+ "subject": subject,
+ "body": body,
+ "recipients": to,
+ "cc": cc,
+ "bcc": bcc,
+ "attachments": len(documentList) if documentList else 0,
+ "aiSelectedAttachments": normalized_ai_attachments if normalized_ai_attachments else "all documents",
+ "aiGenerated": True,
+ "context": context,
+ "emailStyle": emailStyle,
+ "timestamp": self.services.utils.timestampGetUtc(),
+ "draftData": draft_data
+ }
+
+ # Extract attachment filenames for validation metadata
+ attachmentFilenames = []
+ attachmentReferences = []
+ if documentList:
+ try:
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ attached_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(documentList)) or []
+ attachmentFilenames = [getattr(doc, 'fileName', '') for doc in attached_docs if getattr(doc, 'fileName', None)]
+ # Store normalized document references (with filenames) - use normalized_ai_attachments if available
+ attachmentReferences = normalized_ai_attachments if normalized_ai_attachments else [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in attached_docs]
+ except Exception:
+ pass
+
+ # Create validation metadata for content validator
+ validationMetadata = {
+ "actionType": "outlook.composeAndDraftEmailWithContext",
+ "emailRecipients": to,
+ "emailCc": cc,
+ "emailBcc": bcc,
+ "emailSubject": subject,
+ "emailAttachments": attachmentFilenames,
+ "emailAttachmentReferences": attachmentReferences,
+ "emailAttachmentCount": len(attachmentFilenames),
+ "emailStyle": emailStyle,
+ "hasAttachments": len(attachmentFilenames) > 0
+ }
+
+ return ActionResult(
+ success=True,
+ documents=[ActionDocument(
+ documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json",
+ documentData=json.dumps(draftResultData, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )]
+ )
+ else:
+ logger.error(f"Failed to create draft. Status: {response.status_code}, Response: {response.text}")
+ return ActionResult.isFailure(error=f"Failed to create email draft: {response.status_code} - {response.text}")
+
+ except Exception as e:
+ logger.error(f"Error creating email via Microsoft Graph API: {str(e)}")
+ return ActionResult.isFailure(error=f"Failed to create email: {str(e)}")
+
+ except Exception as e:
+ logger.error(f"Error in composeAndDraftEmailWithContext: {str(e)}")
+ return ActionResult.isFailure(error=str(e))
+
diff --git a/modules/workflows/methods/methodOutlook/actions/readEmails.py b/modules/workflows/methods/methodOutlook/actions/readEmails.py
new file mode 100644
index 00000000..e698cb9f
--- /dev/null
+++ b/modules/workflows/methods/methodOutlook/actions/readEmails.py
@@ -0,0 +1,245 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Read Emails action for Outlook operations.
+Reads emails and metadata from a mailbox folder.
+"""
+
+import logging
+import time
+import json
+import requests
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def readEmails(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Read emails and metadata from a mailbox folder.
+ - Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType.
+ - Output format: JSON with emails and metadata.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - folder (str, optional): Folder to read from. Default: Inbox.
+ - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
+ - filter (str, optional): Sender, query operators, or subject text.
+ - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
+ """
+ operationId = None
+ try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"outlook_read_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Read Emails",
+ "Outlook Email Reading",
+ f"Folder: {parameters.get('folder', 'Inbox')}",
+ parentOperationId=parentOperationId
+ )
+
+ connectionReference = parameters.get("connectionReference")
+ folder = parameters.get("folder", "Inbox")
+ limit = parameters.get("limit", 10)
+ filter = parameters.get("filter")
+ outputMimeType = parameters.get("outputMimeType", "application/json")
+
+ if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Connection reference is required")
+
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Validating parameters")
+
+ # Validate limit parameter
+ if limit <= 0:
+ limit = 1000
+ logger.warning(f"Invalid limit value ({limit}), using default value 1000")
+
+ # Validate filter parameter if provided
+ if filter:
+ # Remove any potentially dangerous characters that could break the filter
+ filter = filter.strip()
+ if len(filter) > 100:
+ logger.warning(f"Filter too long ({len(filter)} chars), truncating to 100 characters")
+ filter = filter[:100]
+
+
+ # Get Microsoft connection
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Read emails using Microsoft Graph API
+ self.services.chat.progressLogUpdate(operationId, 0.4, "Reading emails from Microsoft Graph API")
+ try:
+ # Microsoft Graph API endpoint for messages
+ graph_url = "https://graph.microsoft.com/v1.0"
+ headers = {
+ "Authorization": f"Bearer {connection['accessToken']}",
+ "Content-Type": "application/json"
+ }
+
+ # Get the folder ID for the specified folder
+ folder_id = self.folderManagement.getFolderId(folder, connection)
+
+ if folder_id:
+ # Build the API request with folder ID
+ api_url = f"{graph_url}/me/mailFolders/{folder_id}/messages"
+ else:
+ # Fallback: use folder name directly (for well-known folders like "Inbox")
+ api_url = f"{graph_url}/me/mailFolders/{folder}/messages"
+ logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
+ params = {
+ "$top": limit,
+ "$orderby": "receivedDateTime desc"
+ }
+
+ if filter:
+ # Build proper Graph API filter parameters
+ filter_params = self.emailProcessing.buildGraphFilter(filter)
+ params.update(filter_params)
+
+ # If using $search, remove $orderby as they can't be combined
+ if "$search" in params:
+ params.pop("$orderby", None)
+
+ # If using $filter with contains(), remove $orderby as they can't be combined
+ # Microsoft Graph API doesn't support contains() with orderby
+ if "$filter" in params and "contains(" in params["$filter"].lower():
+ params.pop("$orderby", None)
+
+ # Filter applied
+
+ # Make the API call
+
+
+ response = requests.get(api_url, headers=headers, params=params)
+
+ if response.status_code != 200:
+ logger.error(f"Graph API error: {response.status_code} - {response.text}")
+ logger.error(f"Request URL: {response.url}")
+ logger.error(f"Request headers: {headers}")
+ logger.error(f"Request params: {params}")
+
+ response.raise_for_status()
+
+ self.services.chat.progressLogUpdate(operationId, 0.7, "Processing email data")
+ emails_data = response.json()
+ email_data = {
+ "emails": emails_data.get("value", []),
+ "count": len(emails_data.get("value", [])),
+ "folder": folder,
+ "filter": filter,
+ "apiMetadata": {
+ "@odata.context": emails_data.get("@odata.context"),
+ "@odata.count": emails_data.get("@odata.count"),
+ "@odata.nextLink": emails_data.get("@odata.nextLink")
+ }
+ }
+
+
+
+ except ImportError:
+ logger.error("requests module not available")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="requests module not available")
+ except requests.exceptions.HTTPError as e:
+ if e.response.status_code == 400:
+ logger.error(f"Bad Request (400) - Invalid filter or parameter: {e.response.text}")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=f"Invalid filter syntax. Please check your filter parameter. Error: {e.response.text}")
+ elif e.response.status_code == 401:
+ logger.error("Unauthorized (401) - Access token may be expired or invalid")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Authentication failed. Please check your connection and try again.")
+ elif e.response.status_code == 403:
+ logger.error("Forbidden (403) - Insufficient permissions to access emails")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Insufficient permissions to read emails from this folder.")
+ else:
+ logger.error(f"HTTP Error {e.response.status_code}: {e.response.text}")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=f"HTTP Error {e.response.status_code}: {e.response.text}")
+ except Exception as e:
+ logger.error(f"Error reading emails from Microsoft Graph API: {str(e)}")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=f"Failed to read emails: {str(e)}")
+
+ # Determine output format based on MIME type
+ mime_type_mapping = {
+ "application/json": ".json",
+ "text/plain": ".txt",
+ "text/csv": ".csv"
+ }
+ output_extension = mime_type_mapping.get(outputMimeType, ".json")
+ output_mime_type = outputMimeType
+ logger.info(f"Using output format: {output_extension} ({output_mime_type})")
+
+
+
+ # Create result data as JSON string
+ result_data = {
+ "connectionReference": connectionReference,
+ "folder": folder,
+ "limit": limit,
+ "filter": filter,
+ "emails": email_data,
+ "connection": {
+ "id": connection["id"],
+ "authority": "microsoft",
+ "reference": connectionReference
+ },
+ "timestamp": self.services.utils.timestampGetUtc()
+ }
+
+ validationMetadata = {
+ "actionType": "outlook.readEmails",
+ "connectionReference": connectionReference,
+ "folder": folder,
+ "limit": limit,
+ "filter": filter,
+ "emailCount": email_data.get("count", 0),
+ "outputMimeType": outputMimeType
+ }
+
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {email_data.get('count', 0)} emails")
+ self.services.chat.progressLogFinish(operationId, True)
+
+ return ActionResult.isSuccess(
+ documents=[ActionDocument(
+ documentName=f"outlook_emails_{self._format_timestamp_for_filename()}.json",
+ documentData=json.dumps(result_data, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )]
+ )
+
+ except Exception as e:
+ logger.error(f"Error reading emails: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass # Don't fail on progress logging errors
+ return ActionResult.isFailure(
+ error=str(e)
+ )
+
diff --git a/modules/workflows/methods/methodOutlook/actions/searchEmails.py b/modules/workflows/methods/methodOutlook/actions/searchEmails.py
new file mode 100644
index 00000000..72830caf
--- /dev/null
+++ b/modules/workflows/methods/methodOutlook/actions/searchEmails.py
@@ -0,0 +1,257 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Search Emails action for Outlook operations.
+Searches emails by query and returns matching items with metadata.
+"""
+
+import logging
+import json
+import requests
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def searchEmails(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Search emails by query and return matching items with metadata.
+ - Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType.
+ - Output format: JSON with search results and metadata.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - query (str, required): Search expression.
+ - folder (str, optional): Folder scope or All. Default: All.
+ - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
+ - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
+ """
+ try:
+ connectionReference = parameters.get("connectionReference")
+ query = parameters.get("query")
+ folder = parameters.get("folder", "All")
+ limit = parameters.get("limit", 1000)
+ outputMimeType = parameters.get("outputMimeType", "application/json")
+
+ # Validate parameters
+ if not connectionReference:
+ return ActionResult.isFailure(error="Connection reference is required")
+
+ # Validate limit parameter
+ if limit <= 0:
+ limit = 1000
+ logger.warning(f"Invalid limit value ({limit}), using default value 1000")
+
+ if not query or not query.strip():
+ return ActionResult.isFailure(error="Search query is required and cannot be empty")
+
+ # Check if this is a folder specification query
+ if query.strip().lower().startswith('folder:'):
+ folder_name = query.strip()[7:].strip() # Remove "folder:" prefix
+ if not folder_name:
+ return ActionResult.isFailure(error="Invalid folder specification. Use format 'folder:FolderName'")
+ logger.info(f"Search query is a folder specification: {folder_name}")
+
+ # Validate limit
+ try:
+ limit = int(limit)
+ if limit <= 0:
+ limit = 1000
+ logger.warning(f"Invalid limit value (<=0), using default value 1000")
+ elif limit > 1000: # Microsoft Graph API has limits
+ limit = 1000
+ logger.warning(f"Limit {limit} exceeds maximum (1000), using 1000")
+ except (ValueError, TypeError):
+ limit = 1000
+ logger.warning(f"Invalid limit value, using default value 1000")
+
+ # Get Microsoft connection
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Search emails using Microsoft Graph API
+ try:
+ # Microsoft Graph API endpoint for searching messages
+ graph_url = "https://graph.microsoft.com/v1.0"
+ headers = {
+ "Authorization": f"Bearer {connection['accessToken']}",
+ "Content-Type": "application/json"
+ }
+
+ # Get the folder ID for the specified folder if needed
+ folder_id = None
+ if folder and folder.lower() != "all":
+ folder_id = self.folderManagement.getFolderId(folder, connection)
+ if folder_id:
+ logger.debug(f"Found folder ID for '{folder}': {folder_id}")
+ else:
+ logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
+
+ # Build the search API request
+ api_url = f"{graph_url}/me/messages"
+ params = self.emailProcessing.buildSearchParameters(query, folder_id or folder, limit)
+
+ # Log search parameters for debugging
+ logger.debug(f"Search query: '{query}'")
+ logger.debug(f"Search folder: '{folder}'")
+ logger.debug(f"Search parameters: {params}")
+ logger.debug(f"API URL: {api_url}")
+
+ # Make the API call
+ response = requests.get(api_url, headers=headers, params=params)
+
+ # Log response details for debugging
+
+
+ if response.status_code != 200:
+ # Log detailed error information
+ try:
+ error_data = response.json()
+ logger.error(f"Microsoft Graph API error: {response.status_code} - {error_data}")
+ except:
+ logger.error(f"Microsoft Graph API error: {response.status_code} - {response.text}")
+
+ # Check for specific error types and provide helpful messages
+ if response.status_code == 400:
+ logger.error("Bad Request (400) - Check search query format and parameters")
+ logger.error(f"Search query: '{query}'")
+ logger.error(f"Search parameters: {params}")
+ logger.error(f"API URL: {api_url}")
+ elif response.status_code == 401:
+ logger.error("Unauthorized (401) - Check access token and permissions")
+ elif response.status_code == 403:
+ logger.error("Forbidden (403) - Check API permissions and scopes")
+ elif response.status_code == 429:
+ logger.error("Too Many Requests (429) - Rate limit exceeded")
+
+ raise Exception(f"Microsoft Graph API returned {response.status_code}: {response.text}")
+
+ response.raise_for_status()
+
+ search_data = response.json()
+ emails = search_data.get("value", [])
+
+
+
+ # Apply folder filtering if needed and we used $search
+ if folder and folder.lower() != "all" and "$search" in params:
+ # Get the actual folder ID for proper filtering
+ folder_id = self.folderManagement.getFolderId(folder, connection)
+
+ if folder_id:
+ # Filter results by folder ID
+ filtered_emails = []
+ for email in emails:
+ if email.get("parentFolderId") == folder_id:
+ filtered_emails.append(email)
+ emails = filtered_emails
+ logger.debug(f"Applied folder filtering: {len(filtered_emails)} emails found in folder {folder}")
+ else:
+ # Fallback: try to filter by folder name (less reliable)
+ filtered_emails = []
+ for email in emails:
+ # Check if email has folder information
+ if hasattr(email, 'parentFolderId') and email.get('parentFolderId'):
+ if email.get('parentFolderId') == folder:
+ filtered_emails.append(email)
+ else:
+ # If no folder info, include the email (less strict filtering)
+ filtered_emails.append(email)
+
+ emails = filtered_emails
+ logger.debug(f"Applied fallback folder filtering: {len(filtered_emails)} emails found in folder {folder}")
+
+ # Special handling for folder specification queries
+ if query.strip().lower().startswith('folder:'):
+ folder_name = query.strip()[7:].strip()
+ folder_id = self.folderManagement.getFolderId(folder_name, connection)
+ if folder_id:
+ # Filter results to only include emails from the specified folder
+ filtered_emails = []
+ for email in emails:
+ if email.get("parentFolderId") == folder_id:
+ filtered_emails.append(email)
+ emails = filtered_emails
+ logger.debug(f"Applied folder specification filtering: {len(filtered_emails)} emails found in folder {folder_name}")
+ else:
+ logger.warning(f"Could not find folder ID for folder specification: {folder_name}")
+
+
+ search_result = {
+ "query": query,
+ "results": emails,
+ "count": len(emails),
+ "folder": folder,
+ "limit": limit,
+ "apiMetadata": {
+ "@odata.context": search_data.get("@odata.context"),
+ "@odata.count": search_data.get("@odata.count"),
+ "@odata.nextLink": search_data.get("@odata.nextLink")
+ },
+ "searchParams": params
+ }
+
+
+
+ except ImportError:
+ logger.error("requests module not available")
+ return ActionResult.isFailure(error="requests module not available")
+ except Exception as e:
+ logger.error(f"Error searching emails via Microsoft Graph API: {str(e)}")
+ return ActionResult.isFailure(error=f"Failed to search emails: {str(e)}")
+
+ # Determine output format based on MIME type
+ mime_type_mapping = {
+ "application/json": ".json",
+ "text/plain": ".txt",
+ "text/csv": ".csv"
+ }
+ output_extension = mime_type_mapping.get(outputMimeType, ".json")
+ output_mime_type = outputMimeType
+ logger.info(f"Using output format: {output_extension} ({output_mime_type})")
+
+
+
+ result_data = {
+ "connectionReference": connectionReference,
+ "query": query,
+ "folder": folder,
+ "limit": limit,
+ "searchResults": search_result,
+ "connection": {
+ "id": connection["id"],
+ "authority": "microsoft",
+ "reference": connectionReference
+ },
+ "timestamp": self.services.utils.timestampGetUtc()
+ }
+
+ validationMetadata = {
+ "actionType": "outlook.searchEmails",
+ "connectionReference": connectionReference,
+ "query": query,
+ "folder": folder,
+ "limit": limit,
+ "resultCount": search_result.get("count", 0),
+ "outputMimeType": outputMimeType
+ }
+
+ return ActionResult(
+ success=True,
+ documents=[ActionDocument(
+ documentName=f"outlook_email_search_{self._format_timestamp_for_filename()}.json",
+ documentData=json.dumps(result_data, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )]
+ )
+
+ except Exception as e:
+ logger.error(f"Error searching emails: {str(e)}")
+ return ActionResult.isFailure(error=str(e))
+
diff --git a/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py b/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py
new file mode 100644
index 00000000..ffae4c8d
--- /dev/null
+++ b/modules/workflows/methods/methodOutlook/actions/sendDraftEmail.py
@@ -0,0 +1,312 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Send Draft Email action for Outlook operations.
+Sends draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
+"""
+
+import logging
+import time
+import json
+import requests
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def sendDraftEmail(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
+ - Input requirements: connectionReference (required); documentList with draft email JSON documents (required).
+ - Output format: JSON confirmation with sent mail metadata for all emails.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - documentList (list, required): Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function).
+ """
+ operationId = None
+ try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"outlook_send_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Send Draft Email",
+ "Outlook Email Sending",
+ f"Processing {len(parameters.get('documentList', []))} draft(s)",
+ parentOperationId=parentOperationId
+ )
+
+ connectionReference = parameters.get("connectionReference")
+ documentList = parameters.get("documentList", [])
+
+ if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Connection reference is required")
+
+ if not documentList:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="documentList is required and cannot be empty")
+
+ # Convert single value to list if needed
+ if isinstance(documentList, str):
+ documentList = [documentList]
+
+ # Get Microsoft connection
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Check permissions
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Checking permissions")
+ permissions_ok = await self.connection.checkPermissions(connection)
+ if not permissions_ok:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Connection lacks necessary permissions for Outlook operations")
+
+ # Read draft email JSON documents from documentList
+ self.services.chat.progressLogUpdate(operationId, 0.4, "Reading draft email documents")
+ draftEmails = []
+ for docRef in documentList:
+ try:
+ # Get documents from document reference
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([docRef]))
+ if not chatDocuments:
+ logger.warning(f"No documents found for reference: {docRef}")
+ continue
+
+ # Process each document in the reference
+ for doc in chatDocuments:
+ try:
+ # Read file data
+ fileId = getattr(doc, 'fileId', None)
+ if not fileId:
+ logger.warning(f"Document {doc.fileName} has no fileId")
+ continue
+
+ fileData = self.services.chat.getFileData(fileId)
+ if not fileData:
+ logger.warning(f"No file data found for document: {doc.fileName}")
+ continue
+
+ # Parse JSON content
+ if isinstance(fileData, bytes):
+ jsonContent = fileData.decode('utf-8')
+ else:
+ jsonContent = str(fileData)
+
+ # Parse JSON - handle both direct JSON and JSON wrapped in documentData
+ try:
+ draftEmailData = json.loads(jsonContent)
+
+ # If the JSON contains a 'documentData' field, extract it
+ if isinstance(draftEmailData, dict) and 'documentData' in draftEmailData:
+ documentDataStr = draftEmailData['documentData']
+ if isinstance(documentDataStr, str):
+ draftEmailData = json.loads(documentDataStr)
+
+ # Validate draft email structure
+ if not isinstance(draftEmailData, dict):
+ logger.warning(f"Document {doc.fileName} does not contain a valid draft email JSON object")
+ continue
+
+ draftId = draftEmailData.get("draftId")
+ if not draftId:
+ logger.warning(f"Document {doc.fileName} does not contain 'draftId' field")
+ continue
+
+ draftEmails.append({
+ "draftEmailJson": draftEmailData,
+ "draftId": draftId,
+ "sourceDocument": doc.fileName,
+ "sourceReference": docRef
+ })
+
+ except json.JSONDecodeError as e:
+ logger.error(f"Failed to parse JSON from document {doc.fileName}: {str(e)}")
+ continue
+
+ except Exception as e:
+ logger.error(f"Error processing document {doc.fileName}: {str(e)}")
+ continue
+
+ except Exception as e:
+ logger.error(f"Error reading documents from reference {docRef}: {str(e)}")
+ continue
+
+ if not draftEmails:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid draft email JSON documents found in documentList")
+
+ self.services.chat.progressLogUpdate(operationId, 0.6, f"Found {len(draftEmails)} draft email(s) to send")
+
+ # Send all draft emails
+ graph_url = "https://graph.microsoft.com/v1.0"
+ headers = {
+ "Authorization": f"Bearer {connection['accessToken']}",
+ "Content-Type": "application/json"
+ }
+
+ sentResults = []
+ failedResults = []
+
+ self.services.chat.progressLogUpdate(operationId, 0.7, "Sending emails")
+ for idx, draftEmail in enumerate(draftEmails):
+ draftEmailJson = draftEmail["draftEmailJson"]
+ draftId = draftEmail["draftId"]
+ sourceDocument = draftEmail["sourceDocument"]
+
+ try:
+ send_url = f"{graph_url}/me/messages/{draftId}/send"
+ sendResponse = requests.post(send_url, headers=headers)
+
+ # Extract email details from draft JSON for confirmation
+ subject = draftEmailJson.get("subject", "Unknown")
+ recipients = draftEmailJson.get("recipients", [])
+ cc = draftEmailJson.get("cc", [])
+ bcc = draftEmailJson.get("bcc", [])
+ attachmentsCount = draftEmailJson.get("attachments", 0)
+
+ if sendResponse.status_code in [200, 202, 204]:
+ sentResults.append({
+ "status": "sent",
+ "message": "Email sent successfully",
+ "draftId": draftId,
+ "subject": subject,
+ "recipients": recipients,
+ "cc": cc,
+ "bcc": bcc,
+ "attachments": attachmentsCount,
+ "sentTimestamp": self.services.utils.timestampGetUtc(),
+ "sourceDocument": sourceDocument
+ })
+ logger.info(f"Email sent successfully. Draft ID: {draftId}, Subject: {subject}")
+ self.services.chat.progressLogUpdate(operationId, 0.7 + (idx + 1) * 0.2 / len(draftEmails), f"Sent {idx + 1}/{len(draftEmails)}: {subject}")
+ else:
+ errorResult = {
+ "status": "error",
+ "message": "Failed to send draft email",
+ "draftId": draftId,
+ "subject": subject,
+ "recipients": recipients,
+ "sendError": {
+ "statusCode": sendResponse.status_code,
+ "response": sendResponse.text
+ },
+ "sentTimestamp": self.services.utils.timestampGetUtc(),
+ "sourceDocument": sourceDocument
+ }
+ failedResults.append(errorResult)
+ logger.error(f"Failed to send email. Draft ID: {draftId}, Status: {sendResponse.status_code}, Response: {sendResponse.text}")
+
+ except Exception as e:
+ errorResult = {
+ "status": "error",
+ "message": f"Exception while sending draft email: {str(e)}",
+ "draftId": draftId,
+ "subject": draftEmailJson.get("subject", "Unknown"),
+ "recipients": draftEmailJson.get("recipients", []),
+ "exception": str(e),
+ "sentTimestamp": self.services.utils.timestampGetUtc(),
+ "sourceDocument": sourceDocument
+ }
+ failedResults.append(errorResult)
+ logger.error(f"Error sending draft email {draftId}: {str(e)}")
+
+ # Build result summary
+ totalEmails = len(draftEmails)
+ successfulEmails = len(sentResults)
+ failedEmails = len(failedResults)
+
+ resultData = {
+ "totalEmails": totalEmails,
+ "successfulEmails": successfulEmails,
+ "failedEmails": failedEmails,
+ "sentResults": sentResults,
+ "failedResults": failedResults,
+ "timestamp": self.services.utils.timestampGetUtc()
+ }
+
+ # Determine overall success status
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Sent {successfulEmails}/{totalEmails} email(s)")
+ if successfulEmails == 0:
+ self.services.chat.progressLogFinish(operationId, False)
+ validationMetadata = {
+ "actionType": "outlook.sendDraftEmail",
+ "connectionReference": connectionReference,
+ "totalEmails": totalEmails,
+ "successfulEmails": successfulEmails,
+ "failedEmails": failedEmails,
+ "status": "all_failed"
+ }
+ return ActionResult.isFailure(
+ error=f"Failed to send all {totalEmails} email(s)",
+ documents=[ActionDocument(
+ documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
+ documentData=json.dumps(resultData, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )]
+ )
+ elif failedEmails > 0:
+ # Partial success
+ logger.warning(f"Sent {successfulEmails} out of {totalEmails} emails. {failedEmails} failed.")
+ validationMetadata = {
+ "actionType": "outlook.sendDraftEmail",
+ "connectionReference": connectionReference,
+ "totalEmails": totalEmails,
+ "successfulEmails": successfulEmails,
+ "failedEmails": failedEmails,
+ "status": "partial_success"
+ }
+ self.services.chat.progressLogFinish(operationId, True)
+ return ActionResult(
+ success=True,
+ documents=[ActionDocument(
+ documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
+ documentData=json.dumps(resultData, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )]
+ )
+ else:
+ # All successful
+ logger.info(f"Successfully sent all {totalEmails} email(s)")
+ validationMetadata = {
+ "actionType": "outlook.sendDraftEmail",
+ "connectionReference": connectionReference,
+ "totalEmails": totalEmails,
+ "successfulEmails": successfulEmails,
+ "failedEmails": failedEmails,
+ "status": "all_successful"
+ }
+ self.services.chat.progressLogFinish(operationId, True)
+ return ActionResult(
+ success=True,
+ documents=[ActionDocument(
+ documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
+ documentData=json.dumps(resultData, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )]
+ )
+
+ except ImportError:
+ logger.error("requests module not available")
+ return ActionResult.isFailure(error="requests module not available")
+ except Exception as e:
+ logger.error(f"Error in sendDraftEmail: {str(e)}")
+ return ActionResult.isFailure(error=str(e))
+
diff --git a/modules/workflows/methods/methodOutlook/helpers/__init__.py b/modules/workflows/methods/methodOutlook/helpers/__init__.py
new file mode 100644
index 00000000..45028b5a
--- /dev/null
+++ b/modules/workflows/methods/methodOutlook/helpers/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""Helper modules for Outlook method operations."""
+
diff --git a/modules/workflows/methods/methodOutlook/helpers/connection.py b/modules/workflows/methods/methodOutlook/helpers/connection.py
new file mode 100644
index 00000000..8f3daded
--- /dev/null
+++ b/modules/workflows/methods/methodOutlook/helpers/connection.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Connection helper for Outlook operations.
+Handles Microsoft connection management and permission checking.
+"""
+
+import logging
+import requests
+from typing import Dict, Any, Optional
+
+logger = logging.getLogger(__name__)
+
+class ConnectionHelper:
+ """Helper for Microsoft connection management in Outlook operations"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize connection helper.
+
+ Args:
+ methodInstance: Instance of MethodOutlook (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ def getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
+ """
+ Helper function to get Microsoft connection details.
+ """
+ try:
+ logger.debug(f"Getting Microsoft connection for reference: {connectionReference}")
+
+ # Get the connection from the service
+ userConnection = self.services.chat.getUserConnectionFromConnectionReference(connectionReference)
+ if not userConnection:
+ logger.error(f"Connection not found: {connectionReference}")
+ return None
+
+ logger.debug(f"Found connection: {userConnection.id}, status: {userConnection.status.value}, authority: {userConnection.authority.value}")
+
+ # Get a fresh token for this connection
+ token = self.services.chat.getFreshConnectionToken(userConnection.id)
+ if not token:
+ logger.error(f"Fresh token not found for connection: {userConnection.id}")
+ logger.debug(f"Connection details: {userConnection}")
+ return None
+
+ logger.debug(f"Fresh token retrieved for connection {userConnection.id}")
+
+ # Check if connection is active
+ if userConnection.status.value != "active":
+ logger.error(f"Connection is not active: {userConnection.id}, status: {userConnection.status.value}")
+ return None
+
+ return {
+ "id": userConnection.id,
+ "accessToken": token.tokenAccess,
+ "refreshToken": token.tokenRefresh,
+ "scopes": ["Mail.ReadWrite", "Mail.Send", "Mail.ReadWrite.Shared", "User.Read"] # Valid Microsoft Graph API scopes
+ }
+ except Exception as e:
+ logger.error(f"Error getting Microsoft connection: {str(e)}")
+ return None
+
+ async def checkPermissions(self, connection: Dict[str, Any]) -> bool:
+ """
+ Check if the current connection has the necessary permissions for Outlook operations.
+ """
+ try:
+ graph_url = "https://graph.microsoft.com/v1.0"
+ headers = {
+ "Authorization": f"Bearer {connection['accessToken']}",
+ "Content-Type": "application/json"
+ }
+
+ # Test permissions by trying to access the user's mail folder
+ test_url = f"{graph_url}/me/mailFolders"
+ response = requests.get(test_url, headers=headers)
+
+ if response.status_code == 200:
+ return True
+ elif response.status_code == 403:
+ logger.error("Permission denied - connection lacks necessary mail permissions")
+ logger.error("Required scopes: Mail.ReadWrite, Mail.Send, Mail.ReadWrite.Shared")
+ return False
+ else:
+ logger.warning(f"Permission check returned status {response.status_code}")
+ return False
+
+ except Exception as e:
+ logger.error(f"Error checking permissions: {str(e)}")
+ return False
+
diff --git a/modules/workflows/methods/methodOutlook/helpers/emailProcessing.py b/modules/workflows/methods/methodOutlook/helpers/emailProcessing.py
new file mode 100644
index 00000000..88644a33
--- /dev/null
+++ b/modules/workflows/methods/methodOutlook/helpers/emailProcessing.py
@@ -0,0 +1,184 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Email Processing helper for Outlook operations.
+Handles email search query sanitization, search parameter building, and filter construction.
+"""
+
+import logging
+import re
+from typing import Dict, Any
+
+logger = logging.getLogger(__name__)
+
+class EmailProcessingHelper:
+ """Helper for email search and processing operations"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize email processing helper.
+
+ Args:
+ methodInstance: Instance of MethodOutlook (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ def sanitizeSearchQuery(self, query: str) -> str:
+ """
+ Sanitize and validate search query for Microsoft Graph API
+
+ Microsoft Graph API has specific requirements for search queries:
+ - Escape special characters properly
+ - Handle search operators correctly
+ - Ensure query format is valid
+ """
+ if not query:
+ return ""
+
+ # Clean the query
+ clean_query = query.strip()
+
+ # Handle folder specifications first
+ if clean_query.lower().startswith('folder:'):
+ folder_name = clean_query[7:].strip()
+ if folder_name:
+ # Return the folder specification as-is
+ return clean_query
+
+ # Remove any double quotes that might cause issues
+ clean_query = clean_query.replace('"', '')
+
+ # Handle common search operators
+ # Recognize Graph operators including both singular and plural forms for hasAttachments
+ lowered = clean_query.lower()
+ if any(op in lowered for op in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
+ # This is an advanced search query, return as-is
+ return clean_query
+
+ # For basic text search, ensure it's safe for contains() filter
+ # Remove any characters that might break the OData filter syntax
+ # Remove or escape characters that could break OData filter syntax
+ safe_query = re.sub(r'[\\\'"]', '', clean_query)
+
+ return safe_query
+
+ def buildSearchParameters(self, query: str, folder: str, limit: int) -> Dict[str, Any]:
+ """
+ Build search parameters for Microsoft Graph API
+
+ This method handles the complexity of building search parameters
+ while avoiding conflicts between $search and $filter parameters.
+ """
+ params = {
+ "$top": limit
+ }
+
+ if not query or not query.strip():
+ # No query specified, just get emails from folder
+ if folder and folder.lower() != "all":
+ # Use folder name directly for well-known folders, or get folder ID
+ if folder.lower() in ["inbox", "drafts", "sentitems", "deleteditems"]:
+ params["$filter"] = f"parentFolderId eq '{folder}'"
+ else:
+ # For custom folders, we need to get the folder ID first
+ # This will be handled by the calling method
+ params["$filter"] = f"parentFolderId eq '{folder}'"
+ # Add orderby for basic queries
+ params["$orderby"] = "receivedDateTime desc"
+ return params
+
+ clean_query = self.sanitizeSearchQuery(query)
+
+ # Check if this is a folder specification (e.g., "folder:Drafts", "folder:Inbox")
+ if clean_query.lower().startswith('folder:'):
+ folder_name = clean_query[7:].strip() # Remove "folder:" prefix
+ if folder_name:
+ # This is a folder specification, not a text search
+ # Just filter by folder and return
+ params["$filter"] = f"parentFolderId eq '{folder_name}'"
+ params["$orderby"] = "receivedDateTime desc"
+ return params
+
+ # Check if this is a complex search query with multiple operators
+ # Recognize Graph operators including both singular and plural forms for hasAttachments
+ lowered = clean_query.lower()
+ if any(op in lowered for op in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
+ # This is an advanced search query, use $search
+ # Microsoft Graph API supports complex search syntax
+ params["$search"] = f'"{clean_query}"'
+
+ # Note: When using $search, we cannot combine it with $orderby or $filter for folder
+ # We'll need to filter results after the API call
+ # Folder filtering will be done after the API call
+ else:
+ # Use $filter for basic text search, but keep it simple to avoid "InefficientFilter" error
+ # Microsoft Graph API has limitations on complex filters
+ if len(clean_query) > 50:
+ # If query is too long, truncate it to avoid complex filter issues
+ clean_query = clean_query[:50]
+
+
+ # Use only subject search to keep filter simple
+ # Handle wildcard queries specially
+ if clean_query == "*" or clean_query == "":
+ # For wildcard or empty query, don't use contains filter
+ # Just use folder filter if specified
+ if folder and folder.lower() != "all":
+ params["$filter"] = f"parentFolderId eq '{folder}'"
+ else:
+ # No filter needed for wildcard search across all folders
+ pass
+ else:
+ params["$filter"] = f"contains(subject,'{clean_query}')"
+
+ # Add folder filter if specified
+ if folder and folder.lower() != "all":
+ params["$filter"] = f"{params['$filter']} and parentFolderId eq '{folder}'"
+
+ # Add orderby for basic queries
+ params["$orderby"] = "receivedDateTime desc"
+
+
+ return params
+
+ def buildGraphFilter(self, filter_text: str) -> Dict[str, str]:
+ """
+ Build proper Microsoft Graph API filter parameters based on filter text
+
+ Args:
+ filter_text (str): The filter text to process
+
+ Returns:
+ Dict[str, str]: Dictionary with either $filter or $search parameter
+ """
+ if not filter_text:
+ return {}
+
+ filter_text = filter_text.strip()
+
+ # Handle folder specifications (e.g., "folder:Drafts", "folder:Inbox")
+ if filter_text.lower().startswith('folder:'):
+ folder_name = filter_text[7:].strip() # Remove "folder:" prefix
+ if folder_name:
+ # This is a folder specification, return empty to let the main method handle it
+ return {}
+
+ # Handle search queries (from:, to:, subject:, etc.) - check this FIRST
+ # Support both singular and plural forms for hasAttachments
+ lt = filter_text.lower()
+ if any(lt.startswith(prefix) for prefix in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
+ return {"$search": f'"{filter_text}"'}
+
+ # Handle email address filters (only if it's NOT a search query)
+ if '@' in filter_text and '.' in filter_text and ' ' not in filter_text and not filter_text.startswith('from:'):
+ return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
+
+ # Handle OData filter conditions (contains 'eq', 'ne', 'gt', 'lt', etc.)
+ if any(op in filter_text.lower() for op in [' eq ', ' ne ', ' gt ', ' lt ', ' ge ', ' le ', ' and ', ' or ']):
+ return {"$filter": filter_text}
+
+ # Handle text content - search in subject
+ return {"$filter": f"contains(subject,'{filter_text}')"}
+
diff --git a/modules/workflows/methods/methodOutlook/helpers/folderManagement.py b/modules/workflows/methods/methodOutlook/helpers/folderManagement.py
new file mode 100644
index 00000000..1ca7be87
--- /dev/null
+++ b/modules/workflows/methods/methodOutlook/helpers/folderManagement.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Folder Management helper for Outlook operations.
+Handles folder ID resolution and folder name lookups.
+"""
+
+import logging
+import requests
+from typing import Dict, Any, Optional
+
+logger = logging.getLogger(__name__)
+
+class FolderManagementHelper:
+ """Helper for folder management operations"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize folder management helper.
+
+ Args:
+ methodInstance: Instance of MethodOutlook (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ def getFolderId(self, folder_name: str, connection: Dict[str, Any]) -> Optional[str]:
+ """
+ Get the folder ID for a given folder name
+
+ This is needed for proper filtering when using advanced search queries
+ """
+ try:
+ graph_url = "https://graph.microsoft.com/v1.0"
+ headers = {
+ "Authorization": f"Bearer {connection['accessToken']}",
+ "Content-Type": "application/json"
+ }
+
+ # Get mail folders
+ api_url = f"{graph_url}/me/mailFolders"
+ response = requests.get(api_url, headers=headers)
+
+ if response.status_code == 200:
+ folders_data = response.json()
+ all_folders = folders_data.get("value", [])
+
+
+
+ # Try exact match first
+ for folder in all_folders:
+ if folder.get("displayName", "").lower() == folder_name.lower():
+
+ return folder.get("id")
+
+ # Try common variations for Drafts folder
+ if folder_name.lower() == "drafts":
+ draft_variations = ["drafts", "draft", "entwürfe", "entwurf", "brouillons", "brouillon"]
+ for folder in all_folders:
+ folder_display_name = folder.get("displayName", "").lower()
+ if any(variation in folder_display_name for variation in draft_variations):
+
+ return folder.get("id")
+
+ # Try common variations for other folders
+ if folder_name.lower() == "sent items":
+ sent_variations = ["sent items", "sent", "gesendete elemente", "éléments envoyés"]
+ for folder in all_folders:
+ folder_display_name = folder.get("displayName", "").lower()
+ if any(variation in folder_display_name for variation in sent_variations):
+
+ return folder.get("id")
+
+ logger.warning(f"Folder '{folder_name}' not found. Available folders: {[f.get('displayName', 'Unknown') for f in all_folders]}")
+ return None
+ else:
+ logger.warning(f"Could not retrieve folders: {response.status_code}")
+ return None
+
+ except Exception as e:
+ logger.warning(f"Error getting folder ID for '{folder_name}': {str(e)}")
+ return None
+
+ def getFolderNameById(self, folder_id: str, connection: Dict[str, Any]) -> str:
+ """
+ Get the folder display name for a given folder ID
+ """
+ try:
+ graph_url = "https://graph.microsoft.com/v1.0"
+ headers = {
+ "Authorization": f"Bearer {connection['accessToken']}",
+ "Content-Type": "application/json"
+ }
+
+ # Get folder by ID
+ api_url = f"{graph_url}/me/mailFolders/{folder_id}"
+ response = requests.get(api_url, headers=headers)
+
+ if response.status_code == 200:
+ folder_data = response.json()
+ return folder_data.get("displayName", folder_id)
+ else:
+ logger.warning(f"Could not retrieve folder name for ID {folder_id}: {response.status_code}")
+ return folder_id
+
+ except Exception as e:
+ logger.warning(f"Error getting folder name for ID '{folder_id}': {str(e)}")
+ return folder_id
+
diff --git a/modules/workflows/methods/methodOutlook/methodOutlook.py b/modules/workflows/methods/methodOutlook/methodOutlook.py
new file mode 100644
index 00000000..31bc7dc3
--- /dev/null
+++ b/modules/workflows/methods/methodOutlook/methodOutlook.py
@@ -0,0 +1,237 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+import logging
+from datetime import datetime, UTC
+from modules.workflows.methods.methodBase import MethodBase
+from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
+from modules.shared.frontendTypes import FrontendType
+
+# Import helpers
+from .helpers.connection import ConnectionHelper
+from .helpers.emailProcessing import EmailProcessingHelper
+from .helpers.folderManagement import FolderManagementHelper
+
+# Import actions
+from .actions.readEmails import readEmails
+from .actions.searchEmails import searchEmails
+from .actions.composeAndDraftEmailWithContext import composeAndDraftEmailWithContext
+from .actions.sendDraftEmail import sendDraftEmail
+
+logger = logging.getLogger(__name__)
+
+class MethodOutlook(MethodBase):
+ """Outlook method implementation for email operations"""
+
+ def __init__(self, services):
+ """Initialize the Outlook method"""
+ super().__init__(services)
+ self.name = "outlook"
+ self.description = "Handle Microsoft Outlook email operations"
+
+ # Initialize helper modules
+ self.connection = ConnectionHelper(self)
+ self.emailProcessing = EmailProcessingHelper(self)
+ self.folderManagement = FolderManagementHelper(self)
+
+ # RBAC-Integration: Action-Definitionen mit actionId
+ self._actions = {
+ "readEmails": WorkflowActionDefinition(
+ actionId="outlook.readEmails",
+ description="Read emails and metadata from a mailbox folder",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "folder": WorkflowActionParameter(
+ name="folder",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions="outlook.folder",
+ required=False,
+ default="Inbox",
+ description="Folder to read from"
+ ),
+ "limit": WorkflowActionParameter(
+ name="limit",
+ type="int",
+ frontendType=FrontendType.NUMBER,
+ required=False,
+ default=1000,
+ description="Maximum items to return",
+ validation={"min": 1, "max": 10000}
+ ),
+ "filter": WorkflowActionParameter(
+ name="filter",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=False,
+ description="Sender, query operators, or subject text"
+ ),
+ "outputMimeType": WorkflowActionParameter(
+ name="outputMimeType",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["application/json", "text/plain", "text/csv"],
+ required=False,
+ default="application/json",
+ description="MIME type for output file"
+ )
+ },
+ execute=readEmails.__get__(self, self.__class__)
+ ),
+ "searchEmails": WorkflowActionDefinition(
+ actionId="outlook.searchEmails",
+ description="Search emails by query and return matching items with metadata",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "query": WorkflowActionParameter(
+ name="query",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Search expression"
+ ),
+ "folder": WorkflowActionParameter(
+ name="folder",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions="outlook.folder",
+ required=False,
+ default="All",
+ description="Folder scope or All"
+ ),
+ "limit": WorkflowActionParameter(
+ name="limit",
+ type="int",
+ frontendType=FrontendType.NUMBER,
+ required=False,
+ default=1000,
+ description="Maximum items to return",
+ validation={"min": 1, "max": 10000}
+ ),
+ "outputMimeType": WorkflowActionParameter(
+ name="outputMimeType",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["application/json", "text/plain", "text/csv"],
+ required=False,
+ default="application/json",
+ description="MIME type for output file"
+ )
+ },
+ execute=searchEmails.__get__(self, self.__class__)
+ ),
+ "composeAndDraftEmailWithContext": WorkflowActionDefinition(
+ actionId="outlook.composeAndDraftEmailWithContext",
+ description="Compose email content using AI from context and optional documents, then create a draft",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "to": WorkflowActionParameter(
+ name="to",
+ type="List[str]",
+ frontendType=FrontendType.MULTISELECT,
+ required=True,
+ description="Recipient email addresses"
+ ),
+ "context": WorkflowActionParameter(
+ name="context",
+ type="str",
+ frontendType=FrontendType.TEXTAREA,
+ required=True,
+ description="Detailed context for composing the email"
+ ),
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=False,
+ description="Document references for context/attachments"
+ ),
+ "cc": WorkflowActionParameter(
+ name="cc",
+ type="List[str]",
+ frontendType=FrontendType.MULTISELECT,
+ required=False,
+ description="CC recipients"
+ ),
+ "bcc": WorkflowActionParameter(
+ name="bcc",
+ type="List[str]",
+ frontendType=FrontendType.MULTISELECT,
+ required=False,
+ description="BCC recipients"
+ ),
+ "emailStyle": WorkflowActionParameter(
+ name="emailStyle",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["formal", "casual", "business"],
+ required=False,
+ default="business",
+ description="Email style: formal, casual, or business"
+ ),
+ "maxLength": WorkflowActionParameter(
+ name="maxLength",
+ type="int",
+ frontendType=FrontendType.NUMBER,
+ required=False,
+ default=1000,
+ description="Maximum length for generated content",
+ validation={"min": 100, "max": 10000}
+ )
+ },
+ execute=composeAndDraftEmailWithContext.__get__(self, self.__class__)
+ ),
+ "sendDraftEmail": WorkflowActionDefinition(
+ actionId="outlook.sendDraftEmail",
+ description="Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function)"
+ )
+ },
+ execute=sendDraftEmail.__get__(self, self.__class__)
+ )
+ }
+
+ # Validate actions after definition
+ self._validateActions()
+
+ # Register actions as methods (optional, für direkten Zugriff)
+ self.readEmails = readEmails.__get__(self, self.__class__)
+ self.searchEmails = searchEmails.__get__(self, self.__class__)
+ self.composeAndDraftEmailWithContext = composeAndDraftEmailWithContext.__get__(self, self.__class__)
+ self.sendDraftEmail = sendDraftEmail.__get__(self, self.__class__)
+
+ def _format_timestamp_for_filename(self) -> str:
+ """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
+ return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+
diff --git a/modules/workflows/methods/methodSharepoint.py b/modules/workflows/methods/methodSharepoint.py.old
similarity index 100%
rename from modules/workflows/methods/methodSharepoint.py
rename to modules/workflows/methods/methodSharepoint.py.old
diff --git a/modules/workflows/methods/methodSharepoint/__init__.py b/modules/workflows/methods/methodSharepoint/__init__.py
new file mode 100644
index 00000000..40c14cf3
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+from .methodSharepoint import MethodSharepoint
+
+__all__ = ['MethodSharepoint']
+
diff --git a/modules/workflows/methods/methodSharepoint/actions/__init__.py b/modules/workflows/methods/methodSharepoint/actions/__init__.py
new file mode 100644
index 00000000..6975f8af
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/actions/__init__.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""Action modules for SharePoint operations."""
+
+# Export all actions
+from .findDocumentPath import findDocumentPath
+from .readDocuments import readDocuments
+from .uploadDocument import uploadDocument
+from .listDocuments import listDocuments
+from .analyzeFolderUsage import analyzeFolderUsage
+from .findSiteByUrl import findSiteByUrl
+from .downloadFileByPath import downloadFileByPath
+from .copyFile import copyFile
+from .uploadFile import uploadFile
+
+__all__ = [
+ 'findDocumentPath',
+ 'readDocuments',
+ 'uploadDocument',
+ 'listDocuments',
+ 'analyzeFolderUsage',
+ 'findSiteByUrl',
+ 'downloadFileByPath',
+ 'copyFile',
+ 'uploadFile',
+]
+
diff --git a/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py b/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py
new file mode 100644
index 00000000..075c8b96
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py
@@ -0,0 +1,337 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Analyze Folder Usage action for SharePoint operations.
+Analyzes usage intensity of folders and files in SharePoint.
+"""
+
+import logging
+import time
+import json
+from datetime import datetime, timezone, timedelta
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Analyze usage intensity of folders and files in SharePoint.
+ - Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval.
+ - Output format: JSON with usage analytics grouped by time intervals.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - documentList (list, required): Document list reference(s) containing findDocumentPath result.
+ - startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago.
+ - endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time.
+ - interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day".
+ """
+ operationId = None
+ try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"sharepoint_usage_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Analyze Folder Usage",
+ "SharePoint Analytics",
+ "Processing document list",
+ parentOperationId=parentOperationId
+ )
+
+ connectionReference = parameters.get("connectionReference")
+ documentList = parameters.get("documentList")
+ pathQuery = parameters.get("pathQuery")
+ if isinstance(documentList, str):
+ documentList = [documentList]
+ startDateTime = parameters.get("startDateTime")
+ endDateTime = parameters.get("endDateTime")
+ interval = parameters.get("interval", "day")
+
+ if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Connection reference is required")
+
+ # Require either documentList or pathQuery
+ if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList or pathQuery is required")
+
+ # Resolve folder/item information from documentList or pathQuery
+ siteId = None
+ driveId = None
+ itemId = None
+ folderPath = None
+ folderName = None
+ foundDocuments = None
+
+ if documentList:
+ foundDocuments, sites, errorMsg = await self.documentParsing.parseDocumentListForFoundDocuments(documentList)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ if not foundDocuments:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No documents found in documentList")
+
+ # Get siteId from first document (all should be from same site)
+ firstItem = foundDocuments[0]
+ siteId = firstItem.get("siteId")
+ if not siteId:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Site ID missing from documentList")
+
+ # Get drive ID (needed for analytics)
+ driveId = await self.services.sharepoint.getDriveId(siteId)
+ if not driveId:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Could not determine drive ID for the site")
+
+ # If no items from documentList, try pathQuery fallback
+ if not foundDocuments and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
+ sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ if sites:
+ siteId = sites[0].get("id")
+ # Parse pathQuery to find the folder/item
+ pathQueryParsed, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(pathQuery)
+
+ # Extract folder path from pathQuery
+ folderPath = '/'
+ if pathQueryParsed and pathQueryParsed.startswith('/sites/'):
+ parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQueryParsed)
+ if parsedPath:
+ innerPath = parsedPath.get("innerPath", "")
+ folderPath = '/' + innerPath if innerPath else '/'
+ elif pathQueryParsed:
+ folderPath = pathQueryParsed
+
+ # Get drive ID
+ driveId = await self.services.sharepoint.getDriveId(siteId)
+ if not driveId:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Could not determine drive ID for the site")
+
+ # Get folder/item by path
+ folderInfo = await self.services.sharepoint.getFolderByPath(siteId, folderPath.lstrip('/'))
+ if not folderInfo:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=f"Folder or file not found at path: {folderPath}")
+
+ # Add pathQuery item to foundDocuments for processing
+ foundDocuments = [{
+ "id": folderInfo.get("id"),
+ "name": folderInfo.get("name", ""),
+ "type": "folder" if folderInfo.get("folder") else "file",
+ "siteId": siteId,
+ "fullPath": folderPath,
+ "webUrl": folderInfo.get("webUrl", "")
+ }]
+
+ if not siteId or not driveId:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
+
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
+ # Get Microsoft connection
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Set access token
+ if not self.services.sharepoint.setAccessTokenFromConnection(connection):
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Failed to set SharePoint access token")
+
+ # Process all items from documentList or pathQuery
+ # IMPORTANT: Only analyze FOLDERS, not files (action is "analyzeFolderUsage")
+ itemsToAnalyze = []
+ if foundDocuments:
+ for item in foundDocuments:
+ itemId = item.get("id")
+ itemType = item.get("type", "").lower()
+
+ # Only process folders, skip files and site-level items
+ if itemId and itemType == "folder":
+ itemsToAnalyze.append({
+ "id": itemId,
+ "name": item.get("name", ""),
+ "type": itemType,
+ "path": item.get("fullPath", ""),
+ "webUrl": item.get("webUrl", "")
+ })
+
+ if not itemsToAnalyze:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid folders found in documentList to analyze. Note: This action only analyzes folders, not files.")
+
+ self.services.chat.progressLogUpdate(operationId, 0.4, f"Analyzing {len(itemsToAnalyze)} folder(s)")
+
+ # Analyze each item
+ allAnalytics = []
+ totalActivities = 0
+ uniqueUsers = set()
+ activityTypes = {}
+
+ # Compute actual date range values (getFolderUsageAnalytics will set defaults if None)
+ # We need to compute them here to store in output, since getFolderUsageAnalytics modifies them
+ actualStartDateTime = startDateTime
+ actualEndDateTime = endDateTime
+ if not actualEndDateTime:
+ actualEndDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+ if not actualStartDateTime:
+ startDate = datetime.now(timezone.utc) - timedelta(days=30)
+ actualStartDateTime = startDate.isoformat().replace('+00:00', 'Z')
+
+ for idx, item in enumerate(itemsToAnalyze):
+ progress = 0.4 + (idx / len(itemsToAnalyze)) * 0.5
+ self.services.chat.progressLogUpdate(operationId, progress, f"Analyzing folder {item['name']} ({idx+1}/{len(itemsToAnalyze)})")
+
+ # Get usage analytics for this folder
+ analyticsResult = await self.services.sharepoint.getFolderUsageAnalytics(
+ siteId=siteId,
+ driveId=driveId,
+ itemId=item["id"],
+ startDateTime=startDateTime,
+ endDateTime=endDateTime,
+ interval=interval
+ )
+
+ if "error" in analyticsResult:
+ logger.warning(f"Failed to get analytics for item {item['name']} ({item['id']}): {analyticsResult['error']}")
+ # Continue with other items even if one fails
+ itemAnalytics = {
+ "itemId": item["id"],
+ "itemName": item["name"],
+ "itemType": item["type"],
+ "itemPath": item["path"],
+ "error": analyticsResult.get("error", "Unknown error")
+ }
+ else:
+ # Process analytics for this item
+ itemActivities = 0
+ itemUsers = set()
+ itemActivityTypes = {}
+
+ if "value" in analyticsResult:
+ for intervalData in analyticsResult["value"]:
+ activities = intervalData.get("activities", [])
+ for activity in activities:
+ itemActivities += 1
+ totalActivities += 1
+
+ action = activity.get("action", {})
+ actionType = action.get("verb", "unknown")
+ itemActivityTypes[actionType] = itemActivityTypes.get(actionType, 0) + 1
+ activityTypes[actionType] = activityTypes.get(actionType, 0) + 1
+
+ actor = activity.get("actor", {})
+ userPrincipalName = actor.get("userPrincipalName", "")
+ if userPrincipalName:
+ itemUsers.add(userPrincipalName)
+ uniqueUsers.add(userPrincipalName)
+
+ itemAnalytics = {
+ "itemId": item["id"],
+ "itemName": item["name"],
+ "itemType": item["type"],
+ "itemPath": item["path"],
+ "webUrl": item["webUrl"],
+ "analytics": analyticsResult,
+ "summary": {
+ "totalActivities": itemActivities,
+ "uniqueUsers": len(itemUsers),
+ "activityTypes": itemActivityTypes
+ }
+ }
+
+ # Include note if analytics are not available
+ if "note" in analyticsResult:
+ itemAnalytics["note"] = analyticsResult["note"]
+
+ allAnalytics.append(itemAnalytics)
+
+ self.services.chat.progressLogUpdate(operationId, 0.9, "Processing analytics data")
+
+ # Process and format analytics data
+ resultData = {
+ "siteId": siteId,
+ "driveId": driveId,
+ "startDateTime": actualStartDateTime, # Store computed date range (not None)
+ "endDateTime": actualEndDateTime, # Store computed date range (not None)
+ "interval": interval,
+ "itemsAnalyzed": len(itemsToAnalyze),
+ "foldersAnalyzed": len([item for item in allAnalytics if item.get("itemType") == "folder"]),
+ "items": allAnalytics,
+ "summary": {
+ "totalActivities": totalActivities,
+ "uniqueUsers": len(uniqueUsers),
+ "activityTypes": activityTypes
+ },
+ "note": f"Analyzed {len(itemsToAnalyze)} folder(s) from {actualStartDateTime} to {actualEndDateTime}. " +
+ f"Found {totalActivities} total activities across {len(uniqueUsers)} unique user(s)." +
+ (f" Note: {len([item for item in allAnalytics if 'error' in item])} folder(s) had errors or no analytics data available." if any('error' in item for item in allAnalytics) else ""),
+ "timestamp": self.services.utils.timestampGetUtc()
+ }
+
+ self.services.chat.progressLogUpdate(operationId, 0.95, f"Found {totalActivities} total activities across {len(itemsToAnalyze)} folder(s)")
+
+ validationMetadata = {
+ "actionType": "sharepoint.analyzeFolderUsage",
+ "itemsAnalyzed": len(itemsToAnalyze),
+ "interval": interval,
+ "totalActivities": totalActivities,
+ "uniqueUsers": len(uniqueUsers)
+ }
+
+ self.services.chat.progressLogFinish(operationId, True)
+ return ActionResult(
+ success=True,
+ documents=[
+ ActionDocument(
+ documentName=self._generateMeaningfulFileName("sharepoint_usage_analysis", "json", None, "analyzeFolderUsage"),
+ documentData=json.dumps(resultData, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+ ]
+ )
+
+ except Exception as e:
+ logger.error(f"Error analyzing folder usage: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass
+ return ActionResult(
+ success=False,
+ error=str(e)
+ )
+
diff --git a/modules/workflows/methods/methodSharepoint/actions/copyFile.py b/modules/workflows/methods/methodSharepoint/actions/copyFile.py
new file mode 100644
index 00000000..1b6d821d
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/actions/copyFile.py
@@ -0,0 +1,163 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Copy File action for SharePoint operations.
+Copies file within SharePoint.
+"""
+
+import logging
+import json
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def copyFile(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Copy file within SharePoint.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
+ - sourceFolder (str, required): Source folder path relative to site root
+ - sourceFile (str, required): Source file name
+ - destFolder (str, required): Destination folder path relative to site root
+ - destFile (str, required): Destination file name
+
+ Returns:
+ - ActionResult with ActionDocument containing copy result
+ """
+ try:
+ connectionReference = parameters.get("connectionReference")
+ if not connectionReference:
+ return ActionResult.isFailure(error="connectionReference parameter is required")
+
+ siteIdParam = parameters.get("siteId")
+ if not siteIdParam:
+ return ActionResult.isFailure(error="siteId parameter is required")
+
+ sourceFolder = parameters.get("sourceFolder")
+ if not sourceFolder:
+ return ActionResult.isFailure(error="sourceFolder parameter is required")
+
+ sourceFile = parameters.get("sourceFile")
+ if not sourceFile:
+ return ActionResult.isFailure(error="sourceFile parameter is required")
+
+ destFolder = parameters.get("destFolder")
+ if not destFolder:
+ return ActionResult.isFailure(error="destFolder parameter is required")
+
+ destFile = parameters.get("destFile")
+ if not destFile:
+ return ActionResult.isFailure(error="destFile parameter is required")
+
+ # Extract siteId from document if it's a reference
+ siteId = None
+ if isinstance(siteIdParam, str):
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ try:
+ docList = DocumentReferenceList.from_string_list([siteIdParam])
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
+ if chatDocuments and len(chatDocuments) > 0:
+ siteInfoJson = json.loads(chatDocuments[0].documentData)
+ siteId = siteInfoJson.get("id")
+ except:
+ pass
+
+ if not siteId:
+ siteId = siteIdParam
+ else:
+ siteId = siteIdParam
+
+ if not siteId:
+ return ActionResult.isFailure(error="Could not extract siteId from parameter")
+
+ # Get Microsoft connection
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Copy file
+ await self.services.sharepoint.copyFileAsync(
+ siteId=siteId,
+ sourceFolder=sourceFolder,
+ sourceFile=sourceFile,
+ destFolder=destFolder,
+ destFile=destFile
+ )
+
+ logger.info(f"Copied file in SharePoint: {sourceFolder}/{sourceFile} -> {destFolder}/{destFile}")
+
+ # Generate filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "file_copy_result",
+ "json",
+ workflowContext,
+ "copyFile"
+ )
+
+ result = {
+ "success": True,
+ "siteId": siteId,
+ "sourcePath": f"{sourceFolder}/{sourceFile}",
+ "destPath": f"{destFolder}/{destFile}"
+ }
+
+ validationMetadata = self._createValidationMetadata(
+ "copyFile",
+ siteId=siteId,
+ sourcePath=f"{sourceFolder}/{sourceFile}",
+ destPath=f"{destFolder}/{destFile}"
+ )
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=json.dumps(result, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ # Handle file not found gracefully
+ if "itemNotFound" in str(e) or "404" in str(e):
+ logger.warning(f"File not found for copy: {parameters.get('sourceFolder')}/{parameters.get('sourceFile')}")
+ # Return success with skipped status
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "file_copy_result",
+ "json",
+ workflowContext,
+ "copyFile"
+ )
+
+ result = {
+ "success": True,
+ "skipped": True,
+ "reason": "File not found (may not exist yet)"
+ }
+
+ validationMetadata = self._createValidationMetadata(
+ "copyFile",
+ skipped=True
+ )
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=json.dumps(result, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ errorMsg = f"Error copying file in SharePoint: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py b/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py
new file mode 100644
index 00000000..d6e291a8
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/actions/downloadFileByPath.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Download File By Path action for SharePoint operations.
+Downloads file from SharePoint by exact file path.
+"""
+
+import logging
+import json
+import base64
+import os
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def downloadFileByPath(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Download file from SharePoint by exact file path.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
+ - filePath (str, required): Full file path relative to site root (e.g., "/General/50 Docs hosted by SELISE/file.xlsx")
+
+ Returns:
+ - ActionResult with ActionDocument containing file content as base64-encoded bytes
+ """
+ try:
+ connectionReference = parameters.get("connectionReference")
+ if not connectionReference:
+ return ActionResult.isFailure(error="connectionReference parameter is required")
+
+ siteIdParam = parameters.get("siteId")
+ if not siteIdParam:
+ return ActionResult.isFailure(error="siteId parameter is required")
+
+ filePath = parameters.get("filePath")
+ if not filePath:
+ return ActionResult.isFailure(error="filePath parameter is required")
+
+ # Extract siteId from document if it's a reference
+ siteId = None
+ if isinstance(siteIdParam, str):
+ # Try to parse from document reference
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ try:
+ docList = DocumentReferenceList.from_string_list([siteIdParam])
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
+ if chatDocuments and len(chatDocuments) > 0:
+ siteInfoJson = json.loads(chatDocuments[0].documentData)
+ siteId = siteInfoJson.get("id")
+ except:
+ pass
+
+ if not siteId:
+ # Assume it's the site ID directly
+ siteId = siteIdParam
+ else:
+ siteId = siteIdParam
+
+ if not siteId:
+ return ActionResult.isFailure(error="Could not extract siteId from parameter")
+
+ # Get Microsoft connection
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Download file
+ fileContent = await self.services.sharepoint.downloadFileByPath(
+ siteId=siteId,
+ filePath=filePath
+ )
+
+ if fileContent is None:
+ return ActionResult.isFailure(error=f"File not found or could not be downloaded: {filePath}")
+
+ logger.info(f"Downloaded file from SharePoint: {filePath} ({len(fileContent)} bytes)")
+
+ # Generate filename from filePath
+ fileName = os.path.basename(filePath) or "downloaded_file"
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ fileName.split('.')[0] if '.' in fileName else fileName,
+ fileName.split('.')[-1] if '.' in fileName else "bin",
+ workflowContext,
+ "downloadFileByPath"
+ )
+
+ # Encode as base64
+ fileBase64 = base64.b64encode(fileContent).decode('utf-8')
+
+ validationMetadata = self._createValidationMetadata(
+ "downloadFileByPath",
+ siteId=siteId,
+ filePath=filePath,
+ fileSize=len(fileContent)
+ )
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=fileBase64,
+ mimeType="application/octet-stream",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ errorMsg = f"Error downloading file from SharePoint: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py b/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py
new file mode 100644
index 00000000..01c1baf3
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/actions/findDocumentPath.py
@@ -0,0 +1,497 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Find Document Path action for SharePoint operations.
+Finds documents and folders by name/path across SharePoint sites.
+"""
+
+import logging
+import time
+import json
+import urllib.parse
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Find documents and folders by name/path across sites.
+ - Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
+ - Output format: JSON with found items and paths.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - site (str, optional): Site hint.
+ - searchQuery (str, required): Search terms or path.
+ - maxResults (int, optional): Maximum items to return. Default: 1000.
+ """
+ operationId = None
+ try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"sharepoint_find_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Find Document Path",
+ "SharePoint Search",
+ f"Query: {parameters.get('searchQuery', '*')}",
+ parentOperationId=parentOperationId
+ )
+
+ connectionReference = parameters.get("connectionReference")
+ site = parameters.get("site")
+ searchQuery = parameters.get("searchQuery", "*")
+ maxResults = parameters.get("maxResults", 1000)
+
+ if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Connection reference is required")
+
+ # Parse searchQuery to extract path, search terms, search type, and options
+ pathQuery, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(searchQuery)
+ logger.debug(f"Parsed searchQuery '{searchQuery}' -> pathQuery='{pathQuery}', fileQuery='{fileQuery}', searchType='{searchType}'")
+
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Extract site name from pathQuery if it contains Microsoft-standard path (/sites/SiteName/...)
+ siteFromPath = None
+ directSite = None
+ if pathQuery and pathQuery.startswith('/sites/'):
+ parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ siteFromPath = parsedPath.get("siteName")
+ logger.info(f"Extracted site from Microsoft-standard pathQuery '{pathQuery}': '{siteFromPath}'")
+
+ # Try to get site directly by path (optimization - no need to load all 60 sites)
+ directSite = await self.siteDiscovery.getSiteByStandardPath(siteFromPath)
+ if directSite:
+ logger.info(f"Got site directly by standard path - no need to discover all sites")
+ sites = [directSite]
+ else:
+ logger.warning(f"Could not get site directly, falling back to site discovery")
+ directSite = None
+ else:
+ logger.warning(f"Failed to parse site from standard pathQuery '{pathQuery}'")
+
+ # If we didn't get the site directly, use discovery and filtering
+ if not directSite:
+ # Determine which site hint to use (priority: site parameter > site from pathQuery > site_hint from searchOptions)
+ siteHintToUse = site or siteFromPath or searchOptions.get("site_hint")
+
+ # Discover SharePoint sites - use targeted approach when site hint is available
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Discovering SharePoint sites")
+ if siteHintToUse:
+ # When site hint is available, discover all sites first, then filter
+ allSites = await self.siteDiscovery.discoverSharePointSites()
+ if not allSites:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+
+ sites = self.siteDiscovery.filterSitesByHint(allSites, siteHintToUse)
+ logger.info(f"Filtered sites by site hint '{siteHintToUse}' -> {len(sites)} sites")
+ if not sites:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=f"No SharePoint sites found matching '{siteHintToUse}'")
+ else:
+ # No site hint - discover all sites
+ sites = await self.siteDiscovery.discoverSharePointSites()
+ if not sites:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+
+ # Resolve path query into search paths
+ searchPaths = self.pathProcessing.resolvePathQuery(pathQuery)
+
+ self.services.chat.progressLogUpdate(operationId, 0.5, f"Searching across {len(sites)} site(s)")
+
+ try:
+ # Search across all discovered sites
+ foundDocuments = []
+ allSitesSearched = []
+
+ # Handle different search approaches based on search type
+ if searchType == "folders" and fileQuery and fileQuery.strip() != "" and fileQuery.strip() != "*":
+ # Use unified search for folders - this is global and searches all sites
+ try:
+
+ # Use Microsoft Graph Search API syntax (simple term search only)
+ terms = [t for t in fileQuery.split() if t.strip()]
+
+ if len(terms) > 1:
+ # Multiple terms: search for ALL terms (AND) - more specific results
+ queryString = " AND ".join(terms)
+ else:
+ # Single term: search for the term
+ queryString = terms[0] if terms else fileQuery
+ logger.info(f"Using unified search for folders: {queryString}")
+
+ payload = {
+ "requests": [
+ {
+ "entityTypes": ["driveItem"],
+ "query": {"queryString": queryString},
+ "from": 0,
+ "size": 50
+ }
+ ]
+ }
+ logger.info(f"Using unified search API for folders with queryString: {queryString}")
+
+ # Use global search endpoint (site-specific search not available)
+ unifiedResult = await self.apiClient.makeGraphApiCall(
+ "search/query",
+ method="POST",
+ data=json.dumps(payload).encode("utf-8")
+ )
+
+ if "error" in unifiedResult:
+ logger.warning(f"Unified search failed: {unifiedResult['error']}")
+ items = []
+ else:
+ # Flatten hits -> driveItem resources
+ items = []
+ for container in (unifiedResult.get("value", []) or []):
+ for hitsContainer in (container.get("hitsContainers", []) or []):
+ for hit in (hitsContainer.get("hits", []) or []):
+ resource = hit.get("resource")
+ if resource:
+ items.append(resource)
+
+ logger.info(f"Unified search returned {len(items)} items (pre-filter)")
+
+ # Apply our improved folder detection logic
+ folderItems = []
+ for item in items:
+ resource = item
+
+ # Use the same detection logic as our test
+ isFolder = self.services.sharepoint.detectFolderType(resource)
+
+ if isFolder:
+ folderItems.append(item)
+
+ items = folderItems
+ logger.info(f"Filtered to {len(items)} folders using improved detection logic")
+
+ # Process unified search results - extract site information from webUrl
+ for item in items:
+ itemName = item.get("name", "")
+ webUrl = item.get("webUrl", "")
+
+ # Extract site information from webUrl
+ siteName = "Unknown Site"
+ siteId = "unknown"
+
+ if webUrl and '/sites/' in webUrl:
+ try:
+ # Extract site name from URL: https://pcuster.sharepoint.com/sites/SiteName/...
+ urlParts = webUrl.split('/sites/')
+ if len(urlParts) > 1:
+ sitePath = urlParts[1].split('/')[0]
+ # Find matching site from discovered sites
+ # First try to match by site name (URL path)
+ for site in sites:
+ if site.get("name") == sitePath:
+ siteName = site.get("displayName", sitePath)
+ siteId = site.get("id", "unknown")
+ break
+ else:
+ # If no match by name, try to match by displayName
+ for site in sites:
+ if site.get("displayName") == sitePath:
+ siteName = site.get("displayName", sitePath)
+ siteId = site.get("id", "unknown")
+ break
+ else:
+ # If no exact match, use the site path as site name
+ siteName = sitePath
+ # Try to find a site with similar name
+ for site in sites:
+ if sitePath.lower() in site.get("name", "").lower() or sitePath.lower() in site.get("displayName", "").lower():
+ siteName = site.get("displayName", sitePath)
+ siteId = site.get("id", "unknown")
+ break
+ except Exception as e:
+ logger.warning(f"Error extracting site info from URL {webUrl}: {e}")
+
+ # Use improved folder detection logic
+ isFolder = self.services.sharepoint.detectFolderType(item)
+ itemType = "folder" if isFolder else "file"
+ itemPath = item.get("parentReference", {}).get("path", "")
+ logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'")
+
+ # Simple filtering like test file - just check search type
+ if searchType == "files" and isFolder:
+ continue # Skip folders when searching for files
+ elif searchType == "folders" and not isFolder:
+ continue # Skip files when searching for folders
+
+ # Simple approach like test file - no complex filtering
+ logger.debug(f"Item '{itemName}' found - adding to results")
+
+ # Create result with full path information for proper action chaining
+ parentPath = item.get("parentReference", {}).get("path", "")
+
+ # Extract the full SharePoint path from webUrl or parentReference
+ fullPath = ""
+ if webUrl:
+ # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
+ if '/sites/' in webUrl:
+ pathPart = webUrl.split('/sites/')[1]
+ # Decode URL encoding and convert to backslash format
+ decodedPath = urllib.parse.unquote(pathPart)
+ fullPath = "\\" + decodedPath.replace('/', '\\')
+ elif parentPath:
+ # Use parentReference path if available
+ fullPath = parentPath.replace('/', '\\')
+
+ docInfo = {
+ "id": item.get("id"),
+ "name": item.get("name"),
+ "type": "folder" if isFolder else "file",
+ "siteName": siteName,
+ "siteId": siteId,
+ "webUrl": webUrl,
+ "fullPath": fullPath,
+ "parentPath": parentPath
+ }
+
+ foundDocuments.append(docInfo)
+
+ logger.info(f"Found {len(foundDocuments)} documents from unified search")
+
+ except Exception as e:
+ logger.error(f"Error performing unified folder search: {str(e)}")
+ # Fallback to site-by-site search
+ pass
+
+ # If no unified search was performed or it failed, fall back to site-by-site search
+ if not foundDocuments:
+ # Use simple approach like test file - no complex filtering
+ siteScopedSites = sites
+
+ for site in siteScopedSites:
+ siteId = site["id"]
+ siteName = site["displayName"]
+ siteUrl = site["webUrl"]
+
+ logger.info(f"Searching in site: {siteName} ({siteUrl})")
+
+ # Check if pathQuery contains a specific folder path (not just /sites/SiteName)
+ folderPath = None
+ if pathQuery and pathQuery.startswith('/sites/'):
+ parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ innerPath = parsedPath.get("innerPath", "")
+ if innerPath and innerPath.strip():
+ # Remove leading slash if present
+ folderPath = innerPath.lstrip('/')
+
+ # Generic approach: Try to find the folder, if it fails, remove first segment
+ # This works for all languages because we test the actual API response
+ # In SharePoint Graph API, /drive/root already points to the default document library,
+ # so library names in paths should be removed
+ pathSegments = [s for s in folderPath.split('/') if s.strip()]
+ if len(pathSegments) > 1:
+ # Try with first segment removed (first segment is likely the document library)
+ testPath = '/'.join(pathSegments[1:])
+ # Quick test: try to get folder info (this is fast and doesn't require full search)
+ testEndpoint = f"sites/{siteId}/drive/root:/{urllib.parse.quote(testPath, safe='')}:"
+ testResult = await self.apiClient.makeGraphApiCall(testEndpoint)
+ if testResult and "error" not in testResult:
+ # Path without first segment works - first segment was likely the document library
+ folderPath = testPath
+ logger.info(f"Removed document library name '{pathSegments[0]}' from folder path (tested via API)")
+ else:
+ # Keep original path - first segment is not a document library
+ logger.info(f"Keeping original folder path '{folderPath}' (first segment is not a document library)")
+ elif len(pathSegments) == 1:
+ # Only one segment - likely the document library itself, use root
+ folderPath = None
+ logger.info(f"Only one segment '{pathSegments[0]}' found, likely document library - using root")
+
+ if folderPath:
+ logger.info(f"Extracted folder path from pathQuery: '{folderPath}'")
+ else:
+ logger.info(f"Folder path resolved to root (only document library in path)")
+
+ # Use Microsoft Graph API for this specific site
+ # Handle empty or wildcard queries
+ if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
+ # For wildcard/empty queries, list all items
+ if folderPath:
+ # List items in specific folder
+ encodedPath = urllib.parse.quote(folderPath, safe='')
+ endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/children"
+ logger.info(f"Listing items in folder: '{folderPath}'")
+ else:
+ # List all items in the drive root
+ endpoint = f"sites/{siteId}/drive/root/children"
+
+ # Make the API call to list items
+ listResult = await self.apiClient.makeGraphApiCall(endpoint)
+ if "error" in listResult:
+ logger.warning(f"List failed for site {siteName}: {listResult['error']}")
+ continue
+ # Process list results for this site
+ items = listResult.get("value", [])
+ logger.info(f"Retrieved {len(items)} items from site {siteName}")
+ else:
+ # For files, use regular search API
+ # Clean the query: remove path-like syntax and invalid KQL syntax
+ searchQueryCleaned = self.pathProcessing.cleanSearchQuery(fileQuery)
+ # URL-encode the query parameter
+ encodedQuery = urllib.parse.quote(searchQueryCleaned, safe='')
+
+ if folderPath:
+ # Search in specific folder
+ encodedPath = urllib.parse.quote(folderPath, safe='')
+ endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/search(q='{encodedQuery}')"
+ logger.info(f"Searching in folder '{folderPath}' with query: '{searchQueryCleaned}' (encoded: '{encodedQuery}')")
+ else:
+ # Search in drive root
+ endpoint = f"sites/{siteId}/drive/root/search(q='{encodedQuery}')"
+ logger.info(f"Using search API for files with query: '{searchQueryCleaned}' (encoded: '{encodedQuery}')")
+
+ # Make the search API call (files)
+ searchResult = await self.apiClient.makeGraphApiCall(endpoint)
+ if "error" in searchResult:
+ logger.warning(f"Search failed for site {siteName}: {searchResult['error']}")
+ continue
+ # Process search results for this site (files)
+ items = searchResult.get("value", [])
+ logger.info(f"Retrieved {len(items)} items from site {siteName}")
+
+ siteDocuments = []
+
+ for item in items:
+ itemName = item.get("name", "")
+
+ # Use improved folder detection logic
+ isFolder = self.services.sharepoint.detectFolderType(item)
+
+ itemType = "folder" if isFolder else "file"
+ itemPath = item.get("parentReference", {}).get("path", "")
+ logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'")
+
+ # Simple filtering like test file - just check search type
+ if searchType == "files" and isFolder:
+ continue # Skip folders when searching for files
+ elif searchType == "folders" and not isFolder:
+ continue # Skip files when searching for folders
+
+ # Simple approach like test file - no complex filtering
+ logger.debug(f"Item '{itemName}' found - adding to results")
+
+ # Create result with full path information for proper action chaining
+ webUrl = item.get("webUrl", "")
+ parentPath = item.get("parentReference", {}).get("path", "")
+
+ # Extract the full SharePoint path from webUrl or parentReference
+ fullPath = ""
+ if webUrl:
+ # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
+ if '/sites/' in webUrl:
+ pathPart = webUrl.split('/sites/')[1]
+ # Decode URL encoding and convert to backslash format
+ decodedPath = urllib.parse.unquote(pathPart)
+ fullPath = "\\" + decodedPath.replace('/', '\\')
+ elif parentPath:
+ # Use parentReference path if available
+ fullPath = parentPath.replace('/', '\\')
+
+ docInfo = {
+ "id": item.get("id"),
+ "name": item.get("name"),
+ "type": "folder" if isFolder else "file",
+ "siteName": siteName,
+ "siteId": siteId,
+ "webUrl": webUrl,
+ "fullPath": fullPath,
+ "parentPath": parentPath
+ }
+
+ siteDocuments.append(docInfo)
+
+ foundDocuments.extend(siteDocuments)
+ allSitesSearched.append({
+ "siteName": siteName,
+ "siteUrl": siteUrl,
+ "siteId": siteId,
+ "documentsFound": len(siteDocuments)
+ })
+
+ logger.info(f"Found {len(siteDocuments)} documents in site {siteName}")
+
+ # Limit total results to maxResults
+ if len(foundDocuments) > maxResults:
+ foundDocuments = foundDocuments[:maxResults]
+ logger.info(f"Limited results to {maxResults} items")
+
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {len(foundDocuments)} document(s)")
+
+ resultData = {
+ "searchQuery": searchQuery,
+ "totalResults": len(foundDocuments),
+ "maxResults": maxResults,
+ "foundDocuments": foundDocuments,
+ "timestamp": self.services.utils.timestampGetUtc()
+ }
+
+ except Exception as e:
+ logger.error(f"Error searching SharePoint: {str(e)}")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=str(e))
+
+ # Use default JSON format for output
+ outputExtension = ".json" # Default
+ outputMimeType = "application/json" # Default
+
+ validationMetadata = {
+ "actionType": "sharepoint.findDocumentPath",
+ "searchQuery": searchQuery,
+ "maxResults": maxResults,
+ "totalResults": len(foundDocuments),
+ "hasResults": len(foundDocuments) > 0
+ }
+
+ self.services.chat.progressLogFinish(operationId, True)
+ return ActionResult(
+ success=True,
+ documents=[
+ ActionDocument(
+ documentName=self._generateMeaningfulFileName("sharepoint_find_path", "json", None, "findDocumentPath"),
+ documentData=json.dumps(resultData, indent=2),
+ mimeType=outputMimeType,
+ validationMetadata=validationMetadata
+ )
+ ]
+ )
+
+ except Exception as e:
+ logger.error(f"Error finding document path: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass
+ return ActionResult.isFailure(error=str(e))
+
diff --git a/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py b/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py
new file mode 100644
index 00000000..405b35f2
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/actions/findSiteByUrl.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Find Site By URL action for SharePoint operations.
+Finds SharePoint site by hostname and site path.
+"""
+
+import logging
+import json
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def findSiteByUrl(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Find SharePoint site by hostname and site path.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - hostname (str, required): SharePoint hostname (e.g., "example.sharepoint.com")
+ - sitePath (str, required): Site path (e.g., "SteeringBPM" or "/sites/SteeringBPM")
+
+ Returns:
+ - ActionResult with ActionDocument containing site information (id, displayName, name, webUrl)
+ """
+ try:
+ connectionReference = parameters.get("connectionReference")
+ if not connectionReference:
+ return ActionResult.isFailure(error="connectionReference parameter is required")
+
+ hostname = parameters.get("hostname")
+ if not hostname:
+ return ActionResult.isFailure(error="hostname parameter is required")
+
+ sitePath = parameters.get("sitePath")
+ if not sitePath:
+ return ActionResult.isFailure(error="sitePath parameter is required")
+
+ # Get Microsoft connection
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Find site by URL
+ siteInfo = await self.services.sharepoint.findSiteByUrl(
+ hostname=hostname,
+ sitePath=sitePath
+ )
+
+ if not siteInfo:
+ return ActionResult.isFailure(error=f"Site not found: {hostname}:/sites/{sitePath}")
+
+ logger.info(f"Found SharePoint site: {siteInfo.get('displayName')} (ID: {siteInfo.get('id')})")
+
+ # Generate filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "sharepoint_site",
+ "json",
+ workflowContext,
+ "findSiteByUrl"
+ )
+
+ validationMetadata = self._createValidationMetadata(
+ "findSiteByUrl",
+ hostname=hostname,
+ sitePath=sitePath,
+ siteId=siteInfo.get("id")
+ )
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=json.dumps(siteInfo, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ errorMsg = f"Error finding SharePoint site: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodSharepoint/actions/listDocuments.py b/modules/workflows/methods/methodSharepoint/actions/listDocuments.py
new file mode 100644
index 00000000..78aabadc
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/actions/listDocuments.py
@@ -0,0 +1,345 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+List Documents action for SharePoint operations.
+Lists documents and folders in SharePoint paths across sites.
+"""
+
+import logging
+import time
+import json
+import urllib.parse
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: List documents and folders in SharePoint paths across sites.
+ - Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional).
+ - Output format: JSON with folder items and metadata.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - documentList (list, required): Document list reference(s) containing findDocumentPath result.
+ - includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
+ """
+ operationId = None
+ try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"sharepoint_list_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "List Documents",
+ "SharePoint Listing",
+ "Processing document list",
+ parentOperationId=parentOperationId
+ )
+
+ connectionReference = parameters.get("connectionReference")
+ documentList = parameters.get("documentList")
+ pathQuery = parameters.get("pathQuery", "*")
+ if isinstance(documentList, str):
+ documentList = [documentList]
+ includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX
+
+ if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Connection reference is required")
+
+ # Require either documentList or pathQuery
+ if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList or pathQuery is required")
+
+ # Parse documentList to extract folder path and site information
+ listQuery, sites, _, errorMsg = await self.documentParsing.parseDocumentListForFolder(documentList)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ # If no folder path found from documentList, use pathQuery if provided
+ if not listQuery and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
+ listQuery = pathQuery
+ logger.info(f"Using pathQuery for list query: {listQuery}")
+ # Resolve sites from pathQuery
+ sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ # Validate required parameters
+ if not listQuery:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
+
+ if not sites:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Site information missing. Cannot determine target site for list operation.")
+
+ # Get connection
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ logger.info(f"Starting SharePoint listDocuments for listQuery: {listQuery}")
+ logger.debug(f"Connection ID: {connection['id']}")
+
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Processing folder path")
+
+ # Parse listQuery to extract path, search terms, search type, and options
+ pathQuery, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(listQuery)
+
+ # Check if listQuery is a folder ID (starts with 01PPXICCB...)
+ if listQuery.startswith('01PPXICCB') or listQuery.startswith('01'):
+ # Direct folder ID - use it directly
+ folderPaths = [listQuery]
+ logger.info(f"Using direct folder ID: {listQuery}")
+ else:
+ # Remove site prefix from pathQuery before resolving (it's only for site filtering)
+ pathQueryForResolve = pathQuery
+ # Microsoft-standard path: /sites/SiteName/Path -> /Path
+ if pathQuery.startswith('/sites/'):
+ parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ innerPath = parsedPath.get("innerPath", "")
+ pathQueryForResolve = '/' + innerPath if innerPath else '/'
+ else:
+ pathQueryForResolve = '/'
+
+ # Remove first path segment if it looks like a document library name
+ # In SharePoint Graph API, /drive/root already points to the default document library,
+ # so library names in paths should be removed
+ # Generic approach: if path has multiple segments, store original for fallback
+ pathSegments = [s for s in pathQueryForResolve.split('/') if s.strip()]
+ if len(pathSegments) > 1:
+ # Path has multiple segments - first might be a library name
+ # Store original for potential fallback
+ originalPath = pathQueryForResolve
+ # Try without first segment (assuming it's a library name)
+ pathQueryForResolve = '/' + '/'.join(pathSegments[1:])
+ logger.info(f"Removed first path segment (potential library name), path changed from '{originalPath}' to '{pathQueryForResolve}'")
+ elif len(pathSegments) == 1:
+ # Only one segment - if it's a common library-like name, use root
+ firstSegmentLower = pathSegments[0].lower()
+ libraryIndicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek']
+ if any(indicator in firstSegmentLower for indicator in libraryIndicators):
+ pathQueryForResolve = '/'
+ logger.info(f"First segment '{pathSegments[0]}' appears to be a library name, using root")
+
+ # Resolve path query into folder paths
+ folderPaths = self.pathProcessing.resolvePathQuery(pathQueryForResolve)
+ logger.info(f"Resolved folder paths: {folderPaths}")
+
+ # Process each folder path across all sites
+ listResults = []
+
+ self.services.chat.progressLogUpdate(operationId, 0.5, f"Listing {len(folderPaths)} folder(s) across {len(sites)} site(s)")
+
+ for folderPath in folderPaths:
+ try:
+ folderResults = []
+
+ for site in sites:
+ siteId = site["id"]
+ siteName = site["displayName"]
+ siteUrl = site["webUrl"]
+
+ logger.info(f"Listing folder {folderPath} in site: {siteName}")
+
+ # Determine the endpoint based on folder path
+ if folderPath in ["/", ""] or folderPath == "*":
+ # Root folder
+ endpoint = f"sites/{siteId}/drive/root/children"
+ elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'):
+ # Direct folder ID
+ endpoint = f"sites/{siteId}/drive/items/{folderPath}/children"
+ else:
+ # Specific folder path - remove leading slash if present and URL encode
+ folderPathClean = folderPath.lstrip('/')
+ # URL encode the path for Graph API (spaces and special characters need encoding)
+ folderPathEncoded = urllib.parse.quote(folderPathClean, safe='/')
+ endpoint = f"sites/{siteId}/drive/root:/{folderPathEncoded}:/children"
+
+ # Make the API call to list folder contents
+ apiResult = await self.apiClient.makeGraphApiCall(endpoint)
+
+ if "error" in apiResult:
+ logger.warning(f"Failed to list folder {folderPath} in site {siteName}: {apiResult['error']}")
+ continue
+
+ # Process the results
+ items = apiResult.get("value", [])
+ processedItems = []
+
+ for item in items:
+ # Use improved folder detection logic
+ isFolder = self.services.sharepoint.detectFolderType(item)
+
+ itemInfo = {
+ "id": item.get("id"),
+ "name": item.get("name"),
+ "size": item.get("size", 0),
+ "createdDateTime": item.get("createdDateTime"),
+ "lastModifiedDateTime": item.get("lastModifiedDateTime"),
+ "webUrl": item.get("webUrl"),
+ "type": "folder" if isFolder else "file",
+ "siteName": siteName,
+ "siteUrl": siteUrl
+ }
+
+ # Add file-specific information
+ if "file" in item:
+ itemInfo.update({
+ "mimeType": item["file"].get("mimeType"),
+ "downloadUrl": item.get("@microsoft.graph.downloadUrl")
+ })
+
+ # Add folder-specific information
+ if "folder" in item:
+ itemInfo.update({
+ "childCount": item["folder"].get("childCount", 0)
+ })
+
+ processedItems.append(itemInfo)
+
+ # If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
+ if includeSubfolders:
+ folderItems = [item for item in processedItems if item['type'] == 'folder']
+ logger.info(f"Including subfolders - processing {len(folderItems)} folders")
+ subfolderCount = 0
+ maxSubfolders = 10 # Limit to prevent infinite loops
+
+ for item in processedItems[:]: # Use slice to avoid modifying list during iteration
+ if item["type"] == "folder" and subfolderCount < maxSubfolders:
+ subfolderCount += 1
+ subfolderPath = f"{folderPath.rstrip('/')}/{item['name']}"
+ subfolderEndpoint = f"sites/{siteId}/drive/items/{item['id']}/children"
+
+ logger.debug(f"Getting contents of subfolder: {item['name']}")
+ subfolderResult = await self.apiClient.makeGraphApiCall(subfolderEndpoint)
+ if "error" not in subfolderResult:
+ subfolderItems = subfolderResult.get("value", [])
+ logger.debug(f"Found {len(subfolderItems)} items in subfolder {item['name']}")
+
+ for subfolderItem in subfolderItems:
+ # Use improved folder detection logic for subfolder items
+ subfolderIsFolder = self.services.sharepoint.detectFolderType(subfolderItem)
+
+ # Only add files and direct subfolders, NO RECURSION
+ subfolderItemInfo = {
+ "id": subfolderItem.get("id"),
+ "name": subfolderItem.get("name"),
+ "size": subfolderItem.get("size", 0),
+ "createdDateTime": subfolderItem.get("createdDateTime"),
+ "lastModifiedDateTime": subfolderItem.get("lastModifiedDateTime"),
+ "webUrl": subfolderItem.get("webUrl"),
+ "type": "folder" if subfolderIsFolder else "file",
+ "parentPath": subfolderPath,
+ "siteName": siteName,
+ "siteUrl": siteUrl
+ }
+
+ if "file" in subfolderItem:
+ subfolderItemInfo.update({
+ "mimeType": subfolderItem["file"].get("mimeType"),
+ "downloadUrl": subfolderItem.get("@microsoft.graph.downloadUrl")
+ })
+
+ processedItems.append(subfolderItemInfo)
+ else:
+ logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolderResult.get('error')}")
+ elif subfolderCount >= maxSubfolders:
+ logger.warning(f"Reached maximum subfolder limit ({maxSubfolders}), skipping remaining folders")
+ break
+
+ logger.info(f"Processed {subfolderCount} subfolders, total items: {len(processedItems)}")
+
+ folderResults.append({
+ "siteName": siteName,
+ "siteUrl": siteUrl,
+ "itemCount": len(processedItems),
+ "items": processedItems
+ })
+
+ listResults.append({
+ "folderPath": folderPath,
+ "sitesProcessed": len(folderResults),
+ "siteResults": folderResults
+ })
+
+ except Exception as e:
+ logger.error(f"Error listing folder {folderPath}: {str(e)}")
+ listResults.append({
+ "folderPath": folderPath,
+ "error": str(e),
+ "sitesProcessed": 0,
+ "siteResults": []
+ })
+
+ # Create result data
+ totalItems = sum(len(siteResult.get("items", [])) for result in listResults for siteResult in result.get("siteResults", []))
+
+ resultData = {
+ "listQuery": listQuery,
+ "pathQuery": pathQuery,
+ "totalItems": totalItems,
+ "foldersProcessed": len(listResults),
+ "listResults": listResults,
+ "includeSubfolders": includeSubfolders,
+ "timestamp": self.services.utils.timestampGetUtc()
+ }
+
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {totalItems} item(s) in {len(listResults)} folder(s)")
+
+ validationMetadata = {
+ "actionType": "sharepoint.listDocuments",
+ "listQuery": listQuery,
+ "totalItems": totalItems,
+ "foldersProcessed": len(listResults),
+ "includeSubfolders": includeSubfolders
+ }
+
+ self.services.chat.progressLogFinish(operationId, True)
+ return ActionResult(
+ success=True,
+ documents=[
+ ActionDocument(
+ documentName=self._generateMeaningfulFileName("sharepoint_list", "json", None, "listDocuments"),
+ documentData=json.dumps(resultData, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+ ]
+ )
+
+ except Exception as e:
+ logger.error(f"Error listing SharePoint documents: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass
+ return ActionResult(
+ success=False,
+ error=str(e)
+ )
+
diff --git a/modules/workflows/methods/methodSharepoint/actions/readDocuments.py b/modules/workflows/methods/methodSharepoint/actions/readDocuments.py
new file mode 100644
index 00000000..2bc2688c
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/actions/readDocuments.py
@@ -0,0 +1,290 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Read Documents action for SharePoint operations.
+Reads documents from SharePoint and extracts content/metadata.
+"""
+
+import logging
+import time
+import json
+import base64
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Read documents from SharePoint and extract content/metadata.
+ - Input requirements: connectionReference (required); documentList or pathQuery (required); includeMetadata (optional).
+ - Output format: Standardized ActionDocument format (documentName, documentData, mimeType).
+ - Binary files (PDFs, etc.) are Base64-encoded in documentData.
+ - Text files are stored as plain text in documentData.
+ - Returns ActionResult with documents list for template processing.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - documentList (list, optional): Document list reference(s) containing findDocumentPath result.
+ - pathQuery (str, optional): Direct path query if no documentList (e.g., /sites/SiteName/FolderPath).
+ - includeMetadata (bool, optional): Include metadata. Default: True.
+
+ Returns:
+ - ActionResult with documents: List[ActionDocument] where each ActionDocument contains:
+ - documentName: File name
+ - documentData: Base64-encoded content (binary files) or plain text (text files)
+ - mimeType: MIME type (e.g., application/pdf, text/plain)
+ """
+ operationId = None
+ try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"sharepoint_read_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Read Documents",
+ "SharePoint Document Reading",
+ "Processing document list",
+ parentOperationId=parentOperationId
+ )
+
+ documentList = parameters.get("documentList")
+ pathQuery = parameters.get("pathQuery", "*")
+ connectionReference = parameters.get("connectionReference")
+ includeMetadata = parameters.get("includeMetadata", True)
+
+ # Validate connection reference
+ if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Connection reference is required")
+
+ # Require either documentList or pathQuery
+ if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList or pathQuery is required")
+
+ # Get connection first
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Parse documentList to extract foundDocuments and site information
+ sharePointFileIds = None
+ sites = None
+
+ if documentList:
+ foundDocuments, sites, errorMsg = await self.documentParsing.parseDocumentListForFoundDocuments(documentList)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ if foundDocuments:
+ # Extract SharePoint file IDs from foundDocuments
+ sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
+ if not sharePointFileIds:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No files found in documentList from findDocumentPath result")
+ logger.info(f"Extracted {len(sharePointFileIds)} SharePoint file IDs from documentList")
+
+ # If we have SharePoint file IDs from documentList (findDocumentPath result), read them directly
+ if sharePointFileIds and sites:
+ # Read SharePoint files directly using their IDs
+ readResults = []
+ siteId = sites[0]['id']
+
+ self.services.chat.progressLogUpdate(operationId, 0.5, f"Reading {len(sharePointFileIds)} file(s) from SharePoint")
+ for idx, fileId in enumerate(sharePointFileIds):
+ try:
+ self.services.chat.progressLogUpdate(operationId, 0.5 + (idx * 0.3 / len(sharePointFileIds)), f"Reading file {idx + 1}/{len(sharePointFileIds)}")
+ # Get file info from SharePoint
+ endpoint = f"sites/{siteId}/drive/items/{fileId}"
+ fileInfo = await self.apiClient.makeGraphApiCall(endpoint)
+
+ if "error" in fileInfo:
+ logger.warning(f"Failed to get file info for {fileId}: {fileInfo['error']}")
+ continue
+
+ # Get file content using SharePoint service (handles binary data correctly)
+ fileName = fileInfo.get("name", f"file_{fileId}")
+ fileContent = await self.services.sharepoint.downloadFile(siteId, fileId)
+
+ # Create result document
+ resultItem = {
+ "fileId": fileId,
+ "fileName": fileName,
+ "sharepointFileId": fileId,
+ "siteName": sites[0]['displayName'],
+ "siteUrl": sites[0]['webUrl'],
+ "size": fileInfo.get("size", 0),
+ "createdDateTime": fileInfo.get("createdDateTime"),
+ "lastModifiedDateTime": fileInfo.get("lastModifiedDateTime"),
+ "webUrl": fileInfo.get("webUrl")
+ }
+
+ # Add content if available
+ if fileContent:
+ resultItem["content"] = fileContent
+
+ # Add metadata if requested
+ if includeMetadata:
+ resultItem["metadata"] = {
+ "mimeType": fileInfo.get("file", {}).get("mimeType"),
+ "downloadUrl": fileInfo.get("@microsoft.graph.downloadUrl"),
+ "createdBy": fileInfo.get("createdBy", {}),
+ "lastModifiedBy": fileInfo.get("lastModifiedBy", {}),
+ "parentReference": fileInfo.get("parentReference", {})
+ }
+
+ readResults.append(resultItem)
+ except Exception as e:
+ logger.error(f"Error reading file {fileId}: {str(e)}")
+ continue
+
+ if not readResults:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No files could be read from documentList")
+
+ # Convert read results to ActionDocument objects
+ # IMPORTANT: For binary files (PDFs), store Base64-encoded content directly in documentData
+ # The system will create FileData and ChatDocument automatically
+ self.services.chat.progressLogUpdate(operationId, 0.8, f"Processing {len(readResults)} document(s)")
+
+ actionDocuments = []
+ for resultItem in readResults:
+ fileContent = resultItem.get("content")
+ fileName = resultItem.get("fileName", f"file_{resultItem.get('fileId')}")
+
+ # Determine MIME type from metadata or file extension
+ mimeType = "application/octet-stream"
+ if resultItem.get("metadata", {}).get("mimeType"):
+ mimeType = resultItem["metadata"]["mimeType"]
+ elif fileName:
+ if fileName.endswith('.pdf'):
+ mimeType = "application/pdf"
+ elif fileName.endswith('.txt'):
+ mimeType = "text/plain"
+ elif fileName.endswith('.json'):
+ mimeType = "application/json"
+
+ # For binary files (PDFs, etc.), store Base64-encoded content directly
+ # The GenerationService will detect PDF mimeType and handle base64 decoding
+ if fileContent and isinstance(fileContent, bytes):
+ # Encode binary content as Base64 string
+ base64Content = base64.b64encode(fileContent).decode('utf-8')
+ validationMetadata = {
+ "actionType": "sharepoint.readDocuments",
+ "fileName": fileName,
+ "sharepointFileId": resultItem.get("sharepointFileId"),
+ "siteName": resultItem.get("siteName"),
+ "mimeType": mimeType,
+ "contentType": "binary",
+ "size": len(fileContent),
+ "includeMetadata": includeMetadata
+ }
+ actionDoc = ActionDocument(
+ documentName=fileName,
+ documentData=base64Content, # Base64 string for binary files
+ mimeType=mimeType,
+ validationMetadata=validationMetadata
+ )
+ actionDocuments.append(actionDoc)
+ logger.info(f"Stored binary file {fileName} ({len(fileContent)} bytes) as Base64 in ActionDocument")
+ elif fileContent:
+ # Text content - store directly in documentData
+ validationMetadata = {
+ "actionType": "sharepoint.readDocuments",
+ "fileName": fileName,
+ "sharepointFileId": resultItem.get("sharepointFileId"),
+ "siteName": resultItem.get("siteName"),
+ "mimeType": mimeType,
+ "contentType": "text",
+ "includeMetadata": includeMetadata
+ }
+ actionDoc = ActionDocument(
+ documentName=fileName,
+ documentData=fileContent if isinstance(fileContent, str) else str(fileContent),
+ mimeType=mimeType,
+ validationMetadata=validationMetadata
+ )
+ actionDocuments.append(actionDoc)
+ else:
+ # No content - store metadata only
+ docData = {
+ "fileName": fileName,
+ "sharepointFileId": resultItem.get("sharepointFileId"),
+ "siteName": resultItem.get("siteName"),
+ "siteUrl": resultItem.get("siteUrl"),
+ "size": resultItem.get("size"),
+ "createdDateTime": resultItem.get("createdDateTime"),
+ "lastModifiedDateTime": resultItem.get("lastModifiedDateTime"),
+ "webUrl": resultItem.get("webUrl")
+ }
+ if resultItem.get("metadata"):
+ docData["metadata"] = resultItem["metadata"]
+
+ validationMetadata = {
+ "actionType": "sharepoint.readDocuments",
+ "fileName": fileName,
+ "sharepointFileId": resultItem.get("sharepointFileId"),
+ "siteName": resultItem.get("siteName"),
+ "mimeType": mimeType,
+ "contentType": "metadata_only",
+ "includeMetadata": includeMetadata
+ }
+ actionDoc = ActionDocument(
+ documentName=fileName,
+ documentData=json.dumps(docData, indent=2),
+ mimeType=mimeType,
+ validationMetadata=validationMetadata
+ )
+ actionDocuments.append(actionDoc)
+
+ # Return success with action documents
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Read {len(actionDocuments)} document(s)")
+ self.services.chat.progressLogFinish(operationId, True)
+ return ActionResult.isSuccess(documents=actionDocuments)
+
+ # If no sites from documentList, try pathQuery fallback
+ if not sites and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
+ sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ # If still no sites, return error
+ if not sites:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with file information, or pathQuery must be provided. Use findDocumentPath first to get file paths, or provide pathQuery directly.")
+
+ # This should never be reached if logic above is correct
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Unexpected error: could not process documentList or pathQuery")
+ except Exception as e:
+ logger.error(f"Error reading SharePoint documents: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass # Don't fail on progress logging errors
+ return ActionResult(
+ success=False,
+ error=str(e)
+ )
+
diff --git a/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
new file mode 100644
index 00000000..82c93434
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
@@ -0,0 +1,278 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Upload Document action for SharePoint operations.
+Uploads documents to SharePoint.
+"""
+
+import logging
+import time
+import json
+import urllib.parse
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
+ - Input requirements: connectionReference (required); documentList (required); pathQuery (optional).
+ - Output format: JSON with upload status and file info.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
+ - pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath).
+ """
+ operationId = None
+ try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"sharepoint_upload_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Upload Document",
+ "SharePoint Upload",
+ "Processing document list",
+ parentOperationId=parentOperationId
+ )
+
+ connectionReference = parameters.get("connectionReference")
+ documentList = parameters.get("documentList")
+ pathQuery = parameters.get("pathQuery")
+ if isinstance(documentList, str):
+ documentList = [documentList]
+
+ if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Connection reference is required")
+
+ if not documentList:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Document list is required")
+
+ # Parse documentList to extract folder path and site information
+ uploadPath, sites, filesToUpload, errorMsg = await self.documentParsing.parseDocumentListForFolder(documentList)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ # If no folder path found from documentList, use pathQuery if provided
+ if not uploadPath and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
+ uploadPath = pathQuery
+ logger.info(f"Using pathQuery for upload path: {uploadPath}")
+ # Resolve sites from pathQuery
+ sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ # Validate required parameters
+ if not uploadPath:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get upload folder, or provide pathQuery directly.")
+
+ if not sites:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Site information missing. Cannot determine target site for upload.")
+
+ if not filesToUpload:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No files to upload found in documentList.")
+
+ # Get connection
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Process upload paths
+ uploadPaths = []
+ if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
+ # It's a folder ID - use it directly
+ uploadPaths = [uploadPath]
+ logger.info(f"Using folder ID directly for upload: {uploadPath}")
+ else:
+ # It's a path - resolve it normally
+ uploadPaths = self.pathProcessing.resolvePathQuery(uploadPath)
+
+ # Process each document upload
+ uploadResults = []
+
+ # Extract file names from documents
+ fileNames = [doc.fileName for doc in filesToUpload]
+ logger.info(f"Using file names from documentList: {fileNames}")
+
+ self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)")
+
+ for i, (chatDocument, fileName) in enumerate(zip(filesToUpload, fileNames)):
+ try:
+ fileId = chatDocument.fileId
+ fileData = self.services.chat.getFileData(fileId)
+
+ if not fileData:
+ logger.warning(f"File data not found for fileId: {fileId}")
+ uploadResults.append({
+ "fileName": fileName,
+ "fileId": fileId,
+ "error": "File data not found",
+ "uploadStatus": "failed"
+ })
+ continue
+
+ # Upload to the first available site (or could be made configurable)
+ uploadSuccessful = False
+
+ for site in sites:
+ siteId = site["id"]
+ siteName = site["displayName"]
+ siteUrl = site["webUrl"]
+
+ # Use the first upload path or default to Documents
+ uploadPath = uploadPaths[0] if uploadPaths else "/Documents"
+
+ # Handle wildcard paths - replace with default Documents folder
+ if uploadPath == "*":
+ uploadPath = "/Documents"
+ logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")
+
+ # Check if uploadPath is a folder ID or a regular path
+ if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
+ # It's a folder ID - use the folder-specific upload endpoint
+ uploadEndpoint = f"sites/{siteId}/drive/items/{uploadPath}:/{fileName}:/content"
+ logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}")
+ else:
+ # It's a regular path - use the root-based upload endpoint
+ uploadPath = uploadPath.rstrip('/') + '/' + fileName
+ uploadPathClean = uploadPath.lstrip('/')
+ uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content"
+ logger.info(f"Using path-based upload endpoint: {uploadEndpoint}")
+
+ # Upload endpoint for small files (< 4MB)
+ if len(fileData) < 4 * 1024 * 1024: # 4MB
+
+ # Upload the file
+ uploadResult = await self.apiClient.makeGraphApiCall(
+ uploadEndpoint,
+ method="PUT",
+ data=fileData
+ )
+
+ if "error" not in uploadResult:
+ uploadResults.append({
+ "fileName": fileName,
+ "fileId": fileId,
+ "uploadStatus": "success",
+ "siteName": siteName,
+ "siteUrl": siteUrl,
+ "uploadPath": uploadPath,
+ "uploadEndpoint": uploadEndpoint,
+ "sharepointFileId": uploadResult.get("id"),
+ "webUrl": uploadResult.get("webUrl"),
+ "size": uploadResult.get("size"),
+ "createdDateTime": uploadResult.get("createdDateTime")
+ })
+ uploadSuccessful = True
+ break
+ else:
+ logger.warning(f"Upload failed to site {siteName}: {uploadResult['error']}")
+ else:
+ # For large files, we would need to implement resumable upload
+ logger.warning(f"File too large ({len(fileData)} bytes) for site {siteName}")
+ continue
+
+ if not uploadSuccessful:
+ uploadResults.append({
+ "fileName": fileName,
+ "fileId": fileId,
+ "error": f"File too large ({len(fileData)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
+ "uploadStatus": "failed"
+ })
+
+ except Exception as e:
+ logger.error(f"Error uploading document {fileName}: {str(e)}")
+ uploadResults.append({
+ "fileName": fileName,
+ "fileId": fileId,
+ "error": str(e),
+ "uploadStatus": "failed"
+ })
+
+ # Update progress for each file
+ self.services.chat.progressLogUpdate(operationId, 0.5 + (i * 0.4 / len(filesToUpload)), f"Uploaded {i + 1}/{len(filesToUpload)} file(s)")
+
+ # Create result data
+ resultData = {
+ "connectionReference": connectionReference,
+ "uploadPath": uploadPath,
+ "documentList": documentList,
+ "fileNames": fileNames,
+ "sitesAvailable": len(sites),
+ "uploadResults": uploadResults,
+ "connection": {
+ "id": connection["id"],
+ "authority": "microsoft",
+ "reference": connectionReference
+ },
+ "timestamp": self.services.utils.timestampGetUtc()
+ }
+
+ # Use default JSON format for output
+ outputExtension = ".json" # Default
+ outputMimeType = "application/json" # Default
+
+ validationMetadata = {
+ "actionType": "sharepoint.uploadDocument",
+ "connectionReference": connectionReference,
+ "uploadPath": uploadPath,
+ "fileNames": fileNames,
+ "uploadCount": len(uploadResults),
+ "successfulUploads": len([r for r in uploadResults if r.get("uploadStatus") == "success"]),
+ "failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"])
+ }
+
+ successfulUploads = len([r for r in uploadResults if r.get("uploadStatus") == "success"])
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Uploaded {successfulUploads}/{len(uploadResults)} file(s)")
+ self.services.chat.progressLogFinish(operationId, successfulUploads > 0)
+
+ return ActionResult(
+ success=True,
+ documents=[
+ ActionDocument(
+ documentName=self._generateMeaningfulFileName("sharepoint_upload", "json", None, "uploadDocument"),
+ documentData=json.dumps(resultData, indent=2),
+ mimeType=outputMimeType,
+ validationMetadata=validationMetadata
+ )
+ ]
+ )
+
+ except Exception as e:
+ logger.error(f"Error uploading to SharePoint: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass
+ return ActionResult(
+ success=False,
+ error=str(e)
+ )
+
diff --git a/modules/workflows/methods/methodSharepoint/actions/uploadFile.py b/modules/workflows/methods/methodSharepoint/actions/uploadFile.py
new file mode 100644
index 00000000..3d8a9499
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/actions/uploadFile.py
@@ -0,0 +1,145 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Upload File action for SharePoint operations.
+Uploads raw file content (bytes) to SharePoint.
+"""
+
+import logging
+import json
+import base64
+from typing import Dict, Any
+from modules.workflows.methods.methodBase import action
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
+
+logger = logging.getLogger(__name__)
+
+@action
+async def uploadFile(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ Upload raw file content (bytes) to SharePoint.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
+ - folderPath (str, required): Folder path relative to site root
+ - fileName (str, required): File name
+ - content (str, required): Document reference containing file content as base64-encoded bytes
+
+ Returns:
+ - ActionResult with ActionDocument containing upload result
+ """
+ try:
+ connectionReference = parameters.get("connectionReference")
+ if not connectionReference:
+ return ActionResult.isFailure(error="connectionReference parameter is required")
+
+ siteIdParam = parameters.get("siteId")
+ if not siteIdParam:
+ return ActionResult.isFailure(error="siteId parameter is required")
+
+ folderPath = parameters.get("folderPath")
+ if not folderPath:
+ return ActionResult.isFailure(error="folderPath parameter is required")
+
+ fileName = parameters.get("fileName")
+ if not fileName:
+ return ActionResult.isFailure(error="fileName parameter is required")
+
+ contentParam = parameters.get("content")
+ if not contentParam:
+ return ActionResult.isFailure(error="content parameter is required")
+
+ # Extract siteId from document if it's a reference
+ siteId = None
+ if isinstance(siteIdParam, str):
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ try:
+ docList = DocumentReferenceList.from_string_list([siteIdParam])
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
+ if chatDocuments and len(chatDocuments) > 0:
+ siteInfoJson = json.loads(chatDocuments[0].documentData)
+ siteId = siteInfoJson.get("id")
+ except:
+ pass
+
+ if not siteId:
+ siteId = siteIdParam
+ else:
+ siteId = siteIdParam
+
+ if not siteId:
+ return ActionResult.isFailure(error="Could not extract siteId from parameter")
+
+ # Get file content from document
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ docList = DocumentReferenceList.from_string_list([contentParam] if isinstance(contentParam, str) else contentParam)
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docList)
+ if not chatDocuments or len(chatDocuments) == 0:
+ return ActionResult.isFailure(error="Could not get file content from document reference")
+
+ fileContentBase64 = chatDocuments[0].documentData
+
+ # Decode base64
+ try:
+ fileContent = base64.b64decode(fileContentBase64)
+ except Exception as e:
+ return ActionResult.isFailure(error=f"Could not decode base64 file content: {str(e)}")
+
+ # Get Microsoft connection
+ connection = self.connection.getMicrosoftConnection(connectionReference)
+ if not connection:
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Upload file
+ uploadResult = await self.services.sharepoint.uploadFile(
+ siteId=siteId,
+ folderPath=folderPath,
+ fileName=fileName,
+ content=fileContent
+ )
+
+ if "error" in uploadResult:
+ return ActionResult.isFailure(error=f"Upload failed: {uploadResult['error']}")
+
+ logger.info(f"Uploaded file to SharePoint: {folderPath}/{fileName} ({len(fileContent)} bytes)")
+
+ # Generate filename
+ workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
+ filename = self._generateMeaningfulFileName(
+ "file_upload_result",
+ "json",
+ workflowContext,
+ "uploadFile"
+ )
+
+ result = {
+ "success": True,
+ "siteId": siteId,
+ "filePath": f"{folderPath}/{fileName}",
+ "fileSize": len(fileContent),
+ "uploadResult": uploadResult
+ }
+
+ validationMetadata = self._createValidationMetadata(
+ "uploadFile",
+ siteId=siteId,
+ filePath=f"{folderPath}/{fileName}",
+ fileSize=len(fileContent)
+ )
+
+ document = ActionDocument(
+ documentName=filename,
+ documentData=json.dumps(result, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+
+ return ActionResult.isSuccess(documents=[document])
+
+ except Exception as e:
+ errorMsg = f"Error uploading file to SharePoint: {str(e)}"
+ logger.error(errorMsg)
+ return ActionResult.isFailure(error=errorMsg)
+
diff --git a/modules/workflows/methods/methodSharepoint/helpers/__init__.py b/modules/workflows/methods/methodSharepoint/helpers/__init__.py
new file mode 100644
index 00000000..cc1293b3
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/helpers/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""Helper modules for SharePoint method operations."""
+
diff --git a/modules/workflows/methods/methodSharepoint/helpers/apiClient.py b/modules/workflows/methods/methodSharepoint/helpers/apiClient.py
new file mode 100644
index 00000000..7cead7ef
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/helpers/apiClient.py
@@ -0,0 +1,102 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+API Client helper for SharePoint operations.
+Handles Microsoft Graph API calls with timeout and error handling.
+"""
+
+import logging
+import aiohttp
+import asyncio
+from typing import Dict, Any
+
+logger = logging.getLogger(__name__)
+
+class ApiClientHelper:
+ """Helper for Microsoft Graph API calls"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize API client helper.
+
+ Args:
+ methodInstance: Instance of MethodSharepoint (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ async def makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
+ """
+ Make a Microsoft Graph API call with timeout and detailed logging.
+
+ Args:
+ endpoint: API endpoint (without base URL)
+ method: HTTP method (GET, POST, PUT)
+ data: Optional request body data (bytes)
+
+ Returns:
+ Dict with API response or error information
+ """
+ try:
+ if not hasattr(self.services, 'sharepoint') or not self.services.sharepoint._target.accessToken:
+ return {"error": "SharePoint service not configured with access token"}
+
+ headers = {
+ "Authorization": f"Bearer {self.services.sharepoint._target.accessToken}",
+ "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
+ }
+
+ url = f"https://graph.microsoft.com/v1.0/{endpoint}"
+ logger.info(f"Making Graph API call: {method} {url}")
+
+ # Set timeout to 30 seconds
+ timeout = aiohttp.ClientTimeout(total=30)
+
+ async with aiohttp.ClientSession(timeout=timeout) as session:
+ if method == "GET":
+ logger.debug(f"Starting GET request to {url}")
+ async with session.get(url, headers=headers) as response:
+ logger.info(f"Graph API response: {response.status}")
+ if response.status == 200:
+ result = await response.json()
+ logger.debug(f"Graph API success: {len(str(result))} characters response")
+ return result
+ else:
+ errorText = await response.text()
+ logger.error(f"Graph API call failed: {response.status} - {errorText}")
+ return {"error": f"API call failed: {response.status} - {errorText}"}
+
+ elif method == "PUT":
+ logger.debug(f"Starting PUT request to {url}")
+ async with session.put(url, headers=headers, data=data) as response:
+ logger.info(f"Graph API response: {response.status}")
+ if response.status in [200, 201]:
+ result = await response.json()
+ logger.debug(f"Graph API success: {len(str(result))} characters response")
+ return result
+ else:
+ errorText = await response.text()
+ logger.error(f"Graph API call failed: {response.status} - {errorText}")
+ return {"error": f"API call failed: {response.status} - {errorText}"}
+
+ elif method == "POST":
+ logger.debug(f"Starting POST request to {url}")
+ async with session.post(url, headers=headers, data=data) as response:
+ logger.info(f"Graph API response: {response.status}")
+ if response.status in [200, 201]:
+ result = await response.json()
+ logger.debug(f"Graph API success: {len(str(result))} characters response")
+ return result
+ else:
+ errorText = await response.text()
+ logger.error(f"Graph API call failed: {response.status} - {errorText}")
+ return {"error": f"API call failed: {response.status} - {errorText}"}
+
+ except asyncio.TimeoutError:
+ logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
+ return {"error": f"API call timed out after 30 seconds: {endpoint}"}
+ except Exception as e:
+ logger.error(f"Error making Graph API call: {str(e)}")
+ return {"error": f"Error making Graph API call: {str(e)}"}
+
diff --git a/modules/workflows/methods/methodSharepoint/helpers/connection.py b/modules/workflows/methods/methodSharepoint/helpers/connection.py
new file mode 100644
index 00000000..3c2ce16d
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/helpers/connection.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Connection helper for SharePoint operations.
+Handles Microsoft connection management and SharePoint service configuration.
+"""
+
+import logging
+from typing import Dict, Any, Optional
+
+logger = logging.getLogger(__name__)
+
+class ConnectionHelper:
+ """Helper for Microsoft connection management in SharePoint operations"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize connection helper.
+
+ Args:
+ methodInstance: Instance of MethodSharepoint (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ def getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
+ """
+ Get Microsoft connection from connection reference and configure SharePoint service.
+
+ Args:
+ connectionReference: Connection reference string
+
+ Returns:
+ Dict with connection info or None if failed
+ """
+ try:
+ userConnection = self.services.chat.getUserConnectionFromConnectionReference(connectionReference)
+ if not userConnection:
+ logger.warning(f"No user connection found for reference: {connectionReference}")
+ return None
+
+ if userConnection.authority.value != "msft":
+ logger.warning(f"Connection {userConnection.id} is not Microsoft (authority: {userConnection.authority.value})")
+ return None
+
+ # Check if connection is active or pending (pending means OAuth in progress)
+ if userConnection.status.value not in ["active", "pending"]:
+ logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
+ return None
+
+ # Configure SharePoint service with the UserConnection
+ if not self.services.sharepoint.setAccessTokenFromConnection(userConnection):
+ logger.warning(f"Failed to configure SharePoint service with connection {userConnection.id}")
+ return None
+
+ logger.info(f"Successfully configured SharePoint service with Microsoft connection: {userConnection.id}, status: {userConnection.status.value}, externalId: {userConnection.externalId}")
+
+ return {
+ "id": userConnection.id,
+ "userConnection": userConnection,
+ "scopes": ["Sites.ReadWrite.All", "Files.ReadWrite.All", "User.Read"] # SharePoint scopes
+ }
+ except Exception as e:
+ logger.error(f"Error getting Microsoft connection: {str(e)}")
+ return None
+
diff --git a/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py b/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py
new file mode 100644
index 00000000..138e2ea3
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py
@@ -0,0 +1,252 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Document Parsing helper for SharePoint operations.
+Handles parsing of document lists and extracting found documents and site information.
+"""
+
+import logging
+import json
+from typing import Dict, Any, List, Optional
+
+logger = logging.getLogger(__name__)
+
+class DocumentParsingHelper:
+ """Helper for parsing document lists and extracting document information"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize document parsing helper.
+
+ Args:
+ methodInstance: Instance of MethodSharepoint (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ async def parseDocumentListForFoundDocuments(self, documentList: Any) -> tuple[Optional[List[Dict[str, Any]]], Optional[List[Dict[str, Any]]], Optional[str]]:
+ """
+ Parse documentList to extract foundDocuments and site information.
+
+ Parameters:
+ documentList: Document list (can be list, DocumentReferenceList, or string)
+
+ Returns:
+ tuple: (foundDocuments, sites, errorMessage)
+ - foundDocuments: List of found documents from findDocumentPath result
+ - sites: List of site dictionaries with id, displayName, webUrl
+ - errorMessage: Error message if parsing failed, None otherwise
+ """
+ try:
+ if isinstance(documentList, str):
+ documentList = [documentList]
+
+ # Resolve documentList to get actual documents
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ if isinstance(documentList, DocumentReferenceList):
+ docRefList = documentList
+ elif isinstance(documentList, list):
+ docRefList = DocumentReferenceList.from_string_list(documentList)
+ else:
+ docRefList = DocumentReferenceList(references=[])
+
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
+ if not chatDocuments:
+ return None, None, "No documents found for the provided document list"
+
+ firstDocument = chatDocuments[0]
+ fileData = self.services.chat.getFileData(firstDocument.fileId)
+ if not fileData:
+ return None, None, None # No fileData, but not an error (might be regular file)
+
+ try:
+ resultData = json.loads(fileData)
+ foundDocuments = resultData.get("foundDocuments", [])
+
+ # If no foundDocuments, check if it's a listDocuments result (has listResults)
+ if not foundDocuments and "listResults" in resultData:
+ logger.info(f"documentList contains listResults from listDocuments, converting to foundDocuments format")
+ listResults = resultData.get("listResults", [])
+ foundDocuments = []
+ siteIdFromList = None
+ siteNameFromList = None
+
+ for listResult in listResults:
+ siteResults = listResult.get("siteResults", [])
+ for siteResult in siteResults:
+ items = siteResult.get("items", [])
+ # Extract site info from first item if available
+ if items and not siteIdFromList:
+ siteNameFromList = items[0].get("siteName")
+
+ for item in items:
+ # Convert listDocuments item format to foundDocuments format
+ if item.get("type") == "file":
+ foundDoc = {
+ "id": item.get("id"),
+ "name": item.get("name"),
+ "type": "file",
+ "siteName": item.get("siteName"),
+ "siteId": None, # Will be determined from site discovery
+ "webUrl": item.get("webUrl"),
+ "fullPath": item.get("webUrl", ""),
+ "parentPath": item.get("parentPath", "")
+ }
+ foundDocuments.append(foundDoc)
+
+ # Discover sites to get siteId if we have siteName
+ if foundDocuments and siteNameFromList and not siteIdFromList:
+ logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'")
+ allSites = await self.method.siteDiscovery.discoverSharePointSites()
+ matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteNameFromList)
+ if matchingSites:
+ siteIdFromList = matchingSites[0].get("id")
+ # Update all foundDocuments with siteId
+ for doc in foundDocuments:
+ doc["siteId"] = siteIdFromList
+ logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'")
+
+ logger.info(f"Converted {len(foundDocuments)} files from listResults format")
+
+ if not foundDocuments:
+ return None, None, None # No foundDocuments, but not an error
+
+ # Extract site information from foundDocuments
+ firstDoc = foundDocuments[0]
+ siteName = firstDoc.get("siteName")
+ siteId = firstDoc.get("siteId")
+
+ # If siteId is missing (from listDocuments conversion), discover sites to find it
+ if siteName and not siteId:
+ logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'")
+ allSites = await self.method.siteDiscovery.discoverSharePointSites()
+ matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteName)
+ if matchingSites:
+ siteId = matchingSites[0].get("id")
+ logger.info(f"Found siteId '{siteId}' for site '{siteName}'")
+
+ sites = None
+ if siteName and siteId:
+ sites = [{
+ "id": siteId,
+ "displayName": siteName,
+ "webUrl": firstDoc.get("webUrl", "")
+ }]
+ logger.info(f"Using specific site from documentList: {siteName} (ID: {siteId})")
+ elif siteName:
+ # Try to get site by name
+ allSites = await self.method.siteDiscovery.discoverSharePointSites()
+ matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteName)
+ if matchingSites:
+ sites = [{
+ "id": matchingSites[0].get("id"),
+ "displayName": siteName,
+ "webUrl": matchingSites[0].get("webUrl", "")
+ }]
+ logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})")
+ else:
+ return None, None, f"Site '{siteName}' not found. Cannot determine target site."
+ else:
+ return None, None, "Site information missing from documentList. Cannot determine target site."
+
+ return foundDocuments, sites, None
+
+ except json.JSONDecodeError as e:
+ return None, None, f"Invalid JSON in documentList: {str(e)}"
+ except Exception as e:
+ return None, None, f"Error processing documentList: {str(e)}"
+
+ except Exception as e:
+ logger.error(f"Error parsing documentList: {str(e)}")
+ return None, None, f"Error parsing documentList: {str(e)}"
+
+ async def parseDocumentListForFolder(self, documentList: Any) -> tuple[Optional[str], Optional[List[Dict[str, Any]]], Optional[List], Optional[str]]:
+ """
+ Parse documentList to extract folder path, site information, and files to upload.
+
+ Parameters:
+ documentList: Document list (can be list, DocumentReferenceList, or string)
+
+ Returns:
+ tuple: (folderPath, sites, filesToUpload, errorMessage)
+ - folderPath: Folder path from findDocumentPath result (or None)
+ - sites: List of site dictionaries with id, displayName, webUrl
+ - filesToUpload: List of ChatDocument objects to upload (or None)
+ - errorMessage: Error message if parsing failed, None otherwise
+ """
+ try:
+ if isinstance(documentList, str):
+ documentList = [documentList]
+
+ # Resolve documentList to get actual documents
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ if isinstance(documentList, DocumentReferenceList):
+ docRefList = documentList
+ elif isinstance(documentList, list):
+ docRefList = DocumentReferenceList.from_string_list(documentList)
+ else:
+ docRefList = DocumentReferenceList(references=[])
+
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
+ if not chatDocuments:
+ return None, None, None, "No documents found for the provided document list"
+
+ # Check if first document is a findDocumentPath result (has foundDocuments)
+ firstDocument = chatDocuments[0]
+ fileData = self.services.chat.getFileData(firstDocument.fileId)
+
+ folderPath = None
+ sites = None
+ filesToUpload = None
+
+ if fileData:
+ try:
+ resultData = json.loads(fileData)
+ foundDocuments = resultData.get("foundDocuments", [])
+
+ if foundDocuments:
+ # Extract folder path from first found document
+ firstDoc = foundDocuments[0]
+ parentPath = firstDoc.get("parentPath", "")
+ if parentPath:
+ folderPath = parentPath
+
+ # Extract site information
+ siteName = firstDoc.get("siteName")
+ siteId = firstDoc.get("siteId")
+
+ if siteName and siteId:
+ sites = [{
+ "id": siteId,
+ "displayName": siteName,
+ "webUrl": firstDoc.get("webUrl", "")
+ }]
+ elif siteName:
+ # Discover sites to find siteId
+ allSites = await self.method.siteDiscovery.discoverSharePointSites()
+ matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteName)
+ if matchingSites:
+ sites = [{
+ "id": matchingSites[0].get("id"),
+ "displayName": siteName,
+ "webUrl": matchingSites[0].get("webUrl", "")
+ }]
+
+ # For uploadDocument: filesToUpload are the chatDocuments themselves
+ # (they contain the files to upload)
+ filesToUpload = chatDocuments
+
+ except json.JSONDecodeError:
+ # Not a findDocumentPath result - treat as regular files to upload
+ filesToUpload = chatDocuments
+ else:
+ # No fileData - treat as regular files to upload
+ filesToUpload = chatDocuments
+
+ return folderPath, sites, filesToUpload, None
+
+ except Exception as e:
+ logger.error(f"Error parsing documentList for folder: {str(e)}")
+ return None, None, None, f"Error parsing documentList for folder: {str(e)}"
+
diff --git a/modules/workflows/methods/methodSharepoint/helpers/pathProcessing.py b/modules/workflows/methods/methodSharepoint/helpers/pathProcessing.py
new file mode 100644
index 00000000..3e1a94f1
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/helpers/pathProcessing.py
@@ -0,0 +1,338 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Path Processing helper for SharePoint operations.
+Handles search query parsing, path resolution, and query cleaning.
+"""
+
+import logging
+import re
+from typing import List, Optional, Dict, Any
+
+logger = logging.getLogger(__name__)
+
+class PathProcessingHelper:
+ """Helper for path and query processing"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize path processing helper.
+
+ Args:
+ methodInstance: Instance of MethodSharepoint (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ def parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
+ """
+ Parse searchQuery to extract path, search terms, search type, and search options.
+
+ CRITICAL: NEVER convert words to paths! Words stay as search terms.
+ - "root document lesson" → fileQuery="root document lesson" (NOT "/root/document/lesson")
+ - "root, gose" → fileQuery="root, gose" (NOT "/root/gose")
+ - "druckersteuerung eskalation logobject" → fileQuery="druckersteuerung eskalation logobject"
+
+ Parameters:
+ searchQuery (str): Enhanced search query with options:
+ - "budget" -> pathQuery="*", fileQuery="budget", searchType="all", options={}
+ - "root document lesson" -> pathQuery="*", fileQuery="root document lesson", searchType="all", options={}
+ - "root, gose" -> pathQuery="*", fileQuery="root, gose", searchType="all", options={}
+ - "/Documents:budget" -> pathQuery="/Documents", fileQuery="budget", searchType="all", options={}
+ - "files:budget" -> pathQuery="*", fileQuery="budget", searchType="files", options={}
+ - "folders:DELTA" -> pathQuery="*", fileQuery="DELTA", searchType="folders", options={}
+ - "exact:\"Operations 2025\"" -> exact phrase matching
+ - "regex:^Operations.*2025$" -> regex pattern matching
+ - "case:DELTA" -> case-sensitive search
+ - "and:DELTA AND 2025 Mars AND Group" -> all AND terms must be present
+
+ Returns:
+ tuple[str, str, str, dict]: (pathQuery, fileQuery, searchType, searchOptions)
+ """
+ try:
+ if not searchQuery or not searchQuery.strip() or searchQuery.strip() == "*":
+ return "*", "*", "all", {}
+
+ searchQuery = searchQuery.strip()
+ searchOptions = {}
+
+ # CRITICAL: Do NOT convert space-separated or comma-separated words to paths!
+ # "root document lesson" should stay as "root document lesson", NOT "/root/document/lesson"
+ # "root, gose" should stay as "root, gose", NOT "/root/gose"
+
+ # Check for search type specification (files:, folders:, all:) FIRST
+ searchType = "all" # Default
+ if searchQuery.startswith(("files:", "folders:", "all:")):
+ typeParts = searchQuery.split(':', 1)
+ searchType = typeParts[0].strip()
+ searchQuery = typeParts[1].strip()
+
+ # Extract optional site hint tokens: support "site=Name" or leading "site:Name"
+ def _extractSiteHint(q: str) -> tuple[str, Optional[str]]:
+ try:
+ qStrip = q.strip()
+ # Leading form: site:KM LayerFinance ...
+ if qStrip.lower().startswith("site:"):
+ after = qStrip[5:].lstrip()
+ # site name until next space or end
+ if ' ' in after:
+ siteName, rest = after.split(' ', 1)
+ else:
+ siteName, rest = after, ''
+ return rest.strip(), siteName.strip()
+ # Inline key=value form anywhere
+ m = re.search(r"\bsite=([^;\s]+)", qStrip, flags=re.IGNORECASE)
+ if m:
+ siteName = m.group(1).strip()
+ # remove the token from query
+ qNew = re.sub(r"\bsite=[^;\s]+;?", "", qStrip, flags=re.IGNORECASE).strip()
+ return qNew, siteName
+ except Exception:
+ pass
+ return q, None
+
+ searchQuery, extractedSite = _extractSiteHint(searchQuery)
+ if extractedSite:
+ searchOptions["site_hint"] = extractedSite
+ logger.info(f"Extracted site hint: '{extractedSite}'")
+
+ # Extract name="..." if present (for quoted multi-word names)
+ nameMatch = re.search(r"name=\"([^\"]+)\"", searchQuery)
+ if nameMatch:
+ searchQuery = nameMatch.group(1)
+ logger.info(f"Extracted name from quotes: '{searchQuery}'")
+
+ # Check for search mode specification (exact:, regex:, case:, and:)
+ if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
+ modeParts = searchQuery.split(':', 1)
+ mode = modeParts[0].strip()
+ searchQuery = modeParts[1].strip()
+
+ if mode == "exact":
+ searchOptions["exact_match"] = True
+ # Remove quotes if present
+ if searchQuery.startswith('"') and searchQuery.endswith('"'):
+ searchQuery = searchQuery[1:-1]
+ elif mode == "regex":
+ searchOptions["regex_match"] = True
+ elif mode == "case":
+ searchOptions["case_sensitive"] = True
+ elif mode == "and":
+ searchOptions["and_terms"] = True
+
+ # Check if it contains path:search format
+ # Microsoft-standard paths: /sites/SiteName/Path:files:.pdf
+ if ':' in searchQuery:
+ # For Microsoft-standard paths (/sites/...), find the colon that separates path from search
+ if searchQuery.startswith('/sites/'):
+ # Find the colon that separates path from search (after the full path)
+ # Look for pattern: /sites/SiteName/Path/...:files:.pdf
+ # We need to find the colon that's followed by search type or file extension
+ colonPositions = []
+ for i, char in enumerate(searchQuery):
+ if char == ':':
+ colonPositions.append(i)
+
+ # If we have colons, find the one that's followed by search type or file extension
+ splitPos = None
+ if colonPositions:
+ for pos in colonPositions:
+ afterColon = searchQuery[pos+1:pos+10].strip().lower()
+ # Check if this colon is followed by search type or looks like a file extension
+ if afterColon.startswith(('files:', 'folders:', 'all:', '.')) or afterColon == '':
+ splitPos = pos
+ break
+
+ # If no clear split found, use the last colon
+ if splitPos is None and colonPositions:
+ splitPos = colonPositions[-1]
+
+ if splitPos:
+ pathPart = searchQuery[:splitPos].strip()
+ searchPart = searchQuery[splitPos+1:].strip()
+ else:
+ # Fallback: split on first colon
+ parts = searchQuery.split(':', 1)
+ pathPart = parts[0].strip()
+ searchPart = parts[1].strip()
+ else:
+ # Regular path:search format - split on first colon
+ parts = searchQuery.split(':', 1)
+ pathPart = parts[0].strip()
+ searchPart = parts[1].strip()
+
+ # Check if searchPart starts with search type (files:, folders:, all:)
+ if searchPart.startswith(("files:", "folders:", "all:")):
+ typeParts = searchPart.split(':', 1)
+ searchType = typeParts[0].strip() # Update searchType
+ searchPart = typeParts[1].strip() if len(typeParts) > 1 else ""
+
+ # Handle path part
+ if not pathPart or pathPart == "*":
+ pathQuery = "*"
+ elif pathPart.startswith('/'):
+ pathQuery = pathPart
+ else:
+ pathQuery = f"/Documents/{pathPart}"
+
+ # Handle search part
+ if not searchPart or searchPart == "*":
+ fileQuery = "*"
+ else:
+ fileQuery = searchPart
+
+ return pathQuery, fileQuery, searchType, searchOptions
+
+ # No colon - check if it looks like a path
+ elif searchQuery.startswith('/'):
+ # It's a path only
+ return searchQuery, "*", searchType, searchOptions
+
+ else:
+ # It's a search term only - keep words as-is, do NOT convert to paths
+ # "root document lesson" stays as "root document lesson"
+ # "root, gose" stays as "root, gose"
+ return "*", searchQuery, searchType, searchOptions
+
+ except Exception as e:
+ logger.error(f"Error parsing searchQuery '{searchQuery}': {str(e)}")
+ raise ValueError(f"Failed to parse searchQuery '{searchQuery}': {str(e)}")
+
+ def resolvePathQuery(self, pathQuery: str) -> List[str]:
+ """
+ Resolve pathQuery into a list of search paths for SharePoint operations.
+
+ Parameters:
+ pathQuery (str): Query string that can contain:
+ - Direct paths (e.g., "/Documents/Project1")
+ - Wildcards (e.g., "/Documents/*")
+ - Multiple paths separated by semicolons (e.g., "/Docs; /Files")
+ - Single word relative paths (e.g., "Project1" -> resolved to default folder)
+ - Empty string or "*" for global search
+ - Space-separated words are treated as search terms, NOT folder paths
+
+ Returns:
+ List[str]: List of resolved paths
+ """
+ try:
+ if not pathQuery or not pathQuery.strip() or pathQuery.strip() == "*":
+ return ["*"] # Global search across all sites
+
+ # Split by semicolon to handle multiple paths
+ rawPaths = [path.strip() for path in pathQuery.split(';') if path.strip()]
+ resolvedPaths = []
+
+ for rawPath in rawPaths:
+ # Handle wildcards - return as-is
+ if '*' in rawPath:
+ resolvedPaths.append(rawPath)
+ # Handle absolute paths
+ elif rawPath.startswith('/'):
+ resolvedPaths.append(rawPath)
+ # Handle single word relative paths - prepend default folder
+ # BUT NOT space-separated words (those are search terms, not paths)
+ elif ' ' not in rawPath:
+ resolvedPaths.append(f"/Documents/{rawPath}")
+ else:
+ # Check if this looks like a path (has path separators) or search terms
+ if '\\' in rawPath or '/' in rawPath:
+ # This looks like a path with spaces in folder names - treat as valid path
+ resolvedPaths.append(rawPath)
+ logger.info(f"Path with spaces '{rawPath}' treated as valid folder path")
+ else:
+ # Space-separated words without path separators are search terms
+ # Return as "*" to search globally
+ logger.info(f"Space-separated words '{rawPath}' treated as search terms, not folder path")
+ resolvedPaths.append("*")
+
+ # Remove duplicates while preserving order
+ seen = set()
+ uniquePaths = []
+ for path in resolvedPaths:
+ if path not in seen:
+ seen.add(path)
+ uniquePaths.append(path)
+
+ logger.info(f"Resolved pathQuery '{pathQuery}' to {len(uniquePaths)} paths: {uniquePaths}")
+ return uniquePaths
+
+ except Exception as e:
+ logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}")
+ raise ValueError(f"Failed to resolve pathQuery '{pathQuery}': {str(e)}")
+
+ def cleanSearchQuery(self, query: str) -> str:
+ """
+ Clean search query to make it compatible with Graph API KQL syntax.
+ Removes path-like syntax and invalid KQL constructs.
+
+ Parameters:
+ query (str): Raw search query that may contain paths and invalid syntax
+
+ Returns:
+ str: Cleaned query suitable for Graph API search endpoint
+ """
+ if not query or not query.strip():
+ return ""
+
+ query = query.strip()
+
+ # Handle patterns like: "Company Share/Freigegebene Dokumente/.../expenses:files:.pdf"
+ # Extract the search term and file extension
+
+ # First, extract file extension if present (format: :files:.pdf or just .pdf at the end)
+ fileExtension = ""
+ if ':files:' in query.lower() or ':folders:' in query.lower():
+ # Extract extension after the type filter
+ extMatch = re.search(r':(?:files|folders):(\.\w+)', query, re.IGNORECASE)
+ if extMatch:
+ fileExtension = extMatch.group(1)
+ # Remove the type filter part
+ query = re.sub(r':(?:files|folders):\.?\w*', '', query, flags=re.IGNORECASE)
+ elif query.endswith(('.pdf', '.doc', '.docx', '.xls', '.xlsx', '.txt', '.csv', '.ppt', '.pptx')):
+ # Extract extension from end
+ extMatch = re.search(r'(\.\w+)$', query)
+ if extMatch:
+ fileExtension = extMatch.group(1)
+ query = query[:-len(fileExtension)]
+
+ # Extract search term: get the last segment after the last slash (filename part)
+ queryNormalized = query.replace('\\', '/')
+ if '/' in queryNormalized:
+ # Extract the last segment (usually the filename/search term)
+ lastSegment = queryNormalized.split('/')[-1]
+ # Remove any remaining colons or type filters
+ if ':' in lastSegment:
+ lastSegment = lastSegment.split(':')[0]
+ searchTerm = lastSegment.strip()
+ else:
+ # No path separators, use the query as-is but remove type filters
+ if ':' in query:
+ searchTerm = query.split(':')[0].strip()
+ else:
+ searchTerm = query.strip()
+
+ # Remove any remaining type filters or invalid syntax
+ searchTerm = re.sub(r':(?:files|folders|all):?', '', searchTerm, flags=re.IGNORECASE)
+ searchTerm = searchTerm.strip()
+
+ # If we have a file extension, include it in the search term
+ # Note: Graph API search endpoint may not support filetype: syntax
+ # So we include the extension as part of the search term or filter results after
+ if fileExtension:
+ extWithoutDot = fileExtension.lstrip('.')
+ # Try simple approach: add extension as search term
+ # If this doesn't work, we'll filter results after search
+ if searchTerm:
+ # Include extension in search - Graph API will search in filename
+ searchTerm = f"{searchTerm} {extWithoutDot}"
+ else:
+ searchTerm = extWithoutDot
+
+ # Final cleanup: remove any remaining invalid characters for KQL
+ # Keep alphanumeric, spaces, hyphens, underscores, dots, and common search operators
+ searchTerm = re.sub(r'[^\w\s\-\.\*]', ' ', searchTerm)
+ searchTerm = ' '.join(searchTerm.split()) # Normalize whitespace
+
+ return searchTerm if searchTerm else "*"
+
diff --git a/modules/workflows/methods/methodSharepoint/helpers/siteDiscovery.py b/modules/workflows/methods/methodSharepoint/helpers/siteDiscovery.py
new file mode 100644
index 00000000..f59de8f7
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/helpers/siteDiscovery.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+Site Discovery helper for SharePoint operations.
+Handles SharePoint site discovery, filtering, and resolution.
+"""
+
+import logging
+import urllib.parse
+from typing import Dict, Any, List, Optional
+
+logger = logging.getLogger(__name__)
+
+class SiteDiscoveryHelper:
+ """Helper for SharePoint site discovery and resolution"""
+
+ def __init__(self, methodInstance):
+ """
+ Initialize site discovery helper.
+
+ Args:
+ methodInstance: Instance of MethodSharepoint (for access to services)
+ """
+ self.method = methodInstance
+ self.services = methodInstance.services
+
+ async def discoverSharePointSites(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
+ """
+ Discover SharePoint sites accessible to the user via Microsoft Graph API.
+
+ Args:
+ limit: Optional limit on number of sites to return
+
+ Returns:
+ List of site information dictionaries
+ """
+ try:
+ # Query Microsoft Graph to get sites the user has access to
+ endpoint = "sites?search=*"
+ if limit:
+ endpoint += f"&$top={limit}"
+
+ result = await self.method.apiClient.makeGraphApiCall(endpoint)
+
+ if "error" in result:
+ logger.error(f"Error discovering SharePoint sites: {result['error']}")
+ return []
+
+ sites = result.get("value", [])
+ if limit:
+ sites = sites[:limit]
+
+ logger.info(f"Discovered {len(sites)} SharePoint sites" + (f" (limited to {limit})" if limit else ""))
+
+ # Process and return site information
+ processedSites = []
+ for site in sites:
+ siteInfo = {
+ "id": site.get("id"),
+ "displayName": site.get("displayName"),
+ "name": site.get("name"),
+ "webUrl": site.get("webUrl"),
+ "description": site.get("description"),
+ "createdDateTime": site.get("createdDateTime"),
+ "lastModifiedDateTime": site.get("lastModifiedDateTime")
+ }
+ processedSites.append(siteInfo)
+ logger.debug(f"Site: {siteInfo['displayName']} - {siteInfo['webUrl']}")
+
+ return processedSites
+
+ except Exception as e:
+ logger.error(f"Error discovering SharePoint sites: {str(e)}")
+ return []
+
+ def extractHostnameFromWebUrl(self, webUrl: str) -> Optional[str]:
+ """Extract hostname from SharePoint webUrl (e.g., https://pcuster.sharepoint.com)"""
+ try:
+ if not webUrl:
+ return None
+ parsed = urllib.parse.urlparse(webUrl)
+ return parsed.hostname
+ except Exception as e:
+ logger.error(f"Error extracting hostname from webUrl '{webUrl}': {str(e)}")
+ return None
+
+ def extractSiteFromStandardPath(self, pathQuery: str) -> Optional[Dict[str, str]]:
+ """
+ Extract site name from Microsoft-standard server-relative path.
+ Delegates to SharePoint service.
+ """
+ return self.services.sharepoint.extractSiteFromStandardPath(pathQuery)
+
+ async def getSiteByStandardPath(self, sitePath: str) -> Optional[Dict[str, Any]]:
+ """
+ Get SharePoint site directly by Microsoft-standard path.
+ Delegates to SharePoint service.
+ """
+ return await self.services.sharepoint.getSiteByStandardPath(sitePath)
+
+ def filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]:
+ """
+ Filter discovered sites by a human-entered site hint.
+ Delegates to SharePoint service.
+ """
+ return self.services.sharepoint.filterSitesByHint(sites, siteHint)
+
+ async def getSiteId(self, hostname: str, sitePath: str) -> str:
+ """
+ Get SharePoint site ID from hostname and site path.
+
+ Args:
+ hostname: SharePoint hostname
+ sitePath: Site path
+
+ Returns:
+ Site ID string
+ """
+ try:
+ endpoint = f"sites/{hostname}:/{sitePath}"
+ result = await self.method.apiClient.makeGraphApiCall(endpoint)
+
+ if "error" in result:
+ logger.error(f"Error getting site ID: {result['error']}")
+ return ""
+
+ return result.get("id", "")
+ except Exception as e:
+ logger.error(f"Error getting site ID: {str(e)}")
+ return ""
+
+ async def resolveSitesFromPathQuery(self, pathQuery: str) -> tuple[List[Dict[str, Any]], Optional[str]]:
+ """
+ Resolve sites from pathQuery using SharePoint service helper methods.
+
+ Args:
+ pathQuery: Path query string
+
+ Returns:
+ Tuple of (sites list, error message)
+ """
+ try:
+ # Validate pathQuery format
+ isValid, errorMsg = self.services.sharepoint.validatePathQuery(pathQuery)
+ if not isValid:
+ return [], errorMsg
+
+ # Resolve sites using service helper
+ sites = await self.services.sharepoint.resolveSitesFromPathQuery(pathQuery)
+ if not sites:
+ return [], "No SharePoint sites found or accessible"
+
+ return sites, None
+ except Exception as e:
+ logger.error(f"Error resolving sites from pathQuery '{pathQuery}': {str(e)}")
+ return [], f"Error resolving sites from pathQuery: {str(e)}"
+
+ def parseSiteUrl(self, siteUrl: str) -> Dict[str, str]:
+ """Parse SharePoint site URL to extract hostname and site path"""
+ try:
+ parsed = urllib.parse.urlparse(siteUrl)
+ hostname = parsed.hostname
+ path = parsed.path.strip('/')
+
+ return {
+ "hostname": hostname,
+ "sitePath": path
+ }
+ except Exception as e:
+ logger.error(f"Error parsing site URL {siteUrl}: {str(e)}")
+ return {"hostname": "", "sitePath": ""}
+
diff --git a/modules/workflows/methods/methodSharepoint/methodSharepoint.py b/modules/workflows/methods/methodSharepoint/methodSharepoint.py
new file mode 100644
index 00000000..299d3fed
--- /dev/null
+++ b/modules/workflows/methods/methodSharepoint/methodSharepoint.py
@@ -0,0 +1,387 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+
+"""
+SharePoint operations method module.
+Handles SharePoint document operations using the SharePoint service.
+"""
+
+import logging
+from modules.workflows.methods.methodBase import MethodBase
+from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
+from modules.shared.frontendTypes import FrontendType
+
+# Import helpers
+from .helpers.connection import ConnectionHelper
+from .helpers.siteDiscovery import SiteDiscoveryHelper
+from .helpers.documentParsing import DocumentParsingHelper
+from .helpers.pathProcessing import PathProcessingHelper
+from .helpers.apiClient import ApiClientHelper
+
+# Import actions
+from .actions.findDocumentPath import findDocumentPath
+from .actions.readDocuments import readDocuments
+from .actions.uploadDocument import uploadDocument
+from .actions.listDocuments import listDocuments
+from .actions.analyzeFolderUsage import analyzeFolderUsage
+from .actions.findSiteByUrl import findSiteByUrl
+from .actions.downloadFileByPath import downloadFileByPath
+from .actions.copyFile import copyFile
+from .actions.uploadFile import uploadFile
+
+logger = logging.getLogger(__name__)
+
+class MethodSharepoint(MethodBase):
+ """SharePoint operations methods."""
+
+ def __init__(self, services):
+ super().__init__(services)
+ self.name = "sharepoint"
+ self.description = "SharePoint operations methods"
+
+ # Initialize helper modules
+ self.connection = ConnectionHelper(self)
+ self.siteDiscovery = SiteDiscoveryHelper(self)
+ self.documentParsing = DocumentParsingHelper(self)
+ self.pathProcessing = PathProcessingHelper(self)
+ self.apiClient = ApiClientHelper(self)
+
+ # RBAC-Integration: Action-Definitionen mit actionId
+ self._actions = {
+ "findDocumentPath": WorkflowActionDefinition(
+ actionId="sharepoint.findDocumentPath",
+ description="Find documents and folders by name/path across sites",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "site": WorkflowActionParameter(
+ name="site",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=False,
+ description="Site hint"
+ ),
+ "searchQuery": WorkflowActionParameter(
+ name="searchQuery",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Search terms or path"
+ ),
+ "maxResults": WorkflowActionParameter(
+ name="maxResults",
+ type="int",
+ frontendType=FrontendType.NUMBER,
+ required=False,
+ default=1000,
+ description="Maximum items to return",
+ validation={"min": 1, "max": 10000}
+ )
+ },
+ execute=findDocumentPath.__get__(self, self.__class__)
+ ),
+ "readDocuments": WorkflowActionDefinition(
+ actionId="sharepoint.readDocuments",
+ description="Read documents from SharePoint and extract content/metadata",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=False,
+ description="Document list reference(s) containing findDocumentPath result"
+ ),
+ "pathQuery": WorkflowActionParameter(
+ name="pathQuery",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=False,
+ description="Direct path query if no documentList (e.g., /sites/SiteName/FolderPath)"
+ ),
+ "includeMetadata": WorkflowActionParameter(
+ name="includeMetadata",
+ type="bool",
+ frontendType=FrontendType.CHECKBOX,
+ required=False,
+ default=True,
+ description="Include metadata"
+ )
+ },
+ execute=readDocuments.__get__(self, self.__class__)
+ ),
+ "uploadDocument": WorkflowActionDefinition(
+ actionId="sharepoint.uploadDocument",
+ description="Upload documents to SharePoint",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference(s) to upload. File names are taken from the documents"
+ ),
+ "pathQuery": WorkflowActionParameter(
+ name="pathQuery",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=False,
+ description="Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath)"
+ )
+ },
+ execute=uploadDocument.__get__(self, self.__class__)
+ ),
+ "listDocuments": WorkflowActionDefinition(
+ actionId="sharepoint.listDocuments",
+ description="List documents and folders in SharePoint paths across sites",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document list reference(s) containing findDocumentPath result"
+ ),
+ "includeSubfolders": WorkflowActionParameter(
+ name="includeSubfolders",
+ type="bool",
+ frontendType=FrontendType.CHECKBOX,
+ required=False,
+ default=False,
+ description="Include one level of subfolders"
+ )
+ },
+ execute=listDocuments.__get__(self, self.__class__)
+ ),
+ "analyzeFolderUsage": WorkflowActionDefinition(
+ actionId="sharepoint.analyzeFolderUsage",
+ description="Analyze usage intensity of folders and files in SharePoint",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "documentList": WorkflowActionParameter(
+ name="documentList",
+ type="List[str]",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document list reference(s) containing findDocumentPath result"
+ ),
+ "startDateTime": WorkflowActionParameter(
+ name="startDateTime",
+ type="str",
+ frontendType=FrontendType.DATETIME,
+ required=False,
+ description="Start date/time in ISO format (e.g., 2025-11-01T00:00:00Z). Default: 30 days ago"
+ ),
+ "endDateTime": WorkflowActionParameter(
+ name="endDateTime",
+ type="str",
+ frontendType=FrontendType.DATETIME,
+ required=False,
+ description="End date/time in ISO format (e.g., 2025-11-30T23:59:59Z). Default: current time"
+ ),
+ "interval": WorkflowActionParameter(
+ name="interval",
+ type="str",
+ frontendType=FrontendType.SELECT,
+ frontendOptions=["day", "week", "month"],
+ required=False,
+ default="day",
+ description="Time interval for grouping activities"
+ )
+ },
+ execute=analyzeFolderUsage.__get__(self, self.__class__)
+ ),
+ "findSiteByUrl": WorkflowActionDefinition(
+ actionId="sharepoint.findSiteByUrl",
+ description="Find SharePoint site by hostname and site path",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "hostname": WorkflowActionParameter(
+ name="hostname",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="SharePoint hostname (e.g., example.sharepoint.com)"
+ ),
+ "sitePath": WorkflowActionParameter(
+ name="sitePath",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Site path (e.g., SteeringBPM or /sites/SteeringBPM)"
+ )
+ },
+ execute=findSiteByUrl.__get__(self, self.__class__)
+ ),
+ "downloadFileByPath": WorkflowActionDefinition(
+ actionId="sharepoint.downloadFileByPath",
+ description="Download file from SharePoint by exact file path",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "siteId": WorkflowActionParameter(
+ name="siteId",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="SharePoint site ID (from findSiteByUrl result) or document reference containing site info"
+ ),
+ "filePath": WorkflowActionParameter(
+ name="filePath",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Full file path relative to site root (e.g., /General/50 Docs hosted by SELISE/file.xlsx)"
+ )
+ },
+ execute=downloadFileByPath.__get__(self, self.__class__)
+ ),
+ "copyFile": WorkflowActionDefinition(
+ actionId="sharepoint.copyFile",
+ description="Copy file within SharePoint",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "siteId": WorkflowActionParameter(
+ name="siteId",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="SharePoint site ID (from findSiteByUrl result) or document reference containing site info"
+ ),
+ "sourceFolder": WorkflowActionParameter(
+ name="sourceFolder",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Source folder path relative to site root"
+ ),
+ "sourceFile": WorkflowActionParameter(
+ name="sourceFile",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Source file name"
+ ),
+ "destFolder": WorkflowActionParameter(
+ name="destFolder",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Destination folder path relative to site root"
+ ),
+ "destFile": WorkflowActionParameter(
+ name="destFile",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Destination file name"
+ )
+ },
+ execute=copyFile.__get__(self, self.__class__)
+ ),
+ "uploadFile": WorkflowActionDefinition(
+ actionId="sharepoint.uploadFile",
+ description="Upload raw file content (bytes) to SharePoint",
+ parameters={
+ "connectionReference": WorkflowActionParameter(
+ name="connectionReference",
+ type="str",
+ frontendType=FrontendType.USER_CONNECTION,
+ required=True,
+ description="Microsoft connection label"
+ ),
+ "siteId": WorkflowActionParameter(
+ name="siteId",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="SharePoint site ID (from findSiteByUrl result) or document reference containing site info"
+ ),
+ "folderPath": WorkflowActionParameter(
+ name="folderPath",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="Folder path relative to site root"
+ ),
+ "fileName": WorkflowActionParameter(
+ name="fileName",
+ type="str",
+ frontendType=FrontendType.TEXT,
+ required=True,
+ description="File name"
+ ),
+ "content": WorkflowActionParameter(
+ name="content",
+ type="str",
+ frontendType=FrontendType.DOCUMENT_REFERENCE,
+ required=True,
+ description="Document reference containing file content as base64-encoded bytes"
+ )
+ },
+ execute=uploadFile.__get__(self, self.__class__)
+ )
+ }
+
+ # Validate actions after definition
+ self._validateActions()
+
+ # Register actions as methods (optional, für direkten Zugriff)
+ self.findDocumentPath = findDocumentPath.__get__(self, self.__class__)
+ self.readDocuments = readDocuments.__get__(self, self.__class__)
+ self.uploadDocument = uploadDocument.__get__(self, self.__class__)
+ self.listDocuments = listDocuments.__get__(self, self.__class__)
+ self.analyzeFolderUsage = analyzeFolderUsage.__get__(self, self.__class__)
+ self.findSiteByUrl = findSiteByUrl.__get__(self, self.__class__)
+ self.downloadFileByPath = downloadFileByPath.__get__(self, self.__class__)
+ self.copyFile = copyFile.__get__(self, self.__class__)
+ self.uploadFile = uploadFile.__get__(self, self.__class__)
+
diff --git a/modules/workflows/processing/shared/methodDiscovery.py b/modules/workflows/processing/shared/methodDiscovery.py
index 02c584cc..30708010 100644
--- a/modules/workflows/processing/shared/methodDiscovery.py
+++ b/modules/workflows/processing/shared/methodDiscovery.py
@@ -27,12 +27,16 @@ def discoverMethods(serviceCenter):
# Import the methods package
methodsPackage = importlib.import_module('modules.workflows.methods')
- # Discover all modules in the package
+ # Discover all modules and packages in the methods package
for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__):
- if not isPkg and name.startswith('method'):
+ if name.startswith('method'):
try:
- # Import the module
- module = importlib.import_module(f'modules.workflows.methods.{name}')
+ if isPkg:
+ # Package (folder) - import __init__.py which exports the Method class
+ module = importlib.import_module(f'modules.workflows.methods.{name}')
+ else:
+ # Module (file) - import directly
+ module = importlib.import_module(f'modules.workflows.methods.{name}')
# Find all classes in the module that inherit from MethodBase
for itemName, item in inspect.getmembers(module):