From 13b7c4fdbe6fe11c40fda7c627bbdbaf81bd1b25 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sun, 7 Dec 2025 08:48:49 +0100
Subject: [PATCH] hot fixes: sharepoint folders and stats
---
modules/routes/routeDataAutomation.py | 3 +-
.../services/serviceChat/mainServiceChat.py | 3 +-
.../mainServiceSharepoint.py | 410 +++-
modules/workflows/methods/methodAi.py | 8 +-
modules/workflows/methods/methodContext.py | 4 +-
modules/workflows/methods/methodOutlook.py | 8 +-
modules/workflows/methods/methodSharepoint.py | 1732 ++++++++---------
.../processing/core/actionExecutor.py | 29 +
8 files changed, 1229 insertions(+), 968 deletions(-)
diff --git a/modules/routes/routeDataAutomation.py b/modules/routes/routeDataAutomation.py
index 903d0d53..ee13915c 100644
--- a/modules/routes/routeDataAutomation.py
+++ b/modules/routes/routeDataAutomation.py
@@ -15,6 +15,7 @@ from modules.security.auth import getCurrentUser, limiter
from modules.datamodels.datamodelChat import AutomationDefinition, ChatWorkflow
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata
from modules.shared.attributeUtils import getModelAttributeDefinitions
+from modules.features.automation import executeAutomation
# Configure logger
logger = logging.getLogger(__name__)
@@ -217,7 +218,7 @@ async def execute_automation(
"""Execute an automation immediately (test mode)"""
try:
chatInterface = getChatInterface(currentUser)
- workflow = await chatInterface.executeAutomation(automationId)
+ workflow = await executeAutomation(automationId, chatInterface)
return workflow
except HTTPException:
raise
diff --git a/modules/services/serviceChat/mainServiceChat.py b/modules/services/serviceChat/mainServiceChat.py
index cb05279f..7848cb29 100644
--- a/modules/services/serviceChat/mainServiceChat.py
+++ b/modules/services/serviceChat/mainServiceChat.py
@@ -1013,7 +1013,8 @@ class ChatService:
return self._progressLogger
def createProgressLogger(self) -> ProgressLogger:
- return ProgressLogger(self.services)
+ """Get or create the progress logger instance (singleton)"""
+ return self._getProgressLogger()
def progressLogStart(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentOperationId: Optional[str] = None):
"""Wrapper for ProgressLogger.startOperation
diff --git a/modules/services/serviceSharepoint/mainServiceSharepoint.py b/modules/services/serviceSharepoint/mainServiceSharepoint.py
index e7f24648..6c6c266e 100644
--- a/modules/services/serviceSharepoint/mainServiceSharepoint.py
+++ b/modules/services/serviceSharepoint/mainServiceSharepoint.py
@@ -287,7 +287,12 @@ class SharepointService:
try:
# Clean the path
cleanPath = folderPath.lstrip('/')
- endpoint = f"sites/{siteId}/drive/root:/{cleanPath}"
+
+ # If path is empty, get root directly
+ if not cleanPath:
+ endpoint = f"sites/{siteId}/drive/root"
+ else:
+ endpoint = f"sites/{siteId}/drive/root:/{cleanPath}"
result = await self._makeGraphApiCall(endpoint)
@@ -499,4 +504,407 @@ class SharepointService:
except Exception as e:
logger.error(f"Error downloading file by path: {str(e)}")
return None
+
+ async def _getItemById(self, siteId: str, driveId: str, itemId: str) -> Optional[Dict[str, Any]]:
+ """Verify that an item exists by getting it by ID.
+
+ Args:
+ siteId: SharePoint site ID
+ driveId: Drive ID (document library)
+ itemId: Item ID to verify
+
+ Returns:
+ Item dictionary if found, None otherwise
+ """
+ try:
+ endpoint = f"sites/{siteId}/drives/{driveId}/items/{itemId}"
+ result = await self._makeGraphApiCall(endpoint)
+
+ if "error" in result:
+ logger.warning(f"Item {itemId} not found: {result['error']}")
+ return None
+
+ return result
+
+ except Exception as e:
+ logger.warning(f"Error verifying item {itemId}: {str(e)}")
+ return None
+
+ async def _findDriveForItem(self, siteId: str, itemId: str) -> Optional[str]:
+ """Find which drive contains a specific item by trying to get it from all drives.
+
+ Args:
+ siteId: SharePoint site ID
+ itemId: Item ID to find
+
+ Returns:
+ Drive ID if found, None otherwise
+ """
+ try:
+ # Get all drives for the site
+ endpoint = f"sites/{siteId}/drives"
+ drivesResult = await self._makeGraphApiCall(endpoint)
+
+ if "error" in drivesResult:
+ logger.warning(f"Could not get drives for site {siteId}: {drivesResult['error']}")
+ return None
+
+ drives = drivesResult.get("value", [])
+ if not drives:
+ logger.warning(f"No drives found for site {siteId}")
+ return None
+
+ # Try to find the item in each drive
+ for drive in drives:
+ driveId = drive.get("id")
+ if not driveId:
+ continue
+
+ itemInfo = await self._getItemById(siteId, driveId, itemId)
+ if itemInfo:
+ logger.info(f"Found item {itemId} in drive {drive.get('name', driveId)}")
+ return driveId
+
+ logger.warning(f"Item {itemId} not found in any drive for site {siteId}")
+ return None
+
+ except Exception as e:
+ logger.warning(f"Error finding drive for item {itemId}: {str(e)}")
+ return None
+
+ async def getFolderUsageAnalytics(self, siteId: str, driveId: str, itemId: str, startDateTime: Optional[str] = None, endDateTime: Optional[str] = None, interval: str = "day") -> Dict[str, Any]:
+ """Get usage analytics for a folder or file.
+
+ Args:
+ siteId: SharePoint site ID
+ driveId: Drive ID (document library)
+ itemId: Folder or file item ID
+ startDateTime: Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). If None, uses 30 days ago.
+ endDateTime: End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). If None, uses current time.
+ interval: Time interval for grouping activities. Options: "day", "week", "month". Default: "day"
+
+ Returns:
+ Dictionary containing analytics data with activities grouped by interval.
+ If analytics are not available (404), returns empty analytics structure instead of error.
+ """
+ try:
+ from datetime import datetime, timedelta, timezone
+
+ # Set default time range if not provided (last 30 days)
+ if not endDateTime:
+ endDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+ if not startDateTime:
+ startDate = datetime.now(timezone.utc) - timedelta(days=30)
+ startDateTime = startDate.isoformat().replace('+00:00', 'Z')
+
+ # Build endpoint with query parameters
+ endpoint = f"sites/{siteId}/drives/{driveId}/items/{itemId}/getActivitiesByInterval"
+ endpoint += f"?startDateTime={startDateTime}&endDateTime={endDateTime}&interval={interval}"
+
+ result = await self._makeGraphApiCall(endpoint)
+
+ if "error" in result:
+ errorMsg = result.get('error', '')
+ # Check if it's a 404 error
+ if isinstance(errorMsg, str) and '404' in errorMsg:
+ # Verify if the item exists - first try with current driveId
+ itemInfo = await self._getItemById(siteId, driveId, itemId)
+
+ # If not found, try to find the correct drive for this item
+ if not itemInfo:
+ logger.info(f"Item {itemId} not found in drive {driveId}, searching for correct drive")
+ correctDriveId = await self._findDriveForItem(siteId, itemId)
+ if correctDriveId and correctDriveId != driveId:
+ logger.info(f"Found item in different drive {correctDriveId}, retrying analytics call")
+ # Retry with correct drive
+ endpoint = f"sites/{siteId}/drives/{correctDriveId}/items/{itemId}/getActivitiesByInterval"
+ endpoint += f"?startDateTime={startDateTime}&endDateTime={endDateTime}&interval={interval}"
+ result = await self._makeGraphApiCall(endpoint)
+
+ if "error" not in result:
+ logger.info(f"Successfully retrieved analytics using correct drive {correctDriveId}")
+ return result
+ # If still error, continue with original error handling
+ itemInfo = await self._getItemById(siteId, correctDriveId, itemId)
+
+ if itemInfo:
+ # Item exists but analytics are not available - return empty analytics
+ logger.warning(f"Usage analytics not available for item {itemId} (item exists but has no activity data or analytics not supported)")
+ return {
+ "value": [],
+ "note": "No analytics data available for this item. The item exists but may not have activity data or analytics may not be supported for this item type."
+ }
+ else:
+ # Item doesn't exist
+ logger.error(f"Item {itemId} not found when trying to get usage analytics")
+ return result
+ else:
+ # Other error
+ logger.error(f"Error getting usage analytics: {result['error']}")
+ return result
+
+ logger.info(f"Retrieved usage analytics for item {itemId} with interval {interval}")
+ return result
+
+ except Exception as e:
+ logger.error(f"Error getting folder usage analytics: {str(e)}")
+ return {"error": f"Error getting folder usage analytics: {str(e)}"}
+
+ async def getDriveId(self, siteId: str, driveName: Optional[str] = None) -> Optional[str]:
+ """Get drive ID for a site. If driveName is provided, finds the specific drive, otherwise returns the default drive.
+
+ Args:
+ siteId: SharePoint site ID
+ driveName: Optional drive name (document library name). If None, returns default drive.
+
+ Returns:
+ Drive ID string or None if not found
+ """
+ try:
+ endpoint = f"sites/{siteId}/drives"
+ result = await self._makeGraphApiCall(endpoint)
+
+ if "error" in result:
+ logger.error(f"Error getting drives: {result['error']}")
+ return None
+
+ drives = result.get("value", [])
+
+ if not driveName:
+ # Return default drive (usually the first one or the one named "Documents")
+ for drive in drives:
+ if drive.get("name") == "Documents" or drive.get("name") == "Shared Documents":
+ logger.info(f"Found default drive: {drive.get('name')} (ID: {drive.get('id')})")
+ return drive.get("id")
+ # If no Documents drive found, return first drive
+ if drives:
+ logger.info(f"Using first drive: {drives[0].get('name')} (ID: {drives[0].get('id')})")
+ return drives[0].get("id")
+ return None
+
+ # Find specific drive by name
+ for drive in drives:
+ if drive.get("name", "").lower() == driveName.lower():
+ logger.info(f"Found drive '{driveName}': {drive.get('id')}")
+ return drive.get("id")
+
+ logger.warning(f"Drive '{driveName}' not found")
+ return None
+
+ except Exception as e:
+ logger.error(f"Error getting drive ID: {str(e)}")
+ return None
+
+ def extractSiteFromStandardPath(self, pathQuery: str) -> Optional[Dict[str, str]]:
+ """
+ Extract site name from Microsoft-standard server-relative path:
+ /sites/company-share/Freigegebene Dokumente/...
+
+ Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
+ """
+ try:
+ if not pathQuery or not pathQuery.startswith('/sites/'):
+ return None
+
+ # Remove leading /sites/ prefix
+ remainder = pathQuery[7:] # len('/sites/') = 7
+
+ # Split on first '/' to get site name
+ if '/' not in remainder:
+ # Only site name, no inner path
+ return {"siteName": remainder, "innerPath": ""}
+
+ siteName, inner = remainder.split('/', 1)
+ siteName = siteName.strip()
+ innerPath = inner.strip()
+
+ if not siteName:
+ return None
+
+ return {"siteName": siteName, "innerPath": innerPath}
+ except Exception as e:
+ logger.error(f"Error extracting site from standard path '{pathQuery}': {str(e)}")
+ return None
+
+ async def getSiteByStandardPath(self, sitePath: str, allSites: Optional[List[Dict[str, Any]]] = None) -> Optional[Dict[str, Any]]:
+ """
+ Get SharePoint site directly by Microsoft-standard path (/sites/SiteName)
+ without loading all sites. Uses hostname from first available site.
+
+ Parameters:
+ sitePath (str): Site path like 'company-share' (without /sites/ prefix)
+ allSites (Optional[List[Dict]]): Pre-discovered sites list (optional, for optimization)
+
+ Returns:
+ Optional[Dict[str, Any]]: Site information if found, None otherwise
+ """
+ try:
+ # Get hostname from first available site (minimal load - only 1 site)
+ if allSites and len(allSites) > 0:
+ from urllib.parse import urlparse
+ webUrl = allSites[0].get("webUrl", "")
+ hostname = urlparse(webUrl).hostname if webUrl else None
+ else:
+ # Discover minimal sites to get hostname
+ minimalSites = await self.discoverSites()
+ if not minimalSites:
+ logger.warning("No sites available to extract hostname")
+ return None
+ from urllib.parse import urlparse
+ hostname = urlparse(minimalSites[0].get("webUrl", "")).hostname
+
+ if not hostname:
+ logger.warning("Could not extract hostname from site")
+ return None
+
+ logger.info(f"Extracted hostname '{hostname}' from first site, now getting site by path: {sitePath}")
+
+ # Get site directly using hostname + path
+ endpoint = f"sites/{hostname}:/sites/{sitePath}"
+ result = await self._makeGraphApiCall(endpoint)
+
+ if "error" in result:
+ logger.warning(f"Could not get site directly by path '{sitePath}': {result['error']}")
+ return None
+
+ siteInfo = {
+ "id": result.get("id"),
+ "displayName": result.get("displayName"),
+ "name": result.get("name"),
+ "webUrl": result.get("webUrl"),
+ "description": result.get("description"),
+ "createdDateTime": result.get("createdDateTime"),
+ "lastModifiedDateTime": result.get("lastModifiedDateTime")
+ }
+
+ logger.info(f"Successfully got site by standard path: {siteInfo['displayName']} (ID: {siteInfo['id']})")
+ return siteInfo
+
+ except Exception as e:
+ logger.error(f"Error getting site by standard path '{sitePath}': {str(e)}")
+ return None
+
+ def filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]:
+ """Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
+ try:
+ if not siteHint:
+ return sites
+ hint = siteHint.strip().lower()
+ filtered: List[Dict[str, Any]] = []
+ for site in sites:
+ name = (site.get("displayName") or "").lower()
+ webUrl = (site.get("webUrl") or "").lower()
+ if hint in name or hint in webUrl:
+ filtered.append(site)
+ return filtered if filtered else sites
+ except Exception as e:
+ logger.error(f"Error filtering sites by hint '{siteHint}': {str(e)}")
+ return sites
+
+ async def resolveSitesFromPathQuery(self, pathQuery: str, allSites: Optional[List[Dict[str, Any]]] = None) -> List[Dict[str, Any]]:
+ """
+ Resolve sites from pathQuery. Handles both Microsoft-standard paths (/sites/SiteName/...)
+ and regular paths. Returns list of matching sites.
+
+ Parameters:
+ pathQuery (str): Path query string (e.g., /sites/SiteName/FolderPath)
+ allSites (Optional[List[Dict]]): Pre-discovered sites list (optional, for optimization)
+
+ Returns:
+ List[Dict[str, Any]]: List of matching sites
+ """
+ try:
+ # If pathQuery starts with Microsoft-standard /sites/, try to get site directly
+ if pathQuery.startswith('/sites/'):
+ parsedPath = self.extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ siteName = parsedPath.get("siteName")
+ directSite = await self.getSiteByStandardPath(siteName, allSites)
+ if directSite:
+ logger.info(f"Got site directly by standard path - no need to discover all sites")
+ return [directSite]
+ else:
+ logger.warning(f"Could not get site directly, falling back to site discovery")
+
+ # If we didn't get the site directly, use discovery and filtering
+ if not allSites:
+ allSites = await self.discoverSites()
+ if not allSites:
+ logger.warning("No SharePoint sites found or accessible")
+ return []
+
+ # If pathQuery starts with Microsoft-standard /sites/, extract site name and filter
+ if pathQuery.startswith('/sites/'):
+ parsedPath = self.extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ siteName = parsedPath.get("siteName")
+ sites = self.filterSitesByHint(allSites, siteName)
+ if not sites:
+ logger.warning(f"No SharePoint site found matching '{siteName}'")
+ return []
+ logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}")
+ return sites
+ else:
+ return allSites
+ else:
+ return allSites
+
+ except Exception as e:
+ logger.error(f"Error resolving sites from pathQuery '{pathQuery}': {str(e)}")
+ return []
+
+ def validatePathQuery(self, pathQuery: str) -> tuple[bool, Optional[str]]:
+ """
+ Validate pathQuery format. Returns (isValid, errorMessage).
+
+ Parameters:
+ pathQuery (str): Path query to validate
+
+ Returns:
+ tuple[bool, Optional[str]]: (True, None) if valid, (False, errorMessage) if invalid
+ """
+ try:
+ if not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*":
+ return False, "pathQuery cannot be empty or '*'"
+
+ if not pathQuery.startswith('/'):
+ return False, "pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites//... e.g. /sites/company-share/Freigegebene Dokumente/Work"
+
+ # Check if pathQuery contains search terms (words without proper path structure)
+ validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents']
+ if not any(pathQuery.startswith(prefix) for prefix in validPathPrefixes):
+ return False, f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery."
+
+ return True, None
+ except Exception as e:
+ logger.error(f"Error validating pathQuery '{pathQuery}': {str(e)}")
+ return False, f"Error validating pathQuery: {str(e)}"
+
+ def detectFolderType(self, item: Dict[str, Any]) -> bool:
+ """
+ Detect if an item is a folder using improved detection logic.
+
+ Parameters:
+ item (Dict[str, Any]): Item from SharePoint API response
+
+ Returns:
+ bool: True if item is a folder, False otherwise
+ """
+ try:
+ # Use improved folder detection logic
+ if 'folder' in item:
+ return True
+
+ # Try to detect by URL pattern or other indicators
+ webUrl = item.get('webUrl', '')
+ name = item.get('name', '')
+
+ # Check if URL has no file extension and looks like a folder path
+ if '.' not in name and ('/' in webUrl or '\\' in webUrl):
+ return True
+
+ return False
+ except Exception as e:
+ logger.error(f"Error detecting folder type: {str(e)}")
+ return False
diff --git a/modules/workflows/methods/methodAi.py b/modules/workflows/methods/methodAi.py
index eee848f7..ba6bb9b3 100644
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@@ -49,11 +49,13 @@ class MethodAi(MethodBase):
operationId = f"ai_process_{workflowId}_{int(time.time())}"
# Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Generate",
"AI Processing",
- f"Format: {parameters.get('resultType', 'txt')}"
+ f"Format: {parameters.get('resultType', 'txt')}",
+ parentOperationId=parentOperationId
)
aiPrompt = parameters.get("aiPrompt")
@@ -256,11 +258,13 @@ class MethodAi(MethodBase):
operationId = f"web_research_{workflowId}_{int(time.time())}"
# Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Web Research",
"Searching and Crawling",
- "Extracting URLs and Content"
+ "Extracting URLs and Content",
+ parentOperationId=parentOperationId
)
# Call webcrawl service - service handles all AI intention analysis and processing
diff --git a/modules/workflows/methods/methodContext.py b/modules/workflows/methods/methodContext.py
index 8bd16f9b..20485612 100644
--- a/modules/workflows/methods/methodContext.py
+++ b/modules/workflows/methods/methodContext.py
@@ -250,11 +250,13 @@ class MethodContext(MethodBase):
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
# Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Extracting content from documents",
"Content Extraction",
- f"Documents: {len(documentList.references)}"
+ f"Documents: {len(documentList.references)}",
+ parentOperationId=parentOperationId
)
# Get ChatDocuments from documentList
diff --git a/modules/workflows/methods/methodOutlook.py b/modules/workflows/methods/methodOutlook.py
index 033b5283..16030fcc 100644
--- a/modules/workflows/methods/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook.py
@@ -334,11 +334,13 @@ class MethodOutlook(MethodBase):
operationId = f"outlook_read_{workflowId}_{int(time.time())}"
# Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Read Emails",
"Outlook Email Reading",
- f"Folder: {parameters.get('folder', 'Inbox')}"
+ f"Folder: {parameters.get('folder', 'Inbox')}",
+ parentOperationId=parentOperationId
)
connectionReference = parameters.get("connectionReference")
@@ -1546,11 +1548,13 @@ Return JSON:
operationId = f"outlook_send_{workflowId}_{int(time.time())}"
# Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Send Draft Email",
"Outlook Email Sending",
- f"Processing {len(parameters.get('documentList', []))} draft(s)"
+ f"Processing {len(parameters.get('documentList', []))} draft(s)",
+ parentOperationId=parentOperationId
)
connectionReference = parameters.get("connectionReference")
diff --git a/modules/workflows/methods/methodSharepoint.py b/modules/workflows/methods/methodSharepoint.py
index da3db26b..d5109251 100644
--- a/modules/workflows/methods/methodSharepoint.py
+++ b/modules/workflows/methods/methodSharepoint.py
@@ -7,7 +7,7 @@ import logging
import re
import json
from typing import Dict, Any, List, Optional
-from datetime import datetime, UTC
+from datetime import datetime, UTC, timedelta, timezone
import urllib
import aiohttp
import asyncio
@@ -122,103 +122,26 @@ class MethodSharepoint(MethodBase):
logger.error(f"Error extracting hostname from webUrl '{webUrl}': {str(e)}")
return None
- async def _getSiteByStandardPath(self, sitePath: str) -> Optional[Dict[str, Any]]:
- """
- Get SharePoint site directly by Microsoft-standard path (/sites/SiteName)
- without loading all sites. Uses hostname from first available site.
-
- Parameters:
- sitePath (str): Site path like 'company-share' (without /sites/ prefix)
-
- Returns:
- Optional[Dict[str, Any]]: Site information if found, None otherwise
- """
- try:
- # Get hostname from first available site (minimal load - only 1 site)
- minimalSites = await self._discoverSharePointSites(limit=1)
- if not minimalSites:
- logger.warning("No sites available to extract hostname")
- return None
-
- hostname = self._extractHostnameFromWebUrl(minimalSites[0].get("webUrl"))
- if not hostname:
- logger.warning("Could not extract hostname from site")
- return None
-
- logger.info(f"Extracted hostname '{hostname}' from first site, now getting site by path: {sitePath}")
-
- # Get site directly using hostname + path
- endpoint = f"sites/{hostname}:/sites/{sitePath}"
- result = await self._makeGraphApiCall(endpoint)
-
- if "error" in result:
- logger.warning(f"Could not get site directly by path '{sitePath}': {result['error']}")
- return None
-
- siteInfo = {
- "id": result.get("id"),
- "displayName": result.get("displayName"),
- "name": result.get("name"),
- "webUrl": result.get("webUrl"),
- "description": result.get("description"),
- "createdDateTime": result.get("createdDateTime"),
- "lastModifiedDateTime": result.get("lastModifiedDateTime")
- }
-
- logger.info(f"Successfully got site by standard path: {siteInfo['displayName']} (ID: {siteInfo['id']})")
- return siteInfo
-
- except Exception as e:
- logger.error(f"Error getting site by standard path '{sitePath}': {str(e)}")
- return None
-
- def _filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]:
- """Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
- try:
- if not siteHint:
- return sites
- hint = siteHint.strip().lower()
- filtered: List[Dict[str, Any]] = []
- for site in sites:
- name = (site.get("displayName") or "").lower()
- webUrl = (site.get("webUrl") or "").lower()
- if hint in name or hint in webUrl:
- filtered.append(site)
- return filtered if filtered else sites
- except Exception as e:
- logger.error(f"Error filtering sites by hint '{siteHint}': {str(e)}")
- return sites
-
def _extractSiteFromStandardPath(self, pathQuery: str) -> Optional[Dict[str, str]]:
"""
- Extract site name from Microsoft-standard server-relative path:
- /sites/company-share/Freigegebene Dokumente/...
-
- Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
+ Extract site name from Microsoft-standard server-relative path.
+ Delegates to SharePoint service.
"""
- try:
- if not pathQuery or not pathQuery.startswith('/sites/'):
- return None
-
- # Remove leading /sites/ prefix
- remainder = pathQuery[7:] # len('/sites/') = 7
-
- # Split on first '/' to get site name
- if '/' not in remainder:
- # Only site name, no inner path
- return {"siteName": remainder, "innerPath": ""}
-
- siteName, inner = remainder.split('/', 1)
- siteName = siteName.strip()
- innerPath = inner.strip()
-
- if not siteName:
- return None
-
- return {"siteName": siteName, "innerPath": innerPath}
- except Exception as e:
- logger.error(f"Error extracting site from standard path '{pathQuery}': {str(e)}")
- return None
+ return self.services.sharepoint.extractSiteFromStandardPath(pathQuery)
+
+ async def _getSiteByStandardPath(self, sitePath: str) -> Optional[Dict[str, Any]]:
+ """
+ Get SharePoint site directly by Microsoft-standard path.
+ Delegates to SharePoint service.
+ """
+ return await self.services.sharepoint.getSiteByStandardPath(sitePath)
+
+ def _filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]:
+ """
+ Filter discovered sites by a human-entered site hint.
+ Delegates to SharePoint service.
+ """
+ return self.services.sharepoint.filterSitesByHint(sites, siteHint)
def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
"""
@@ -624,6 +547,170 @@ class MethodSharepoint(MethodBase):
except Exception as e:
logger.error(f"Error getting site ID: {str(e)}")
return ""
+
+ async def _parseDocumentListForFoundDocuments(self, documentList: Any) -> tuple[Optional[List[Dict[str, Any]]], Optional[List[Dict[str, Any]]], Optional[str]]:
+ """
+ Parse documentList to extract foundDocuments and site information.
+
+ Parameters:
+ documentList: Document list (can be list, DocumentReferenceList, or string)
+
+ Returns:
+ tuple: (foundDocuments, sites, errorMessage)
+ - foundDocuments: List of found documents from findDocumentPath result
+ - sites: List of site dictionaries with id, displayName, webUrl
+ - errorMessage: Error message if parsing failed, None otherwise
+ """
+ try:
+ if isinstance(documentList, str):
+ documentList = [documentList]
+
+ # Resolve documentList to get actual documents
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ if isinstance(documentList, DocumentReferenceList):
+ docRefList = documentList
+ elif isinstance(documentList, list):
+ docRefList = DocumentReferenceList.from_string_list(documentList)
+ else:
+ docRefList = DocumentReferenceList(references=[])
+
+ chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
+ if not chatDocuments:
+ return None, None, "No documents found for the provided document list"
+
+ firstDocument = chatDocuments[0]
+ fileData = self.services.chat.getFileData(firstDocument.fileId)
+ if not fileData:
+ return None, None, None # No fileData, but not an error (might be regular file)
+
+ try:
+ resultData = json.loads(fileData)
+ foundDocuments = resultData.get("foundDocuments", [])
+
+ # If no foundDocuments, check if it's a listDocuments result (has listResults)
+ if not foundDocuments and "listResults" in resultData:
+ logger.info(f"documentList contains listResults from listDocuments, converting to foundDocuments format")
+ listResults = resultData.get("listResults", [])
+ foundDocuments = []
+ siteIdFromList = None
+ siteNameFromList = None
+
+ for listResult in listResults:
+ siteResults = listResult.get("siteResults", [])
+ for siteResult in siteResults:
+ items = siteResult.get("items", [])
+ # Extract site info from first item if available
+ if items and not siteIdFromList:
+ siteNameFromList = items[0].get("siteName")
+
+ for item in items:
+ # Convert listDocuments item format to foundDocuments format
+ if item.get("type") == "file":
+ foundDoc = {
+ "id": item.get("id"),
+ "name": item.get("name"),
+ "type": "file",
+ "siteName": item.get("siteName"),
+ "siteId": None, # Will be determined from site discovery
+ "webUrl": item.get("webUrl"),
+ "fullPath": item.get("webUrl", ""),
+ "parentPath": item.get("parentPath", "")
+ }
+ foundDocuments.append(foundDoc)
+
+ # Discover sites to get siteId if we have siteName
+ if foundDocuments and siteNameFromList and not siteIdFromList:
+ logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'")
+ allSites = await self._discoverSharePointSites()
+ matchingSites = self._filterSitesByHint(allSites, siteNameFromList)
+ if matchingSites:
+ siteIdFromList = matchingSites[0].get("id")
+ # Update all foundDocuments with siteId
+ for doc in foundDocuments:
+ doc["siteId"] = siteIdFromList
+ logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'")
+
+ logger.info(f"Converted {len(foundDocuments)} files from listResults format")
+
+ if not foundDocuments:
+ return None, None, None # No foundDocuments, but not an error
+
+ # Extract site information from foundDocuments
+ firstDoc = foundDocuments[0]
+ siteName = firstDoc.get("siteName")
+ siteId = firstDoc.get("siteId")
+
+ # If siteId is missing (from listDocuments conversion), discover sites to find it
+ if siteName and not siteId:
+ logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'")
+ allSites = await self._discoverSharePointSites()
+ matchingSites = self._filterSitesByHint(allSites, siteName)
+ if matchingSites:
+ siteId = matchingSites[0].get("id")
+ logger.info(f"Found siteId '{siteId}' for site '{siteName}'")
+
+ sites = None
+ if siteName and siteId:
+ sites = [{
+ "id": siteId,
+ "displayName": siteName,
+ "webUrl": firstDoc.get("webUrl", "")
+ }]
+ logger.info(f"Using specific site from documentList: {siteName} (ID: {siteId})")
+ elif siteName:
+ # Try to get site by name
+ allSites = await self._discoverSharePointSites()
+ matchingSites = self._filterSitesByHint(allSites, siteName)
+ if matchingSites:
+ sites = [{
+ "id": matchingSites[0].get("id"),
+ "displayName": siteName,
+ "webUrl": matchingSites[0].get("webUrl", "")
+ }]
+ logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})")
+ else:
+ return None, None, f"Site '{siteName}' not found. Cannot determine target site."
+ else:
+ return None, None, "Site information missing from documentList. Cannot determine target site."
+
+ return foundDocuments, sites, None
+
+ except json.JSONDecodeError as e:
+ return None, None, f"Invalid JSON in documentList: {str(e)}"
+ except Exception as e:
+ return None, None, f"Error processing documentList: {str(e)}"
+
+ except Exception as e:
+ logger.error(f"Error parsing documentList: {str(e)}")
+ return None, None, f"Error parsing documentList: {str(e)}"
+
+ async def _resolveSitesFromPathQuery(self, pathQuery: str) -> tuple[List[Dict[str, Any]], Optional[str]]:
+ """
+ Resolve sites from pathQuery using SharePoint service helper methods.
+
+ Parameters:
+ pathQuery (str): Path query string
+
+ Returns:
+ tuple: (sites, errorMessage)
+ - sites: List of site dictionaries
+ - errorMessage: Error message if resolution failed, None otherwise
+ """
+ try:
+ # Validate pathQuery format
+ isValid, errorMsg = self.services.sharepoint.validatePathQuery(pathQuery)
+ if not isValid:
+ return [], errorMsg
+
+ # Resolve sites using service helper
+ sites = await self.services.sharepoint.resolveSitesFromPathQuery(pathQuery)
+ if not sites:
+ return [], "No SharePoint sites found or accessible"
+
+ return sites, None
+ except Exception as e:
+ logger.error(f"Error resolving sites from pathQuery '{pathQuery}': {str(e)}")
+ return [], f"Error resolving sites from pathQuery: {str(e)}"
@action
@@ -638,23 +725,44 @@ class MethodSharepoint(MethodBase):
- connectionReference (str, required): Microsoft connection label.
- site (str, optional): Site hint.
- searchQuery (str, required): Search terms or path.
- - maxResults (int, optional): Maximum items to return. Default: 100.
+ - maxResults (int, optional): Maximum items to return. Default: 1000.
"""
+ import time
+ operationId = None
try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"sharepoint_find_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Find Document Path",
+ "SharePoint Search",
+ f"Query: {parameters.get('searchQuery', '*')}",
+ parentOperationId=parentOperationId
+ )
+
connectionReference = parameters.get("connectionReference")
site = parameters.get("site")
searchQuery = parameters.get("searchQuery", "*")
- maxResults = parameters.get("maxResults", 100)
+ maxResults = parameters.get("maxResults", 1000)
if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Connection reference is required")
# Parse searchQuery to extract path, search terms, search type, and options
pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery)
logger.debug(f"Parsed searchQuery '{searchQuery}' -> pathQuery='{pathQuery}', fileQuery='{fileQuery}', searchType='{searchType}'")
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
connection = self._getMicrosoftConnection(connectionReference)
if not connection:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
# Extract site name from pathQuery if it contains Microsoft-standard path (/sites/SiteName/...)
@@ -683,25 +791,34 @@ class MethodSharepoint(MethodBase):
siteHintToUse = site or siteFromPath or searchOptions.get("site_hint")
# Discover SharePoint sites - use targeted approach when site hint is available
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Discovering SharePoint sites")
if siteHintToUse:
# When site hint is available, discover all sites first, then filter
allSites = await self._discoverSharePointSites()
if not allSites:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
sites = self._filterSitesByHint(allSites, siteHintToUse)
logger.info(f"Filtered sites by site hint '{siteHintToUse}' -> {len(sites)} sites")
if not sites:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=f"No SharePoint sites found matching '{siteHintToUse}'")
else:
# No site hint - discover all sites
sites = await self._discoverSharePointSites()
if not sites:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
# Resolve path query into search paths
searchPaths = self._resolvePathQuery(pathQuery)
+ self.services.chat.progressLogUpdate(operationId, 0.5, f"Searching across {len(sites)} site(s)")
+
try:
# Search across all discovered sites
foundDocuments = []
@@ -763,17 +880,7 @@ class MethodSharepoint(MethodBase):
resource = item
# Use the same detection logic as our test
- isFolder = False
- if 'folder' in resource:
- isFolder = True
- else:
- # Try to detect by URL pattern or other indicators
- webUrl = resource.get('webUrl', '')
- name = resource.get('name', '')
-
- # Check if URL has no file extension and looks like a folder path
- if '.' not in name and ('/' in webUrl or '\\' in webUrl):
- isFolder = True
+ isFolder = self.services.sharepoint.detectFolderType(resource)
if isFolder:
folderItems.append(item)
@@ -823,17 +930,7 @@ class MethodSharepoint(MethodBase):
logger.warning(f"Error extracting site info from URL {webUrl}: {e}")
# Use improved folder detection logic
- isFolder = False
- if 'folder' in item:
- isFolder = True
- else:
- # Try to detect by URL pattern or other indicators
- name = item.get('name', '')
-
- # Check if URL has no file extension and looks like a folder path
- if '.' not in name and ('/' in webUrl or '\\' in webUrl):
- isFolder = True
-
+ isFolder = self.services.sharepoint.detectFolderType(item)
itemType = "folder" if isFolder else "file"
itemPath = item.get("parentReference", {}).get("path", "")
logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'")
@@ -986,17 +1083,7 @@ class MethodSharepoint(MethodBase):
itemName = item.get("name", "")
# Use improved folder detection logic
- isFolder = False
- if 'folder' in item:
- isFolder = True
- else:
- # Try to detect by URL pattern or other indicators
- webUrl = item.get('webUrl', '')
- name = item.get('name', '')
-
- # Check if URL has no file extension and looks like a folder path
- if '.' not in name and ('/' in webUrl or '\\' in webUrl):
- isFolder = True
+ isFolder = self.services.sharepoint.detectFolderType(item)
itemType = "folder" if isFolder else "file"
itemPath = item.get("parentReference", {}).get("path", "")
@@ -1056,6 +1143,8 @@ class MethodSharepoint(MethodBase):
foundDocuments = foundDocuments[:maxResults]
logger.info(f"Limited results to {maxResults} items")
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {len(foundDocuments)} document(s)")
+
resultData = {
"searchQuery": searchQuery,
"totalResults": len(foundDocuments),
@@ -1066,6 +1155,8 @@ class MethodSharepoint(MethodBase):
except Exception as e:
logger.error(f"Error searching SharePoint: {str(e)}")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=str(e))
# Use default JSON format for output
@@ -1080,6 +1171,7 @@ class MethodSharepoint(MethodBase):
"hasResults": len(foundDocuments) > 0
}
+ self.services.chat.progressLogFinish(operationId, True)
return ActionResult(
success=True,
documents=[
@@ -1094,6 +1186,11 @@ class MethodSharepoint(MethodBase):
except Exception as e:
logger.error(f"Error finding document path: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass
return ActionResult.isFailure(error=str(e))
@action
@@ -1101,7 +1198,7 @@ class MethodSharepoint(MethodBase):
"""
GENERAL:
- Purpose: Read documents from SharePoint and extract content/metadata.
- - Input requirements: connectionReference (required); optional documentList, pathObject, or pathQuery; includeMetadata.
+ - Input requirements: connectionReference (required); documentList or pathQuery (required); includeMetadata (optional).
- Output format: Standardized ActionDocument format (documentName, documentData, mimeType).
- Binary files (PDFs, etc.) are Base64-encoded in documentData.
- Text files are stored as plain text in documentData.
@@ -1109,9 +1206,8 @@ class MethodSharepoint(MethodBase):
Parameters:
- connectionReference (str, required): Microsoft connection label.
- - pathObject (str, optional): Reference to a previous path result (from findDocumentPath).
- - documentList (list, optional): Document list reference(s) to read (backward compatibility).
- - pathQuery (str, optional): Path query if no pathObject (backward compatibility).
+ - documentList (list, optional): Document list reference(s) containing findDocumentPath result.
+ - pathQuery (str, optional): Direct path query if no documentList (e.g., /sites/SiteName/FolderPath).
- includeMetadata (bool, optional): Include metadata. Default: True.
Returns:
@@ -1128,19 +1224,18 @@ class MethodSharepoint(MethodBase):
operationId = f"sharepoint_read_{workflowId}_{int(time.time())}"
# Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Read Documents",
"SharePoint Document Reading",
- f"Path: {parameters.get('pathQuery', parameters.get('pathObject', '*'))}"
+ "Processing document list",
+ parentOperationId=parentOperationId
)
documentList = parameters.get("documentList")
- if isinstance(documentList, str):
- documentList = [documentList]
- connectionReference = parameters.get("connectionReference")
pathQuery = parameters.get("pathQuery", "*")
- pathObject = parameters.get("pathObject")
+ connectionReference = parameters.get("connectionReference")
includeMetadata = parameters.get("includeMetadata", True)
# Validate connection reference
@@ -1149,7 +1244,13 @@ class MethodSharepoint(MethodBase):
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Connection reference is required")
- # Get connection first - needed for both pathObject and documentList approaches
+ # Require either documentList or pathQuery
+ if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList or pathQuery is required")
+
+ # Get connection first
self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
connection = self._getMicrosoftConnection(connectionReference)
if not connection:
@@ -1157,132 +1258,27 @@ class MethodSharepoint(MethodBase):
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
- # If pathObject is provided, extract SharePoint file IDs and read them directly
- # pathObject contains the result from findDocumentPath with foundDocuments array
+ # Parse documentList to extract foundDocuments and site information
sharePointFileIds = None
sites = None
- if pathObject:
- if pathQuery and pathQuery != "*":
- logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
- try:
- # Resolve the reference label to get the actual document list
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- pathObjectDocuments = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([pathObject]))
- if not pathObjectDocuments or len(pathObjectDocuments) == 0:
+
+ if documentList:
+ foundDocuments, sites, errorMsg = await self._parseDocumentListForFoundDocuments(documentList)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ if foundDocuments:
+ # Extract SharePoint file IDs from foundDocuments
+ sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
+ if not sharePointFileIds:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
-
- # Get the first document's content (which should be the JSON from findDocumentPath)
- firstDocument = pathObjectDocuments[0]
- fileData = self.services.chat.getFileData(firstDocument.fileId)
- if not fileData:
- return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")
-
- # Parse the JSON content
- resultData = json.loads(fileData)
- foundDocuments = resultData.get("foundDocuments", [])
-
- # If no foundDocuments, check if it's a listDocuments result (has listResults)
- if not foundDocuments and "listResults" in resultData:
- logger.info(f"pathObject contains listResults from listDocuments, converting to foundDocuments format")
- listResults = resultData.get("listResults", [])
- foundDocuments = []
- siteIdFromList = None
- siteNameFromList = None
-
- for listResult in listResults:
- siteResults = listResult.get("siteResults", [])
- for siteResult in siteResults:
- items = siteResult.get("items", [])
- # Extract site info from first item if available
- if items and not siteIdFromList:
- # Try to get site info from the siteResult structure
- # We need to discover sites to get the siteId
- siteNameFromList = items[0].get("siteName")
-
- for item in items:
- # Convert listDocuments item format to foundDocuments format
- if item.get("type") == "file":
- foundDoc = {
- "id": item.get("id"),
- "name": item.get("name"),
- "type": "file",
- "siteName": item.get("siteName"),
- "siteId": None, # Will be determined from site discovery
- "webUrl": item.get("webUrl"),
- "fullPath": item.get("webUrl", ""),
- "parentPath": item.get("parentPath", "")
- }
- foundDocuments.append(foundDoc)
-
- # Discover sites to get siteId if we have siteName
- if foundDocuments and siteNameFromList and not siteIdFromList:
- logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'")
- allSites = await self._discoverSharePointSites()
- matchingSites = self._filterSitesByHint(allSites, siteNameFromList)
- if matchingSites:
- siteIdFromList = matchingSites[0].get("id")
- # Update all foundDocuments with siteId
- for doc in foundDocuments:
- doc["siteId"] = siteIdFromList
- logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'")
-
- logger.info(f"Converted {len(foundDocuments)} files from listResults format")
-
- if foundDocuments:
- # Extract SharePoint file IDs from foundDocuments
- sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
- if not sharePointFileIds:
- return ActionResult.isFailure(error=f"No files found in pathObject '{pathObject}'")
- logger.info(f"Extracted {len(sharePointFileIds)} SharePoint file IDs from pathObject '{pathObject}'")
-
- # Extract site information from foundDocuments
- if foundDocuments:
- firstDoc = foundDocuments[0]
- siteName = firstDoc.get("siteName")
- siteId = firstDoc.get("siteId")
-
- # If siteId is missing (from listDocuments conversion), discover sites to find it
- if siteName and not siteId:
- logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'")
- allSites = await self._discoverSharePointSites()
- matchingSites = self._filterSitesByHint(allSites, siteName)
- if matchingSites:
- siteId = matchingSites[0].get("id")
- logger.info(f"Found siteId '{siteId}' for site '{siteName}'")
-
- if siteName and siteId:
- sites = [{
- "id": siteId,
- "displayName": siteName,
- "webUrl": firstDoc.get("webUrl", "")
- }]
- logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
- elif siteName:
- # Try to get site by name
- allSites = await self._discoverSharePointSites()
- matchingSites = self._filterSitesByHint(allSites, siteName)
- if matchingSites:
- sites = [{
- "id": matchingSites[0].get("id"),
- "displayName": siteName,
- "webUrl": matchingSites[0].get("webUrl", "")
- }]
- logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})")
- else:
- return ActionResult.isFailure(error=f"Site '{siteName}' not found. Cannot determine target site for read operation.")
- else:
- return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
- else:
- return ActionResult.isFailure(error=f"No documents found in pathObject '{pathObject}'")
-
- except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
- except Exception as e:
- return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
+ return ActionResult.isFailure(error="No files found in documentList from findDocumentPath result")
+ logger.info(f"Extracted {len(sharePointFileIds)} SharePoint file IDs from documentList")
- # If we have SharePoint file IDs from pathObject, read them directly
+ # If we have SharePoint file IDs from documentList (findDocumentPath result), read them directly
if sharePointFileIds and sites:
# Read SharePoint files directly using their IDs
readResults = []
@@ -1338,7 +1334,7 @@ class MethodSharepoint(MethodBase):
if not readResults:
self.services.chat.progressLogFinish(operationId, False)
- return ActionResult.isFailure(error="No files could be read from pathObject")
+ return ActionResult.isFailure(error="No files could be read from documentList")
# Convert read results to ActionDocument objects
# IMPORTANT: For binary files (PDFs), store Base64-encoded content directly in documentData
@@ -1442,232 +1438,24 @@ class MethodSharepoint(MethodBase):
self.services.chat.progressLogFinish(operationId, True)
return ActionResult.isSuccess(documents=actionDocuments)
- # Fallback: Use documentList parameter (for backward compatibility)
- # Validate documentList
- if not documentList:
- return ActionResult.isFailure(error="Document list reference is required. Either provide documentList parameter or use pathObject that contains files.")
+ # If no sites from documentList, try pathQuery fallback
+ if not sites and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
+ sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
- # Get documents from reference - ensure documentList is a list, not a string
- # documentList is already normalized above
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- # Convert to DocumentReferenceList if needed
- if isinstance(documentList, DocumentReferenceList):
- docRefList = documentList
- elif isinstance(documentList, list):
- docRefList = DocumentReferenceList.from_string_list(documentList)
- elif isinstance(documentList, str):
- docRefList = DocumentReferenceList.from_string_list([documentList])
- else:
- docRefList = DocumentReferenceList(references=[])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
-
- if not chatDocuments:
- return ActionResult.isFailure(error="No documents found for the provided reference")
-
- # Determine sites to use - strict validation: pathObject → pathQuery → ERROR
+ # If still no sites, return error
if not sites:
- # Step 2: If no pathObject, check pathQuery
- if pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
- # Validate pathQuery format
- if not pathQuery.startswith('/'):
- return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites//... e.g. /sites/company-share/Freigegebene Dokumente/Work")
-
- # Check if pathQuery contains search terms (words without proper path structure)
- validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents']
- if not any(pathQuery.startswith(prefix) for prefix in validPathPrefixes):
- return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
-
- # If pathQuery starts with Microsoft-standard /sites/, try to get site directly
- directSite = None
- if pathQuery.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQuery)
- if parsedPath:
- siteName = parsedPath.get("siteName")
- # Try to get site directly by path (optimization - no need to load all 60 sites)
- directSite = await self._getSiteByStandardPath(siteName)
- if directSite:
- logger.info(f"Got site directly by standard path - no need to discover all sites")
- sites = [directSite]
- else:
- logger.warning(f"Could not get site directly, falling back to site discovery")
-
- # If we didn't get the site directly, use discovery and filtering
- if not directSite:
- # For pathQuery, we need to discover sites to find the specific one
- allSites = await self._discoverSharePointSites()
- if not allSites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
-
- # If pathQuery starts with Microsoft-standard /sites/, extract site name and filter
- if pathQuery.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQuery)
- if parsedPath:
- siteName = parsedPath.get("siteName")
- # Filter sites by name (case-insensitive substring match)
- sites = self._filterSitesByHint(allSites, siteName)
- if not sites:
- return ActionResult.isFailure(error=f"No SharePoint site found matching '{siteName}'")
- logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}")
- else:
- sites = allSites
- else:
- sites = allSites
- else:
- # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
- return ActionResult.isFailure(error="No valid read path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with file information, or pathQuery must be provided. Use findDocumentPath first to get file paths, or provide pathQuery directly.")
- if not sites:
- return ActionResult.isFailure(error="No valid target site determined for read operation")
-
- # Resolve path query into search paths
- searchPaths = self._resolvePathQuery(pathQuery)
-
- # Process each chat document across all sites
- readResults = []
-
- for i, chatDocument in enumerate(chatDocuments):
- try:
- fileId = chatDocument.fileId
- fileName = chatDocument.fileName
-
- # Search for this file across all sites
- fileFound = False
-
- for site in sites:
- siteId = site["id"]
- siteName = site["displayName"]
- siteUrl = site["webUrl"]
-
- # Try to find the file by name in this site
- searchQuery = fileName.replace("'", "''") # Escape single quotes for OData
- endpoint = f"sites/{siteId}/drive/root/search(q='{searchQuery}')"
-
- searchResult = await self._makeGraphApiCall(endpoint)
-
- if "error" in searchResult:
- continue
-
- items = searchResult.get("value", [])
- for item in items:
- if item.get("name") == fileName:
- # Found the file, get its details
- fileId = item.get("id")
- fileEndpoint = f"sites/{siteId}/drive/items/{fileId}"
-
- # Get file metadata
- fileInfoResult = await self._makeGraphApiCall(fileEndpoint)
-
- if "error" in fileInfoResult:
- continue
-
- # Build result with metadata
- resultItem = {
- "fileId": fileId,
- "fileName": fileName,
- "sharepointFileId": fileId,
- "siteName": siteName,
- "siteUrl": siteUrl,
- "size": fileInfoResult.get("size", 0),
- "createdDateTime": fileInfoResult.get("createdDateTime"),
- "lastModifiedDateTime": fileInfoResult.get("lastModifiedDateTime"),
- "webUrl": fileInfoResult.get("webUrl")
- }
-
- # Add metadata if requested
- if includeMetadata:
- resultItem["metadata"] = {
- "mimeType": fileInfoResult.get("file", {}).get("mimeType"),
- "downloadUrl": fileInfoResult.get("@microsoft.graph.downloadUrl"),
- "createdBy": fileInfoResult.get("createdBy", {}),
- "lastModifiedBy": fileInfoResult.get("lastModifiedBy", {}),
- "parentReference": fileInfoResult.get("parentReference", {})
- }
-
- # Get file content if it's a readable format
- mimeType = fileInfoResult.get("file", {}).get("mimeType", "")
- if mimeType.startswith("text/") or mimeType in [
- "application/json", "application/xml", "application/javascript"
- ]:
- # Download the file content
- contentEndpoint = f"sites/{siteId}/drive/items/{fileId}/content"
-
- # For content download, we need to handle binary data
- try:
- async with aiohttp.ClientSession() as session:
- headers = {"Authorization": f"Bearer {self.services.sharepoint._target.accessToken}"}
- async with session.get(f"https://graph.microsoft.com/v1.0/{contentEndpoint}", headers=headers) as response:
- if response.status == 200:
- content = await response.text()
- resultItem["content"] = content
- else:
- resultItem["content"] = f"Could not download content: HTTP {response.status}"
- except Exception as e:
- resultItem["content"] = f"Error downloading content: {str(e)}"
- else:
- resultItem["content"] = f"Binary file type ({mimeType}) - content not retrieved"
-
- readResults.append(resultItem)
- fileFound = True
- break
-
- if fileFound:
- break
-
- if not fileFound:
- readResults.append({
- "fileId": fileId,
- "fileName": fileName,
- "error": "File not found in any accessible SharePoint site",
- "content": None
- })
-
- except Exception as e:
- logger.error(f"Error reading document {chatDocument.fileName}: {str(e)}")
- readResults.append({
- "fileId": chatDocument.fileId,
- "fileName": chatDocument.fileName,
- "error": str(e),
- "content": None
- })
-
- resultData = {
- "connectionReference": connectionReference,
- "pathQuery": pathQuery,
- "documentList": documentList,
- "includeMetadata": includeMetadata,
- "sitesSearched": len(sites),
- "readResults": readResults,
- "connection": {
- "id": connection["id"],
- "authority": "microsoft",
- "reference": connectionReference
- },
- "timestamp": self.services.utils.timestampGetUtc()
- }
-
- # Use default JSON format for output
- outputExtension = ".json" # Default
- outputMimeType = "application/json" # Default
-
- validationMetadata = {
- "actionType": "sharepoint.readDocuments",
- "connectionReference": connectionReference,
- "documentCount": len(readResults),
- "includeMetadata": includeMetadata,
- "sitesSearched": len(sites)
- }
-
- return ActionResult(
- success=True,
- documents=[
- ActionDocument(
- documentName=f"sharepoint_documents_{self._format_timestamp_for_filename()}{outputExtension}",
- documentData=json.dumps(resultData, indent=2),
- mimeType=outputMimeType,
- validationMetadata=validationMetadata
- )
- ]
- )
+ # This should never be reached if logic above is correct
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Unexpected error: could not process documentList or pathQuery")
except Exception as e:
logger.error(f"Error reading SharePoint documents: {str(e)}")
if operationId:
@@ -1685,286 +1473,120 @@ class MethodSharepoint(MethodBase):
"""
GENERAL:
- Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
- - Input requirements: connectionReference (required); documentList (required); optional pathObject or pathQuery.
+ - Input requirements: connectionReference (required); documentList (required); pathQuery (optional).
- Output format: JSON with upload status and file info.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- - pathObject (str, optional): Reference to a previous path result.
- - pathQuery (str, optional): Upload target path if no pathObject.
- documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
+ - pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath).
"""
+ import time
+ operationId = None
try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"sharepoint_upload_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Upload Document",
+ "SharePoint Upload",
+ "Processing document list",
+ parentOperationId=parentOperationId
+ )
+
connectionReference = parameters.get("connectionReference")
- pathQuery = parameters.get("pathQuery")
documentList = parameters.get("documentList")
+ pathQuery = parameters.get("pathQuery")
if isinstance(documentList, str):
documentList = [documentList]
- pathObject = parameters.get("pathObject")
- uploadPath = pathQuery
- logger.debug(f"Using pathQuery: {pathQuery}")
+ if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Connection reference is required")
- if not connectionReference or not documentList:
- return ActionResult.isFailure(error="Connection reference and document list are required")
+ if not documentList:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Document list is required")
- # If pathObject is provided, extract folder IDs from it
- if pathObject:
- try:
- # Resolve the reference label to get the actual document list
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- documentList = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([pathObject]))
- if not documentList or len(documentList) == 0:
- return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
-
- # Get the first document's content (which should be the JSON)
- firstDocument = documentList[0]
- fileData = self.services.chat.getFileData(firstDocument.fileId)
- if not fileData:
- return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")
-
- # Parse the JSON content
- resultData = json.loads(fileData)
-
- # Debug: Log the structure of the result document
- logger.info(f"Result document keys: {list(resultData.keys())}")
-
- # Handle different result document formats
- foundDocuments = []
-
- # Check if it's a direct SharePoint result (has foundDocuments)
- if "foundDocuments" in resultData:
- foundDocuments = resultData.get("foundDocuments", [])
- logger.info(f"Found {len(foundDocuments)} documents in foundDocuments array")
- # Check if it's an AI validation result (has result string with validationReport)
- elif "result" in resultData and "validationReport" in resultData["result"]:
- try:
- # Parse the nested JSON in the result field
- nestedResult = json.loads(resultData["result"])
- validationReport = nestedResult.get("validationReport", {})
- documentDetails = validationReport.get("documentDetails", {})
-
- if documentDetails:
- # Convert the single document details to the expected format
- doc = {
- "id": documentDetails.get("id"),
- "name": documentDetails.get("name"),
- "type": documentDetails.get("type", "").lower(), # Convert "Folder" to "folder"
- "siteName": documentDetails.get("siteName"),
- "siteId": documentDetails.get("siteId"),
- "fullPath": documentDetails.get("fullPath"),
- "webUrl": documentDetails.get("webUrl", ""),
- "parentPath": documentDetails.get("parentPath", "")
- }
- foundDocuments = [doc]
- logger.info(f"Extracted 1 document from validation report")
- except json.JSONDecodeError as e:
- logger.error(f"Failed to parse nested JSON in result field: {e}")
- return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")
-
- # Debug: Log what we found in the result document
- logger.info(f"Result document contains {len(foundDocuments)} documents")
- for i, doc in enumerate(foundDocuments):
- logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")
-
- # Extract folder information from the result
- folders = []
- for doc in foundDocuments:
- if doc.get("type") == "folder":
- folders.append(doc)
-
- logger.info(f"Found {len(folders)} folders in result document")
-
- if folders:
- # Use the first folder found - prefer folder ID for direct API calls
- firstFolder = folders[0]
- if firstFolder.get("id"):
- # Use folder ID directly for most reliable API calls
- uploadPath = firstFolder.get("id")
- logger.info(f"Using folder ID from pathObject: {uploadPath}")
- elif firstFolder.get("fullPath"):
- # Extract the correct path portion from fullPath by removing site name
- fullPath = firstFolder.get("fullPath")
- # fullPath format: \\SiteName\\Library\\Folder\\SubFolder
- # We need to remove the first two parts (\\SiteName\\) to get the actual folder path
- pathParts = fullPath.lstrip('\\').split('\\')
- if len(pathParts) > 1:
- # Remove the first part (site name) and reconstruct the path
- actualPath = '\\'.join(pathParts[1:])
- uploadPath = actualPath
- logger.info(f"Extracted path from fullPath: {uploadPath}")
- else:
- uploadPath = fullPath
- logger.info(f"Using full path from pathObject (no site name to remove): {uploadPath}")
- else:
- return ActionResult.isFailure(error="No valid folder information found in pathObject")
- else:
- return ActionResult.isFailure(error="No folders found in pathObject")
-
- except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
- except Exception as e:
- return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
+ # Parse documentList to extract folder path and site information
+ uploadPath, sites, filesToUpload, errorMsg = await self._parseDocumentListForFolder(documentList)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
- # Get Microsoft connection
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+ # If no folder path found from documentList, use pathQuery if provided
+ if not uploadPath and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
+ uploadPath = pathQuery
+ logger.info(f"Using pathQuery for upload path: {uploadPath}")
+ # Resolve sites from pathQuery
+ sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
- # Get documents from reference - ensure documentList is a list, not a string
- if isinstance(documentList, str):
- documentList = [documentList] # Convert string to list
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- # Convert to DocumentReferenceList if needed
- if isinstance(documentList, DocumentReferenceList):
- docRefList = documentList
- elif isinstance(documentList, list):
- docRefList = DocumentReferenceList.from_string_list(documentList)
- elif isinstance(documentList, str):
- docRefList = DocumentReferenceList.from_string_list([documentList])
- else:
- docRefList = DocumentReferenceList(references=[])
- chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
- if not chatDocuments:
- return ActionResult.isFailure(error="No documents found for the provided reference")
-
- # Determine sites to use based on whether pathObject was provided
- sites = None
- if pathObject:
- # When pathObject is provided, we should have specific site information
- # Extract site information from the pathObject result
- try:
- # Get the site information from the first folder in pathObject
- if 'foundDocuments' in locals() and foundDocuments:
- firstFolder = foundDocuments[0]
- siteName = firstFolder.get("siteName")
- siteId = firstFolder.get("siteId")
-
- if siteName and siteId:
- # Use the specific site from pathObject instead of discovering all sites
- sites = [{
- "id": siteId,
- "displayName": siteName,
- "webUrl": firstFolder.get("webUrl", "")
- }]
- logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
- else:
- # Site info missing from pathObject - this is an error, not a fallback
- return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for upload.")
- else:
- # No documents found in pathObject - this is an error
- return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for upload.")
- except Exception as e:
- # Error processing pathObject - this is an error, not a fallback
- return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for upload.")
- else:
- # No pathObject provided - check if pathQuery is valid
- if not uploadPath or uploadPath.strip() == "" or uploadPath.strip() == "*":
- return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
-
- # Validate pathQuery format
- if not uploadPath.startswith('/'):
- return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites//... e.g. /sites/company-share/Freigegebene Dokumente/Work")
-
- # Check if uploadPath contains search terms (words without proper path structure)
- validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents']
- if not any(uploadPath.startswith(prefix) for prefix in validPathPrefixes):
- return ActionResult.isFailure(error=f"Invalid pathQuery '{uploadPath}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
-
- # If uploadPath starts with Microsoft-standard /sites/, try to get site directly
- directSite = None
- if uploadPath.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(uploadPath)
- if parsedPath:
- siteName = parsedPath.get("siteName")
- # Try to get site directly by path (optimization - no need to load all 60 sites)
- directSite = await self._getSiteByStandardPath(siteName)
- if directSite:
- logger.info(f"Got site directly by standard path - no need to discover all sites")
- sites = [directSite]
- else:
- logger.warning(f"Could not get site directly, falling back to site discovery")
-
- # If we didn't get the site directly, use discovery and filtering
- if not directSite:
- # For pathQuery, we need to discover sites to find the specific one
- allSites = await self._discoverSharePointSites()
- if not allSites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
-
- # If uploadPath starts with Microsoft-standard /sites/, extract site name and filter
- if uploadPath.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(uploadPath)
- if parsedPath:
- siteName = parsedPath.get("siteName")
- # Filter sites by name (case-insensitive substring match)
- sites = self._filterSitesByHint(allSites, siteName)
- if not sites:
- return ActionResult.isFailure(error=f"No SharePoint site found matching '{siteName}'")
- logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}")
- else:
- sites = allSites
- else:
- sites = allSites
+ # Validate required parameters
+ if not uploadPath:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get upload folder, or provide pathQuery directly.")
if not sites:
- return ActionResult.isFailure(error="No valid target site determined for upload")
-
- # Process upload paths based on whether pathObject was provided
- uploadSiteScope = None
- if not pathObject:
- # Parse the validated pathQuery to extract site and path information
- parsed = self._extractSiteFromStandardPath(uploadPath)
-
- if not parsed:
- return ActionResult.isFailure(error="Invalid uploadPath. Use Microsoft-standard /sites//")
-
- # Find matching site (already filtered above, but ensure we have the right one)
- candidateSites = self._filterSitesByHint(sites, parsed["siteName"]) # substring match
- # Choose exact displayName match if available
- exact = [s for s in candidateSites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
- selectedSite = exact[0] if exact else (candidateSites[0] if candidateSites else None)
- if not selectedSite:
- return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible")
-
- uploadSiteScope = selectedSite
- # Use the inner path portion as the actual upload target path
- # Remove document library name from path (same logic as listDocuments)
- innerPath = parsed.get('innerPath', '').lstrip('/')
- pathSegments = [s for s in innerPath.split('/') if s.strip()]
- if len(pathSegments) > 1:
- # Path has multiple segments - first might be a library name
- # Try without first segment (assuming it's a library name)
- innerPath = '/'.join(pathSegments[1:])
- logger.info(f"Removed first path segment (potential library name), path changed from '{parsed['innerPath']}' to '{innerPath}'")
- elif len(pathSegments) == 1:
- # Only one segment - if it's a common library-like name, use empty path (root)
- firstSegmentLower = pathSegments[0].lower()
- libraryIndicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek']
- if any(indicator in firstSegmentLower for indicator in libraryIndicators):
- innerPath = ''
- logger.info(f"First segment '{pathSegments[0]}' appears to be a library name, using root")
-
- uploadPaths = [f"/{innerPath}" if innerPath else "/"]
- sites = [selectedSite]
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Site information missing. Cannot determine target site for upload.")
+
+ if not filesToUpload:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No files to upload found in documentList.")
+
+ # Get connection
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
+ connection = self._getMicrosoftConnection(connectionReference)
+ if not connection:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Process upload paths
+ uploadPaths = []
+ if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
+ # It's a folder ID - use it directly
+ uploadPaths = [uploadPath]
+ logger.info(f"Using folder ID directly for upload: {uploadPath}")
else:
- # When using pathObject, check if uploadPath is a folder ID or a path
- if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
- # It's a folder ID - use it directly
- uploadPaths = [uploadPath]
- logger.info(f"Using folder ID directly for upload: {uploadPath}")
- else:
- # It's a path - resolve it normally
- uploadPaths = self._resolvePathQuery(uploadPath)
+ # It's a path - resolve it normally
+ uploadPaths = self._resolvePathQuery(uploadPath)
# Process each document upload
uploadResults = []
# Extract file names from documents
- fileNames = [doc.fileName for doc in chatDocuments]
+ fileNames = [doc.fileName for doc in filesToUpload]
logger.info(f"Using file names from documentList: {fileNames}")
- for i, (chatDocument, fileName) in enumerate(zip(chatDocuments, fileNames)):
+ self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)")
+
+ # Process upload paths
+
+ # Process each document upload
+ uploadResults = []
+
+ # Extract file names from documents
+ fileNames = [doc.fileName for doc in filesToUpload]
+ logger.info(f"Using file names from documentList: {fileNames}")
+
+ self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)")
+
+ for i, (chatDocument, fileName) in enumerate(zip(filesToUpload, fileNames)):
try:
fileId = chatDocument.fileId
fileData = self.services.chat.getFileData(fileId)
@@ -2056,11 +1678,14 @@ class MethodSharepoint(MethodBase):
"error": str(e),
"uploadStatus": "failed"
})
+
+ # Update progress for each file
+ self.services.chat.progressLogUpdate(operationId, 0.5 + (i * 0.4 / len(filesToUpload)), f"Uploaded {i + 1}/{len(filesToUpload)} file(s)")
# Create result data
resultData = {
"connectionReference": connectionReference,
- "pathQuery": uploadPath,
+ "uploadPath": uploadPath,
"documentList": documentList,
"fileNames": fileNames,
"sitesAvailable": len(sites),
@@ -2087,6 +1712,10 @@ class MethodSharepoint(MethodBase):
"failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"])
}
+ successfulUploads = len([r for r in uploadResults if r.get("uploadStatus") == "success"])
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Uploaded {successfulUploads}/{len(uploadResults)} file(s)")
+ self.services.chat.progressLogFinish(operationId, successfulUploads > 0)
+
return ActionResult(
success=True,
documents=[
@@ -2101,6 +1730,11 @@ class MethodSharepoint(MethodBase):
except Exception as e:
logger.error(f"Error uploading to SharePoint: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass
return ActionResult(
success=False,
error=str(e)
@@ -2111,226 +1745,94 @@ class MethodSharepoint(MethodBase):
"""
GENERAL:
- Purpose: List documents and folders in SharePoint paths across sites.
- - Input requirements: connectionReference (required); optional pathObject or pathQuery; includeSubfolders.
+ - Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional).
- Output format: JSON with folder items and metadata.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- - pathObject (str, optional): Reference to a previous path result.
- - pathQuery (str, optional): Path query if no pathObject.
+ - documentList (list, required): Document list reference(s) containing findDocumentPath result.
- includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
"""
+ import time
+ operationId = None
try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"sharepoint_list_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "List Documents",
+ "SharePoint Listing",
+ "Processing document list",
+ parentOperationId=parentOperationId
+ )
+
connectionReference = parameters.get("connectionReference")
- pathObject = parameters.get("pathObject")
- pathQuery = parameters.get("pathQuery")
+ documentList = parameters.get("documentList")
+ pathQuery = parameters.get("pathQuery", "*")
+ if isinstance(documentList, str):
+ documentList = [documentList]
includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX
- listQuery = pathQuery
- logger.info(f"Using pathQuery: {pathQuery}")
-
if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Connection reference is required")
- # If pathObject is provided, resolve the reference and extract folder IDs from it
- # Note: pathObject takes precedence over pathQuery when both are provided
- if pathObject:
- if pathQuery and pathQuery != "*":
- logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
- try:
- # Resolve the reference label to get the actual document list
- from modules.datamodels.datamodelDocref import DocumentReferenceList
- documentList = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([pathObject]))
- if not documentList or len(documentList) == 0:
- return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
-
- # Get the first document's content (which should be the JSON)
- firstDocument = documentList[0]
- logger.info(f"Document fileId: {firstDocument.fileId}, fileName: {firstDocument.fileName}")
- fileData = self.services.chat.getFileData(firstDocument.fileId)
- if not fileData:
- return ActionResult.isFailure(error=f"No file data found for document: {pathObject} (fileId: {firstDocument.fileId})")
- logger.info(f"File data length: {len(fileData) if fileData else 0}")
-
- # Parse the JSON content
- resultData = json.loads(fileData)
-
- # Debug: Log the structure of the result document
- logger.info(f"Result document keys: {list(resultData.keys())}")
-
- # Handle different result document formats
- foundDocuments = []
-
- # Check if it's a direct SharePoint result (has foundDocuments)
- if "foundDocuments" in resultData:
- foundDocuments = resultData.get("foundDocuments", [])
- logger.info(f"Found {len(foundDocuments)} documents in foundDocuments array")
- # Check if it's an AI validation result (has result string with validationReport)
- elif "result" in resultData and "validationReport" in resultData["result"]:
- try:
- # Parse the nested JSON in the result field
- nestedResult = json.loads(resultData["result"])
- validationReport = nestedResult.get("validationReport", {})
- documentDetails = validationReport.get("documentDetails", {})
-
- if documentDetails:
- # Convert the single document details to the expected format
- doc = {
- "id": documentDetails.get("id"),
- "name": documentDetails.get("name"),
- "type": documentDetails.get("type", "").lower(), # Convert "Folder" to "folder"
- "siteName": documentDetails.get("siteName"),
- "siteId": documentDetails.get("siteId"),
- "fullPath": documentDetails.get("fullPath"),
- "webUrl": documentDetails.get("webUrl", ""),
- "parentPath": documentDetails.get("parentPath", "")
- }
- foundDocuments = [doc]
- logger.info(f"Extracted 1 document from validation report")
- except ValueError as e:
- logger.error(f"Failed to parse nested JSON in result field: {e}")
- return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")
-
- # Debug: Log what we found in the result document
- logger.info(f"Result document contains {len(foundDocuments)} documents")
- for i, doc in enumerate(foundDocuments):
- logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")
-
- # Extract folder information from the result
- folders = []
- for doc in foundDocuments:
- if doc.get("type") == "folder":
- folders.append(doc)
-
- logger.info(f"Found {len(folders)} folders in result document")
-
- if folders:
- # Use the first folder found - prefer folder ID for direct API calls
- firstFolder = folders[0]
- if firstFolder.get("id"):
- # Use folder ID directly for most reliable API calls
- listQuery = firstFolder.get("id")
- logger.info(f"Using folder ID from pathObject: {listQuery}")
- elif firstFolder.get("fullPath"):
- # Extract the correct path portion from fullPath by removing site name
- fullPath = firstFolder.get("fullPath")
- # fullPath format: \\SiteName\\Library\\Folder\\SubFolder
- # We need to remove the first two parts (\\SiteName\\) to get the actual folder path
- pathParts = fullPath.lstrip('\\').split('\\')
- if len(pathParts) > 1:
- # Remove the first part (site name) and reconstruct the path
- actualPath = '\\'.join(pathParts[1:])
- listQuery = actualPath
- logger.info(f"Extracted path from fullPath: {listQuery}")
- else:
- listQuery = fullPath
- logger.info(f"Using full path from pathObject (no site name to remove): {listQuery}")
- else:
- return ActionResult.isFailure(error="No valid folder information found in pathObject")
- else:
- return ActionResult.isFailure(error="No folders found in pathObject")
-
- except ValueError as e:
- return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
- except Exception as e:
- return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
+ # Require either documentList or pathQuery
+ if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList or pathQuery is required")
- # Get Microsoft connection
+ # Parse documentList to extract folder path and site information
+ listQuery, sites, _, errorMsg = await self._parseDocumentListForFolder(documentList)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ # If no folder path found from documentList, use pathQuery if provided
+ if not listQuery and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
+ listQuery = pathQuery
+ logger.info(f"Using pathQuery for list query: {listQuery}")
+ # Resolve sites from pathQuery
+ sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ # Validate required parameters
+ if not listQuery:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
+
+ if not sites:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Site information missing. Cannot determine target site for list operation.")
+
+ # Get connection
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
connection = self._getMicrosoftConnection(connectionReference)
if not connection:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
logger.info(f"Starting SharePoint listDocuments for listQuery: {listQuery}")
logger.debug(f"Connection ID: {connection['id']}")
+ self.services.chat.progressLogUpdate(operationId, 0.3, "Processing folder path")
+
# Parse listQuery to extract path, search terms, search type, and options
pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(listQuery)
- # Determine sites to use - strict validation: pathObject → pathQuery → ERROR
- sites = None
-
- # Step 1: Check pathObject first
- if pathObject:
- # When pathObject is provided, we should have specific site information
- # Extract site information from the pathObject result
- try:
- # Get the site information from the first folder in pathObject
- if 'foundDocuments' in locals() and foundDocuments:
- firstFolder = foundDocuments[0]
- siteName = firstFolder.get("siteName")
- siteId = firstFolder.get("siteId")
-
- if siteName and siteId:
- # Use the specific site from pathObject instead of discovering all sites
- sites = [{
- "id": siteId,
- "displayName": siteName,
- "webUrl": firstFolder.get("webUrl", "")
- }]
- logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
- else:
- # Site info missing from pathObject - this is an error
- return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for list operation.")
- else:
- # No documents found in pathObject - this is an error
- return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for list operation.")
- except Exception as e:
- # Error processing pathObject - this is an error
- return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for list operation.")
-
- # Step 2: If no pathObject, check pathQuery
- elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
- # Validate pathQuery format
- if not pathQuery.startswith('/'):
- return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites//... e.g. /sites/company-share/Freigegebene Dokumente/Work")
-
- # Check if pathQuery contains search terms (words without proper path structure)
- validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents']
- if not any(pathQuery.startswith(prefix) for prefix in validPathPrefixes):
- return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
-
- # If pathQuery starts with Microsoft-standard /sites/, try to get site directly
- directSite = None
- if pathQuery.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQuery)
- if parsedPath:
- siteName = parsedPath.get("siteName")
- # Try to get site directly by path (optimization - no need to load all 60 sites)
- directSite = await self._getSiteByStandardPath(siteName)
- if directSite:
- logger.info(f"Got site directly by standard path - no need to discover all sites")
- sites = [directSite]
- else:
- logger.warning(f"Could not get site directly, falling back to site discovery")
-
- # If we didn't get the site directly, use discovery and filtering
- if not directSite:
- # For pathQuery, we need to discover sites to find the specific one
- allSites = await self._discoverSharePointSites()
- if not allSites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
-
- # If pathQuery starts with Microsoft-standard /sites/, extract site name and filter
- if pathQuery.startswith('/sites/'):
- parsedPath = self._extractSiteFromStandardPath(pathQuery)
- if parsedPath:
- siteName = parsedPath.get("siteName")
- # Filter sites by name (case-insensitive substring match)
- sites = self._filterSitesByHint(allSites, siteName)
- if not sites:
- return ActionResult.isFailure(error=f"No SharePoint site found matching '{siteName}'")
- logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}")
- else:
- sites = allSites
- else:
- sites = allSites
- else:
- # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
- return ActionResult.isFailure(error="No valid list path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
-
- if not sites:
- return ActionResult.isFailure(error="No valid target site determined for list operation")
-
# Check if listQuery is a folder ID (starts with 01PPXICCB...)
if listQuery.startswith('01PPXICCB') or listQuery.startswith('01'):
# Direct folder ID - use it directly
@@ -2375,6 +1877,8 @@ class MethodSharepoint(MethodBase):
# Process each folder path across all sites
listResults = []
+ self.services.chat.progressLogUpdate(operationId, 0.5, f"Listing {len(folderPaths)} folder(s) across {len(sites)} site(s)")
+
for folderPath in folderPaths:
try:
folderResults = []
@@ -2413,17 +1917,7 @@ class MethodSharepoint(MethodBase):
for item in items:
# Use improved folder detection logic
- isFolder = False
- if 'folder' in item:
- isFolder = True
- else:
- # Try to detect by URL pattern or other indicators
- webUrl = item.get('webUrl', '')
- name = item.get('name', '')
-
- # Check if URL has no file extension and looks like a folder path
- if '.' not in name and ('/' in webUrl or '\\' in webUrl):
- isFolder = True
+ isFolder = self.services.sharepoint.detectFolderType(item)
itemInfo = {
"id": item.get("id"),
@@ -2473,17 +1967,7 @@ class MethodSharepoint(MethodBase):
for subfolderItem in subfolderItems:
# Use improved folder detection logic for subfolder items
- subfolderIsFolder = False
- if 'folder' in subfolderItem:
- subfolderIsFolder = True
- else:
- # Try to detect by URL pattern or other indicators
- subfolderWebUrl = subfolderItem.get('webUrl', '')
- subfolderName = subfolderItem.get('name', '')
-
- # Check if URL has no file extension and looks like a folder path
- if '.' not in subfolderName and ('/' in subfolderWebUrl or '\\' in subfolderWebUrl):
- subfolderIsFolder = True
+ subfolderIsFolder = self.services.sharepoint.detectFolderType(subfolderItem)
# Only add files and direct subfolders, NO RECURSION
subfolderItemInfo = {
@@ -2535,6 +2019,9 @@ class MethodSharepoint(MethodBase):
"siteResults": []
})
+ totalItems = sum(len(result.get("siteResults", [])) for result in listResults)
+ self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {totalItems} item(s)")
+
# Create result data
resultData = {
"pathQuery": listQuery,
@@ -2554,9 +2041,10 @@ class MethodSharepoint(MethodBase):
"includeSubfolders": includeSubfolders,
"sitesSearched": len(sites),
"folderCount": len(listResults),
- "totalItems": sum(len(result.get("siteResults", [])) for result in listResults)
+ "totalItems": totalItems
}
+ self.services.chat.progressLogFinish(operationId, True)
return ActionResult(
success=True,
documents=[
@@ -2571,7 +2059,331 @@ class MethodSharepoint(MethodBase):
except Exception as e:
logger.error(f"Error listing SharePoint documents: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass
return ActionResult(
success=False,
error=str(e)
- )
\ No newline at end of file
+ )
+
+ @action
+ async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
+ """
+ GENERAL:
+ - Purpose: Analyze usage intensity of folders and files in SharePoint.
+ - Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval.
+ - Output format: JSON with usage analytics grouped by time intervals.
+
+ Parameters:
+ - connectionReference (str, required): Microsoft connection label.
+ - documentList (list, required): Document list reference(s) containing findDocumentPath result.
+ - startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago.
+ - endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time.
+ - interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day".
+ """
+ import time
+ operationId = None
+ try:
+ # Init progress logger
+ workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+ operationId = f"sharepoint_usage_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking
+ parentOperationId = parameters.get('parentOperationId')
+ self.services.chat.progressLogStart(
+ operationId,
+ "Analyze Folder Usage",
+ "SharePoint Analytics",
+ "Processing document list",
+ parentOperationId=parentOperationId
+ )
+
+ connectionReference = parameters.get("connectionReference")
+ documentList = parameters.get("documentList")
+ pathQuery = parameters.get("pathQuery")
+ if isinstance(documentList, str):
+ documentList = [documentList]
+ startDateTime = parameters.get("startDateTime")
+ endDateTime = parameters.get("endDateTime")
+ interval = parameters.get("interval", "day")
+
+ if not connectionReference:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Connection reference is required")
+
+ # Require either documentList or pathQuery
+ if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList or pathQuery is required")
+
+ # Resolve folder/item information from documentList or pathQuery
+ siteId = None
+ driveId = None
+ itemId = None
+ folderPath = None
+ folderName = None
+
+ if documentList:
+ foundDocuments, sites, errorMsg = await self._parseDocumentListForFoundDocuments(documentList)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ if not foundDocuments:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No documents found in documentList")
+
+ # Get siteId from first document (all should be from same site)
+ firstItem = foundDocuments[0]
+ siteId = firstItem.get("siteId")
+ if not siteId:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Site ID missing from documentList")
+
+ # Get drive ID (needed for analytics)
+ driveId = await self.services.sharepoint.getDriveId(siteId)
+ if not driveId:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Could not determine drive ID for the site")
+
+ # If no items from documentList, try pathQuery fallback
+ if not foundDocuments and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
+ sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery)
+ if errorMsg:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=errorMsg)
+
+ if sites:
+ siteId = sites[0].get("id")
+ # Parse pathQuery to find the folder/item
+ pathQueryParsed, fileQuery, searchType, searchOptions = self._parseSearchQuery(pathQuery)
+
+ # Extract folder path from pathQuery
+ folderPath = '/'
+ if pathQueryParsed and pathQueryParsed.startswith('/sites/'):
+ parsedPath = self._extractSiteFromStandardPath(pathQueryParsed)
+ if parsedPath:
+ innerPath = parsedPath.get("innerPath", "")
+ folderPath = '/' + innerPath if innerPath else '/'
+ elif pathQueryParsed:
+ folderPath = pathQueryParsed
+
+ # Get drive ID
+ driveId = await self.services.sharepoint.getDriveId(siteId)
+ if not driveId:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Could not determine drive ID for the site")
+
+ # Get folder/item by path
+ folderInfo = await self.services.sharepoint.getFolderByPath(siteId, folderPath.lstrip('/'))
+ if not folderInfo:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error=f"Folder or file not found at path: {folderPath}")
+
+ # Add pathQuery item to foundDocuments for processing
+ foundDocuments = [{
+ "id": folderInfo.get("id"),
+ "name": folderInfo.get("name", ""),
+ "type": "folder" if folderInfo.get("folder") else "file",
+ "siteId": siteId,
+ "fullPath": folderPath,
+ "webUrl": folderInfo.get("webUrl", "")
+ }]
+
+ if not siteId or not driveId:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
+
+ self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
+ # Get Microsoft connection
+ connection = self._getMicrosoftConnection(connectionReference)
+ if not connection:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # Set access token
+ if not self.services.sharepoint.setAccessTokenFromConnection(connection):
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="Failed to set SharePoint access token")
+
+ # Process all items from documentList or pathQuery
+ # IMPORTANT: Only analyze FOLDERS, not files (action is "analyzeFolderUsage")
+ itemsToAnalyze = []
+ if foundDocuments:
+ for item in foundDocuments:
+ itemId = item.get("id")
+ itemType = item.get("type", "").lower()
+
+ # Only process folders, skip files and site-level items
+ if itemId and itemType == "folder":
+ itemsToAnalyze.append({
+ "id": itemId,
+ "name": item.get("name", ""),
+ "type": itemType,
+ "path": item.get("fullPath", ""),
+ "webUrl": item.get("webUrl", "")
+ })
+
+ if not itemsToAnalyze:
+ if operationId:
+ self.services.chat.progressLogFinish(operationId, False)
+ return ActionResult.isFailure(error="No valid folders found in documentList to analyze. Note: This action only analyzes folders, not files.")
+
+ self.services.chat.progressLogUpdate(operationId, 0.4, f"Analyzing {len(itemsToAnalyze)} folder(s)")
+
+ # Analyze each item
+ allAnalytics = []
+ totalActivities = 0
+ uniqueUsers = set()
+ activityTypes = {}
+
+ # Compute actual date range values (getFolderUsageAnalytics will set defaults if None)
+ # We need to compute them here to store in output, since getFolderUsageAnalytics modifies them
+ actualStartDateTime = startDateTime
+ actualEndDateTime = endDateTime
+ if not actualEndDateTime:
+ actualEndDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+ if not actualStartDateTime:
+ startDate = datetime.now(timezone.utc) - timedelta(days=30)
+ actualStartDateTime = startDate.isoformat().replace('+00:00', 'Z')
+
+ for idx, item in enumerate(itemsToAnalyze):
+ progress = 0.4 + (idx / len(itemsToAnalyze)) * 0.5
+ self.services.chat.progressLogUpdate(operationId, progress, f"Analyzing folder {item['name']} ({idx+1}/{len(itemsToAnalyze)})")
+
+ # Get usage analytics for this folder
+ analyticsResult = await self.services.sharepoint.getFolderUsageAnalytics(
+ siteId=siteId,
+ driveId=driveId,
+ itemId=item["id"],
+ startDateTime=startDateTime,
+ endDateTime=endDateTime,
+ interval=interval
+ )
+
+ if "error" in analyticsResult:
+ logger.warning(f"Failed to get analytics for item {item['name']} ({item['id']}): {analyticsResult['error']}")
+ # Continue with other items even if one fails
+ itemAnalytics = {
+ "itemId": item["id"],
+ "itemName": item["name"],
+ "itemType": item["type"],
+ "itemPath": item["path"],
+ "error": analyticsResult.get("error", "Unknown error")
+ }
+ else:
+ # Process analytics for this item
+ itemActivities = 0
+ itemUsers = set()
+ itemActivityTypes = {}
+
+ if "value" in analyticsResult:
+ for intervalData in analyticsResult["value"]:
+ activities = intervalData.get("activities", [])
+ for activity in activities:
+ itemActivities += 1
+ totalActivities += 1
+
+ action = activity.get("action", {})
+ actionType = action.get("verb", "unknown")
+ itemActivityTypes[actionType] = itemActivityTypes.get(actionType, 0) + 1
+ activityTypes[actionType] = activityTypes.get(actionType, 0) + 1
+
+ actor = activity.get("actor", {})
+ userPrincipalName = actor.get("userPrincipalName", "")
+ if userPrincipalName:
+ itemUsers.add(userPrincipalName)
+ uniqueUsers.add(userPrincipalName)
+
+ itemAnalytics = {
+ "itemId": item["id"],
+ "itemName": item["name"],
+ "itemType": item["type"],
+ "itemPath": item["path"],
+ "webUrl": item["webUrl"],
+ "analytics": analyticsResult,
+ "summary": {
+ "totalActivities": itemActivities,
+ "uniqueUsers": len(itemUsers),
+ "activityTypes": itemActivityTypes
+ }
+ }
+
+ # Include note if analytics are not available
+ if "note" in analyticsResult:
+ itemAnalytics["note"] = analyticsResult["note"]
+
+ allAnalytics.append(itemAnalytics)
+
+ self.services.chat.progressLogUpdate(operationId, 0.9, "Processing analytics data")
+
+ # Process and format analytics data
+ resultData = {
+ "siteId": siteId,
+ "driveId": driveId,
+ "startDateTime": actualStartDateTime, # Store computed date range (not None)
+ "endDateTime": actualEndDateTime, # Store computed date range (not None)
+ "interval": interval,
+ "itemsAnalyzed": len(itemsToAnalyze),
+ "foldersAnalyzed": len([item for item in allAnalytics if item.get("itemType") == "folder"]),
+ "items": allAnalytics,
+ "summary": {
+ "totalActivities": totalActivities,
+ "uniqueUsers": len(uniqueUsers),
+ "activityTypes": activityTypes
+ },
+ "note": f"Analyzed {len(itemsToAnalyze)} folder(s) from {actualStartDateTime} to {actualEndDateTime}. " +
+ f"Found {totalActivities} total activities across {len(uniqueUsers)} unique user(s)." +
+ (f" Note: {len([item for item in allAnalytics if 'error' in item])} folder(s) had errors or no analytics data available." if any('error' in item for item in allAnalytics) else ""),
+ "timestamp": self.services.utils.timestampGetUtc()
+ }
+
+ self.services.chat.progressLogUpdate(operationId, 0.95, f"Found {totalActivities} total activities across {len(itemsToAnalyze)} folder(s)")
+
+ validationMetadata = {
+ "actionType": "sharepoint.analyzeFolderUsage",
+ "itemsAnalyzed": len(itemsToAnalyze),
+ "interval": interval,
+ "totalActivities": totalActivities,
+ "uniqueUsers": len(uniqueUsers)
+ }
+
+ self.services.chat.progressLogFinish(operationId, True)
+ return ActionResult(
+ success=True,
+ documents=[
+ ActionDocument(
+ documentName=f"sharepoint_usage_analysis_{self._format_timestamp_for_filename()}.json",
+ documentData=json.dumps(resultData, indent=2),
+ mimeType="application/json",
+ validationMetadata=validationMetadata
+ )
+ ]
+ )
+
+ except Exception as e:
+ logger.error(f"Error analyzing folder usage: {str(e)}")
+ if operationId:
+ try:
+ self.services.chat.progressLogFinish(operationId, False)
+ except:
+ pass
+ return ActionResult(
+ success=False,
+ error=str(e)
+ )
\ No newline at end of file
diff --git a/modules/workflows/processing/core/actionExecutor.py b/modules/workflows/processing/core/actionExecutor.py
index f9af58e7..f183c0e4 100644
--- a/modules/workflows/processing/core/actionExecutor.py
+++ b/modules/workflows/processing/core/actionExecutor.py
@@ -82,6 +82,35 @@ class ActionExecutor:
enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats
logger.info(f"Expected formats: {action.expectedDocumentFormats}")
+ # Get current task execution operationId to pass as parent to action methods
+ # This MUST be the "Service Workflow Execution" operation ID (taskExec_*)
+ parentOperationId = None
+ try:
+ progressLogger = self.services.chat.createProgressLogger()
+ activeOperations = progressLogger.getActiveOperations()
+ logger.debug(f"Looking for parent operation ID. Active operations: {list(activeOperations.keys())}")
+
+ # Look for task execution operation (starts with "taskExec_")
+ # This is the "Service Workflow Execution" level that should be parent of ALL actions
+ for opId in activeOperations.keys():
+ if opId.startswith("taskExec_"):
+ parentOperationId = opId
+ logger.info(f"Found parent operation ID: {parentOperationId} for action {action.execMethod}.{action.execAction}")
+ break
+
+ if not parentOperationId:
+ logger.warning(f"No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}")
+ except Exception as e:
+ logger.error(f"Error getting parent operation ID: {str(e)}")
+
+ # Add parentOperationId to parameters so action methods can use it
+ # This is critical for UI dashboard hierarchical display
+ if parentOperationId:
+ enhancedParameters['parentOperationId'] = parentOperationId
+ logger.info(f"Passing parentOperationId '{parentOperationId}' to action {action.execMethod}.{action.execAction}")
+ else:
+ logger.warning(f"WARNING: No parentOperationId found for action {action.execMethod}.{action.execAction}. Action logs will appear at root level!")
+
# Check workflow status before executing the action
checkWorkflowStopped(self.services)