diff --git a/modules/routes/routeDataAutomation.py b/modules/routes/routeDataAutomation.py index 903d0d53..ee13915c 100644 --- a/modules/routes/routeDataAutomation.py +++ b/modules/routes/routeDataAutomation.py @@ -15,6 +15,7 @@ from modules.security.auth import getCurrentUser, limiter from modules.datamodels.datamodelChat import AutomationDefinition, ChatWorkflow from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata from modules.shared.attributeUtils import getModelAttributeDefinitions +from modules.features.automation import executeAutomation # Configure logger logger = logging.getLogger(__name__) @@ -217,7 +218,7 @@ async def execute_automation( """Execute an automation immediately (test mode)""" try: chatInterface = getChatInterface(currentUser) - workflow = await chatInterface.executeAutomation(automationId) + workflow = await executeAutomation(automationId, chatInterface) return workflow except HTTPException: raise diff --git a/modules/services/serviceChat/mainServiceChat.py b/modules/services/serviceChat/mainServiceChat.py index cb05279f..7848cb29 100644 --- a/modules/services/serviceChat/mainServiceChat.py +++ b/modules/services/serviceChat/mainServiceChat.py @@ -1013,7 +1013,8 @@ class ChatService: return self._progressLogger def createProgressLogger(self) -> ProgressLogger: - return ProgressLogger(self.services) + """Get or create the progress logger instance (singleton)""" + return self._getProgressLogger() def progressLogStart(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentOperationId: Optional[str] = None): """Wrapper for ProgressLogger.startOperation diff --git a/modules/services/serviceSharepoint/mainServiceSharepoint.py b/modules/services/serviceSharepoint/mainServiceSharepoint.py index e7f24648..6c6c266e 100644 --- a/modules/services/serviceSharepoint/mainServiceSharepoint.py +++ b/modules/services/serviceSharepoint/mainServiceSharepoint.py @@ -287,7 +287,12 @@ class SharepointService: try: # Clean the path cleanPath = folderPath.lstrip('/') - endpoint = f"sites/{siteId}/drive/root:/{cleanPath}" + + # If path is empty, get root directly + if not cleanPath: + endpoint = f"sites/{siteId}/drive/root" + else: + endpoint = f"sites/{siteId}/drive/root:/{cleanPath}" result = await self._makeGraphApiCall(endpoint) @@ -499,4 +504,407 @@ class SharepointService: except Exception as e: logger.error(f"Error downloading file by path: {str(e)}") return None + + async def _getItemById(self, siteId: str, driveId: str, itemId: str) -> Optional[Dict[str, Any]]: + """Verify that an item exists by getting it by ID. + + Args: + siteId: SharePoint site ID + driveId: Drive ID (document library) + itemId: Item ID to verify + + Returns: + Item dictionary if found, None otherwise + """ + try: + endpoint = f"sites/{siteId}/drives/{driveId}/items/{itemId}" + result = await self._makeGraphApiCall(endpoint) + + if "error" in result: + logger.warning(f"Item {itemId} not found: {result['error']}") + return None + + return result + + except Exception as e: + logger.warning(f"Error verifying item {itemId}: {str(e)}") + return None + + async def _findDriveForItem(self, siteId: str, itemId: str) -> Optional[str]: + """Find which drive contains a specific item by trying to get it from all drives. + + Args: + siteId: SharePoint site ID + itemId: Item ID to find + + Returns: + Drive ID if found, None otherwise + """ + try: + # Get all drives for the site + endpoint = f"sites/{siteId}/drives" + drivesResult = await self._makeGraphApiCall(endpoint) + + if "error" in drivesResult: + logger.warning(f"Could not get drives for site {siteId}: {drivesResult['error']}") + return None + + drives = drivesResult.get("value", []) + if not drives: + logger.warning(f"No drives found for site {siteId}") + return None + + # Try to find the item in each drive + for drive in drives: + driveId = drive.get("id") + if not driveId: + continue + + itemInfo = await self._getItemById(siteId, driveId, itemId) + if itemInfo: + logger.info(f"Found item {itemId} in drive {drive.get('name', driveId)}") + return driveId + + logger.warning(f"Item {itemId} not found in any drive for site {siteId}") + return None + + except Exception as e: + logger.warning(f"Error finding drive for item {itemId}: {str(e)}") + return None + + async def getFolderUsageAnalytics(self, siteId: str, driveId: str, itemId: str, startDateTime: Optional[str] = None, endDateTime: Optional[str] = None, interval: str = "day") -> Dict[str, Any]: + """Get usage analytics for a folder or file. + + Args: + siteId: SharePoint site ID + driveId: Drive ID (document library) + itemId: Folder or file item ID + startDateTime: Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). If None, uses 30 days ago. + endDateTime: End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). If None, uses current time. + interval: Time interval for grouping activities. Options: "day", "week", "month". Default: "day" + + Returns: + Dictionary containing analytics data with activities grouped by interval. + If analytics are not available (404), returns empty analytics structure instead of error. + """ + try: + from datetime import datetime, timedelta, timezone + + # Set default time range if not provided (last 30 days) + if not endDateTime: + endDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + if not startDateTime: + startDate = datetime.now(timezone.utc) - timedelta(days=30) + startDateTime = startDate.isoformat().replace('+00:00', 'Z') + + # Build endpoint with query parameters + endpoint = f"sites/{siteId}/drives/{driveId}/items/{itemId}/getActivitiesByInterval" + endpoint += f"?startDateTime={startDateTime}&endDateTime={endDateTime}&interval={interval}" + + result = await self._makeGraphApiCall(endpoint) + + if "error" in result: + errorMsg = result.get('error', '') + # Check if it's a 404 error + if isinstance(errorMsg, str) and '404' in errorMsg: + # Verify if the item exists - first try with current driveId + itemInfo = await self._getItemById(siteId, driveId, itemId) + + # If not found, try to find the correct drive for this item + if not itemInfo: + logger.info(f"Item {itemId} not found in drive {driveId}, searching for correct drive") + correctDriveId = await self._findDriveForItem(siteId, itemId) + if correctDriveId and correctDriveId != driveId: + logger.info(f"Found item in different drive {correctDriveId}, retrying analytics call") + # Retry with correct drive + endpoint = f"sites/{siteId}/drives/{correctDriveId}/items/{itemId}/getActivitiesByInterval" + endpoint += f"?startDateTime={startDateTime}&endDateTime={endDateTime}&interval={interval}" + result = await self._makeGraphApiCall(endpoint) + + if "error" not in result: + logger.info(f"Successfully retrieved analytics using correct drive {correctDriveId}") + return result + # If still error, continue with original error handling + itemInfo = await self._getItemById(siteId, correctDriveId, itemId) + + if itemInfo: + # Item exists but analytics are not available - return empty analytics + logger.warning(f"Usage analytics not available for item {itemId} (item exists but has no activity data or analytics not supported)") + return { + "value": [], + "note": "No analytics data available for this item. The item exists but may not have activity data or analytics may not be supported for this item type." + } + else: + # Item doesn't exist + logger.error(f"Item {itemId} not found when trying to get usage analytics") + return result + else: + # Other error + logger.error(f"Error getting usage analytics: {result['error']}") + return result + + logger.info(f"Retrieved usage analytics for item {itemId} with interval {interval}") + return result + + except Exception as e: + logger.error(f"Error getting folder usage analytics: {str(e)}") + return {"error": f"Error getting folder usage analytics: {str(e)}"} + + async def getDriveId(self, siteId: str, driveName: Optional[str] = None) -> Optional[str]: + """Get drive ID for a site. If driveName is provided, finds the specific drive, otherwise returns the default drive. + + Args: + siteId: SharePoint site ID + driveName: Optional drive name (document library name). If None, returns default drive. + + Returns: + Drive ID string or None if not found + """ + try: + endpoint = f"sites/{siteId}/drives" + result = await self._makeGraphApiCall(endpoint) + + if "error" in result: + logger.error(f"Error getting drives: {result['error']}") + return None + + drives = result.get("value", []) + + if not driveName: + # Return default drive (usually the first one or the one named "Documents") + for drive in drives: + if drive.get("name") == "Documents" or drive.get("name") == "Shared Documents": + logger.info(f"Found default drive: {drive.get('name')} (ID: {drive.get('id')})") + return drive.get("id") + # If no Documents drive found, return first drive + if drives: + logger.info(f"Using first drive: {drives[0].get('name')} (ID: {drives[0].get('id')})") + return drives[0].get("id") + return None + + # Find specific drive by name + for drive in drives: + if drive.get("name", "").lower() == driveName.lower(): + logger.info(f"Found drive '{driveName}': {drive.get('id')}") + return drive.get("id") + + logger.warning(f"Drive '{driveName}' not found") + return None + + except Exception as e: + logger.error(f"Error getting drive ID: {str(e)}") + return None + + def extractSiteFromStandardPath(self, pathQuery: str) -> Optional[Dict[str, str]]: + """ + Extract site name from Microsoft-standard server-relative path: + /sites/company-share/Freigegebene Dokumente/... + + Returns dict with keys: siteName, innerPath (no leading slash) on success, else None. + """ + try: + if not pathQuery or not pathQuery.startswith('/sites/'): + return None + + # Remove leading /sites/ prefix + remainder = pathQuery[7:] # len('/sites/') = 7 + + # Split on first '/' to get site name + if '/' not in remainder: + # Only site name, no inner path + return {"siteName": remainder, "innerPath": ""} + + siteName, inner = remainder.split('/', 1) + siteName = siteName.strip() + innerPath = inner.strip() + + if not siteName: + return None + + return {"siteName": siteName, "innerPath": innerPath} + except Exception as e: + logger.error(f"Error extracting site from standard path '{pathQuery}': {str(e)}") + return None + + async def getSiteByStandardPath(self, sitePath: str, allSites: Optional[List[Dict[str, Any]]] = None) -> Optional[Dict[str, Any]]: + """ + Get SharePoint site directly by Microsoft-standard path (/sites/SiteName) + without loading all sites. Uses hostname from first available site. + + Parameters: + sitePath (str): Site path like 'company-share' (without /sites/ prefix) + allSites (Optional[List[Dict]]): Pre-discovered sites list (optional, for optimization) + + Returns: + Optional[Dict[str, Any]]: Site information if found, None otherwise + """ + try: + # Get hostname from first available site (minimal load - only 1 site) + if allSites and len(allSites) > 0: + from urllib.parse import urlparse + webUrl = allSites[0].get("webUrl", "") + hostname = urlparse(webUrl).hostname if webUrl else None + else: + # Discover minimal sites to get hostname + minimalSites = await self.discoverSites() + if not minimalSites: + logger.warning("No sites available to extract hostname") + return None + from urllib.parse import urlparse + hostname = urlparse(minimalSites[0].get("webUrl", "")).hostname + + if not hostname: + logger.warning("Could not extract hostname from site") + return None + + logger.info(f"Extracted hostname '{hostname}' from first site, now getting site by path: {sitePath}") + + # Get site directly using hostname + path + endpoint = f"sites/{hostname}:/sites/{sitePath}" + result = await self._makeGraphApiCall(endpoint) + + if "error" in result: + logger.warning(f"Could not get site directly by path '{sitePath}': {result['error']}") + return None + + siteInfo = { + "id": result.get("id"), + "displayName": result.get("displayName"), + "name": result.get("name"), + "webUrl": result.get("webUrl"), + "description": result.get("description"), + "createdDateTime": result.get("createdDateTime"), + "lastModifiedDateTime": result.get("lastModifiedDateTime") + } + + logger.info(f"Successfully got site by standard path: {siteInfo['displayName']} (ID: {siteInfo['id']})") + return siteInfo + + except Exception as e: + logger.error(f"Error getting site by standard path '{sitePath}': {str(e)}") + return None + + def filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]: + """Filter discovered sites by a human-entered site hint (case-insensitive substring).""" + try: + if not siteHint: + return sites + hint = siteHint.strip().lower() + filtered: List[Dict[str, Any]] = [] + for site in sites: + name = (site.get("displayName") or "").lower() + webUrl = (site.get("webUrl") or "").lower() + if hint in name or hint in webUrl: + filtered.append(site) + return filtered if filtered else sites + except Exception as e: + logger.error(f"Error filtering sites by hint '{siteHint}': {str(e)}") + return sites + + async def resolveSitesFromPathQuery(self, pathQuery: str, allSites: Optional[List[Dict[str, Any]]] = None) -> List[Dict[str, Any]]: + """ + Resolve sites from pathQuery. Handles both Microsoft-standard paths (/sites/SiteName/...) + and regular paths. Returns list of matching sites. + + Parameters: + pathQuery (str): Path query string (e.g., /sites/SiteName/FolderPath) + allSites (Optional[List[Dict]]): Pre-discovered sites list (optional, for optimization) + + Returns: + List[Dict[str, Any]]: List of matching sites + """ + try: + # If pathQuery starts with Microsoft-standard /sites/, try to get site directly + if pathQuery.startswith('/sites/'): + parsedPath = self.extractSiteFromStandardPath(pathQuery) + if parsedPath: + siteName = parsedPath.get("siteName") + directSite = await self.getSiteByStandardPath(siteName, allSites) + if directSite: + logger.info(f"Got site directly by standard path - no need to discover all sites") + return [directSite] + else: + logger.warning(f"Could not get site directly, falling back to site discovery") + + # If we didn't get the site directly, use discovery and filtering + if not allSites: + allSites = await self.discoverSites() + if not allSites: + logger.warning("No SharePoint sites found or accessible") + return [] + + # If pathQuery starts with Microsoft-standard /sites/, extract site name and filter + if pathQuery.startswith('/sites/'): + parsedPath = self.extractSiteFromStandardPath(pathQuery) + if parsedPath: + siteName = parsedPath.get("siteName") + sites = self.filterSitesByHint(allSites, siteName) + if not sites: + logger.warning(f"No SharePoint site found matching '{siteName}'") + return [] + logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}") + return sites + else: + return allSites + else: + return allSites + + except Exception as e: + logger.error(f"Error resolving sites from pathQuery '{pathQuery}': {str(e)}") + return [] + + def validatePathQuery(self, pathQuery: str) -> tuple[bool, Optional[str]]: + """ + Validate pathQuery format. Returns (isValid, errorMessage). + + Parameters: + pathQuery (str): Path query to validate + + Returns: + tuple[bool, Optional[str]]: (True, None) if valid, (False, errorMessage) if invalid + """ + try: + if not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*": + return False, "pathQuery cannot be empty or '*'" + + if not pathQuery.startswith('/'): + return False, "pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites//... e.g. /sites/company-share/Freigegebene Dokumente/Work" + + # Check if pathQuery contains search terms (words without proper path structure) + validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents'] + if not any(pathQuery.startswith(prefix) for prefix in validPathPrefixes): + return False, f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery." + + return True, None + except Exception as e: + logger.error(f"Error validating pathQuery '{pathQuery}': {str(e)}") + return False, f"Error validating pathQuery: {str(e)}" + + def detectFolderType(self, item: Dict[str, Any]) -> bool: + """ + Detect if an item is a folder using improved detection logic. + + Parameters: + item (Dict[str, Any]): Item from SharePoint API response + + Returns: + bool: True if item is a folder, False otherwise + """ + try: + # Use improved folder detection logic + if 'folder' in item: + return True + + # Try to detect by URL pattern or other indicators + webUrl = item.get('webUrl', '') + name = item.get('name', '') + + # Check if URL has no file extension and looks like a folder path + if '.' not in name and ('/' in webUrl or '\\' in webUrl): + return True + + return False + except Exception as e: + logger.error(f"Error detecting folder type: {str(e)}") + return False diff --git a/modules/workflows/methods/methodAi.py b/modules/workflows/methods/methodAi.py index eee848f7..ba6bb9b3 100644 --- a/modules/workflows/methods/methodAi.py +++ b/modules/workflows/methods/methodAi.py @@ -49,11 +49,13 @@ class MethodAi(MethodBase): operationId = f"ai_process_{workflowId}_{int(time.time())}" # Start progress tracking + parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "Generate", "AI Processing", - f"Format: {parameters.get('resultType', 'txt')}" + f"Format: {parameters.get('resultType', 'txt')}", + parentOperationId=parentOperationId ) aiPrompt = parameters.get("aiPrompt") @@ -256,11 +258,13 @@ class MethodAi(MethodBase): operationId = f"web_research_{workflowId}_{int(time.time())}" # Start progress tracking + parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "Web Research", "Searching and Crawling", - "Extracting URLs and Content" + "Extracting URLs and Content", + parentOperationId=parentOperationId ) # Call webcrawl service - service handles all AI intention analysis and processing diff --git a/modules/workflows/methods/methodContext.py b/modules/workflows/methods/methodContext.py index 8bd16f9b..20485612 100644 --- a/modules/workflows/methods/methodContext.py +++ b/modules/workflows/methods/methodContext.py @@ -250,11 +250,13 @@ class MethodContext(MethodBase): return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}") # Start progress tracking + parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "Extracting content from documents", "Content Extraction", - f"Documents: {len(documentList.references)}" + f"Documents: {len(documentList.references)}", + parentOperationId=parentOperationId ) # Get ChatDocuments from documentList diff --git a/modules/workflows/methods/methodOutlook.py b/modules/workflows/methods/methodOutlook.py index 033b5283..16030fcc 100644 --- a/modules/workflows/methods/methodOutlook.py +++ b/modules/workflows/methods/methodOutlook.py @@ -334,11 +334,13 @@ class MethodOutlook(MethodBase): operationId = f"outlook_read_{workflowId}_{int(time.time())}" # Start progress tracking + parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "Read Emails", "Outlook Email Reading", - f"Folder: {parameters.get('folder', 'Inbox')}" + f"Folder: {parameters.get('folder', 'Inbox')}", + parentOperationId=parentOperationId ) connectionReference = parameters.get("connectionReference") @@ -1546,11 +1548,13 @@ Return JSON: operationId = f"outlook_send_{workflowId}_{int(time.time())}" # Start progress tracking + parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "Send Draft Email", "Outlook Email Sending", - f"Processing {len(parameters.get('documentList', []))} draft(s)" + f"Processing {len(parameters.get('documentList', []))} draft(s)", + parentOperationId=parentOperationId ) connectionReference = parameters.get("connectionReference") diff --git a/modules/workflows/methods/methodSharepoint.py b/modules/workflows/methods/methodSharepoint.py index da3db26b..d5109251 100644 --- a/modules/workflows/methods/methodSharepoint.py +++ b/modules/workflows/methods/methodSharepoint.py @@ -7,7 +7,7 @@ import logging import re import json from typing import Dict, Any, List, Optional -from datetime import datetime, UTC +from datetime import datetime, UTC, timedelta, timezone import urllib import aiohttp import asyncio @@ -122,103 +122,26 @@ class MethodSharepoint(MethodBase): logger.error(f"Error extracting hostname from webUrl '{webUrl}': {str(e)}") return None - async def _getSiteByStandardPath(self, sitePath: str) -> Optional[Dict[str, Any]]: - """ - Get SharePoint site directly by Microsoft-standard path (/sites/SiteName) - without loading all sites. Uses hostname from first available site. - - Parameters: - sitePath (str): Site path like 'company-share' (without /sites/ prefix) - - Returns: - Optional[Dict[str, Any]]: Site information if found, None otherwise - """ - try: - # Get hostname from first available site (minimal load - only 1 site) - minimalSites = await self._discoverSharePointSites(limit=1) - if not minimalSites: - logger.warning("No sites available to extract hostname") - return None - - hostname = self._extractHostnameFromWebUrl(minimalSites[0].get("webUrl")) - if not hostname: - logger.warning("Could not extract hostname from site") - return None - - logger.info(f"Extracted hostname '{hostname}' from first site, now getting site by path: {sitePath}") - - # Get site directly using hostname + path - endpoint = f"sites/{hostname}:/sites/{sitePath}" - result = await self._makeGraphApiCall(endpoint) - - if "error" in result: - logger.warning(f"Could not get site directly by path '{sitePath}': {result['error']}") - return None - - siteInfo = { - "id": result.get("id"), - "displayName": result.get("displayName"), - "name": result.get("name"), - "webUrl": result.get("webUrl"), - "description": result.get("description"), - "createdDateTime": result.get("createdDateTime"), - "lastModifiedDateTime": result.get("lastModifiedDateTime") - } - - logger.info(f"Successfully got site by standard path: {siteInfo['displayName']} (ID: {siteInfo['id']})") - return siteInfo - - except Exception as e: - logger.error(f"Error getting site by standard path '{sitePath}': {str(e)}") - return None - - def _filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]: - """Filter discovered sites by a human-entered site hint (case-insensitive substring).""" - try: - if not siteHint: - return sites - hint = siteHint.strip().lower() - filtered: List[Dict[str, Any]] = [] - for site in sites: - name = (site.get("displayName") or "").lower() - webUrl = (site.get("webUrl") or "").lower() - if hint in name or hint in webUrl: - filtered.append(site) - return filtered if filtered else sites - except Exception as e: - logger.error(f"Error filtering sites by hint '{siteHint}': {str(e)}") - return sites - def _extractSiteFromStandardPath(self, pathQuery: str) -> Optional[Dict[str, str]]: """ - Extract site name from Microsoft-standard server-relative path: - /sites/company-share/Freigegebene Dokumente/... - - Returns dict with keys: siteName, innerPath (no leading slash) on success, else None. + Extract site name from Microsoft-standard server-relative path. + Delegates to SharePoint service. """ - try: - if not pathQuery or not pathQuery.startswith('/sites/'): - return None - - # Remove leading /sites/ prefix - remainder = pathQuery[7:] # len('/sites/') = 7 - - # Split on first '/' to get site name - if '/' not in remainder: - # Only site name, no inner path - return {"siteName": remainder, "innerPath": ""} - - siteName, inner = remainder.split('/', 1) - siteName = siteName.strip() - innerPath = inner.strip() - - if not siteName: - return None - - return {"siteName": siteName, "innerPath": innerPath} - except Exception as e: - logger.error(f"Error extracting site from standard path '{pathQuery}': {str(e)}") - return None + return self.services.sharepoint.extractSiteFromStandardPath(pathQuery) + + async def _getSiteByStandardPath(self, sitePath: str) -> Optional[Dict[str, Any]]: + """ + Get SharePoint site directly by Microsoft-standard path. + Delegates to SharePoint service. + """ + return await self.services.sharepoint.getSiteByStandardPath(sitePath) + + def _filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]: + """ + Filter discovered sites by a human-entered site hint. + Delegates to SharePoint service. + """ + return self.services.sharepoint.filterSitesByHint(sites, siteHint) def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]: """ @@ -624,6 +547,170 @@ class MethodSharepoint(MethodBase): except Exception as e: logger.error(f"Error getting site ID: {str(e)}") return "" + + async def _parseDocumentListForFoundDocuments(self, documentList: Any) -> tuple[Optional[List[Dict[str, Any]]], Optional[List[Dict[str, Any]]], Optional[str]]: + """ + Parse documentList to extract foundDocuments and site information. + + Parameters: + documentList: Document list (can be list, DocumentReferenceList, or string) + + Returns: + tuple: (foundDocuments, sites, errorMessage) + - foundDocuments: List of found documents from findDocumentPath result + - sites: List of site dictionaries with id, displayName, webUrl + - errorMessage: Error message if parsing failed, None otherwise + """ + try: + if isinstance(documentList, str): + documentList = [documentList] + + # Resolve documentList to get actual documents + from modules.datamodels.datamodelDocref import DocumentReferenceList + if isinstance(documentList, DocumentReferenceList): + docRefList = documentList + elif isinstance(documentList, list): + docRefList = DocumentReferenceList.from_string_list(documentList) + else: + docRefList = DocumentReferenceList(references=[]) + + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) + if not chatDocuments: + return None, None, "No documents found for the provided document list" + + firstDocument = chatDocuments[0] + fileData = self.services.chat.getFileData(firstDocument.fileId) + if not fileData: + return None, None, None # No fileData, but not an error (might be regular file) + + try: + resultData = json.loads(fileData) + foundDocuments = resultData.get("foundDocuments", []) + + # If no foundDocuments, check if it's a listDocuments result (has listResults) + if not foundDocuments and "listResults" in resultData: + logger.info(f"documentList contains listResults from listDocuments, converting to foundDocuments format") + listResults = resultData.get("listResults", []) + foundDocuments = [] + siteIdFromList = None + siteNameFromList = None + + for listResult in listResults: + siteResults = listResult.get("siteResults", []) + for siteResult in siteResults: + items = siteResult.get("items", []) + # Extract site info from first item if available + if items and not siteIdFromList: + siteNameFromList = items[0].get("siteName") + + for item in items: + # Convert listDocuments item format to foundDocuments format + if item.get("type") == "file": + foundDoc = { + "id": item.get("id"), + "name": item.get("name"), + "type": "file", + "siteName": item.get("siteName"), + "siteId": None, # Will be determined from site discovery + "webUrl": item.get("webUrl"), + "fullPath": item.get("webUrl", ""), + "parentPath": item.get("parentPath", "") + } + foundDocuments.append(foundDoc) + + # Discover sites to get siteId if we have siteName + if foundDocuments and siteNameFromList and not siteIdFromList: + logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'") + allSites = await self._discoverSharePointSites() + matchingSites = self._filterSitesByHint(allSites, siteNameFromList) + if matchingSites: + siteIdFromList = matchingSites[0].get("id") + # Update all foundDocuments with siteId + for doc in foundDocuments: + doc["siteId"] = siteIdFromList + logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'") + + logger.info(f"Converted {len(foundDocuments)} files from listResults format") + + if not foundDocuments: + return None, None, None # No foundDocuments, but not an error + + # Extract site information from foundDocuments + firstDoc = foundDocuments[0] + siteName = firstDoc.get("siteName") + siteId = firstDoc.get("siteId") + + # If siteId is missing (from listDocuments conversion), discover sites to find it + if siteName and not siteId: + logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'") + allSites = await self._discoverSharePointSites() + matchingSites = self._filterSitesByHint(allSites, siteName) + if matchingSites: + siteId = matchingSites[0].get("id") + logger.info(f"Found siteId '{siteId}' for site '{siteName}'") + + sites = None + if siteName and siteId: + sites = [{ + "id": siteId, + "displayName": siteName, + "webUrl": firstDoc.get("webUrl", "") + }] + logger.info(f"Using specific site from documentList: {siteName} (ID: {siteId})") + elif siteName: + # Try to get site by name + allSites = await self._discoverSharePointSites() + matchingSites = self._filterSitesByHint(allSites, siteName) + if matchingSites: + sites = [{ + "id": matchingSites[0].get("id"), + "displayName": siteName, + "webUrl": matchingSites[0].get("webUrl", "") + }] + logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})") + else: + return None, None, f"Site '{siteName}' not found. Cannot determine target site." + else: + return None, None, "Site information missing from documentList. Cannot determine target site." + + return foundDocuments, sites, None + + except json.JSONDecodeError as e: + return None, None, f"Invalid JSON in documentList: {str(e)}" + except Exception as e: + return None, None, f"Error processing documentList: {str(e)}" + + except Exception as e: + logger.error(f"Error parsing documentList: {str(e)}") + return None, None, f"Error parsing documentList: {str(e)}" + + async def _resolveSitesFromPathQuery(self, pathQuery: str) -> tuple[List[Dict[str, Any]], Optional[str]]: + """ + Resolve sites from pathQuery using SharePoint service helper methods. + + Parameters: + pathQuery (str): Path query string + + Returns: + tuple: (sites, errorMessage) + - sites: List of site dictionaries + - errorMessage: Error message if resolution failed, None otherwise + """ + try: + # Validate pathQuery format + isValid, errorMsg = self.services.sharepoint.validatePathQuery(pathQuery) + if not isValid: + return [], errorMsg + + # Resolve sites using service helper + sites = await self.services.sharepoint.resolveSitesFromPathQuery(pathQuery) + if not sites: + return [], "No SharePoint sites found or accessible" + + return sites, None + except Exception as e: + logger.error(f"Error resolving sites from pathQuery '{pathQuery}': {str(e)}") + return [], f"Error resolving sites from pathQuery: {str(e)}" @action @@ -638,23 +725,44 @@ class MethodSharepoint(MethodBase): - connectionReference (str, required): Microsoft connection label. - site (str, optional): Site hint. - searchQuery (str, required): Search terms or path. - - maxResults (int, optional): Maximum items to return. Default: 100. + - maxResults (int, optional): Maximum items to return. Default: 1000. """ + import time + operationId = None try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"sharepoint_find_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Find Document Path", + "SharePoint Search", + f"Query: {parameters.get('searchQuery', '*')}", + parentOperationId=parentOperationId + ) + connectionReference = parameters.get("connectionReference") site = parameters.get("site") searchQuery = parameters.get("searchQuery", "*") - maxResults = parameters.get("maxResults", 100) + maxResults = parameters.get("maxResults", 1000) if not connectionReference: + if operationId: + self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Connection reference is required") # Parse searchQuery to extract path, search terms, search type, and options pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery) logger.debug(f"Parsed searchQuery '{searchQuery}' -> pathQuery='{pathQuery}', fileQuery='{fileQuery}', searchType='{searchType}'") + self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") connection = self._getMicrosoftConnection(connectionReference) if not connection: + if operationId: + self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") # Extract site name from pathQuery if it contains Microsoft-standard path (/sites/SiteName/...) @@ -683,25 +791,34 @@ class MethodSharepoint(MethodBase): siteHintToUse = site or siteFromPath or searchOptions.get("site_hint") # Discover SharePoint sites - use targeted approach when site hint is available + self.services.chat.progressLogUpdate(operationId, 0.3, "Discovering SharePoint sites") if siteHintToUse: # When site hint is available, discover all sites first, then filter allSites = await self._discoverSharePointSites() if not allSites: + if operationId: + self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No SharePoint sites found or accessible") sites = self._filterSitesByHint(allSites, siteHintToUse) logger.info(f"Filtered sites by site hint '{siteHintToUse}' -> {len(sites)} sites") if not sites: + if operationId: + self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=f"No SharePoint sites found matching '{siteHintToUse}'") else: # No site hint - discover all sites sites = await self._discoverSharePointSites() if not sites: + if operationId: + self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No SharePoint sites found or accessible") # Resolve path query into search paths searchPaths = self._resolvePathQuery(pathQuery) + self.services.chat.progressLogUpdate(operationId, 0.5, f"Searching across {len(sites)} site(s)") + try: # Search across all discovered sites foundDocuments = [] @@ -763,17 +880,7 @@ class MethodSharepoint(MethodBase): resource = item # Use the same detection logic as our test - isFolder = False - if 'folder' in resource: - isFolder = True - else: - # Try to detect by URL pattern or other indicators - webUrl = resource.get('webUrl', '') - name = resource.get('name', '') - - # Check if URL has no file extension and looks like a folder path - if '.' not in name and ('/' in webUrl or '\\' in webUrl): - isFolder = True + isFolder = self.services.sharepoint.detectFolderType(resource) if isFolder: folderItems.append(item) @@ -823,17 +930,7 @@ class MethodSharepoint(MethodBase): logger.warning(f"Error extracting site info from URL {webUrl}: {e}") # Use improved folder detection logic - isFolder = False - if 'folder' in item: - isFolder = True - else: - # Try to detect by URL pattern or other indicators - name = item.get('name', '') - - # Check if URL has no file extension and looks like a folder path - if '.' not in name and ('/' in webUrl or '\\' in webUrl): - isFolder = True - + isFolder = self.services.sharepoint.detectFolderType(item) itemType = "folder" if isFolder else "file" itemPath = item.get("parentReference", {}).get("path", "") logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'") @@ -986,17 +1083,7 @@ class MethodSharepoint(MethodBase): itemName = item.get("name", "") # Use improved folder detection logic - isFolder = False - if 'folder' in item: - isFolder = True - else: - # Try to detect by URL pattern or other indicators - webUrl = item.get('webUrl', '') - name = item.get('name', '') - - # Check if URL has no file extension and looks like a folder path - if '.' not in name and ('/' in webUrl or '\\' in webUrl): - isFolder = True + isFolder = self.services.sharepoint.detectFolderType(item) itemType = "folder" if isFolder else "file" itemPath = item.get("parentReference", {}).get("path", "") @@ -1056,6 +1143,8 @@ class MethodSharepoint(MethodBase): foundDocuments = foundDocuments[:maxResults] logger.info(f"Limited results to {maxResults} items") + self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {len(foundDocuments)} document(s)") + resultData = { "searchQuery": searchQuery, "totalResults": len(foundDocuments), @@ -1066,6 +1155,8 @@ class MethodSharepoint(MethodBase): except Exception as e: logger.error(f"Error searching SharePoint: {str(e)}") + if operationId: + self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=str(e)) # Use default JSON format for output @@ -1080,6 +1171,7 @@ class MethodSharepoint(MethodBase): "hasResults": len(foundDocuments) > 0 } + self.services.chat.progressLogFinish(operationId, True) return ActionResult( success=True, documents=[ @@ -1094,6 +1186,11 @@ class MethodSharepoint(MethodBase): except Exception as e: logger.error(f"Error finding document path: {str(e)}") + if operationId: + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass return ActionResult.isFailure(error=str(e)) @action @@ -1101,7 +1198,7 @@ class MethodSharepoint(MethodBase): """ GENERAL: - Purpose: Read documents from SharePoint and extract content/metadata. - - Input requirements: connectionReference (required); optional documentList, pathObject, or pathQuery; includeMetadata. + - Input requirements: connectionReference (required); documentList or pathQuery (required); includeMetadata (optional). - Output format: Standardized ActionDocument format (documentName, documentData, mimeType). - Binary files (PDFs, etc.) are Base64-encoded in documentData. - Text files are stored as plain text in documentData. @@ -1109,9 +1206,8 @@ class MethodSharepoint(MethodBase): Parameters: - connectionReference (str, required): Microsoft connection label. - - pathObject (str, optional): Reference to a previous path result (from findDocumentPath). - - documentList (list, optional): Document list reference(s) to read (backward compatibility). - - pathQuery (str, optional): Path query if no pathObject (backward compatibility). + - documentList (list, optional): Document list reference(s) containing findDocumentPath result. + - pathQuery (str, optional): Direct path query if no documentList (e.g., /sites/SiteName/FolderPath). - includeMetadata (bool, optional): Include metadata. Default: True. Returns: @@ -1128,19 +1224,18 @@ class MethodSharepoint(MethodBase): operationId = f"sharepoint_read_{workflowId}_{int(time.time())}" # Start progress tracking + parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "Read Documents", "SharePoint Document Reading", - f"Path: {parameters.get('pathQuery', parameters.get('pathObject', '*'))}" + "Processing document list", + parentOperationId=parentOperationId ) documentList = parameters.get("documentList") - if isinstance(documentList, str): - documentList = [documentList] - connectionReference = parameters.get("connectionReference") pathQuery = parameters.get("pathQuery", "*") - pathObject = parameters.get("pathObject") + connectionReference = parameters.get("connectionReference") includeMetadata = parameters.get("includeMetadata", True) # Validate connection reference @@ -1149,7 +1244,13 @@ class MethodSharepoint(MethodBase): self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Connection reference is required") - # Get connection first - needed for both pathObject and documentList approaches + # Require either documentList or pathQuery + if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"): + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList or pathQuery is required") + + # Get connection first self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") connection = self._getMicrosoftConnection(connectionReference) if not connection: @@ -1157,132 +1258,27 @@ class MethodSharepoint(MethodBase): self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") - # If pathObject is provided, extract SharePoint file IDs and read them directly - # pathObject contains the result from findDocumentPath with foundDocuments array + # Parse documentList to extract foundDocuments and site information sharePointFileIds = None sites = None - if pathObject: - if pathQuery and pathQuery != "*": - logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)") - try: - # Resolve the reference label to get the actual document list - from modules.datamodels.datamodelDocref import DocumentReferenceList - pathObjectDocuments = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([pathObject])) - if not pathObjectDocuments or len(pathObjectDocuments) == 0: + + if documentList: + foundDocuments, sites, errorMsg = await self._parseDocumentListForFoundDocuments(documentList) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + if foundDocuments: + # Extract SharePoint file IDs from foundDocuments + sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"] + if not sharePointFileIds: if operationId: self.services.chat.progressLogFinish(operationId, False) - return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}") - - # Get the first document's content (which should be the JSON from findDocumentPath) - firstDocument = pathObjectDocuments[0] - fileData = self.services.chat.getFileData(firstDocument.fileId) - if not fileData: - return ActionResult.isFailure(error=f"No file data found for document: {pathObject}") - - # Parse the JSON content - resultData = json.loads(fileData) - foundDocuments = resultData.get("foundDocuments", []) - - # If no foundDocuments, check if it's a listDocuments result (has listResults) - if not foundDocuments and "listResults" in resultData: - logger.info(f"pathObject contains listResults from listDocuments, converting to foundDocuments format") - listResults = resultData.get("listResults", []) - foundDocuments = [] - siteIdFromList = None - siteNameFromList = None - - for listResult in listResults: - siteResults = listResult.get("siteResults", []) - for siteResult in siteResults: - items = siteResult.get("items", []) - # Extract site info from first item if available - if items and not siteIdFromList: - # Try to get site info from the siteResult structure - # We need to discover sites to get the siteId - siteNameFromList = items[0].get("siteName") - - for item in items: - # Convert listDocuments item format to foundDocuments format - if item.get("type") == "file": - foundDoc = { - "id": item.get("id"), - "name": item.get("name"), - "type": "file", - "siteName": item.get("siteName"), - "siteId": None, # Will be determined from site discovery - "webUrl": item.get("webUrl"), - "fullPath": item.get("webUrl", ""), - "parentPath": item.get("parentPath", "") - } - foundDocuments.append(foundDoc) - - # Discover sites to get siteId if we have siteName - if foundDocuments and siteNameFromList and not siteIdFromList: - logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'") - allSites = await self._discoverSharePointSites() - matchingSites = self._filterSitesByHint(allSites, siteNameFromList) - if matchingSites: - siteIdFromList = matchingSites[0].get("id") - # Update all foundDocuments with siteId - for doc in foundDocuments: - doc["siteId"] = siteIdFromList - logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'") - - logger.info(f"Converted {len(foundDocuments)} files from listResults format") - - if foundDocuments: - # Extract SharePoint file IDs from foundDocuments - sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"] - if not sharePointFileIds: - return ActionResult.isFailure(error=f"No files found in pathObject '{pathObject}'") - logger.info(f"Extracted {len(sharePointFileIds)} SharePoint file IDs from pathObject '{pathObject}'") - - # Extract site information from foundDocuments - if foundDocuments: - firstDoc = foundDocuments[0] - siteName = firstDoc.get("siteName") - siteId = firstDoc.get("siteId") - - # If siteId is missing (from listDocuments conversion), discover sites to find it - if siteName and not siteId: - logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'") - allSites = await self._discoverSharePointSites() - matchingSites = self._filterSitesByHint(allSites, siteName) - if matchingSites: - siteId = matchingSites[0].get("id") - logger.info(f"Found siteId '{siteId}' for site '{siteName}'") - - if siteName and siteId: - sites = [{ - "id": siteId, - "displayName": siteName, - "webUrl": firstDoc.get("webUrl", "") - }] - logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})") - elif siteName: - # Try to get site by name - allSites = await self._discoverSharePointSites() - matchingSites = self._filterSitesByHint(allSites, siteName) - if matchingSites: - sites = [{ - "id": matchingSites[0].get("id"), - "displayName": siteName, - "webUrl": matchingSites[0].get("webUrl", "") - }] - logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})") - else: - return ActionResult.isFailure(error=f"Site '{siteName}' not found. Cannot determine target site for read operation.") - else: - return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.") - else: - return ActionResult.isFailure(error=f"No documents found in pathObject '{pathObject}'") - - except json.JSONDecodeError as e: - return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}") - except Exception as e: - return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}") + return ActionResult.isFailure(error="No files found in documentList from findDocumentPath result") + logger.info(f"Extracted {len(sharePointFileIds)} SharePoint file IDs from documentList") - # If we have SharePoint file IDs from pathObject, read them directly + # If we have SharePoint file IDs from documentList (findDocumentPath result), read them directly if sharePointFileIds and sites: # Read SharePoint files directly using their IDs readResults = [] @@ -1338,7 +1334,7 @@ class MethodSharepoint(MethodBase): if not readResults: self.services.chat.progressLogFinish(operationId, False) - return ActionResult.isFailure(error="No files could be read from pathObject") + return ActionResult.isFailure(error="No files could be read from documentList") # Convert read results to ActionDocument objects # IMPORTANT: For binary files (PDFs), store Base64-encoded content directly in documentData @@ -1442,232 +1438,24 @@ class MethodSharepoint(MethodBase): self.services.chat.progressLogFinish(operationId, True) return ActionResult.isSuccess(documents=actionDocuments) - # Fallback: Use documentList parameter (for backward compatibility) - # Validate documentList - if not documentList: - return ActionResult.isFailure(error="Document list reference is required. Either provide documentList parameter or use pathObject that contains files.") + # If no sites from documentList, try pathQuery fallback + if not sites and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) - # Get documents from reference - ensure documentList is a list, not a string - # documentList is already normalized above - from modules.datamodels.datamodelDocref import DocumentReferenceList - # Convert to DocumentReferenceList if needed - if isinstance(documentList, DocumentReferenceList): - docRefList = documentList - elif isinstance(documentList, list): - docRefList = DocumentReferenceList.from_string_list(documentList) - elif isinstance(documentList, str): - docRefList = DocumentReferenceList.from_string_list([documentList]) - else: - docRefList = DocumentReferenceList(references=[]) - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) - - if not chatDocuments: - return ActionResult.isFailure(error="No documents found for the provided reference") - - # Determine sites to use - strict validation: pathObject → pathQuery → ERROR + # If still no sites, return error if not sites: - # Step 2: If no pathObject, check pathQuery - if pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": - # Validate pathQuery format - if not pathQuery.startswith('/'): - return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites//... e.g. /sites/company-share/Freigegebene Dokumente/Work") - - # Check if pathQuery contains search terms (words without proper path structure) - validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents'] - if not any(pathQuery.startswith(prefix) for prefix in validPathPrefixes): - return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.") - - # If pathQuery starts with Microsoft-standard /sites/, try to get site directly - directSite = None - if pathQuery.startswith('/sites/'): - parsedPath = self._extractSiteFromStandardPath(pathQuery) - if parsedPath: - siteName = parsedPath.get("siteName") - # Try to get site directly by path (optimization - no need to load all 60 sites) - directSite = await self._getSiteByStandardPath(siteName) - if directSite: - logger.info(f"Got site directly by standard path - no need to discover all sites") - sites = [directSite] - else: - logger.warning(f"Could not get site directly, falling back to site discovery") - - # If we didn't get the site directly, use discovery and filtering - if not directSite: - # For pathQuery, we need to discover sites to find the specific one - allSites = await self._discoverSharePointSites() - if not allSites: - return ActionResult.isFailure(error="No SharePoint sites found or accessible") - - # If pathQuery starts with Microsoft-standard /sites/, extract site name and filter - if pathQuery.startswith('/sites/'): - parsedPath = self._extractSiteFromStandardPath(pathQuery) - if parsedPath: - siteName = parsedPath.get("siteName") - # Filter sites by name (case-insensitive substring match) - sites = self._filterSitesByHint(allSites, siteName) - if not sites: - return ActionResult.isFailure(error=f"No SharePoint site found matching '{siteName}'") - logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}") - else: - sites = allSites - else: - sites = allSites - else: - # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK - return ActionResult.isFailure(error="No valid read path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.") + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with file information, or pathQuery must be provided. Use findDocumentPath first to get file paths, or provide pathQuery directly.") - if not sites: - return ActionResult.isFailure(error="No valid target site determined for read operation") - - # Resolve path query into search paths - searchPaths = self._resolvePathQuery(pathQuery) - - # Process each chat document across all sites - readResults = [] - - for i, chatDocument in enumerate(chatDocuments): - try: - fileId = chatDocument.fileId - fileName = chatDocument.fileName - - # Search for this file across all sites - fileFound = False - - for site in sites: - siteId = site["id"] - siteName = site["displayName"] - siteUrl = site["webUrl"] - - # Try to find the file by name in this site - searchQuery = fileName.replace("'", "''") # Escape single quotes for OData - endpoint = f"sites/{siteId}/drive/root/search(q='{searchQuery}')" - - searchResult = await self._makeGraphApiCall(endpoint) - - if "error" in searchResult: - continue - - items = searchResult.get("value", []) - for item in items: - if item.get("name") == fileName: - # Found the file, get its details - fileId = item.get("id") - fileEndpoint = f"sites/{siteId}/drive/items/{fileId}" - - # Get file metadata - fileInfoResult = await self._makeGraphApiCall(fileEndpoint) - - if "error" in fileInfoResult: - continue - - # Build result with metadata - resultItem = { - "fileId": fileId, - "fileName": fileName, - "sharepointFileId": fileId, - "siteName": siteName, - "siteUrl": siteUrl, - "size": fileInfoResult.get("size", 0), - "createdDateTime": fileInfoResult.get("createdDateTime"), - "lastModifiedDateTime": fileInfoResult.get("lastModifiedDateTime"), - "webUrl": fileInfoResult.get("webUrl") - } - - # Add metadata if requested - if includeMetadata: - resultItem["metadata"] = { - "mimeType": fileInfoResult.get("file", {}).get("mimeType"), - "downloadUrl": fileInfoResult.get("@microsoft.graph.downloadUrl"), - "createdBy": fileInfoResult.get("createdBy", {}), - "lastModifiedBy": fileInfoResult.get("lastModifiedBy", {}), - "parentReference": fileInfoResult.get("parentReference", {}) - } - - # Get file content if it's a readable format - mimeType = fileInfoResult.get("file", {}).get("mimeType", "") - if mimeType.startswith("text/") or mimeType in [ - "application/json", "application/xml", "application/javascript" - ]: - # Download the file content - contentEndpoint = f"sites/{siteId}/drive/items/{fileId}/content" - - # For content download, we need to handle binary data - try: - async with aiohttp.ClientSession() as session: - headers = {"Authorization": f"Bearer {self.services.sharepoint._target.accessToken}"} - async with session.get(f"https://graph.microsoft.com/v1.0/{contentEndpoint}", headers=headers) as response: - if response.status == 200: - content = await response.text() - resultItem["content"] = content - else: - resultItem["content"] = f"Could not download content: HTTP {response.status}" - except Exception as e: - resultItem["content"] = f"Error downloading content: {str(e)}" - else: - resultItem["content"] = f"Binary file type ({mimeType}) - content not retrieved" - - readResults.append(resultItem) - fileFound = True - break - - if fileFound: - break - - if not fileFound: - readResults.append({ - "fileId": fileId, - "fileName": fileName, - "error": "File not found in any accessible SharePoint site", - "content": None - }) - - except Exception as e: - logger.error(f"Error reading document {chatDocument.fileName}: {str(e)}") - readResults.append({ - "fileId": chatDocument.fileId, - "fileName": chatDocument.fileName, - "error": str(e), - "content": None - }) - - resultData = { - "connectionReference": connectionReference, - "pathQuery": pathQuery, - "documentList": documentList, - "includeMetadata": includeMetadata, - "sitesSearched": len(sites), - "readResults": readResults, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - }, - "timestamp": self.services.utils.timestampGetUtc() - } - - # Use default JSON format for output - outputExtension = ".json" # Default - outputMimeType = "application/json" # Default - - validationMetadata = { - "actionType": "sharepoint.readDocuments", - "connectionReference": connectionReference, - "documentCount": len(readResults), - "includeMetadata": includeMetadata, - "sitesSearched": len(sites) - } - - return ActionResult( - success=True, - documents=[ - ActionDocument( - documentName=f"sharepoint_documents_{self._format_timestamp_for_filename()}{outputExtension}", - documentData=json.dumps(resultData, indent=2), - mimeType=outputMimeType, - validationMetadata=validationMetadata - ) - ] - ) + # This should never be reached if logic above is correct + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Unexpected error: could not process documentList or pathQuery") except Exception as e: logger.error(f"Error reading SharePoint documents: {str(e)}") if operationId: @@ -1685,286 +1473,120 @@ class MethodSharepoint(MethodBase): """ GENERAL: - Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference - - Input requirements: connectionReference (required); documentList (required); optional pathObject or pathQuery. + - Input requirements: connectionReference (required); documentList (required); pathQuery (optional). - Output format: JSON with upload status and file info. Parameters: - connectionReference (str, required): Microsoft connection label. - - pathObject (str, optional): Reference to a previous path result. - - pathQuery (str, optional): Upload target path if no pathObject. - documentList (list, required): Document reference(s) to upload. File names are taken from the documents. + - pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath). """ + import time + operationId = None try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"sharepoint_upload_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Upload Document", + "SharePoint Upload", + "Processing document list", + parentOperationId=parentOperationId + ) + connectionReference = parameters.get("connectionReference") - pathQuery = parameters.get("pathQuery") documentList = parameters.get("documentList") + pathQuery = parameters.get("pathQuery") if isinstance(documentList, str): documentList = [documentList] - pathObject = parameters.get("pathObject") - uploadPath = pathQuery - logger.debug(f"Using pathQuery: {pathQuery}") + if not connectionReference: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Connection reference is required") - if not connectionReference or not documentList: - return ActionResult.isFailure(error="Connection reference and document list are required") + if not documentList: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Document list is required") - # If pathObject is provided, extract folder IDs from it - if pathObject: - try: - # Resolve the reference label to get the actual document list - from modules.datamodels.datamodelDocref import DocumentReferenceList - documentList = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([pathObject])) - if not documentList or len(documentList) == 0: - return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}") - - # Get the first document's content (which should be the JSON) - firstDocument = documentList[0] - fileData = self.services.chat.getFileData(firstDocument.fileId) - if not fileData: - return ActionResult.isFailure(error=f"No file data found for document: {pathObject}") - - # Parse the JSON content - resultData = json.loads(fileData) - - # Debug: Log the structure of the result document - logger.info(f"Result document keys: {list(resultData.keys())}") - - # Handle different result document formats - foundDocuments = [] - - # Check if it's a direct SharePoint result (has foundDocuments) - if "foundDocuments" in resultData: - foundDocuments = resultData.get("foundDocuments", []) - logger.info(f"Found {len(foundDocuments)} documents in foundDocuments array") - # Check if it's an AI validation result (has result string with validationReport) - elif "result" in resultData and "validationReport" in resultData["result"]: - try: - # Parse the nested JSON in the result field - nestedResult = json.loads(resultData["result"]) - validationReport = nestedResult.get("validationReport", {}) - documentDetails = validationReport.get("documentDetails", {}) - - if documentDetails: - # Convert the single document details to the expected format - doc = { - "id": documentDetails.get("id"), - "name": documentDetails.get("name"), - "type": documentDetails.get("type", "").lower(), # Convert "Folder" to "folder" - "siteName": documentDetails.get("siteName"), - "siteId": documentDetails.get("siteId"), - "fullPath": documentDetails.get("fullPath"), - "webUrl": documentDetails.get("webUrl", ""), - "parentPath": documentDetails.get("parentPath", "") - } - foundDocuments = [doc] - logger.info(f"Extracted 1 document from validation report") - except json.JSONDecodeError as e: - logger.error(f"Failed to parse nested JSON in result field: {e}") - return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}") - - # Debug: Log what we found in the result document - logger.info(f"Result document contains {len(foundDocuments)} documents") - for i, doc in enumerate(foundDocuments): - logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'") - - # Extract folder information from the result - folders = [] - for doc in foundDocuments: - if doc.get("type") == "folder": - folders.append(doc) - - logger.info(f"Found {len(folders)} folders in result document") - - if folders: - # Use the first folder found - prefer folder ID for direct API calls - firstFolder = folders[0] - if firstFolder.get("id"): - # Use folder ID directly for most reliable API calls - uploadPath = firstFolder.get("id") - logger.info(f"Using folder ID from pathObject: {uploadPath}") - elif firstFolder.get("fullPath"): - # Extract the correct path portion from fullPath by removing site name - fullPath = firstFolder.get("fullPath") - # fullPath format: \\SiteName\\Library\\Folder\\SubFolder - # We need to remove the first two parts (\\SiteName\\) to get the actual folder path - pathParts = fullPath.lstrip('\\').split('\\') - if len(pathParts) > 1: - # Remove the first part (site name) and reconstruct the path - actualPath = '\\'.join(pathParts[1:]) - uploadPath = actualPath - logger.info(f"Extracted path from fullPath: {uploadPath}") - else: - uploadPath = fullPath - logger.info(f"Using full path from pathObject (no site name to remove): {uploadPath}") - else: - return ActionResult.isFailure(error="No valid folder information found in pathObject") - else: - return ActionResult.isFailure(error="No folders found in pathObject") - - except json.JSONDecodeError as e: - return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}") - except Exception as e: - return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}") + # Parse documentList to extract folder path and site information + uploadPath, sites, filesToUpload, errorMsg = await self._parseDocumentListForFolder(documentList) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) - # Get Microsoft connection - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + # If no folder path found from documentList, use pathQuery if provided + if not uploadPath and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + uploadPath = pathQuery + logger.info(f"Using pathQuery for upload path: {uploadPath}") + # Resolve sites from pathQuery + sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) - # Get documents from reference - ensure documentList is a list, not a string - if isinstance(documentList, str): - documentList = [documentList] # Convert string to list - from modules.datamodels.datamodelDocref import DocumentReferenceList - # Convert to DocumentReferenceList if needed - if isinstance(documentList, DocumentReferenceList): - docRefList = documentList - elif isinstance(documentList, list): - docRefList = DocumentReferenceList.from_string_list(documentList) - elif isinstance(documentList, str): - docRefList = DocumentReferenceList.from_string_list([documentList]) - else: - docRefList = DocumentReferenceList(references=[]) - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) - if not chatDocuments: - return ActionResult.isFailure(error="No documents found for the provided reference") - - # Determine sites to use based on whether pathObject was provided - sites = None - if pathObject: - # When pathObject is provided, we should have specific site information - # Extract site information from the pathObject result - try: - # Get the site information from the first folder in pathObject - if 'foundDocuments' in locals() and foundDocuments: - firstFolder = foundDocuments[0] - siteName = firstFolder.get("siteName") - siteId = firstFolder.get("siteId") - - if siteName and siteId: - # Use the specific site from pathObject instead of discovering all sites - sites = [{ - "id": siteId, - "displayName": siteName, - "webUrl": firstFolder.get("webUrl", "") - }] - logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})") - else: - # Site info missing from pathObject - this is an error, not a fallback - return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for upload.") - else: - # No documents found in pathObject - this is an error - return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for upload.") - except Exception as e: - # Error processing pathObject - this is an error, not a fallback - return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for upload.") - else: - # No pathObject provided - check if pathQuery is valid - if not uploadPath or uploadPath.strip() == "" or uploadPath.strip() == "*": - return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.") - - # Validate pathQuery format - if not uploadPath.startswith('/'): - return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites//... e.g. /sites/company-share/Freigegebene Dokumente/Work") - - # Check if uploadPath contains search terms (words without proper path structure) - validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents'] - if not any(uploadPath.startswith(prefix) for prefix in validPathPrefixes): - return ActionResult.isFailure(error=f"Invalid pathQuery '{uploadPath}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.") - - # If uploadPath starts with Microsoft-standard /sites/, try to get site directly - directSite = None - if uploadPath.startswith('/sites/'): - parsedPath = self._extractSiteFromStandardPath(uploadPath) - if parsedPath: - siteName = parsedPath.get("siteName") - # Try to get site directly by path (optimization - no need to load all 60 sites) - directSite = await self._getSiteByStandardPath(siteName) - if directSite: - logger.info(f"Got site directly by standard path - no need to discover all sites") - sites = [directSite] - else: - logger.warning(f"Could not get site directly, falling back to site discovery") - - # If we didn't get the site directly, use discovery and filtering - if not directSite: - # For pathQuery, we need to discover sites to find the specific one - allSites = await self._discoverSharePointSites() - if not allSites: - return ActionResult.isFailure(error="No SharePoint sites found or accessible") - - # If uploadPath starts with Microsoft-standard /sites/, extract site name and filter - if uploadPath.startswith('/sites/'): - parsedPath = self._extractSiteFromStandardPath(uploadPath) - if parsedPath: - siteName = parsedPath.get("siteName") - # Filter sites by name (case-insensitive substring match) - sites = self._filterSitesByHint(allSites, siteName) - if not sites: - return ActionResult.isFailure(error=f"No SharePoint site found matching '{siteName}'") - logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}") - else: - sites = allSites - else: - sites = allSites + # Validate required parameters + if not uploadPath: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get upload folder, or provide pathQuery directly.") if not sites: - return ActionResult.isFailure(error="No valid target site determined for upload") - - # Process upload paths based on whether pathObject was provided - uploadSiteScope = None - if not pathObject: - # Parse the validated pathQuery to extract site and path information - parsed = self._extractSiteFromStandardPath(uploadPath) - - if not parsed: - return ActionResult.isFailure(error="Invalid uploadPath. Use Microsoft-standard /sites//") - - # Find matching site (already filtered above, but ensure we have the right one) - candidateSites = self._filterSitesByHint(sites, parsed["siteName"]) # substring match - # Choose exact displayName match if available - exact = [s for s in candidateSites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()] - selectedSite = exact[0] if exact else (candidateSites[0] if candidateSites else None) - if not selectedSite: - return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible") - - uploadSiteScope = selectedSite - # Use the inner path portion as the actual upload target path - # Remove document library name from path (same logic as listDocuments) - innerPath = parsed.get('innerPath', '').lstrip('/') - pathSegments = [s for s in innerPath.split('/') if s.strip()] - if len(pathSegments) > 1: - # Path has multiple segments - first might be a library name - # Try without first segment (assuming it's a library name) - innerPath = '/'.join(pathSegments[1:]) - logger.info(f"Removed first path segment (potential library name), path changed from '{parsed['innerPath']}' to '{innerPath}'") - elif len(pathSegments) == 1: - # Only one segment - if it's a common library-like name, use empty path (root) - firstSegmentLower = pathSegments[0].lower() - libraryIndicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek'] - if any(indicator in firstSegmentLower for indicator in libraryIndicators): - innerPath = '' - logger.info(f"First segment '{pathSegments[0]}' appears to be a library name, using root") - - uploadPaths = [f"/{innerPath}" if innerPath else "/"] - sites = [selectedSite] + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Site information missing. Cannot determine target site for upload.") + + if not filesToUpload: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No files to upload found in documentList.") + + # Get connection + self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection") + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Process upload paths + uploadPaths = [] + if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'): + # It's a folder ID - use it directly + uploadPaths = [uploadPath] + logger.info(f"Using folder ID directly for upload: {uploadPath}") else: - # When using pathObject, check if uploadPath is a folder ID or a path - if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'): - # It's a folder ID - use it directly - uploadPaths = [uploadPath] - logger.info(f"Using folder ID directly for upload: {uploadPath}") - else: - # It's a path - resolve it normally - uploadPaths = self._resolvePathQuery(uploadPath) + # It's a path - resolve it normally + uploadPaths = self._resolvePathQuery(uploadPath) # Process each document upload uploadResults = [] # Extract file names from documents - fileNames = [doc.fileName for doc in chatDocuments] + fileNames = [doc.fileName for doc in filesToUpload] logger.info(f"Using file names from documentList: {fileNames}") - for i, (chatDocument, fileName) in enumerate(zip(chatDocuments, fileNames)): + self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)") + + # Process upload paths + + # Process each document upload + uploadResults = [] + + # Extract file names from documents + fileNames = [doc.fileName for doc in filesToUpload] + logger.info(f"Using file names from documentList: {fileNames}") + + self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)") + + for i, (chatDocument, fileName) in enumerate(zip(filesToUpload, fileNames)): try: fileId = chatDocument.fileId fileData = self.services.chat.getFileData(fileId) @@ -2056,11 +1678,14 @@ class MethodSharepoint(MethodBase): "error": str(e), "uploadStatus": "failed" }) + + # Update progress for each file + self.services.chat.progressLogUpdate(operationId, 0.5 + (i * 0.4 / len(filesToUpload)), f"Uploaded {i + 1}/{len(filesToUpload)} file(s)") # Create result data resultData = { "connectionReference": connectionReference, - "pathQuery": uploadPath, + "uploadPath": uploadPath, "documentList": documentList, "fileNames": fileNames, "sitesAvailable": len(sites), @@ -2087,6 +1712,10 @@ class MethodSharepoint(MethodBase): "failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"]) } + successfulUploads = len([r for r in uploadResults if r.get("uploadStatus") == "success"]) + self.services.chat.progressLogUpdate(operationId, 0.9, f"Uploaded {successfulUploads}/{len(uploadResults)} file(s)") + self.services.chat.progressLogFinish(operationId, successfulUploads > 0) + return ActionResult( success=True, documents=[ @@ -2101,6 +1730,11 @@ class MethodSharepoint(MethodBase): except Exception as e: logger.error(f"Error uploading to SharePoint: {str(e)}") + if operationId: + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass return ActionResult( success=False, error=str(e) @@ -2111,226 +1745,94 @@ class MethodSharepoint(MethodBase): """ GENERAL: - Purpose: List documents and folders in SharePoint paths across sites. - - Input requirements: connectionReference (required); optional pathObject or pathQuery; includeSubfolders. + - Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional). - Output format: JSON with folder items and metadata. Parameters: - connectionReference (str, required): Microsoft connection label. - - pathObject (str, optional): Reference to a previous path result. - - pathQuery (str, optional): Path query if no pathObject. + - documentList (list, required): Document list reference(s) containing findDocumentPath result. - includeSubfolders (bool, optional): Include one level of subfolders. Default: False. """ + import time + operationId = None try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"sharepoint_list_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "List Documents", + "SharePoint Listing", + "Processing document list", + parentOperationId=parentOperationId + ) + connectionReference = parameters.get("connectionReference") - pathObject = parameters.get("pathObject") - pathQuery = parameters.get("pathQuery") + documentList = parameters.get("documentList") + pathQuery = parameters.get("pathQuery", "*") + if isinstance(documentList, str): + documentList = [documentList] includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX - listQuery = pathQuery - logger.info(f"Using pathQuery: {pathQuery}") - if not connectionReference: + if operationId: + self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Connection reference is required") - # If pathObject is provided, resolve the reference and extract folder IDs from it - # Note: pathObject takes precedence over pathQuery when both are provided - if pathObject: - if pathQuery and pathQuery != "*": - logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)") - try: - # Resolve the reference label to get the actual document list - from modules.datamodels.datamodelDocref import DocumentReferenceList - documentList = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list([pathObject])) - if not documentList or len(documentList) == 0: - return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}") - - # Get the first document's content (which should be the JSON) - firstDocument = documentList[0] - logger.info(f"Document fileId: {firstDocument.fileId}, fileName: {firstDocument.fileName}") - fileData = self.services.chat.getFileData(firstDocument.fileId) - if not fileData: - return ActionResult.isFailure(error=f"No file data found for document: {pathObject} (fileId: {firstDocument.fileId})") - logger.info(f"File data length: {len(fileData) if fileData else 0}") - - # Parse the JSON content - resultData = json.loads(fileData) - - # Debug: Log the structure of the result document - logger.info(f"Result document keys: {list(resultData.keys())}") - - # Handle different result document formats - foundDocuments = [] - - # Check if it's a direct SharePoint result (has foundDocuments) - if "foundDocuments" in resultData: - foundDocuments = resultData.get("foundDocuments", []) - logger.info(f"Found {len(foundDocuments)} documents in foundDocuments array") - # Check if it's an AI validation result (has result string with validationReport) - elif "result" in resultData and "validationReport" in resultData["result"]: - try: - # Parse the nested JSON in the result field - nestedResult = json.loads(resultData["result"]) - validationReport = nestedResult.get("validationReport", {}) - documentDetails = validationReport.get("documentDetails", {}) - - if documentDetails: - # Convert the single document details to the expected format - doc = { - "id": documentDetails.get("id"), - "name": documentDetails.get("name"), - "type": documentDetails.get("type", "").lower(), # Convert "Folder" to "folder" - "siteName": documentDetails.get("siteName"), - "siteId": documentDetails.get("siteId"), - "fullPath": documentDetails.get("fullPath"), - "webUrl": documentDetails.get("webUrl", ""), - "parentPath": documentDetails.get("parentPath", "") - } - foundDocuments = [doc] - logger.info(f"Extracted 1 document from validation report") - except ValueError as e: - logger.error(f"Failed to parse nested JSON in result field: {e}") - return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}") - - # Debug: Log what we found in the result document - logger.info(f"Result document contains {len(foundDocuments)} documents") - for i, doc in enumerate(foundDocuments): - logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'") - - # Extract folder information from the result - folders = [] - for doc in foundDocuments: - if doc.get("type") == "folder": - folders.append(doc) - - logger.info(f"Found {len(folders)} folders in result document") - - if folders: - # Use the first folder found - prefer folder ID for direct API calls - firstFolder = folders[0] - if firstFolder.get("id"): - # Use folder ID directly for most reliable API calls - listQuery = firstFolder.get("id") - logger.info(f"Using folder ID from pathObject: {listQuery}") - elif firstFolder.get("fullPath"): - # Extract the correct path portion from fullPath by removing site name - fullPath = firstFolder.get("fullPath") - # fullPath format: \\SiteName\\Library\\Folder\\SubFolder - # We need to remove the first two parts (\\SiteName\\) to get the actual folder path - pathParts = fullPath.lstrip('\\').split('\\') - if len(pathParts) > 1: - # Remove the first part (site name) and reconstruct the path - actualPath = '\\'.join(pathParts[1:]) - listQuery = actualPath - logger.info(f"Extracted path from fullPath: {listQuery}") - else: - listQuery = fullPath - logger.info(f"Using full path from pathObject (no site name to remove): {listQuery}") - else: - return ActionResult.isFailure(error="No valid folder information found in pathObject") - else: - return ActionResult.isFailure(error="No folders found in pathObject") - - except ValueError as e: - return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}") - except Exception as e: - return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}") + # Require either documentList or pathQuery + if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"): + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList or pathQuery is required") - # Get Microsoft connection + # Parse documentList to extract folder path and site information + listQuery, sites, _, errorMsg = await self._parseDocumentListForFolder(documentList) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + # If no folder path found from documentList, use pathQuery if provided + if not listQuery and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + listQuery = pathQuery + logger.info(f"Using pathQuery for list query: {listQuery}") + # Resolve sites from pathQuery + sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + # Validate required parameters + if not listQuery: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.") + + if not sites: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Site information missing. Cannot determine target site for list operation.") + + # Get connection + self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") connection = self._getMicrosoftConnection(connectionReference) if not connection: + if operationId: + self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") logger.info(f"Starting SharePoint listDocuments for listQuery: {listQuery}") logger.debug(f"Connection ID: {connection['id']}") + self.services.chat.progressLogUpdate(operationId, 0.3, "Processing folder path") + # Parse listQuery to extract path, search terms, search type, and options pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(listQuery) - # Determine sites to use - strict validation: pathObject → pathQuery → ERROR - sites = None - - # Step 1: Check pathObject first - if pathObject: - # When pathObject is provided, we should have specific site information - # Extract site information from the pathObject result - try: - # Get the site information from the first folder in pathObject - if 'foundDocuments' in locals() and foundDocuments: - firstFolder = foundDocuments[0] - siteName = firstFolder.get("siteName") - siteId = firstFolder.get("siteId") - - if siteName and siteId: - # Use the specific site from pathObject instead of discovering all sites - sites = [{ - "id": siteId, - "displayName": siteName, - "webUrl": firstFolder.get("webUrl", "") - }] - logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})") - else: - # Site info missing from pathObject - this is an error - return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for list operation.") - else: - # No documents found in pathObject - this is an error - return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for list operation.") - except Exception as e: - # Error processing pathObject - this is an error - return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for list operation.") - - # Step 2: If no pathObject, check pathQuery - elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": - # Validate pathQuery format - if not pathQuery.startswith('/'): - return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites//... e.g. /sites/company-share/Freigegebene Dokumente/Work") - - # Check if pathQuery contains search terms (words without proper path structure) - validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents'] - if not any(pathQuery.startswith(prefix) for prefix in validPathPrefixes): - return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.") - - # If pathQuery starts with Microsoft-standard /sites/, try to get site directly - directSite = None - if pathQuery.startswith('/sites/'): - parsedPath = self._extractSiteFromStandardPath(pathQuery) - if parsedPath: - siteName = parsedPath.get("siteName") - # Try to get site directly by path (optimization - no need to load all 60 sites) - directSite = await self._getSiteByStandardPath(siteName) - if directSite: - logger.info(f"Got site directly by standard path - no need to discover all sites") - sites = [directSite] - else: - logger.warning(f"Could not get site directly, falling back to site discovery") - - # If we didn't get the site directly, use discovery and filtering - if not directSite: - # For pathQuery, we need to discover sites to find the specific one - allSites = await self._discoverSharePointSites() - if not allSites: - return ActionResult.isFailure(error="No SharePoint sites found or accessible") - - # If pathQuery starts with Microsoft-standard /sites/, extract site name and filter - if pathQuery.startswith('/sites/'): - parsedPath = self._extractSiteFromStandardPath(pathQuery) - if parsedPath: - siteName = parsedPath.get("siteName") - # Filter sites by name (case-insensitive substring match) - sites = self._filterSitesByHint(allSites, siteName) - if not sites: - return ActionResult.isFailure(error=f"No SharePoint site found matching '{siteName}'") - logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}") - else: - sites = allSites - else: - sites = allSites - else: - # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK - return ActionResult.isFailure(error="No valid list path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.") - - if not sites: - return ActionResult.isFailure(error="No valid target site determined for list operation") - # Check if listQuery is a folder ID (starts with 01PPXICCB...) if listQuery.startswith('01PPXICCB') or listQuery.startswith('01'): # Direct folder ID - use it directly @@ -2375,6 +1877,8 @@ class MethodSharepoint(MethodBase): # Process each folder path across all sites listResults = [] + self.services.chat.progressLogUpdate(operationId, 0.5, f"Listing {len(folderPaths)} folder(s) across {len(sites)} site(s)") + for folderPath in folderPaths: try: folderResults = [] @@ -2413,17 +1917,7 @@ class MethodSharepoint(MethodBase): for item in items: # Use improved folder detection logic - isFolder = False - if 'folder' in item: - isFolder = True - else: - # Try to detect by URL pattern or other indicators - webUrl = item.get('webUrl', '') - name = item.get('name', '') - - # Check if URL has no file extension and looks like a folder path - if '.' not in name and ('/' in webUrl or '\\' in webUrl): - isFolder = True + isFolder = self.services.sharepoint.detectFolderType(item) itemInfo = { "id": item.get("id"), @@ -2473,17 +1967,7 @@ class MethodSharepoint(MethodBase): for subfolderItem in subfolderItems: # Use improved folder detection logic for subfolder items - subfolderIsFolder = False - if 'folder' in subfolderItem: - subfolderIsFolder = True - else: - # Try to detect by URL pattern or other indicators - subfolderWebUrl = subfolderItem.get('webUrl', '') - subfolderName = subfolderItem.get('name', '') - - # Check if URL has no file extension and looks like a folder path - if '.' not in subfolderName and ('/' in subfolderWebUrl or '\\' in subfolderWebUrl): - subfolderIsFolder = True + subfolderIsFolder = self.services.sharepoint.detectFolderType(subfolderItem) # Only add files and direct subfolders, NO RECURSION subfolderItemInfo = { @@ -2535,6 +2019,9 @@ class MethodSharepoint(MethodBase): "siteResults": [] }) + totalItems = sum(len(result.get("siteResults", [])) for result in listResults) + self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {totalItems} item(s)") + # Create result data resultData = { "pathQuery": listQuery, @@ -2554,9 +2041,10 @@ class MethodSharepoint(MethodBase): "includeSubfolders": includeSubfolders, "sitesSearched": len(sites), "folderCount": len(listResults), - "totalItems": sum(len(result.get("siteResults", [])) for result in listResults) + "totalItems": totalItems } + self.services.chat.progressLogFinish(operationId, True) return ActionResult( success=True, documents=[ @@ -2571,7 +2059,331 @@ class MethodSharepoint(MethodBase): except Exception as e: logger.error(f"Error listing SharePoint documents: {str(e)}") + if operationId: + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass return ActionResult( success=False, error=str(e) - ) \ No newline at end of file + ) + + @action + async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult: + """ + GENERAL: + - Purpose: Analyze usage intensity of folders and files in SharePoint. + - Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval. + - Output format: JSON with usage analytics grouped by time intervals. + + Parameters: + - connectionReference (str, required): Microsoft connection label. + - documentList (list, required): Document list reference(s) containing findDocumentPath result. + - startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago. + - endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time. + - interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day". + """ + import time + operationId = None + try: + # Init progress logger + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"sharepoint_usage_{workflowId}_{int(time.time())}" + + # Start progress tracking + parentOperationId = parameters.get('parentOperationId') + self.services.chat.progressLogStart( + operationId, + "Analyze Folder Usage", + "SharePoint Analytics", + "Processing document list", + parentOperationId=parentOperationId + ) + + connectionReference = parameters.get("connectionReference") + documentList = parameters.get("documentList") + pathQuery = parameters.get("pathQuery") + if isinstance(documentList, str): + documentList = [documentList] + startDateTime = parameters.get("startDateTime") + endDateTime = parameters.get("endDateTime") + interval = parameters.get("interval", "day") + + if not connectionReference: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Connection reference is required") + + # Require either documentList or pathQuery + if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"): + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList or pathQuery is required") + + # Resolve folder/item information from documentList or pathQuery + siteId = None + driveId = None + itemId = None + folderPath = None + folderName = None + + if documentList: + foundDocuments, sites, errorMsg = await self._parseDocumentListForFoundDocuments(documentList) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + if not foundDocuments: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No documents found in documentList") + + # Get siteId from first document (all should be from same site) + firstItem = foundDocuments[0] + siteId = firstItem.get("siteId") + if not siteId: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Site ID missing from documentList") + + # Get drive ID (needed for analytics) + driveId = await self.services.sharepoint.getDriveId(siteId) + if not driveId: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Could not determine drive ID for the site") + + # If no items from documentList, try pathQuery fallback + if not foundDocuments and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + sites, errorMsg = await self._resolveSitesFromPathQuery(pathQuery) + if errorMsg: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=errorMsg) + + if sites: + siteId = sites[0].get("id") + # Parse pathQuery to find the folder/item + pathQueryParsed, fileQuery, searchType, searchOptions = self._parseSearchQuery(pathQuery) + + # Extract folder path from pathQuery + folderPath = '/' + if pathQueryParsed and pathQueryParsed.startswith('/sites/'): + parsedPath = self._extractSiteFromStandardPath(pathQueryParsed) + if parsedPath: + innerPath = parsedPath.get("innerPath", "") + folderPath = '/' + innerPath if innerPath else '/' + elif pathQueryParsed: + folderPath = pathQueryParsed + + # Get drive ID + driveId = await self.services.sharepoint.getDriveId(siteId) + if not driveId: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Could not determine drive ID for the site") + + # Get folder/item by path + folderInfo = await self.services.sharepoint.getFolderByPath(siteId, folderPath.lstrip('/')) + if not folderInfo: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=f"Folder or file not found at path: {folderPath}") + + # Add pathQuery item to foundDocuments for processing + foundDocuments = [{ + "id": folderInfo.get("id"), + "name": folderInfo.get("name", ""), + "type": "folder" if folderInfo.get("folder") else "file", + "siteId": siteId, + "fullPath": folderPath, + "webUrl": folderInfo.get("webUrl", "") + }] + + if not siteId or not driveId: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.") + + self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") + # Get Microsoft connection + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") + + # Set access token + if not self.services.sharepoint.setAccessTokenFromConnection(connection): + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="Failed to set SharePoint access token") + + # Process all items from documentList or pathQuery + # IMPORTANT: Only analyze FOLDERS, not files (action is "analyzeFolderUsage") + itemsToAnalyze = [] + if foundDocuments: + for item in foundDocuments: + itemId = item.get("id") + itemType = item.get("type", "").lower() + + # Only process folders, skip files and site-level items + if itemId and itemType == "folder": + itemsToAnalyze.append({ + "id": itemId, + "name": item.get("name", ""), + "type": itemType, + "path": item.get("fullPath", ""), + "webUrl": item.get("webUrl", "") + }) + + if not itemsToAnalyze: + if operationId: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error="No valid folders found in documentList to analyze. Note: This action only analyzes folders, not files.") + + self.services.chat.progressLogUpdate(operationId, 0.4, f"Analyzing {len(itemsToAnalyze)} folder(s)") + + # Analyze each item + allAnalytics = [] + totalActivities = 0 + uniqueUsers = set() + activityTypes = {} + + # Compute actual date range values (getFolderUsageAnalytics will set defaults if None) + # We need to compute them here to store in output, since getFolderUsageAnalytics modifies them + actualStartDateTime = startDateTime + actualEndDateTime = endDateTime + if not actualEndDateTime: + actualEndDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + if not actualStartDateTime: + startDate = datetime.now(timezone.utc) - timedelta(days=30) + actualStartDateTime = startDate.isoformat().replace('+00:00', 'Z') + + for idx, item in enumerate(itemsToAnalyze): + progress = 0.4 + (idx / len(itemsToAnalyze)) * 0.5 + self.services.chat.progressLogUpdate(operationId, progress, f"Analyzing folder {item['name']} ({idx+1}/{len(itemsToAnalyze)})") + + # Get usage analytics for this folder + analyticsResult = await self.services.sharepoint.getFolderUsageAnalytics( + siteId=siteId, + driveId=driveId, + itemId=item["id"], + startDateTime=startDateTime, + endDateTime=endDateTime, + interval=interval + ) + + if "error" in analyticsResult: + logger.warning(f"Failed to get analytics for item {item['name']} ({item['id']}): {analyticsResult['error']}") + # Continue with other items even if one fails + itemAnalytics = { + "itemId": item["id"], + "itemName": item["name"], + "itemType": item["type"], + "itemPath": item["path"], + "error": analyticsResult.get("error", "Unknown error") + } + else: + # Process analytics for this item + itemActivities = 0 + itemUsers = set() + itemActivityTypes = {} + + if "value" in analyticsResult: + for intervalData in analyticsResult["value"]: + activities = intervalData.get("activities", []) + for activity in activities: + itemActivities += 1 + totalActivities += 1 + + action = activity.get("action", {}) + actionType = action.get("verb", "unknown") + itemActivityTypes[actionType] = itemActivityTypes.get(actionType, 0) + 1 + activityTypes[actionType] = activityTypes.get(actionType, 0) + 1 + + actor = activity.get("actor", {}) + userPrincipalName = actor.get("userPrincipalName", "") + if userPrincipalName: + itemUsers.add(userPrincipalName) + uniqueUsers.add(userPrincipalName) + + itemAnalytics = { + "itemId": item["id"], + "itemName": item["name"], + "itemType": item["type"], + "itemPath": item["path"], + "webUrl": item["webUrl"], + "analytics": analyticsResult, + "summary": { + "totalActivities": itemActivities, + "uniqueUsers": len(itemUsers), + "activityTypes": itemActivityTypes + } + } + + # Include note if analytics are not available + if "note" in analyticsResult: + itemAnalytics["note"] = analyticsResult["note"] + + allAnalytics.append(itemAnalytics) + + self.services.chat.progressLogUpdate(operationId, 0.9, "Processing analytics data") + + # Process and format analytics data + resultData = { + "siteId": siteId, + "driveId": driveId, + "startDateTime": actualStartDateTime, # Store computed date range (not None) + "endDateTime": actualEndDateTime, # Store computed date range (not None) + "interval": interval, + "itemsAnalyzed": len(itemsToAnalyze), + "foldersAnalyzed": len([item for item in allAnalytics if item.get("itemType") == "folder"]), + "items": allAnalytics, + "summary": { + "totalActivities": totalActivities, + "uniqueUsers": len(uniqueUsers), + "activityTypes": activityTypes + }, + "note": f"Analyzed {len(itemsToAnalyze)} folder(s) from {actualStartDateTime} to {actualEndDateTime}. " + + f"Found {totalActivities} total activities across {len(uniqueUsers)} unique user(s)." + + (f" Note: {len([item for item in allAnalytics if 'error' in item])} folder(s) had errors or no analytics data available." if any('error' in item for item in allAnalytics) else ""), + "timestamp": self.services.utils.timestampGetUtc() + } + + self.services.chat.progressLogUpdate(operationId, 0.95, f"Found {totalActivities} total activities across {len(itemsToAnalyze)} folder(s)") + + validationMetadata = { + "actionType": "sharepoint.analyzeFolderUsage", + "itemsAnalyzed": len(itemsToAnalyze), + "interval": interval, + "totalActivities": totalActivities, + "uniqueUsers": len(uniqueUsers) + } + + self.services.chat.progressLogFinish(operationId, True) + return ActionResult( + success=True, + documents=[ + ActionDocument( + documentName=f"sharepoint_usage_analysis_{self._format_timestamp_for_filename()}.json", + documentData=json.dumps(resultData, indent=2), + mimeType="application/json", + validationMetadata=validationMetadata + ) + ] + ) + + except Exception as e: + logger.error(f"Error analyzing folder usage: {str(e)}") + if operationId: + try: + self.services.chat.progressLogFinish(operationId, False) + except: + pass + return ActionResult( + success=False, + error=str(e) + ) \ No newline at end of file diff --git a/modules/workflows/processing/core/actionExecutor.py b/modules/workflows/processing/core/actionExecutor.py index f9af58e7..f183c0e4 100644 --- a/modules/workflows/processing/core/actionExecutor.py +++ b/modules/workflows/processing/core/actionExecutor.py @@ -82,6 +82,35 @@ class ActionExecutor: enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats logger.info(f"Expected formats: {action.expectedDocumentFormats}") + # Get current task execution operationId to pass as parent to action methods + # This MUST be the "Service Workflow Execution" operation ID (taskExec_*) + parentOperationId = None + try: + progressLogger = self.services.chat.createProgressLogger() + activeOperations = progressLogger.getActiveOperations() + logger.debug(f"Looking for parent operation ID. Active operations: {list(activeOperations.keys())}") + + # Look for task execution operation (starts with "taskExec_") + # This is the "Service Workflow Execution" level that should be parent of ALL actions + for opId in activeOperations.keys(): + if opId.startswith("taskExec_"): + parentOperationId = opId + logger.info(f"Found parent operation ID: {parentOperationId} for action {action.execMethod}.{action.execAction}") + break + + if not parentOperationId: + logger.warning(f"No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}") + except Exception as e: + logger.error(f"Error getting parent operation ID: {str(e)}") + + # Add parentOperationId to parameters so action methods can use it + # This is critical for UI dashboard hierarchical display + if parentOperationId: + enhancedParameters['parentOperationId'] = parentOperationId + logger.info(f"Passing parentOperationId '{parentOperationId}' to action {action.execMethod}.{action.execAction}") + else: + logger.warning(f"WARNING: No parentOperationId found for action {action.execMethod}.{action.execAction}. Action logs will appear at root level!") + # Check workflow status before executing the action checkWorkflowStopped(self.services)