# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Analyze Folder Usage action for SharePoint operations. Analyzes usage intensity of folders and files in SharePoint. """ import logging import time import json from datetime import datetime, timezone, timedelta from typing import Dict, Any from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument logger = logging.getLogger(__name__) @action async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult: """ GENERAL: - Purpose: Analyze usage intensity of folders and files in SharePoint. - Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval. - Output format: JSON with usage analytics grouped by time intervals. Parameters: - connectionReference (str, required): Microsoft connection label. - documentList (list, required): Document list reference(s) containing findDocumentPath result. - startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago. - endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time. - interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day". """ operationId = None try: # Init progress logger workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" operationId = f"sharepoint_usage_{workflowId}_{int(time.time())}" # Start progress tracking parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "Analyze Folder Usage", "SharePoint Analytics", "Processing document list", parentOperationId=parentOperationId ) connectionReference = parameters.get("connectionReference") documentList = parameters.get("documentList") pathQuery = parameters.get("pathQuery") if isinstance(documentList, str): documentList = [documentList] startDateTime = parameters.get("startDateTime") endDateTime = parameters.get("endDateTime") interval = parameters.get("interval", "day") if not connectionReference: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Connection reference is required") # Require either documentList or pathQuery if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"): if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Either documentList or pathQuery is required") # Resolve folder/item information from documentList or pathQuery siteId = None driveId = None itemId = None folderPath = None folderName = None foundDocuments = None if documentList: foundDocuments, sites, errorMsg = await self.documentParsing.parseDocumentListForFoundDocuments(documentList) if errorMsg: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=errorMsg) if not foundDocuments: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No documents found in documentList") # Get siteId from first document (all should be from same site) firstItem = foundDocuments[0] siteId = firstItem.get("siteId") if not siteId: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Site ID missing from documentList") # Get drive ID (needed for analytics) driveId = await self.services.sharepoint.getDriveId(siteId) if not driveId: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Could not determine drive ID for the site") # If no items from documentList, try pathQuery fallback if not foundDocuments and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery) if errorMsg: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=errorMsg) if sites: siteId = sites[0].get("id") # Parse pathQuery to find the folder/item pathQueryParsed, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(pathQuery) # Extract folder path from pathQuery folderPath = '/' if pathQueryParsed and pathQueryParsed.startswith('/sites/'): parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQueryParsed) if parsedPath: innerPath = parsedPath.get("innerPath", "") folderPath = '/' + innerPath if innerPath else '/' elif pathQueryParsed: folderPath = pathQueryParsed # Get drive ID driveId = await self.services.sharepoint.getDriveId(siteId) if not driveId: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Could not determine drive ID for the site") # Get folder/item by path folderInfo = await self.services.sharepoint.getFolderByPath(siteId, folderPath.lstrip('/')) if not folderInfo: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=f"Folder or file not found at path: {folderPath}") # Add pathQuery item to foundDocuments for processing foundDocuments = [{ "id": folderInfo.get("id"), "name": folderInfo.get("name", ""), "type": "folder" if folderInfo.get("folder") else "file", "siteId": siteId, "fullPath": folderPath, "webUrl": folderInfo.get("webUrl", "") }] if not siteId or not driveId: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.") self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") # Get Microsoft connection connection = self.connection.getMicrosoftConnection(connectionReference) if not connection: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") # Set access token if not self.services.sharepoint.setAccessTokenFromConnection(connection): if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Failed to set SharePoint access token") # Process all items from documentList or pathQuery # IMPORTANT: Only analyze FOLDERS, not files (action is "analyzeFolderUsage") itemsToAnalyze = [] if foundDocuments: for item in foundDocuments: itemId = item.get("id") itemType = item.get("type", "").lower() # Only process folders, skip files and site-level items if itemId and itemType == "folder": itemsToAnalyze.append({ "id": itemId, "name": item.get("name", ""), "type": itemType, "path": item.get("fullPath", ""), "webUrl": item.get("webUrl", "") }) if not itemsToAnalyze: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No valid folders found in documentList to analyze. Note: This action only analyzes folders, not files.") self.services.chat.progressLogUpdate(operationId, 0.4, f"Analyzing {len(itemsToAnalyze)} folder(s)") # Analyze each item allAnalytics = [] totalActivities = 0 uniqueUsers = set() activityTypes = {} # Compute actual date range values (getFolderUsageAnalytics will set defaults if None) # We need to compute them here to store in output, since getFolderUsageAnalytics modifies them actualStartDateTime = startDateTime actualEndDateTime = endDateTime if not actualEndDateTime: actualEndDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') if not actualStartDateTime: startDate = datetime.now(timezone.utc) - timedelta(days=30) actualStartDateTime = startDate.isoformat().replace('+00:00', 'Z') for idx, item in enumerate(itemsToAnalyze): progress = 0.4 + (idx / len(itemsToAnalyze)) * 0.5 self.services.chat.progressLogUpdate(operationId, progress, f"Analyzing folder {item['name']} ({idx+1}/{len(itemsToAnalyze)})") # Get usage analytics for this folder analyticsResult = await self.services.sharepoint.getFolderUsageAnalytics( siteId=siteId, driveId=driveId, itemId=item["id"], startDateTime=startDateTime, endDateTime=endDateTime, interval=interval ) if "error" in analyticsResult: logger.warning(f"Failed to get analytics for item {item['name']} ({item['id']}): {analyticsResult['error']}") # Continue with other items even if one fails itemAnalytics = { "itemId": item["id"], "itemName": item["name"], "itemType": item["type"], "itemPath": item["path"], "error": analyticsResult.get("error", "Unknown error") } else: # Process analytics for this item itemActivities = 0 itemUsers = set() itemActivityTypes = {} if "value" in analyticsResult: for intervalData in analyticsResult["value"]: activities = intervalData.get("activities", []) for activity in activities: itemActivities += 1 totalActivities += 1 action = activity.get("action", {}) actionType = action.get("verb", "unknown") itemActivityTypes[actionType] = itemActivityTypes.get(actionType, 0) + 1 activityTypes[actionType] = activityTypes.get(actionType, 0) + 1 actor = activity.get("actor", {}) userPrincipalName = actor.get("userPrincipalName", "") if userPrincipalName: itemUsers.add(userPrincipalName) uniqueUsers.add(userPrincipalName) itemAnalytics = { "itemId": item["id"], "itemName": item["name"], "itemType": item["type"], "itemPath": item["path"], "webUrl": item["webUrl"], "analytics": analyticsResult, "summary": { "totalActivities": itemActivities, "uniqueUsers": len(itemUsers), "activityTypes": itemActivityTypes } } # Include note if analytics are not available if "note" in analyticsResult: itemAnalytics["note"] = analyticsResult["note"] allAnalytics.append(itemAnalytics) self.services.chat.progressLogUpdate(operationId, 0.9, "Processing analytics data") # Process and format analytics data resultData = { "siteId": siteId, "driveId": driveId, "startDateTime": actualStartDateTime, # Store computed date range (not None) "endDateTime": actualEndDateTime, # Store computed date range (not None) "interval": interval, "itemsAnalyzed": len(itemsToAnalyze), "foldersAnalyzed": len([item for item in allAnalytics if item.get("itemType") == "folder"]), "items": allAnalytics, "summary": { "totalActivities": totalActivities, "uniqueUsers": len(uniqueUsers), "activityTypes": activityTypes }, "note": f"Analyzed {len(itemsToAnalyze)} folder(s) from {actualStartDateTime} to {actualEndDateTime}. " + f"Found {totalActivities} total activities across {len(uniqueUsers)} unique user(s)." + (f" Note: {len([item for item in allAnalytics if 'error' in item])} folder(s) had errors or no analytics data available." if any('error' in item for item in allAnalytics) else ""), "timestamp": self.services.utils.timestampGetUtc() } self.services.chat.progressLogUpdate(operationId, 0.95, f"Found {totalActivities} total activities across {len(itemsToAnalyze)} folder(s)") validationMetadata = { "actionType": "sharepoint.analyzeFolderUsage", "itemsAnalyzed": len(itemsToAnalyze), "interval": interval, "totalActivities": totalActivities, "uniqueUsers": len(uniqueUsers) } self.services.chat.progressLogFinish(operationId, True) return ActionResult( success=True, documents=[ ActionDocument( documentName=self._generateMeaningfulFileName("sharepoint_usage_analysis", "json", None, "analyzeFolderUsage"), documentData=json.dumps(resultData, indent=2), mimeType="application/json", validationMetadata=validationMetadata ) ] ) except Exception as e: logger.error(f"Error analyzing folder usage: {str(e)}") if operationId: try: self.services.chat.progressLogFinish(operationId, False) except: pass return ActionResult( success=False, error=str(e) )