# Copyright (c) 2025 Patrick Motsch # All rights reserved. import logging import time import json from datetime import datetime, timezone, timedelta from typing import Dict, Any from modules.datamodels.datamodelChatbot import ActionResult, ActionDocument logger = logging.getLogger(__name__) async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult: operationId = None try: # Init progress logger workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" operationId = f"sharepoint_usage_{workflowId}_{int(time.time())}" # Start progress tracking parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "Analyze Folder Usage", "SharePoint Analytics", "Processing document list", parentOperationId=parentOperationId ) connectionReference = parameters.get("connectionReference") documentList = parameters.get("documentList") pathQuery = parameters.get("pathQuery") if isinstance(documentList, str): documentList = [documentList] startDateTime = parameters.get("startDateTime") endDateTime = parameters.get("endDateTime") interval = parameters.get("interval", "day") if not connectionReference: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Connection reference is required") # Require either documentList or pathQuery if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"): if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Either documentList or pathQuery is required") # Resolve folder/item information from documentList or pathQuery siteId = None driveId = None itemId = None folderPath = None folderName = None foundDocuments = None if documentList: foundDocuments, sites, errorMsg = await self.documentParsing.parseDocumentListForFoundDocuments(documentList) if errorMsg: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=errorMsg) if not foundDocuments: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No documents found in documentList") # Get siteId from first document (all should be from same site) firstItem = foundDocuments[0] siteId = firstItem.get("siteId") if not siteId: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Site ID missing from documentList") # Get drive ID (needed for analytics) driveId = await self.services.sharepoint.getDriveId(siteId) if not driveId: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Could not determine drive ID for the site") # If no items from documentList, try pathQuery fallback if not foundDocuments and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery) if errorMsg: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=errorMsg) if sites: siteId = sites[0].get("id") # Parse pathQuery to find the folder/item pathQueryParsed, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(pathQuery) # Extract folder path from pathQuery folderPath = '/' if pathQueryParsed and pathQueryParsed.startswith('/sites/'): parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQueryParsed) if parsedPath: innerPath = parsedPath.get("innerPath", "") folderPath = '/' + innerPath if innerPath else '/' elif pathQueryParsed: folderPath = pathQueryParsed # Get drive ID driveId = await self.services.sharepoint.getDriveId(siteId) if not driveId: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Could not determine drive ID for the site") # Get folder/item by path folderInfo = await self.services.sharepoint.getFolderByPath(siteId, folderPath.lstrip('/')) if not folderInfo: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=f"Folder or file not found at path: {folderPath}") # Add pathQuery item to foundDocuments for processing foundDocuments = [{ "id": folderInfo.get("id"), "name": folderInfo.get("name", ""), "type": "folder" if folderInfo.get("folder") else "file", "siteId": siteId, "fullPath": folderPath, "webUrl": folderInfo.get("webUrl", "") }] if not siteId or not driveId: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.") self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") # Get Microsoft connection connection = self.connection.getMicrosoftConnection(connectionReference) if not connection: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") # Set access token if not self.services.sharepoint.setAccessTokenFromConnection(connection): if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Failed to set SharePoint access token") # Process all items from documentList or pathQuery # IMPORTANT: Only analyze FOLDERS, not files (action is "analyzeFolderUsage") itemsToAnalyze = [] if foundDocuments: for item in foundDocuments: itemId = item.get("id") itemType = item.get("type", "").lower() # Only process folders, skip files and site-level items if itemId and itemType == "folder": itemsToAnalyze.append({ "id": itemId, "name": item.get("name", ""), "type": itemType, "path": item.get("fullPath", ""), "webUrl": item.get("webUrl", "") }) if not itemsToAnalyze: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No valid folders found in documentList to analyze. Note: This action only analyzes folders, not files.") self.services.chat.progressLogUpdate(operationId, 0.4, f"Analyzing {len(itemsToAnalyze)} folder(s)") # Analyze each item allAnalytics = [] totalActivities = 0 uniqueUsers = set() activityTypes = {} # Compute actual date range values (getFolderUsageAnalytics will set defaults if None) # We need to compute them here to store in output, since getFolderUsageAnalytics modifies them actualStartDateTime = startDateTime actualEndDateTime = endDateTime if not actualEndDateTime: actualEndDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') if not actualStartDateTime: startDate = datetime.now(timezone.utc) - timedelta(days=30) actualStartDateTime = startDate.isoformat().replace('+00:00', 'Z') for idx, item in enumerate(itemsToAnalyze): progress = 0.4 + (idx / len(itemsToAnalyze)) * 0.5 self.services.chat.progressLogUpdate(operationId, progress, f"Analyzing folder {item['name']} ({idx+1}/{len(itemsToAnalyze)})") # Get usage analytics for this folder analyticsResult = await self.services.sharepoint.getFolderUsageAnalytics( siteId=siteId, driveId=driveId, itemId=item["id"], startDateTime=startDateTime, endDateTime=endDateTime, interval=interval ) if "error" in analyticsResult: logger.warning(f"Failed to get analytics for item {item['name']} ({item['id']}): {analyticsResult['error']}") # Continue with other items even if one fails itemAnalytics = { "itemId": item["id"], "itemName": item["name"], "itemType": item["type"], "itemPath": item["path"], "error": analyticsResult.get("error", "Unknown error") } else: # Process analytics for this item itemActivities = 0 itemUsers = set() itemActivityTypes = {} if "value" in analyticsResult: for intervalData in analyticsResult["value"]: activities = intervalData.get("activities", []) for activity in activities: itemActivities += 1 totalActivities += 1 action = activity.get("action", {}) actionType = action.get("verb", "unknown") itemActivityTypes[actionType] = itemActivityTypes.get(actionType, 0) + 1 activityTypes[actionType] = activityTypes.get(actionType, 0) + 1 actor = activity.get("actor", {}) userPrincipalName = actor.get("userPrincipalName", "") if userPrincipalName: itemUsers.add(userPrincipalName) uniqueUsers.add(userPrincipalName) itemAnalytics = { "itemId": item["id"], "itemName": item["name"], "itemType": item["type"], "itemPath": item["path"], "webUrl": item["webUrl"], "analytics": analyticsResult, "summary": { "totalActivities": itemActivities, "uniqueUsers": len(itemUsers), "activityTypes": itemActivityTypes } } # Include note if analytics are not available if "note" in analyticsResult: itemAnalytics["note"] = analyticsResult["note"] allAnalytics.append(itemAnalytics) self.services.chat.progressLogUpdate(operationId, 0.9, "Processing analytics data") # Process and format analytics data resultData = { "siteId": siteId, "driveId": driveId, "startDateTime": actualStartDateTime, # Store computed date range (not None) "endDateTime": actualEndDateTime, # Store computed date range (not None) "interval": interval, "itemsAnalyzed": len(itemsToAnalyze), "foldersAnalyzed": len([item for item in allAnalytics if item.get("itemType") == "folder"]), "items": allAnalytics, "summary": { "totalActivities": totalActivities, "uniqueUsers": len(uniqueUsers), "activityTypes": activityTypes }, "note": f"Analyzed {len(itemsToAnalyze)} folder(s) from {actualStartDateTime} to {actualEndDateTime}. " + f"Found {totalActivities} total activities across {len(uniqueUsers)} unique user(s)." + (f" Note: {len([item for item in allAnalytics if 'error' in item])} folder(s) had errors or no analytics data available." if any('error' in item for item in allAnalytics) else ""), "timestamp": self.services.utils.timestampGetUtc() } self.services.chat.progressLogUpdate(operationId, 0.95, f"Found {totalActivities} total activities across {len(itemsToAnalyze)} folder(s)") validationMetadata = { "actionType": "sharepoint.analyzeFolderUsage", "itemsAnalyzed": len(itemsToAnalyze), "interval": interval, "totalActivities": totalActivities, "uniqueUsers": len(uniqueUsers) } self.services.chat.progressLogFinish(operationId, True) return ActionResult( success=True, documents=[ ActionDocument( documentName=self._generateMeaningfulFileName("sharepoint_usage_analysis", "json", None, "analyzeFolderUsage"), documentData=json.dumps(resultData, indent=2), mimeType="application/json", validationMetadata=validationMetadata ) ] ) except Exception as e: logger.error(f"Error analyzing folder usage: {str(e)}") if operationId: try: self.services.chat.progressLogFinish(operationId, False) except: pass return ActionResult( success=False, error=str(e) )