gateway/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py
2025-12-17 10:45:09 +01:00

337 lines
16 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Analyze Folder Usage action for SharePoint operations.
Analyzes usage intensity of folders and files in SharePoint.
"""
import logging
import time
import json
from datetime import datetime, timezone, timedelta
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Analyze usage intensity of folders and files in SharePoint.
- Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval.
- Output format: JSON with usage analytics grouped by time intervals.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- documentList (list, required): Document list reference(s) containing findDocumentPath result.
- startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago.
- endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time.
- interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day".
"""
operationId = None
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"sharepoint_usage_{workflowId}_{int(time.time())}"
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Analyze Folder Usage",
"SharePoint Analytics",
"Processing document list",
parentOperationId=parentOperationId
)
connectionReference = parameters.get("connectionReference")
documentList = parameters.get("documentList")
pathQuery = parameters.get("pathQuery")
if isinstance(documentList, str):
documentList = [documentList]
startDateTime = parameters.get("startDateTime")
endDateTime = parameters.get("endDateTime")
interval = parameters.get("interval", "day")
if not connectionReference:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Connection reference is required")
# Require either documentList or pathQuery
if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Either documentList or pathQuery is required")
# Resolve folder/item information from documentList or pathQuery
siteId = None
driveId = None
itemId = None
folderPath = None
folderName = None
foundDocuments = None
if documentList:
foundDocuments, sites, errorMsg = await self.documentParsing.parseDocumentListForFoundDocuments(documentList)
if errorMsg:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=errorMsg)
if not foundDocuments:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No documents found in documentList")
# Get siteId from first document (all should be from same site)
firstItem = foundDocuments[0]
siteId = firstItem.get("siteId")
if not siteId:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Site ID missing from documentList")
# Get drive ID (needed for analytics)
driveId = await self.services.sharepoint.getDriveId(siteId)
if not driveId:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Could not determine drive ID for the site")
# If no items from documentList, try pathQuery fallback
if not foundDocuments and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
if errorMsg:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=errorMsg)
if sites:
siteId = sites[0].get("id")
# Parse pathQuery to find the folder/item
pathQueryParsed, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(pathQuery)
# Extract folder path from pathQuery
folderPath = '/'
if pathQueryParsed and pathQueryParsed.startswith('/sites/'):
parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQueryParsed)
if parsedPath:
innerPath = parsedPath.get("innerPath", "")
folderPath = '/' + innerPath if innerPath else '/'
elif pathQueryParsed:
folderPath = pathQueryParsed
# Get drive ID
driveId = await self.services.sharepoint.getDriveId(siteId)
if not driveId:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Could not determine drive ID for the site")
# Get folder/item by path
folderInfo = await self.services.sharepoint.getFolderByPath(siteId, folderPath.lstrip('/'))
if not folderInfo:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=f"Folder or file not found at path: {folderPath}")
# Add pathQuery item to foundDocuments for processing
foundDocuments = [{
"id": folderInfo.get("id"),
"name": folderInfo.get("name", ""),
"type": "folder" if folderInfo.get("folder") else "file",
"siteId": siteId,
"fullPath": folderPath,
"webUrl": folderInfo.get("webUrl", "")
}]
if not siteId or not driveId:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
# Get Microsoft connection
connection = self.connection.getMicrosoftConnection(connectionReference)
if not connection:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
# Set access token
if not self.services.sharepoint.setAccessTokenFromConnection(connection):
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Failed to set SharePoint access token")
# Process all items from documentList or pathQuery
# IMPORTANT: Only analyze FOLDERS, not files (action is "analyzeFolderUsage")
itemsToAnalyze = []
if foundDocuments:
for item in foundDocuments:
itemId = item.get("id")
itemType = item.get("type", "").lower()
# Only process folders, skip files and site-level items
if itemId and itemType == "folder":
itemsToAnalyze.append({
"id": itemId,
"name": item.get("name", ""),
"type": itemType,
"path": item.get("fullPath", ""),
"webUrl": item.get("webUrl", "")
})
if not itemsToAnalyze:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid folders found in documentList to analyze. Note: This action only analyzes folders, not files.")
self.services.chat.progressLogUpdate(operationId, 0.4, f"Analyzing {len(itemsToAnalyze)} folder(s)")
# Analyze each item
allAnalytics = []
totalActivities = 0
uniqueUsers = set()
activityTypes = {}
# Compute actual date range values (getFolderUsageAnalytics will set defaults if None)
# We need to compute them here to store in output, since getFolderUsageAnalytics modifies them
actualStartDateTime = startDateTime
actualEndDateTime = endDateTime
if not actualEndDateTime:
actualEndDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
if not actualStartDateTime:
startDate = datetime.now(timezone.utc) - timedelta(days=30)
actualStartDateTime = startDate.isoformat().replace('+00:00', 'Z')
for idx, item in enumerate(itemsToAnalyze):
progress = 0.4 + (idx / len(itemsToAnalyze)) * 0.5
self.services.chat.progressLogUpdate(operationId, progress, f"Analyzing folder {item['name']} ({idx+1}/{len(itemsToAnalyze)})")
# Get usage analytics for this folder
analyticsResult = await self.services.sharepoint.getFolderUsageAnalytics(
siteId=siteId,
driveId=driveId,
itemId=item["id"],
startDateTime=startDateTime,
endDateTime=endDateTime,
interval=interval
)
if "error" in analyticsResult:
logger.warning(f"Failed to get analytics for item {item['name']} ({item['id']}): {analyticsResult['error']}")
# Continue with other items even if one fails
itemAnalytics = {
"itemId": item["id"],
"itemName": item["name"],
"itemType": item["type"],
"itemPath": item["path"],
"error": analyticsResult.get("error", "Unknown error")
}
else:
# Process analytics for this item
itemActivities = 0
itemUsers = set()
itemActivityTypes = {}
if "value" in analyticsResult:
for intervalData in analyticsResult["value"]:
activities = intervalData.get("activities", [])
for activity in activities:
itemActivities += 1
totalActivities += 1
action = activity.get("action", {})
actionType = action.get("verb", "unknown")
itemActivityTypes[actionType] = itemActivityTypes.get(actionType, 0) + 1
activityTypes[actionType] = activityTypes.get(actionType, 0) + 1
actor = activity.get("actor", {})
userPrincipalName = actor.get("userPrincipalName", "")
if userPrincipalName:
itemUsers.add(userPrincipalName)
uniqueUsers.add(userPrincipalName)
itemAnalytics = {
"itemId": item["id"],
"itemName": item["name"],
"itemType": item["type"],
"itemPath": item["path"],
"webUrl": item["webUrl"],
"analytics": analyticsResult,
"summary": {
"totalActivities": itemActivities,
"uniqueUsers": len(itemUsers),
"activityTypes": itemActivityTypes
}
}
# Include note if analytics are not available
if "note" in analyticsResult:
itemAnalytics["note"] = analyticsResult["note"]
allAnalytics.append(itemAnalytics)
self.services.chat.progressLogUpdate(operationId, 0.9, "Processing analytics data")
# Process and format analytics data
resultData = {
"siteId": siteId,
"driveId": driveId,
"startDateTime": actualStartDateTime, # Store computed date range (not None)
"endDateTime": actualEndDateTime, # Store computed date range (not None)
"interval": interval,
"itemsAnalyzed": len(itemsToAnalyze),
"foldersAnalyzed": len([item for item in allAnalytics if item.get("itemType") == "folder"]),
"items": allAnalytics,
"summary": {
"totalActivities": totalActivities,
"uniqueUsers": len(uniqueUsers),
"activityTypes": activityTypes
},
"note": f"Analyzed {len(itemsToAnalyze)} folder(s) from {actualStartDateTime} to {actualEndDateTime}. " +
f"Found {totalActivities} total activities across {len(uniqueUsers)} unique user(s)." +
(f" Note: {len([item for item in allAnalytics if 'error' in item])} folder(s) had errors or no analytics data available." if any('error' in item for item in allAnalytics) else ""),
"timestamp": self.services.utils.timestampGetUtc()
}
self.services.chat.progressLogUpdate(operationId, 0.95, f"Found {totalActivities} total activities across {len(itemsToAnalyze)} folder(s)")
validationMetadata = {
"actionType": "sharepoint.analyzeFolderUsage",
"itemsAnalyzed": len(itemsToAnalyze),
"interval": interval,
"totalActivities": totalActivities,
"uniqueUsers": len(uniqueUsers)
}
self.services.chat.progressLogFinish(operationId, True)
return ActionResult(
success=True,
documents=[
ActionDocument(
documentName=self._generateMeaningfulFileName("sharepoint_usage_analysis", "json", None, "analyzeFolderUsage"),
documentData=json.dumps(resultData, indent=2),
mimeType="application/json",
validationMetadata=validationMetadata
)
]
)
except Exception as e:
logger.error(f"Error analyzing folder usage: {str(e)}")
if operationId:
try:
self.services.chat.progressLogFinish(operationId, False)
except:
pass
return ActionResult(
success=False,
error=str(e)
)