gateway/modules/workflows/methods/methodSharepoint/actions/analyzeFolderUsage.py
2026-01-20 00:55:39 +01:00

317 lines
15 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import logging
import time
import json
from datetime import datetime, timezone, timedelta
from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
operationId = None
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"sharepoint_usage_{workflowId}_{int(time.time())}"
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Analyze Folder Usage",
"SharePoint Analytics",
"Processing document list",
parentOperationId=parentOperationId
)
connectionReference = parameters.get("connectionReference")
documentList = parameters.get("documentList")
pathQuery = parameters.get("pathQuery")
if isinstance(documentList, str):
documentList = [documentList]
startDateTime = parameters.get("startDateTime")
endDateTime = parameters.get("endDateTime")
interval = parameters.get("interval", "day")
if not connectionReference:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Connection reference is required")
# Require either documentList or pathQuery
if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Either documentList or pathQuery is required")
# Resolve folder/item information from documentList or pathQuery
siteId = None
driveId = None
itemId = None
folderPath = None
folderName = None
foundDocuments = None
if documentList:
foundDocuments, sites, errorMsg = await self.documentParsing.parseDocumentListForFoundDocuments(documentList)
if errorMsg:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=errorMsg)
if not foundDocuments:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No documents found in documentList")
# Get siteId from first document (all should be from same site)
firstItem = foundDocuments[0]
siteId = firstItem.get("siteId")
if not siteId:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Site ID missing from documentList")
# Get drive ID (needed for analytics)
driveId = await self.services.sharepoint.getDriveId(siteId)
if not driveId:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Could not determine drive ID for the site")
# If no items from documentList, try pathQuery fallback
if not foundDocuments and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
if errorMsg:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=errorMsg)
if sites:
siteId = sites[0].get("id")
# Parse pathQuery to find the folder/item
pathQueryParsed, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(pathQuery)
# Extract folder path from pathQuery
folderPath = '/'
if pathQueryParsed and pathQueryParsed.startswith('/sites/'):
parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQueryParsed)
if parsedPath:
innerPath = parsedPath.get("innerPath", "")
folderPath = '/' + innerPath if innerPath else '/'
elif pathQueryParsed:
folderPath = pathQueryParsed
# Get drive ID
driveId = await self.services.sharepoint.getDriveId(siteId)
if not driveId:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Could not determine drive ID for the site")
# Get folder/item by path
folderInfo = await self.services.sharepoint.getFolderByPath(siteId, folderPath.lstrip('/'))
if not folderInfo:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=f"Folder or file not found at path: {folderPath}")
# Add pathQuery item to foundDocuments for processing
foundDocuments = [{
"id": folderInfo.get("id"),
"name": folderInfo.get("name", ""),
"type": "folder" if folderInfo.get("folder") else "file",
"siteId": siteId,
"fullPath": folderPath,
"webUrl": folderInfo.get("webUrl", "")
}]
if not siteId or not driveId:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
# Get Microsoft connection
connection = self.connection.getMicrosoftConnection(connectionReference)
if not connection:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
# Set access token
if not self.services.sharepoint.setAccessTokenFromConnection(connection):
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Failed to set SharePoint access token")
# Process all items from documentList or pathQuery
# IMPORTANT: Only analyze FOLDERS, not files (action is "analyzeFolderUsage")
itemsToAnalyze = []
if foundDocuments:
for item in foundDocuments:
itemId = item.get("id")
itemType = item.get("type", "").lower()
# Only process folders, skip files and site-level items
if itemId and itemType == "folder":
itemsToAnalyze.append({
"id": itemId,
"name": item.get("name", ""),
"type": itemType,
"path": item.get("fullPath", ""),
"webUrl": item.get("webUrl", "")
})
if not itemsToAnalyze:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid folders found in documentList to analyze. Note: This action only analyzes folders, not files.")
self.services.chat.progressLogUpdate(operationId, 0.4, f"Analyzing {len(itemsToAnalyze)} folder(s)")
# Analyze each item
allAnalytics = []
totalActivities = 0
uniqueUsers = set()
activityTypes = {}
# Compute actual date range values (getFolderUsageAnalytics will set defaults if None)
# We need to compute them here to store in output, since getFolderUsageAnalytics modifies them
actualStartDateTime = startDateTime
actualEndDateTime = endDateTime
if not actualEndDateTime:
actualEndDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
if not actualStartDateTime:
startDate = datetime.now(timezone.utc) - timedelta(days=30)
actualStartDateTime = startDate.isoformat().replace('+00:00', 'Z')
for idx, item in enumerate(itemsToAnalyze):
progress = 0.4 + (idx / len(itemsToAnalyze)) * 0.5
self.services.chat.progressLogUpdate(operationId, progress, f"Analyzing folder {item['name']} ({idx+1}/{len(itemsToAnalyze)})")
# Get usage analytics for this folder
analyticsResult = await self.services.sharepoint.getFolderUsageAnalytics(
siteId=siteId,
driveId=driveId,
itemId=item["id"],
startDateTime=startDateTime,
endDateTime=endDateTime,
interval=interval
)
if "error" in analyticsResult:
logger.warning(f"Failed to get analytics for item {item['name']} ({item['id']}): {analyticsResult['error']}")
# Continue with other items even if one fails
itemAnalytics = {
"itemId": item["id"],
"itemName": item["name"],
"itemType": item["type"],
"itemPath": item["path"],
"error": analyticsResult.get("error", "Unknown error")
}
else:
# Process analytics for this item
itemActivities = 0
itemUsers = set()
itemActivityTypes = {}
if "value" in analyticsResult:
for intervalData in analyticsResult["value"]:
activities = intervalData.get("activities", [])
for activity in activities:
itemActivities += 1
totalActivities += 1
action = activity.get("action", {})
actionType = action.get("verb", "unknown")
itemActivityTypes[actionType] = itemActivityTypes.get(actionType, 0) + 1
activityTypes[actionType] = activityTypes.get(actionType, 0) + 1
actor = activity.get("actor", {})
userPrincipalName = actor.get("userPrincipalName", "")
if userPrincipalName:
itemUsers.add(userPrincipalName)
uniqueUsers.add(userPrincipalName)
itemAnalytics = {
"itemId": item["id"],
"itemName": item["name"],
"itemType": item["type"],
"itemPath": item["path"],
"webUrl": item["webUrl"],
"analytics": analyticsResult,
"summary": {
"totalActivities": itemActivities,
"uniqueUsers": len(itemUsers),
"activityTypes": itemActivityTypes
}
}
# Include note if analytics are not available
if "note" in analyticsResult:
itemAnalytics["note"] = analyticsResult["note"]
allAnalytics.append(itemAnalytics)
self.services.chat.progressLogUpdate(operationId, 0.9, "Processing analytics data")
# Process and format analytics data
resultData = {
"siteId": siteId,
"driveId": driveId,
"startDateTime": actualStartDateTime, # Store computed date range (not None)
"endDateTime": actualEndDateTime, # Store computed date range (not None)
"interval": interval,
"itemsAnalyzed": len(itemsToAnalyze),
"foldersAnalyzed": len([item for item in allAnalytics if item.get("itemType") == "folder"]),
"items": allAnalytics,
"summary": {
"totalActivities": totalActivities,
"uniqueUsers": len(uniqueUsers),
"activityTypes": activityTypes
},
"note": f"Analyzed {len(itemsToAnalyze)} folder(s) from {actualStartDateTime} to {actualEndDateTime}. " +
f"Found {totalActivities} total activities across {len(uniqueUsers)} unique user(s)." +
(f" Note: {len([item for item in allAnalytics if 'error' in item])} folder(s) had errors or no analytics data available." if any('error' in item for item in allAnalytics) else ""),
"timestamp": self.services.utils.timestampGetUtc()
}
self.services.chat.progressLogUpdate(operationId, 0.95, f"Found {totalActivities} total activities across {len(itemsToAnalyze)} folder(s)")
validationMetadata = {
"actionType": "sharepoint.analyzeFolderUsage",
"itemsAnalyzed": len(itemsToAnalyze),
"interval": interval,
"totalActivities": totalActivities,
"uniqueUsers": len(uniqueUsers)
}
self.services.chat.progressLogFinish(operationId, True)
return ActionResult(
success=True,
documents=[
ActionDocument(
documentName=self._generateMeaningfulFileName("sharepoint_usage_analysis", "json", None, "analyzeFolderUsage"),
documentData=json.dumps(resultData, indent=2),
mimeType="application/json",
validationMetadata=validationMetadata
)
]
)
except Exception as e:
logger.error(f"Error analyzing folder usage: {str(e)}")
if operationId:
try:
self.services.chat.progressLogFinish(operationId, False)
except:
pass
return ActionResult(
success=False,
error=str(e)
)