317 lines
15 KiB
Python
317 lines
15 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
import logging
|
|
import time
|
|
import json
|
|
from datetime import datetime, timezone, timedelta
|
|
from typing import Dict, Any
|
|
from modules.datamodels.datamodelChatbot import ActionResult, ActionDocument
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
operationId = None
|
|
try:
|
|
# Init progress logger
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"sharepoint_usage_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
parentOperationId = parameters.get('parentOperationId')
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Analyze Folder Usage",
|
|
"SharePoint Analytics",
|
|
"Processing document list",
|
|
parentOperationId=parentOperationId
|
|
)
|
|
|
|
connectionReference = parameters.get("connectionReference")
|
|
documentList = parameters.get("documentList")
|
|
pathQuery = parameters.get("pathQuery")
|
|
if isinstance(documentList, str):
|
|
documentList = [documentList]
|
|
startDateTime = parameters.get("startDateTime")
|
|
endDateTime = parameters.get("endDateTime")
|
|
interval = parameters.get("interval", "day")
|
|
|
|
if not connectionReference:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Connection reference is required")
|
|
|
|
# Require either documentList or pathQuery
|
|
if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Either documentList or pathQuery is required")
|
|
|
|
# Resolve folder/item information from documentList or pathQuery
|
|
siteId = None
|
|
driveId = None
|
|
itemId = None
|
|
folderPath = None
|
|
folderName = None
|
|
foundDocuments = None
|
|
|
|
if documentList:
|
|
foundDocuments, sites, errorMsg = await self.documentParsing.parseDocumentListForFoundDocuments(documentList)
|
|
if errorMsg:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error=errorMsg)
|
|
|
|
if not foundDocuments:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="No documents found in documentList")
|
|
|
|
# Get siteId from first document (all should be from same site)
|
|
firstItem = foundDocuments[0]
|
|
siteId = firstItem.get("siteId")
|
|
if not siteId:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Site ID missing from documentList")
|
|
|
|
# Get drive ID (needed for analytics)
|
|
driveId = await self.services.sharepoint.getDriveId(siteId)
|
|
if not driveId:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Could not determine drive ID for the site")
|
|
|
|
# If no items from documentList, try pathQuery fallback
|
|
if not foundDocuments and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
|
|
sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
|
|
if errorMsg:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error=errorMsg)
|
|
|
|
if sites:
|
|
siteId = sites[0].get("id")
|
|
# Parse pathQuery to find the folder/item
|
|
pathQueryParsed, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(pathQuery)
|
|
|
|
# Extract folder path from pathQuery
|
|
folderPath = '/'
|
|
if pathQueryParsed and pathQueryParsed.startswith('/sites/'):
|
|
parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQueryParsed)
|
|
if parsedPath:
|
|
innerPath = parsedPath.get("innerPath", "")
|
|
folderPath = '/' + innerPath if innerPath else '/'
|
|
elif pathQueryParsed:
|
|
folderPath = pathQueryParsed
|
|
|
|
# Get drive ID
|
|
driveId = await self.services.sharepoint.getDriveId(siteId)
|
|
if not driveId:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Could not determine drive ID for the site")
|
|
|
|
# Get folder/item by path
|
|
folderInfo = await self.services.sharepoint.getFolderByPath(siteId, folderPath.lstrip('/'))
|
|
if not folderInfo:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error=f"Folder or file not found at path: {folderPath}")
|
|
|
|
# Add pathQuery item to foundDocuments for processing
|
|
foundDocuments = [{
|
|
"id": folderInfo.get("id"),
|
|
"name": folderInfo.get("name", ""),
|
|
"type": "folder" if folderInfo.get("folder") else "file",
|
|
"siteId": siteId,
|
|
"fullPath": folderPath,
|
|
"webUrl": folderInfo.get("webUrl", "")
|
|
}]
|
|
|
|
if not siteId or not driveId:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
|
|
|
|
self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
|
|
# Get Microsoft connection
|
|
connection = self.connection.getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
# Set access token
|
|
if not self.services.sharepoint.setAccessTokenFromConnection(connection):
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Failed to set SharePoint access token")
|
|
|
|
# Process all items from documentList or pathQuery
|
|
# IMPORTANT: Only analyze FOLDERS, not files (action is "analyzeFolderUsage")
|
|
itemsToAnalyze = []
|
|
if foundDocuments:
|
|
for item in foundDocuments:
|
|
itemId = item.get("id")
|
|
itemType = item.get("type", "").lower()
|
|
|
|
# Only process folders, skip files and site-level items
|
|
if itemId and itemType == "folder":
|
|
itemsToAnalyze.append({
|
|
"id": itemId,
|
|
"name": item.get("name", ""),
|
|
"type": itemType,
|
|
"path": item.get("fullPath", ""),
|
|
"webUrl": item.get("webUrl", "")
|
|
})
|
|
|
|
if not itemsToAnalyze:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="No valid folders found in documentList to analyze. Note: This action only analyzes folders, not files.")
|
|
|
|
self.services.chat.progressLogUpdate(operationId, 0.4, f"Analyzing {len(itemsToAnalyze)} folder(s)")
|
|
|
|
# Analyze each item
|
|
allAnalytics = []
|
|
totalActivities = 0
|
|
uniqueUsers = set()
|
|
activityTypes = {}
|
|
|
|
# Compute actual date range values (getFolderUsageAnalytics will set defaults if None)
|
|
# We need to compute them here to store in output, since getFolderUsageAnalytics modifies them
|
|
actualStartDateTime = startDateTime
|
|
actualEndDateTime = endDateTime
|
|
if not actualEndDateTime:
|
|
actualEndDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
|
|
if not actualStartDateTime:
|
|
startDate = datetime.now(timezone.utc) - timedelta(days=30)
|
|
actualStartDateTime = startDate.isoformat().replace('+00:00', 'Z')
|
|
|
|
for idx, item in enumerate(itemsToAnalyze):
|
|
progress = 0.4 + (idx / len(itemsToAnalyze)) * 0.5
|
|
self.services.chat.progressLogUpdate(operationId, progress, f"Analyzing folder {item['name']} ({idx+1}/{len(itemsToAnalyze)})")
|
|
|
|
# Get usage analytics for this folder
|
|
analyticsResult = await self.services.sharepoint.getFolderUsageAnalytics(
|
|
siteId=siteId,
|
|
driveId=driveId,
|
|
itemId=item["id"],
|
|
startDateTime=startDateTime,
|
|
endDateTime=endDateTime,
|
|
interval=interval
|
|
)
|
|
|
|
if "error" in analyticsResult:
|
|
logger.warning(f"Failed to get analytics for item {item['name']} ({item['id']}): {analyticsResult['error']}")
|
|
# Continue with other items even if one fails
|
|
itemAnalytics = {
|
|
"itemId": item["id"],
|
|
"itemName": item["name"],
|
|
"itemType": item["type"],
|
|
"itemPath": item["path"],
|
|
"error": analyticsResult.get("error", "Unknown error")
|
|
}
|
|
else:
|
|
# Process analytics for this item
|
|
itemActivities = 0
|
|
itemUsers = set()
|
|
itemActivityTypes = {}
|
|
|
|
if "value" in analyticsResult:
|
|
for intervalData in analyticsResult["value"]:
|
|
activities = intervalData.get("activities", [])
|
|
for activity in activities:
|
|
itemActivities += 1
|
|
totalActivities += 1
|
|
|
|
action = activity.get("action", {})
|
|
actionType = action.get("verb", "unknown")
|
|
itemActivityTypes[actionType] = itemActivityTypes.get(actionType, 0) + 1
|
|
activityTypes[actionType] = activityTypes.get(actionType, 0) + 1
|
|
|
|
actor = activity.get("actor", {})
|
|
userPrincipalName = actor.get("userPrincipalName", "")
|
|
if userPrincipalName:
|
|
itemUsers.add(userPrincipalName)
|
|
uniqueUsers.add(userPrincipalName)
|
|
|
|
itemAnalytics = {
|
|
"itemId": item["id"],
|
|
"itemName": item["name"],
|
|
"itemType": item["type"],
|
|
"itemPath": item["path"],
|
|
"webUrl": item["webUrl"],
|
|
"analytics": analyticsResult,
|
|
"summary": {
|
|
"totalActivities": itemActivities,
|
|
"uniqueUsers": len(itemUsers),
|
|
"activityTypes": itemActivityTypes
|
|
}
|
|
}
|
|
|
|
# Include note if analytics are not available
|
|
if "note" in analyticsResult:
|
|
itemAnalytics["note"] = analyticsResult["note"]
|
|
|
|
allAnalytics.append(itemAnalytics)
|
|
|
|
self.services.chat.progressLogUpdate(operationId, 0.9, "Processing analytics data")
|
|
|
|
# Process and format analytics data
|
|
resultData = {
|
|
"siteId": siteId,
|
|
"driveId": driveId,
|
|
"startDateTime": actualStartDateTime, # Store computed date range (not None)
|
|
"endDateTime": actualEndDateTime, # Store computed date range (not None)
|
|
"interval": interval,
|
|
"itemsAnalyzed": len(itemsToAnalyze),
|
|
"foldersAnalyzed": len([item for item in allAnalytics if item.get("itemType") == "folder"]),
|
|
"items": allAnalytics,
|
|
"summary": {
|
|
"totalActivities": totalActivities,
|
|
"uniqueUsers": len(uniqueUsers),
|
|
"activityTypes": activityTypes
|
|
},
|
|
"note": f"Analyzed {len(itemsToAnalyze)} folder(s) from {actualStartDateTime} to {actualEndDateTime}. " +
|
|
f"Found {totalActivities} total activities across {len(uniqueUsers)} unique user(s)." +
|
|
(f" Note: {len([item for item in allAnalytics if 'error' in item])} folder(s) had errors or no analytics data available." if any('error' in item for item in allAnalytics) else ""),
|
|
"timestamp": self.services.utils.timestampGetUtc()
|
|
}
|
|
|
|
self.services.chat.progressLogUpdate(operationId, 0.95, f"Found {totalActivities} total activities across {len(itemsToAnalyze)} folder(s)")
|
|
|
|
validationMetadata = {
|
|
"actionType": "sharepoint.analyzeFolderUsage",
|
|
"itemsAnalyzed": len(itemsToAnalyze),
|
|
"interval": interval,
|
|
"totalActivities": totalActivities,
|
|
"uniqueUsers": len(uniqueUsers)
|
|
}
|
|
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
ActionDocument(
|
|
documentName=self._generateMeaningfulFileName("sharepoint_usage_analysis", "json", None, "analyzeFolderUsage"),
|
|
documentData=json.dumps(resultData, indent=2),
|
|
mimeType="application/json",
|
|
validationMetadata=validationMetadata
|
|
)
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error analyzing folder usage: {str(e)}")
|
|
if operationId:
|
|
try:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except:
|
|
pass
|
|
return ActionResult(
|
|
success=False,
|
|
error=str(e)
|
|
)
|
|
|