327 lines
17 KiB
Python
327 lines
17 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
import logging
|
|
import time
|
|
import json
|
|
import urllib.parse
|
|
from typing import Dict, Any
|
|
from modules.datamodels.datamodelChatbot import ActionResult, ActionDocument
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
operationId = None
|
|
try:
|
|
# Init progress logger
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"sharepoint_list_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
parentOperationId = parameters.get('parentOperationId')
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"List Documents",
|
|
"SharePoint Listing",
|
|
"Processing document list",
|
|
parentOperationId=parentOperationId
|
|
)
|
|
|
|
connectionReference = parameters.get("connectionReference")
|
|
documentList = parameters.get("documentList")
|
|
pathQuery = parameters.get("pathQuery", "*")
|
|
if isinstance(documentList, str):
|
|
documentList = [documentList]
|
|
includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX
|
|
|
|
if not connectionReference:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Connection reference is required")
|
|
|
|
# Require either documentList or pathQuery
|
|
if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"):
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Either documentList or pathQuery is required")
|
|
|
|
# Parse documentList to extract folder path and site information
|
|
listQuery, sites, _, errorMsg = await self.documentParsing.parseDocumentListForFolder(documentList)
|
|
if errorMsg:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error=errorMsg)
|
|
|
|
# If no folder path found from documentList, use pathQuery if provided
|
|
if not listQuery and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
|
|
listQuery = pathQuery
|
|
logger.info(f"Using pathQuery for list query: {listQuery}")
|
|
# Resolve sites from pathQuery
|
|
sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
|
|
if errorMsg:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error=errorMsg)
|
|
|
|
# Validate required parameters
|
|
if not listQuery:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.")
|
|
|
|
if not sites:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Site information missing. Cannot determine target site for list operation.")
|
|
|
|
# Get connection
|
|
self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
|
|
connection = self.connection.getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
logger.info(f"Starting SharePoint listDocuments for listQuery: {listQuery}")
|
|
logger.debug(f"Connection ID: {connection['id']}")
|
|
|
|
self.services.chat.progressLogUpdate(operationId, 0.3, "Processing folder path")
|
|
|
|
# Parse listQuery to extract path, search terms, search type, and options
|
|
pathQuery, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(listQuery)
|
|
|
|
# Check if listQuery is a folder ID (starts with 01PPXICCB...)
|
|
if listQuery.startswith('01PPXICCB') or listQuery.startswith('01'):
|
|
# Direct folder ID - use it directly
|
|
folderPaths = [listQuery]
|
|
logger.info(f"Using direct folder ID: {listQuery}")
|
|
else:
|
|
# Remove site prefix from pathQuery before resolving (it's only for site filtering)
|
|
pathQueryForResolve = pathQuery
|
|
# Microsoft-standard path: /sites/SiteName/Path -> /Path
|
|
if pathQuery.startswith('/sites/'):
|
|
parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQuery)
|
|
if parsedPath:
|
|
innerPath = parsedPath.get("innerPath", "")
|
|
pathQueryForResolve = '/' + innerPath if innerPath else '/'
|
|
else:
|
|
pathQueryForResolve = '/'
|
|
|
|
# Remove first path segment if it looks like a document library name
|
|
# In SharePoint Graph API, /drive/root already points to the default document library,
|
|
# so library names in paths should be removed
|
|
# Generic approach: if path has multiple segments, store original for fallback
|
|
pathSegments = [s for s in pathQueryForResolve.split('/') if s.strip()]
|
|
if len(pathSegments) > 1:
|
|
# Path has multiple segments - first might be a library name
|
|
# Store original for potential fallback
|
|
originalPath = pathQueryForResolve
|
|
# Try without first segment (assuming it's a library name)
|
|
pathQueryForResolve = '/' + '/'.join(pathSegments[1:])
|
|
logger.info(f"Removed first path segment (potential library name), path changed from '{originalPath}' to '{pathQueryForResolve}'")
|
|
elif len(pathSegments) == 1:
|
|
# Only one segment - if it's a common library-like name, use root
|
|
firstSegmentLower = pathSegments[0].lower()
|
|
libraryIndicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek']
|
|
if any(indicator in firstSegmentLower for indicator in libraryIndicators):
|
|
pathQueryForResolve = '/'
|
|
logger.info(f"First segment '{pathSegments[0]}' appears to be a library name, using root")
|
|
|
|
# Resolve path query into folder paths
|
|
folderPaths = self.pathProcessing.resolvePathQuery(pathQueryForResolve)
|
|
logger.info(f"Resolved folder paths: {folderPaths}")
|
|
|
|
# Process each folder path across all sites
|
|
listResults = []
|
|
|
|
self.services.chat.progressLogUpdate(operationId, 0.5, f"Listing {len(folderPaths)} folder(s) across {len(sites)} site(s)")
|
|
|
|
for folderPath in folderPaths:
|
|
try:
|
|
folderResults = []
|
|
|
|
for site in sites:
|
|
siteId = site["id"]
|
|
siteName = site["displayName"]
|
|
siteUrl = site["webUrl"]
|
|
|
|
logger.info(f"Listing folder {folderPath} in site: {siteName}")
|
|
|
|
# Determine the endpoint based on folder path
|
|
if folderPath in ["/", ""] or folderPath == "*":
|
|
# Root folder
|
|
endpoint = f"sites/{siteId}/drive/root/children"
|
|
elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'):
|
|
# Direct folder ID
|
|
endpoint = f"sites/{siteId}/drive/items/{folderPath}/children"
|
|
else:
|
|
# Specific folder path - remove leading slash if present and URL encode
|
|
folderPathClean = folderPath.lstrip('/')
|
|
# URL encode the path for Graph API (spaces and special characters need encoding)
|
|
folderPathEncoded = urllib.parse.quote(folderPathClean, safe='/')
|
|
endpoint = f"sites/{siteId}/drive/root:/{folderPathEncoded}:/children"
|
|
|
|
# Make the API call to list folder contents
|
|
apiResult = await self.apiClient.makeGraphApiCall(endpoint)
|
|
|
|
if "error" in apiResult:
|
|
logger.warning(f"Failed to list folder {folderPath} in site {siteName}: {apiResult['error']}")
|
|
continue
|
|
|
|
# Process the results
|
|
items = apiResult.get("value", [])
|
|
processedItems = []
|
|
|
|
for item in items:
|
|
# Use improved folder detection logic
|
|
isFolder = self.services.sharepoint.detectFolderType(item)
|
|
|
|
itemInfo = {
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"size": item.get("size", 0),
|
|
"createdDateTime": item.get("createdDateTime"),
|
|
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
|
"webUrl": item.get("webUrl"),
|
|
"type": "folder" if isFolder else "file",
|
|
"siteName": siteName,
|
|
"siteUrl": siteUrl
|
|
}
|
|
|
|
# Add file-specific information
|
|
if "file" in item:
|
|
itemInfo.update({
|
|
"mimeType": item["file"].get("mimeType"),
|
|
"downloadUrl": item.get("@microsoft.graph.downloadUrl")
|
|
})
|
|
|
|
# Add folder-specific information
|
|
if "folder" in item:
|
|
itemInfo.update({
|
|
"childCount": item["folder"].get("childCount", 0)
|
|
})
|
|
|
|
processedItems.append(itemInfo)
|
|
|
|
# If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
|
|
if includeSubfolders:
|
|
folderItems = [item for item in processedItems if item['type'] == 'folder']
|
|
logger.info(f"Including subfolders - processing {len(folderItems)} folders")
|
|
subfolderCount = 0
|
|
maxSubfolders = 10 # Limit to prevent infinite loops
|
|
|
|
for item in processedItems[:]: # Use slice to avoid modifying list during iteration
|
|
if item["type"] == "folder" and subfolderCount < maxSubfolders:
|
|
subfolderCount += 1
|
|
subfolderPath = f"{folderPath.rstrip('/')}/{item['name']}"
|
|
subfolderEndpoint = f"sites/{siteId}/drive/items/{item['id']}/children"
|
|
|
|
logger.debug(f"Getting contents of subfolder: {item['name']}")
|
|
subfolderResult = await self.apiClient.makeGraphApiCall(subfolderEndpoint)
|
|
if "error" not in subfolderResult:
|
|
subfolderItems = subfolderResult.get("value", [])
|
|
logger.debug(f"Found {len(subfolderItems)} items in subfolder {item['name']}")
|
|
|
|
for subfolderItem in subfolderItems:
|
|
# Use improved folder detection logic for subfolder items
|
|
subfolderIsFolder = self.services.sharepoint.detectFolderType(subfolderItem)
|
|
|
|
# Only add files and direct subfolders, NO RECURSION
|
|
subfolderItemInfo = {
|
|
"id": subfolderItem.get("id"),
|
|
"name": subfolderItem.get("name"),
|
|
"size": subfolderItem.get("size", 0),
|
|
"createdDateTime": subfolderItem.get("createdDateTime"),
|
|
"lastModifiedDateTime": subfolderItem.get("lastModifiedDateTime"),
|
|
"webUrl": subfolderItem.get("webUrl"),
|
|
"type": "folder" if subfolderIsFolder else "file",
|
|
"parentPath": subfolderPath,
|
|
"siteName": siteName,
|
|
"siteUrl": siteUrl
|
|
}
|
|
|
|
if "file" in subfolderItem:
|
|
subfolderItemInfo.update({
|
|
"mimeType": subfolderItem["file"].get("mimeType"),
|
|
"downloadUrl": subfolderItem.get("@microsoft.graph.downloadUrl")
|
|
})
|
|
|
|
processedItems.append(subfolderItemInfo)
|
|
else:
|
|
logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolderResult.get('error')}")
|
|
elif subfolderCount >= maxSubfolders:
|
|
logger.warning(f"Reached maximum subfolder limit ({maxSubfolders}), skipping remaining folders")
|
|
break
|
|
|
|
logger.info(f"Processed {subfolderCount} subfolders, total items: {len(processedItems)}")
|
|
|
|
folderResults.append({
|
|
"siteName": siteName,
|
|
"siteUrl": siteUrl,
|
|
"itemCount": len(processedItems),
|
|
"items": processedItems
|
|
})
|
|
|
|
listResults.append({
|
|
"folderPath": folderPath,
|
|
"sitesProcessed": len(folderResults),
|
|
"siteResults": folderResults
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing folder {folderPath}: {str(e)}")
|
|
listResults.append({
|
|
"folderPath": folderPath,
|
|
"error": str(e),
|
|
"sitesProcessed": 0,
|
|
"siteResults": []
|
|
})
|
|
|
|
# Create result data
|
|
totalItems = sum(len(siteResult.get("items", [])) for result in listResults for siteResult in result.get("siteResults", []))
|
|
|
|
resultData = {
|
|
"listQuery": listQuery,
|
|
"pathQuery": pathQuery,
|
|
"totalItems": totalItems,
|
|
"foldersProcessed": len(listResults),
|
|
"listResults": listResults,
|
|
"includeSubfolders": includeSubfolders,
|
|
"timestamp": self.services.utils.timestampGetUtc()
|
|
}
|
|
|
|
self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {totalItems} item(s) in {len(listResults)} folder(s)")
|
|
|
|
validationMetadata = {
|
|
"actionType": "sharepoint.listDocuments",
|
|
"listQuery": listQuery,
|
|
"totalItems": totalItems,
|
|
"foldersProcessed": len(listResults),
|
|
"includeSubfolders": includeSubfolders
|
|
}
|
|
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
ActionDocument(
|
|
documentName=self._generateMeaningfulFileName("sharepoint_list", "json", None, "listDocuments"),
|
|
documentData=json.dumps(resultData, indent=2),
|
|
mimeType="application/json",
|
|
validationMetadata=validationMetadata
|
|
)
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing SharePoint documents: {str(e)}")
|
|
if operationId:
|
|
try:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except:
|
|
pass
|
|
return ActionResult(
|
|
success=False,
|
|
error=str(e)
|
|
)
|
|
|