# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ List Documents action for SharePoint operations. Lists documents and folders in SharePoint paths across sites. """ import logging import time import json import urllib.parse from typing import Dict, Any from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument logger = logging.getLogger(__name__) @action async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult: """ GENERAL: - Purpose: List documents and folders in SharePoint paths across sites. - Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional). - Output format: JSON with folder items and metadata. Parameters: - connectionReference (str, required): Microsoft connection label. - documentList (list, required): Document list reference(s) containing findDocumentPath result. - includeSubfolders (bool, optional): Include one level of subfolders. Default: False. """ operationId = None try: # Init progress logger workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" operationId = f"sharepoint_list_{workflowId}_{int(time.time())}" # Start progress tracking parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "List Documents", "SharePoint Listing", "Processing document list", parentOperationId=parentOperationId ) connectionReference = parameters.get("connectionReference") documentList = parameters.get("documentList") pathQuery = parameters.get("pathQuery", "*") if isinstance(documentList, str): documentList = [documentList] includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX if not connectionReference: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Connection reference is required") # Require either documentList or pathQuery if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"): if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Either documentList or pathQuery is required") # Parse documentList to extract folder path and site information listQuery, sites, _, errorMsg = await self.documentParsing.parseDocumentListForFolder(documentList) if errorMsg: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=errorMsg) # If no folder path found from documentList, use pathQuery if provided if not listQuery and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": listQuery = pathQuery logger.info(f"Using pathQuery for list query: {listQuery}") # Resolve sites from pathQuery sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery) if errorMsg: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=errorMsg) # Validate required parameters if not listQuery: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.") if not sites: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Site information missing. Cannot determine target site for list operation.") # Get connection self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") connection = self.connection.getMicrosoftConnection(connectionReference) if not connection: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") logger.info(f"Starting SharePoint listDocuments for listQuery: {listQuery}") logger.debug(f"Connection ID: {connection['id']}") self.services.chat.progressLogUpdate(operationId, 0.3, "Processing folder path") # Parse listQuery to extract path, search terms, search type, and options pathQuery, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(listQuery) # Check if listQuery is a folder ID (starts with 01PPXICCB...) if listQuery.startswith('01PPXICCB') or listQuery.startswith('01'): # Direct folder ID - use it directly folderPaths = [listQuery] logger.info(f"Using direct folder ID: {listQuery}") else: # Remove site prefix from pathQuery before resolving (it's only for site filtering) pathQueryForResolve = pathQuery # Microsoft-standard path: /sites/SiteName/Path -> /Path if pathQuery.startswith('/sites/'): parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQuery) if parsedPath: innerPath = parsedPath.get("innerPath", "") pathQueryForResolve = '/' + innerPath if innerPath else '/' else: pathQueryForResolve = '/' # Remove first path segment if it looks like a document library name # In SharePoint Graph API, /drive/root already points to the default document library, # so library names in paths should be removed # Generic approach: if path has multiple segments, store original for fallback pathSegments = [s for s in pathQueryForResolve.split('/') if s.strip()] if len(pathSegments) > 1: # Path has multiple segments - first might be a library name # Store original for potential fallback originalPath = pathQueryForResolve # Try without first segment (assuming it's a library name) pathQueryForResolve = '/' + '/'.join(pathSegments[1:]) logger.info(f"Removed first path segment (potential library name), path changed from '{originalPath}' to '{pathQueryForResolve}'") elif len(pathSegments) == 1: # Only one segment - if it's a common library-like name, use root firstSegmentLower = pathSegments[0].lower() libraryIndicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek'] if any(indicator in firstSegmentLower for indicator in libraryIndicators): pathQueryForResolve = '/' logger.info(f"First segment '{pathSegments[0]}' appears to be a library name, using root") # Resolve path query into folder paths folderPaths = self.pathProcessing.resolvePathQuery(pathQueryForResolve) logger.info(f"Resolved folder paths: {folderPaths}") # Process each folder path across all sites listResults = [] self.services.chat.progressLogUpdate(operationId, 0.5, f"Listing {len(folderPaths)} folder(s) across {len(sites)} site(s)") for folderPath in folderPaths: try: folderResults = [] for site in sites: siteId = site["id"] siteName = site["displayName"] siteUrl = site["webUrl"] logger.info(f"Listing folder {folderPath} in site: {siteName}") # Determine the endpoint based on folder path if folderPath in ["/", ""] or folderPath == "*": # Root folder endpoint = f"sites/{siteId}/drive/root/children" elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'): # Direct folder ID endpoint = f"sites/{siteId}/drive/items/{folderPath}/children" else: # Specific folder path - remove leading slash if present and URL encode folderPathClean = folderPath.lstrip('/') # URL encode the path for Graph API (spaces and special characters need encoding) folderPathEncoded = urllib.parse.quote(folderPathClean, safe='/') endpoint = f"sites/{siteId}/drive/root:/{folderPathEncoded}:/children" # Make the API call to list folder contents apiResult = await self.apiClient.makeGraphApiCall(endpoint) if "error" in apiResult: logger.warning(f"Failed to list folder {folderPath} in site {siteName}: {apiResult['error']}") continue # Process the results items = apiResult.get("value", []) processedItems = [] for item in items: # Use improved folder detection logic isFolder = self.services.sharepoint.detectFolderType(item) itemInfo = { "id": item.get("id"), "name": item.get("name"), "size": item.get("size", 0), "createdDateTime": item.get("createdDateTime"), "lastModifiedDateTime": item.get("lastModifiedDateTime"), "webUrl": item.get("webUrl"), "type": "folder" if isFolder else "file", "siteName": siteName, "siteUrl": siteUrl } # Add file-specific information if "file" in item: itemInfo.update({ "mimeType": item["file"].get("mimeType"), "downloadUrl": item.get("@microsoft.graph.downloadUrl") }) # Add folder-specific information if "folder" in item: itemInfo.update({ "childCount": item["folder"].get("childCount", 0) }) processedItems.append(itemInfo) # If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only) if includeSubfolders: folderItems = [item for item in processedItems if item['type'] == 'folder'] logger.info(f"Including subfolders - processing {len(folderItems)} folders") subfolderCount = 0 maxSubfolders = 10 # Limit to prevent infinite loops for item in processedItems[:]: # Use slice to avoid modifying list during iteration if item["type"] == "folder" and subfolderCount < maxSubfolders: subfolderCount += 1 subfolderPath = f"{folderPath.rstrip('/')}/{item['name']}" subfolderEndpoint = f"sites/{siteId}/drive/items/{item['id']}/children" logger.debug(f"Getting contents of subfolder: {item['name']}") subfolderResult = await self.apiClient.makeGraphApiCall(subfolderEndpoint) if "error" not in subfolderResult: subfolderItems = subfolderResult.get("value", []) logger.debug(f"Found {len(subfolderItems)} items in subfolder {item['name']}") for subfolderItem in subfolderItems: # Use improved folder detection logic for subfolder items subfolderIsFolder = self.services.sharepoint.detectFolderType(subfolderItem) # Only add files and direct subfolders, NO RECURSION subfolderItemInfo = { "id": subfolderItem.get("id"), "name": subfolderItem.get("name"), "size": subfolderItem.get("size", 0), "createdDateTime": subfolderItem.get("createdDateTime"), "lastModifiedDateTime": subfolderItem.get("lastModifiedDateTime"), "webUrl": subfolderItem.get("webUrl"), "type": "folder" if subfolderIsFolder else "file", "parentPath": subfolderPath, "siteName": siteName, "siteUrl": siteUrl } if "file" in subfolderItem: subfolderItemInfo.update({ "mimeType": subfolderItem["file"].get("mimeType"), "downloadUrl": subfolderItem.get("@microsoft.graph.downloadUrl") }) processedItems.append(subfolderItemInfo) else: logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolderResult.get('error')}") elif subfolderCount >= maxSubfolders: logger.warning(f"Reached maximum subfolder limit ({maxSubfolders}), skipping remaining folders") break logger.info(f"Processed {subfolderCount} subfolders, total items: {len(processedItems)}") folderResults.append({ "siteName": siteName, "siteUrl": siteUrl, "itemCount": len(processedItems), "items": processedItems }) listResults.append({ "folderPath": folderPath, "sitesProcessed": len(folderResults), "siteResults": folderResults }) except Exception as e: logger.error(f"Error listing folder {folderPath}: {str(e)}") listResults.append({ "folderPath": folderPath, "error": str(e), "sitesProcessed": 0, "siteResults": [] }) # Create result data totalItems = sum(len(siteResult.get("items", [])) for result in listResults for siteResult in result.get("siteResults", [])) resultData = { "listQuery": listQuery, "pathQuery": pathQuery, "totalItems": totalItems, "foldersProcessed": len(listResults), "listResults": listResults, "includeSubfolders": includeSubfolders, "timestamp": self.services.utils.timestampGetUtc() } self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {totalItems} item(s) in {len(listResults)} folder(s)") validationMetadata = { "actionType": "sharepoint.listDocuments", "listQuery": listQuery, "totalItems": totalItems, "foldersProcessed": len(listResults), "includeSubfolders": includeSubfolders } self.services.chat.progressLogFinish(operationId, True) return ActionResult( success=True, documents=[ ActionDocument( documentName=self._generateMeaningfulFileName("sharepoint_list", "json", None, "listDocuments"), documentData=json.dumps(resultData, indent=2), mimeType="application/json", validationMetadata=validationMetadata ) ] ) except Exception as e: logger.error(f"Error listing SharePoint documents: {str(e)}") if operationId: try: self.services.chat.progressLogFinish(operationId, False) except: pass return ActionResult( success=False, error=str(e) )