# Copyright (c) 2025 Patrick Motsch # All rights reserved. import logging import time import json import urllib.parse from typing import Dict, Any from modules.datamodels.datamodelChatbot import ActionResult, ActionDocument logger = logging.getLogger(__name__) async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult: operationId = None try: # Init progress logger workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" operationId = f"sharepoint_list_{workflowId}_{int(time.time())}" # Start progress tracking parentOperationId = parameters.get('parentOperationId') self.services.chat.progressLogStart( operationId, "List Documents", "SharePoint Listing", "Processing document list", parentOperationId=parentOperationId ) connectionReference = parameters.get("connectionReference") documentList = parameters.get("documentList") pathQuery = parameters.get("pathQuery", "*") if isinstance(documentList, str): documentList = [documentList] includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX if not connectionReference: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Connection reference is required") # Require either documentList or pathQuery if not documentList and (not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*"): if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Either documentList or pathQuery is required") # Parse documentList to extract folder path and site information listQuery, sites, _, errorMsg = await self.documentParsing.parseDocumentListForFolder(documentList) if errorMsg: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=errorMsg) # If no folder path found from documentList, use pathQuery if provided if not listQuery and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": listQuery = pathQuery logger.info(f"Using pathQuery for list query: {listQuery}") # Resolve sites from pathQuery sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery) if errorMsg: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=errorMsg) # Validate required parameters if not listQuery: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get folder path, or provide pathQuery directly.") if not sites: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="Site information missing. Cannot determine target site for list operation.") # Get connection self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection") connection = self.connection.getMicrosoftConnection(connectionReference) if not connection: if operationId: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") logger.info(f"Starting SharePoint listDocuments for listQuery: {listQuery}") logger.debug(f"Connection ID: {connection['id']}") self.services.chat.progressLogUpdate(operationId, 0.3, "Processing folder path") # Parse listQuery to extract path, search terms, search type, and options pathQuery, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(listQuery) # Check if listQuery is a folder ID (starts with 01PPXICCB...) if listQuery.startswith('01PPXICCB') or listQuery.startswith('01'): # Direct folder ID - use it directly folderPaths = [listQuery] logger.info(f"Using direct folder ID: {listQuery}") else: # Remove site prefix from pathQuery before resolving (it's only for site filtering) pathQueryForResolve = pathQuery # Microsoft-standard path: /sites/SiteName/Path -> /Path if pathQuery.startswith('/sites/'): parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQuery) if parsedPath: innerPath = parsedPath.get("innerPath", "") pathQueryForResolve = '/' + innerPath if innerPath else '/' else: pathQueryForResolve = '/' # Remove first path segment if it looks like a document library name # In SharePoint Graph API, /drive/root already points to the default document library, # so library names in paths should be removed # Generic approach: if path has multiple segments, store original for fallback pathSegments = [s for s in pathQueryForResolve.split('/') if s.strip()] if len(pathSegments) > 1: # Path has multiple segments - first might be a library name # Store original for potential fallback originalPath = pathQueryForResolve # Try without first segment (assuming it's a library name) pathQueryForResolve = '/' + '/'.join(pathSegments[1:]) logger.info(f"Removed first path segment (potential library name), path changed from '{originalPath}' to '{pathQueryForResolve}'") elif len(pathSegments) == 1: # Only one segment - if it's a common library-like name, use root firstSegmentLower = pathSegments[0].lower() libraryIndicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek'] if any(indicator in firstSegmentLower for indicator in libraryIndicators): pathQueryForResolve = '/' logger.info(f"First segment '{pathSegments[0]}' appears to be a library name, using root") # Resolve path query into folder paths folderPaths = self.pathProcessing.resolvePathQuery(pathQueryForResolve) logger.info(f"Resolved folder paths: {folderPaths}") # Process each folder path across all sites listResults = [] self.services.chat.progressLogUpdate(operationId, 0.5, f"Listing {len(folderPaths)} folder(s) across {len(sites)} site(s)") for folderPath in folderPaths: try: folderResults = [] for site in sites: siteId = site["id"] siteName = site["displayName"] siteUrl = site["webUrl"] logger.info(f"Listing folder {folderPath} in site: {siteName}") # Determine the endpoint based on folder path if folderPath in ["/", ""] or folderPath == "*": # Root folder endpoint = f"sites/{siteId}/drive/root/children" elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'): # Direct folder ID endpoint = f"sites/{siteId}/drive/items/{folderPath}/children" else: # Specific folder path - remove leading slash if present and URL encode folderPathClean = folderPath.lstrip('/') # URL encode the path for Graph API (spaces and special characters need encoding) folderPathEncoded = urllib.parse.quote(folderPathClean, safe='/') endpoint = f"sites/{siteId}/drive/root:/{folderPathEncoded}:/children" # Make the API call to list folder contents apiResult = await self.apiClient.makeGraphApiCall(endpoint) if "error" in apiResult: logger.warning(f"Failed to list folder {folderPath} in site {siteName}: {apiResult['error']}") continue # Process the results items = apiResult.get("value", []) processedItems = [] for item in items: # Use improved folder detection logic isFolder = self.services.sharepoint.detectFolderType(item) itemInfo = { "id": item.get("id"), "name": item.get("name"), "size": item.get("size", 0), "createdDateTime": item.get("createdDateTime"), "lastModifiedDateTime": item.get("lastModifiedDateTime"), "webUrl": item.get("webUrl"), "type": "folder" if isFolder else "file", "siteName": siteName, "siteUrl": siteUrl } # Add file-specific information if "file" in item: itemInfo.update({ "mimeType": item["file"].get("mimeType"), "downloadUrl": item.get("@microsoft.graph.downloadUrl") }) # Add folder-specific information if "folder" in item: itemInfo.update({ "childCount": item["folder"].get("childCount", 0) }) processedItems.append(itemInfo) # If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only) if includeSubfolders: folderItems = [item for item in processedItems if item['type'] == 'folder'] logger.info(f"Including subfolders - processing {len(folderItems)} folders") subfolderCount = 0 maxSubfolders = 10 # Limit to prevent infinite loops for item in processedItems[:]: # Use slice to avoid modifying list during iteration if item["type"] == "folder" and subfolderCount < maxSubfolders: subfolderCount += 1 subfolderPath = f"{folderPath.rstrip('/')}/{item['name']}" subfolderEndpoint = f"sites/{siteId}/drive/items/{item['id']}/children" logger.debug(f"Getting contents of subfolder: {item['name']}") subfolderResult = await self.apiClient.makeGraphApiCall(subfolderEndpoint) if "error" not in subfolderResult: subfolderItems = subfolderResult.get("value", []) logger.debug(f"Found {len(subfolderItems)} items in subfolder {item['name']}") for subfolderItem in subfolderItems: # Use improved folder detection logic for subfolder items subfolderIsFolder = self.services.sharepoint.detectFolderType(subfolderItem) # Only add files and direct subfolders, NO RECURSION subfolderItemInfo = { "id": subfolderItem.get("id"), "name": subfolderItem.get("name"), "size": subfolderItem.get("size", 0), "createdDateTime": subfolderItem.get("createdDateTime"), "lastModifiedDateTime": subfolderItem.get("lastModifiedDateTime"), "webUrl": subfolderItem.get("webUrl"), "type": "folder" if subfolderIsFolder else "file", "parentPath": subfolderPath, "siteName": siteName, "siteUrl": siteUrl } if "file" in subfolderItem: subfolderItemInfo.update({ "mimeType": subfolderItem["file"].get("mimeType"), "downloadUrl": subfolderItem.get("@microsoft.graph.downloadUrl") }) processedItems.append(subfolderItemInfo) else: logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolderResult.get('error')}") elif subfolderCount >= maxSubfolders: logger.warning(f"Reached maximum subfolder limit ({maxSubfolders}), skipping remaining folders") break logger.info(f"Processed {subfolderCount} subfolders, total items: {len(processedItems)}") folderResults.append({ "siteName": siteName, "siteUrl": siteUrl, "itemCount": len(processedItems), "items": processedItems }) listResults.append({ "folderPath": folderPath, "sitesProcessed": len(folderResults), "siteResults": folderResults }) except Exception as e: logger.error(f"Error listing folder {folderPath}: {str(e)}") listResults.append({ "folderPath": folderPath, "error": str(e), "sitesProcessed": 0, "siteResults": [] }) # Create result data totalItems = sum(len(siteResult.get("items", [])) for result in listResults for siteResult in result.get("siteResults", [])) resultData = { "listQuery": listQuery, "pathQuery": pathQuery, "totalItems": totalItems, "foldersProcessed": len(listResults), "listResults": listResults, "includeSubfolders": includeSubfolders, "timestamp": self.services.utils.timestampGetUtc() } self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {totalItems} item(s) in {len(listResults)} folder(s)") validationMetadata = { "actionType": "sharepoint.listDocuments", "listQuery": listQuery, "totalItems": totalItems, "foldersProcessed": len(listResults), "includeSubfolders": includeSubfolders } self.services.chat.progressLogFinish(operationId, True) return ActionResult( success=True, documents=[ ActionDocument( documentName=self._generateMeaningfulFileName("sharepoint_list", "json", None, "listDocuments"), documentData=json.dumps(resultData, indent=2), mimeType="application/json", validationMetadata=validationMetadata ) ] ) except Exception as e: logger.error(f"Error listing SharePoint documents: {str(e)}") if operationId: try: self.services.chat.progressLogFinish(operationId, False) except: pass return ActionResult( success=False, error=str(e) )