gateway/modules/methods/methodSharepoint.py

"""
SharePoint method module.
Handles SharePoint operations using the SharePoint service.
"""

import logging
from typing import Dict, Any, List, Optional
from datetime import datetime, UTC
import json
import uuid
import aiohttp
import asyncio
from urllib.parse import urlparse

from modules.chat.methodBase import MethodBase, ActionResult, action

logger = logging.getLogger(__name__)

class MethodSharepoint(MethodBase):
    """SharePoint method implementation for document operations"""

    def __init__(self, serviceCenter: Any):
        super().__init__(serviceCenter)
        self.name = "sharepoint"
        self.description = "Handle Microsoft SharePoint document operations"

    def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
        """Get Microsoft connection from connection reference"""
        try:
            userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference)
            if not userConnection:
                logger.warning(f"No user connection found for reference: {connectionReference}")
                return None

            if userConnection.authority.value != "msft":
                logger.warning(f"Connection {userConnection.id} is not Microsoft (authority: {userConnection.authority.value})")
                return None

            # Check if connection is active or pending (pending means OAuth in progress)
            if userConnection.status.value not in ["active", "pending"]:
                logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
                return None

            # Get the corresponding token for this user and authority
            token = self.service.interfaceApp.getToken(userConnection.authority.value)
            if not token:
                logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority.value}")
                return None

            # Check if token is expired
            if hasattr(token, 'expiresAt') and token.expiresAt:
                import time
                current_time = time.time()
                if current_time > token.expiresAt:
                    logger.warning(f"Token for connection {userConnection.id} is expired (expiresAt: {token.expiresAt}, current: {current_time})")
                    return None

            logger.info(f"Successfully retrieved Microsoft connection: {userConnection.id}, status: {userConnection.status.value}, externalId: {userConnection.externalId}")

            return {
                "id": userConnection.id,
                "accessToken": token.tokenAccess,
                "refreshToken": token.tokenRefresh,
                "scopes": ["Sites.ReadWrite.All", "Files.ReadWrite.All", "User.Read"]  # SharePoint scopes
            }
        except Exception as e:
            logger.error(f"Error getting Microsoft connection: {str(e)}")
            return None

    def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]:
        """Parse SharePoint site URL to extract hostname and site path"""
        try:
            parsed = urlparse(siteUrl)
            hostname = parsed.hostname
            path = parsed.path.strip('/')

            return {
                "hostname": hostname,
                "sitePath": path
            }
        except Exception as e:
            logger.error(f"Error parsing site URL {siteUrl}: {str(e)}")
            return {"hostname": "", "sitePath": ""}

    async def _makeGraphApiCall(self, access_token: str, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
        """Make a Microsoft Graph API call with timeout and detailed logging"""
        try:
            headers = {
                "Authorization": f"Bearer {access_token}",
                "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
            }

            url = f"https://graph.microsoft.com/v1.0/{endpoint}"
            logger.info(f"Making Graph API call: {method} {url}")

            # Set timeout to 30 seconds
            timeout = aiohttp.ClientTimeout(total=30)

            async with aiohttp.ClientSession(timeout=timeout) as session:
                if method == "GET":
                    logger.debug(f"Starting GET request to {url}")
                    async with session.get(url, headers=headers) as response:
                        logger.info(f"Graph API response: {response.status}")
                        if response.status == 200:
                            result = await response.json()
                            logger.debug(f"Graph API success: {len(str(result))} characters response")
                            return result
                        else:
                            error_text = await response.text()
                            logger.error(f"Graph API call failed: {response.status} - {error_text}")
                            return {"error": f"API call failed: {response.status} - {error_text}"}

                elif method == "PUT":
                    logger.debug(f"Starting PUT request to {url}")
                    async with session.put(url, headers=headers, data=data) as response:
                        logger.info(f"Graph API response: {response.status}")
                        if response.status in [200, 201]:
                            result = await response.json()
                            logger.debug(f"Graph API success: {len(str(result))} characters response")
                            return result
                        else:
                            error_text = await response.text()
                            logger.error(f"Graph API call failed: {response.status} - {error_text}")
                            return {"error": f"API call failed: {response.status} - {error_text}"}

                elif method == "POST":
                    logger.debug(f"Starting POST request to {url}")
                    async with session.post(url, headers=headers, data=data) as response:
                        logger.info(f"Graph API response: {response.status}")
                        if response.status in [200, 201]:
                            result = await response.json()
                            logger.debug(f"Graph API success: {len(str(result))} characters response")
                            return result
                        else:
                            error_text = await response.text()
                            logger.error(f"Graph API call failed: {response.status} - {error_text}")
                            return {"error": f"API call failed: {response.status} - {error_text}"}

        except asyncio.TimeoutError:
            logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
            return {"error": f"API call timed out after 30 seconds: {endpoint}"}
        except Exception as e:
            logger.error(f"Error making Graph API call: {str(e)}")
            return {"error": f"Error making Graph API call: {str(e)}"}

    async def _getSiteId(self, access_token: str, hostname: str, site_path: str) -> str:
        """Get SharePoint site ID from hostname and site path"""
        try:
            endpoint = f"sites/{hostname}:/{site_path}"
            result = await self._makeGraphApiCall(access_token, endpoint)

            if "error" in result:
                logger.error(f"Error getting site ID: {result['error']}")
                return ""

            return result.get("id", "")
        except Exception as e:
            logger.error(f"Error getting site ID: {str(e)}")
            return ""

    @action
    async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Find document path based on query/description

        Parameters:
            connectionReference (str): Reference to the Microsoft connection
            siteUrl (str): SharePoint site URL
            query (str): Query or description to find document
            searchScope (str, optional): Search scope (default: "all")
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
        """
        try:
            connectionReference = parameters.get("connectionReference")
            siteUrl = parameters.get("siteUrl")
            query = parameters.get("query")
            searchScope = parameters.get("searchScope", "all")
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])

            if not connectionReference or not siteUrl or not query:
                return self._createResult(
                    success=False,
                    data={},
                    error="Connection reference, site URL, and query are required"
                )

            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return self._createResult(
                    success=False,
                    data={},
                    error="No valid Microsoft connection found for the provided connection reference"
                )

            # Parse site URL to get hostname and site path
            site_info = self._parseSiteUrl(siteUrl)
            if not site_info["hostname"] or not site_info["sitePath"]:
                return self._createResult(
                    success=False,
                    data={},
                    error=f"Invalid SharePoint site URL: {siteUrl}"
                )

            # Get site ID
            site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
            if not site_id:
                return self._createResult(
                    success=False,
                    data={},
                    error="Failed to get SharePoint site ID"
                )

            try:
                # Use Microsoft Graph search API
                search_query = query.replace("'", "''")  # Escape single quotes for OData
                endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"

                # Make the search API call
                search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)

                if "error" in search_result:
                    return self._createResult(
                        success=False,
                        data={},
                        error=f"Search failed: {search_result['error']}"
                    )

                # Process search results
                items = search_result.get("value", [])
                found_documents = []

                for item in items:
                    # Filter by search scope if specified
                    if searchScope == "documents" and "folder" in item:
                        continue
                    elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"):
                        continue

                    doc_info = {
                        "id": item.get("id"),
                        "name": item.get("name"),
                        "path": item.get("parentReference", {}).get("path", "") + "/" + item.get("name", ""),
                        "size": item.get("size", 0),
                        "createdDateTime": item.get("createdDateTime"),
                        "lastModifiedDateTime": item.get("lastModifiedDateTime"),
                        "webUrl": item.get("webUrl"),
                        "type": "folder" if "folder" in item else "file"
                    }

                    # Add file-specific information
                    if "file" in item:
                        doc_info.update({
                            "mimeType": item["file"].get("mimeType"),
                            "downloadUrl": item.get("@microsoft.graph.downloadUrl")
                        })

                    # Add folder-specific information
                    if "folder" in item:
                        doc_info.update({
                            "childCount": item["folder"].get("childCount", 0)
                        })

                    found_documents.append(doc_info)

                result_data = {
                    "connectionReference": connectionReference,
                    "siteUrl": siteUrl,
                    "query": query,
                    "searchScope": searchScope,
                    "totalResults": len(found_documents),
                    "foundDocuments": found_documents,
                    "connection": {
                        "id": connection["id"],
                        "authority": "microsoft",
                        "reference": connectionReference
                    },
                    "timestamp": datetime.now(UTC).isoformat()
                }

            except Exception as e:
                logger.error(f"Error searching SharePoint: {str(e)}")
                return self._createResult(
                    success=False,
                    data={},
                    error=str(e)
                )

            # Determine output format based on expected formats
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default

            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                # Use the first expected format
                expected_format = expectedDocumentFormats[0]
                output_extension = expected_format.get("extension", ".json")
                output_mime_type = expected_format.get("mimeType", "application/json")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")

            return self._createResult(
                success=True,
                data={
                    "documents": [
                        {
                            "documentName": f"sharepoint_find_path_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
                            "documentData": result_data,
                            "mimeType": output_mime_type
                        }
                    ]
                }
            )

        except Exception as e:
            logger.error(f"Error finding document path: {str(e)}")
            return self._createResult(
                success=False,
                data={},
                error=str(e)
            )

    @action
    async def readDocument(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Read documents from SharePoint

        Parameters:
            documentList (str): Reference to the document list to read
            connectionReference (str): Reference to the Microsoft connection
            siteUrl (str): SharePoint site URL
            documentPaths (List[str]): List of paths to the documents in SharePoint
            includeMetadata (bool, optional): Whether to include metadata (default: True)
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
        """
        try:
            documentList = parameters.get("documentList")
            connectionReference = parameters.get("connectionReference")
            siteUrl = parameters.get("siteUrl")
            documentPaths = parameters.get("documentPaths")
            includeMetadata = parameters.get("includeMetadata", True)
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])

            if not documentList or not connectionReference or not siteUrl or not documentPaths:
                return self._createResult(
                    success=False,
                    data={},
                    error="Document list reference, connection reference, site URL, and document paths are required"
                )

            # Get documents from reference - ensure documentList is a list, not a string
            if isinstance(documentList, str):
                documentList = [documentList]  # Convert string to list
            chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)

            # For testing: if no chat documents found, create mock documents based on document paths
            if not chatDocuments and documentPaths:
                logger.info("No chat documents found, creating mock documents for testing based on document paths")
                chatDocuments = []
                for i, path in enumerate(documentPaths):
                    mock_doc = type('MockChatDocument', (), {
                        'fileId': f'mock_file_id_{i}',
                        'filename': path.split('/')[-1] if '/' in path else path
                    })()
                    chatDocuments.append(mock_doc)
                logger.info(f"Created {len(chatDocuments)} mock documents for testing")

            if not chatDocuments:
                return self._createResult(
                    success=False,
                    data={},
                    error="No documents found for the provided reference"
                )

            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return self._createResult(
                    success=False,
                    data={},
                    error="No valid Microsoft connection found for the provided connection reference"
                )

            # Parse site URL to get hostname and site path
            site_info = self._parseSiteUrl(siteUrl)
            if not site_info["hostname"] or not site_info["sitePath"]:
                return self._createResult(
                    success=False,
                    data={},
                    error=f"Invalid SharePoint site URL: {siteUrl}"
                )

            # Get site ID
            site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
            if not site_id:
                return self._createResult(
                    success=False,
                    data={},
                    error="Failed to get SharePoint site ID"
                )

            # Process each document path
            read_results = []

            for i, documentPath in enumerate(documentPaths):
                try:
                    # Check if documentPath is actually a file ID (starts with 016GRP6V)
                    if documentPath.startswith('016GRP6V'):
                        # Use file ID directly
                        file_endpoint = f"sites/{site_id}/drive/items/{documentPath}"
                        logger.info(f"Reading file by ID: {documentPath}")
                    else:
                        # First, find the file by its path
                        path_clean = documentPath.lstrip('/')
                        file_endpoint = f"sites/{site_id}/drive/root:/{path_clean}"
                        logger.info(f"Reading file by path: {path_clean}")

                    # Get file metadata
                    file_info_result = await self._makeGraphApiCall(connection["accessToken"], file_endpoint)

                    if "error" in file_info_result:
                        read_results.append({
                            "documentPath": documentPath,
                            "error": f"File not found: {file_info_result['error']}",
                            "content": None
                        })
                        continue

                    file_id = file_info_result.get("id")
                    if not file_id:
                        read_results.append({
                            "documentPath": documentPath,
                            "error": "Could not get file ID",
                            "content": None
                        })
                        continue

                    # Build result with metadata
                    result_item = {
                        "documentPath": documentPath,
                        "fileId": file_id,
                        "fileName": file_info_result.get("name"),
                        "size": file_info_result.get("size", 0),
                        "createdDateTime": file_info_result.get("createdDateTime"),
                        "lastModifiedDateTime": file_info_result.get("lastModifiedDateTime"),
                        "webUrl": file_info_result.get("webUrl")
                    }

                    # Add metadata if requested
                    if includeMetadata:
                        result_item["metadata"] = {
                            "mimeType": file_info_result.get("file", {}).get("mimeType"),
                            "downloadUrl": file_info_result.get("@microsoft.graph.downloadUrl"),
                            "createdBy": file_info_result.get("createdBy", {}),
                            "lastModifiedBy": file_info_result.get("lastModifiedBy", {}),
                            "parentReference": file_info_result.get("parentReference", {})
                        }

                    # Get file content if it's a readable format
                    mime_type = file_info_result.get("file", {}).get("mimeType", "")
                    if mime_type.startswith("text/") or mime_type in [
                        "application/json", "application/xml", "application/javascript"
                    ]:
                        # Download the file content
                        content_endpoint = f"sites/{site_id}/drive/items/{file_id}/content"

                        # For content download, we need to handle binary data
                        try:
                            async with aiohttp.ClientSession() as session:
                                headers = {"Authorization": f"Bearer {connection['accessToken']}"}
                                async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
                                    if response.status == 200:
                                        content = await response.text()
                                        result_item["content"] = content
                                    else:
                                        result_item["content"] = f"Could not download content: HTTP {response.status}"
                        except Exception as e:
                            result_item["content"] = f"Error downloading content: {str(e)}"
                    else:
                        result_item["content"] = f"Binary file type ({mime_type}) - content not retrieved"

                    read_results.append(result_item)

                except Exception as e:
                    logger.error(f"Error reading document {documentPath}: {str(e)}")
                    read_results.append({
                        "documentPath": documentPath,
                        "error": str(e),
                        "content": None
                    })

            result_data = {
                "connectionReference": connectionReference,
                "siteUrl": siteUrl,
                "documentPaths": documentPaths,
                "includeMetadata": includeMetadata,
                "readResults": read_results,
                "connection": {
                    "id": connection["id"],
                    "authority": "microsoft",
                    "reference": connectionReference
                },
                "timestamp": datetime.now(UTC).isoformat()
            }

            # Determine output format based on expected formats
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default

            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                # Use the first expected format
                expected_format = expectedDocumentFormats[0]
                output_extension = expected_format.get("extension", ".json")
                output_mime_type = expected_format.get("mimeType", "application/json")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")

            return self._createResult(
                success=True,
                data={
                    "documents": [
                        {
                            "documentName": f"sharepoint_documents_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
                            "documentData": result_data,
                            "mimeType": output_mime_type
                        }
                    ]
                }
            )
        except Exception as e:
            logger.error(f"Error reading SharePoint documents: {str(e)}")
            return self._createResult(
                success=False,
                data={},
                error=str(e)
            )

    @action
    async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Upload documents to SharePoint

        Parameters:
            connectionReference (str): Reference to the Microsoft connection
            siteUrl (str): SharePoint site URL
            documentPaths (List[str]): List of paths where to upload the documents
            documentList (str): Reference to the document list to upload
            fileNames (List[str]): List of names for the uploaded files
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
        """
        try:
            connectionReference = parameters.get("connectionReference")
            siteUrl = parameters.get("siteUrl")
            documentPaths = parameters.get("documentPaths")
            documentList = parameters.get("documentList")
            fileNames = parameters.get("fileNames")
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])

            if not connectionReference or not siteUrl or not documentPaths or not documentList or not fileNames:
                return self._createResult(
                    success=False,
                    data={},
                    error="Connection reference, site URL, document paths, document list, and file names are required"
                )

            # Get Microsoft connection
            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return self._createResult(
                    success=False,
                    data={},
                    error="No valid Microsoft connection found for the provided connection reference"
                )

            # Get documents from reference - ensure documentList is a list, not a string
            if isinstance(documentList, str):
                documentList = [documentList]  # Convert string to list
            chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
            if not chatDocuments:
                return self._createResult(
                    success=False,
                    data={},
                    error="No documents found for the provided reference"
                )

            # Parse site URL to get hostname and site path
            site_info = self._parseSiteUrl(siteUrl)
            if not site_info["hostname"] or not site_info["sitePath"]:
                return self._createResult(
                    success=False,
                    data={},
                    error=f"Invalid SharePoint site URL: {siteUrl}"
                )

            # Get site ID
            site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
            if not site_id:
                return self._createResult(
                    success=False,
                    data={},
                    error="Failed to get SharePoint site ID"
                )

            # Process each document upload
            upload_results = []

            for i, (documentPath, fileName) in enumerate(zip(documentPaths, fileNames)):
                try:
                    if i < len(chatDocuments):
                        chatDocument = chatDocuments[i]
                        fileId = chatDocument.fileId
                        file_data = self.service.getFileData(fileId)

                        if not file_data:
                            logger.warning(f"File data not found for fileId: {fileId}")
                            upload_results.append({
                                "documentPath": documentPath,
                                "fileName": fileName,
                                "fileId": fileId,
                                "error": "File data not found",
                                "uploadStatus": "failed"
                            })
                            continue

                        # Prepare upload path
                        upload_path = documentPath.rstrip('/') + '/' + fileName
                        upload_path_clean = upload_path.lstrip('/')

                        # Upload endpoint for small files (< 4MB)
                        if len(file_data) < 4 * 1024 * 1024:  # 4MB
                            upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content"

                            # Upload the file
                            upload_result = await self._makeGraphApiCall(
                                connection["accessToken"],
                                upload_endpoint,
                                method="PUT",
                                data=file_data
                            )

                            if "error" in upload_result:
                                upload_results.append({
                                    "documentPath": documentPath,
                                    "fileName": fileName,
                                    "fileId": fileId,
                                    "error": upload_result["error"],
                                    "uploadStatus": "failed"
                                })
                            else:
                                upload_results.append({
                                    "documentPath": documentPath,
                                    "fileName": fileName,
                                    "fileId": fileId,
                                    "uploadStatus": "success",
                                    "sharepointFileId": upload_result.get("id"),
                                    "webUrl": upload_result.get("webUrl"),
                                    "size": upload_result.get("size"),
                                    "createdDateTime": upload_result.get("createdDateTime")
                                })
                        else:
                            # For large files, we would need to implement resumable upload
                            # For now, return an error for large files
                            upload_results.append({
                                "documentPath": documentPath,
                                "fileName": fileName,
                                "fileId": fileId,
                                "error": f"File too large ({len(file_data)} bytes). Files larger than 4MB require resumable upload (not implemented).",
                                "uploadStatus": "failed"
                            })
                    else:
                        upload_results.append({
                            "documentPath": documentPath,
                            "fileName": fileName,
                            "fileId": None,
                            "error": "No corresponding chat document found",
                            "uploadStatus": "failed"
                        })

                except Exception as e:
                    logger.error(f"Error uploading document {fileName}: {str(e)}")
                    upload_results.append({
                        "documentPath": documentPath,
                        "fileName": fileName,
                        "fileId": fileId if i < len(chatDocuments) else None,
                        "error": str(e),
                        "uploadStatus": "failed"
                    })

            # Create result data
            result_data = {
                "connectionReference": connectionReference,
                "siteUrl": siteUrl,
                "documentPaths": documentPaths,
                "documentList": documentList,
                "fileNames": fileNames,
                "uploadResults": upload_results,
                "connection": {
                    "id": connection["id"],
                    "authority": "microsoft",
                    "reference": connectionReference
                },
                "timestamp": datetime.now(UTC).isoformat()
            }

            # Determine output format based on expected formats
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default

            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                # Use the first expected format
                expected_format = expectedDocumentFormats[0]
                output_extension = expected_format.get("extension", ".json")
                output_mime_type = expected_format.get("mimeType", "application/json")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")

            return self._createResult(
                success=True,
                data={
                    "documents": [
                        {
                            "documentName": f"sharepoint_upload_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
                            "documentData": result_data,
                            "mimeType": output_mime_type
                        }
                    ]
                }
            )

        except Exception as e:
            logger.error(f"Error uploading to SharePoint: {str(e)}")
            return self._createResult(
                success=False,
                data={},
                error=str(e)
            )

    @action
    async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        List documents in SharePoint folder

        Parameters:
            connectionReference (str): Reference to the Microsoft connection
            siteUrl (str): SharePoint site URL
            folderPaths (List[str]): List of paths to the folders to list
            includeSubfolders (bool, optional): Whether to include subfolders (default: False)
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
        """
        try:
            connectionReference = parameters.get("connectionReference")
            siteUrl = parameters.get("siteUrl")
            folderPaths = parameters.get("folderPaths")
            includeSubfolders = parameters.get("includeSubfolders", False)  # Default to False for better UX
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])

            if not connectionReference or not siteUrl or not folderPaths:
                return self._createResult(
                    success=False,
                    data={},
                    error="Connection reference, site URL, and folder paths are required"
                )

            # Get Microsoft connection
            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return self._createResult(
                    success=False,
                    data={},
                    error="No valid Microsoft connection found for the provided connection reference"
                )

            logger.info(f"Starting SharePoint listDocuments for site: {siteUrl}")
            logger.debug(f"Connection ID: {connection['id']}")
            logger.debug(f"Folder paths: {folderPaths}")

            # Parse site URL to get hostname and site path
            site_info = self._parseSiteUrl(siteUrl)
            logger.info(f"Parsed site info - hostname: {site_info['hostname']}, sitePath: {site_info['sitePath']}")

            if not site_info["hostname"] or not site_info["sitePath"]:
                logger.error(f"Failed to parse site URL: {siteUrl}")
                return self._createResult(
                    success=False,
                    data={},
                    error=f"Invalid SharePoint site URL: {siteUrl}"
                )

            # Get site ID
            logger.info(f"Getting site ID for hostname: {site_info['hostname']}, path: {site_info['sitePath']}")
            site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
            logger.info(f"Site ID result: {site_id}")

            if not site_id:
                return self._createResult(
                    success=False,
                    data={},
                    error="Failed to get SharePoint site ID"
                )

            # Process each folder path
            list_results = []

            for folderPath in folderPaths:
                try:
                    # Determine the endpoint based on folder path
                    if folderPath in ["/", ""]:
                        # Root folder
                        endpoint = f"sites/{site_id}/drive/root/children"
                    else:
                        # Specific folder - remove leading slash if present
                        folder_path_clean = folderPath.lstrip('/')
                        endpoint = f"sites/{site_id}/drive/root:/{folder_path_clean}:/children"

                    # Make the API call to list folder contents
                    api_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)

                    if "error" in api_result:
                        list_results.append({
                            "folderPath": folderPath,
                            "error": api_result["error"],
                            "items": []
                        })
                        continue

                    # Process the results
                    items = api_result.get("value", [])
                    processed_items = []

                    for item in items:
                        item_info = {
                            "id": item.get("id"),
                            "name": item.get("name"),
                            "size": item.get("size", 0),
                            "createdDateTime": item.get("createdDateTime"),
                            "lastModifiedDateTime": item.get("lastModifiedDateTime"),
                            "webUrl": item.get("webUrl"),
                            "type": "folder" if "folder" in item else "file"
                        }

                        # Add file-specific information
                        if "file" in item:
                            item_info.update({
                                "mimeType": item["file"].get("mimeType"),
                                "downloadUrl": item.get("@microsoft.graph.downloadUrl")
                            })

                        # Add folder-specific information
                        if "folder" in item:
                            item_info.update({
                                "childCount": item["folder"].get("childCount", 0)
                            })

                        processed_items.append(item_info)

                    # If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
                    if includeSubfolders:
                        logger.info(f"Including subfolders - processing {len([item for item in processed_items if item['type'] == 'folder'])} folders")
                        subfolder_count = 0
                        max_subfolders = 10  # Limit to prevent infinite loops

                        for item in processed_items[:]:  # Use slice to avoid modifying list during iteration
                            if item["type"] == "folder" and subfolder_count < max_subfolders:
                                subfolder_count += 1
                                subfolder_path = f"{folderPath.rstrip('/')}/{item['name']}"
                                subfolder_endpoint = f"sites/{site_id}/drive/items/{item['id']}/children"

                                logger.debug(f"Getting contents of subfolder: {item['name']}")
                                subfolder_result = await self._makeGraphApiCall(connection["accessToken"], subfolder_endpoint)
                                if "error" not in subfolder_result:
                                    subfolder_items = subfolder_result.get("value", [])
                                    logger.debug(f"Found {len(subfolder_items)} items in subfolder {item['name']}")

                                    for subfolder_item in subfolder_items:
                                        # Only add files and direct subfolders, NO RECURSION
                                        subfolder_item_info = {
                                            "id": subfolder_item.get("id"),
                                            "name": subfolder_item.get("name"),
                                            "size": subfolder_item.get("size", 0),
                                            "createdDateTime": subfolder_item.get("createdDateTime"),
                                            "lastModifiedDateTime": subfolder_item.get("lastModifiedDateTime"),
                                            "webUrl": subfolder_item.get("webUrl"),
                                            "type": "folder" if "folder" in subfolder_item else "file",
                                            "parentPath": subfolder_path
                                        }

                                        if "file" in subfolder_item:
                                            subfolder_item_info.update({
                                                "mimeType": subfolder_item["file"].get("mimeType"),
                                                "downloadUrl": subfolder_item.get("@microsoft.graph.downloadUrl")
                                            })

                                        processed_items.append(subfolder_item_info)
                                else:
                                    logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolder_result.get('error')}")
                            elif subfolder_count >= max_subfolders:
                                logger.warning(f"Reached maximum subfolder limit ({max_subfolders}), skipping remaining folders")
                                break

                        logger.info(f"Processed {subfolder_count} subfolders, total items: {len(processed_items)}")

                    list_results.append({
                        "folderPath": folderPath,
                        "itemCount": len(processed_items),
                        "items": processed_items
                    })

                except Exception as e:
                    logger.error(f"Error listing folder {folderPath}: {str(e)}")
                    list_results.append({
                        "folderPath": folderPath,
                        "error": str(e),
                        "items": []
                    })

            # Create result data
            result_data = {
                "connectionReference": connectionReference,
                "siteUrl": siteUrl,
                "folderPaths": folderPaths,
                "includeSubfolders": includeSubfolders,
                "listResults": list_results,
                "connection": {
                    "id": connection["id"],
                    "authority": "microsoft",
                    "reference": connectionReference
                },
                "timestamp": datetime.now(UTC).isoformat()
            }

            # Determine output format based on expected formats
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default

            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                # Use the first expected format
                expected_format = expectedDocumentFormats[0]
                output_extension = expected_format.get("extension", ".json")
                output_mime_type = expected_format.get("mimeType", "application/json")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")

            return self._createResult(
                success=True,
                data={
                    "documents": [
                        {
                            "documentName": f"sharepoint_document_list_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
                            "documentData": result_data,
                            "mimeType": output_mime_type
                        }
                    ]
                }
            )

        except Exception as e:
            logger.error(f"Error listing SharePoint documents: {str(e)}")
            return self._createResult(
                success=False,
                data={},
                error=str(e)
            )