gateway/modules/methods/methodSharepoint.py

"""
SharePoint operations method module.
Handles SharePoint document operations using the SharePoint service.
"""

import logging
import json
import re
from typing import Dict, Any, List, Optional
from datetime import datetime, UTC
import base64
from urllib.parse import urlparse
import aiohttp
import asyncio

from modules.chat.methodBase import MethodBase, action
from modules.interfaces.interfaceChatModel import ActionResult
from modules.shared.timezoneUtils import get_utc_timestamp

logger = logging.getLogger(__name__)

class MethodSharepoint(MethodBase):
    """SharePoint operations methods."""

    def __init__(self, service):
        super().__init__(service)
        self.name = "sharepoint"
        self.description = "SharePoint operations methods"

    def _format_timestamp_for_filename(self) -> str:
        """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
        return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")

    def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
        """Get Microsoft connection from connection reference"""
        try:
            userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference)
            if not userConnection:
                logger.warning(f"No user connection found for reference: {connectionReference}")
                return None

            if userConnection.authority.value != "msft":
                logger.warning(f"Connection {userConnection.id} is not Microsoft (authority: {userConnection.authority.value})")
                return None

            # Check if connection is active or pending (pending means OAuth in progress)
            if userConnection.status.value not in ["active", "pending"]:
                logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
                return None

            # Get the token for this specific connection
            token = self.service.interfaceApp.getConnectionToken(userConnection.id)
            if not token:
                logger.warning(f"No token found for connection {userConnection.id}")
                return None

            # Check if token is expired
            if hasattr(token, 'expiresAt') and token.expiresAt:
                current_time = get_utc_timestamp()
                if current_time > token.expiresAt:
                    logger.warning(f"Token for connection {userConnection.id} is expired (expiresAt: {token.expiresAt}, current: {current_time})")
                    return None

            logger.info(f"Successfully retrieved Microsoft connection: {userConnection.id}, status: {userConnection.status.value}, externalId: {userConnection.externalId}")

            return {
                "id": userConnection.id,
                "userConnection": userConnection,
                "accessToken": token.tokenAccess,
                "refreshToken": token.tokenRefresh,
                "scopes": ["Sites.ReadWrite.All", "Files.ReadWrite.All", "User.Read"]  # SharePoint scopes
            }
        except Exception as e:
            logger.error(f"Error getting Microsoft connection: {str(e)}")
            return None

    async def _discoverSharePointSites(self, access_token: str) -> List[Dict[str, Any]]:
        """
        Discover all SharePoint sites accessible to the user via Microsoft Graph API

        Parameters:
            access_token (str): Microsoft Graph access token

        Returns:
            List[Dict[str, Any]]: List of SharePoint site information
        """
        try:
            # Query Microsoft Graph to get all sites the user has access to
            endpoint = "sites?search=*"
            result = await self._makeGraphApiCall(access_token, endpoint)

            if "error" in result:
                logger.error(f"Error discovering SharePoint sites: {result['error']}")
                return []

            sites = result.get("value", [])
            logger.info(f"Discovered {len(sites)} SharePoint sites")

            # Process and return site information
            processed_sites = []
            for site in sites:
                site_info = {
                    "id": site.get("id"),
                    "displayName": site.get("displayName"),
                    "name": site.get("name"),
                    "webUrl": site.get("webUrl"),
                    "description": site.get("description"),
                    "createdDateTime": site.get("createdDateTime"),
                    "lastModifiedDateTime": site.get("lastModifiedDateTime")
                }
                processed_sites.append(site_info)
                logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")

            return processed_sites

        except Exception as e:
            logger.error(f"Error discovering SharePoint sites: {str(e)}")
            return []

    def _filter_sites_by_hint(self, sites: List[Dict[str, Any]], site_hint: str) -> List[Dict[str, Any]]:
        """Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
        try:
            if not site_hint:
                return sites
            hint = site_hint.strip().lower()
            filtered: List[Dict[str, Any]] = []
            for site in sites:
                name = (site.get("displayName") or "").lower()
                web_url = (site.get("webUrl") or "").lower()
                if hint in name or hint in web_url:
                    filtered.append(site)
            return filtered if filtered else sites
        except Exception as e:
            logger.error(f"Error filtering sites by hint '{site_hint}': {str(e)}")
            return sites


    def _parse_site_scoped_path(self, path_query: str) -> Optional[Dict[str, str]]:
        """
        Parse a site-scoped path of the form:
        /site:KM LayerFinance/Documents/Work or /site:KM LayerFinance/Shared Documents/Work

        Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
        """
        try:
            if not path_query or not path_query.startswith('/'):
                return None
            # expected syntax prefix
            prefix = '/site:'
            if not path_query.startswith(prefix):
                return None
            remainder = path_query[len(prefix):]
            # split once on the next '/'
            if '/' not in remainder:
                return None
            site_name, inner = remainder.split('/', 1)
            site_name = site_name.strip()
            inner_path = inner.strip()
            if not site_name or not inner_path:
                return None
            return {"siteName": site_name, "innerPath": inner_path}
        except Exception as e:
            logger.error(f"Error parsing site-scoped path '{path_query}': {str(e)}")
            return None

    def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
        """
        Parse searchQuery to extract path, search terms, search type, and search options.

        Parameters:
            searchQuery (str): Enhanced search query with options:
                - "budget" -> pathQuery="*", fileQuery="budget", searchType="all", options={}
                - "/Documents:budget" -> pathQuery="/Documents", fileQuery="budget", searchType="all", options={}
                - "files:budget" -> pathQuery="*", fileQuery="budget", searchType="files", options={}
                - "folders:DELTA" -> pathQuery="*", fileQuery="DELTA", searchType="folders", options={}
                - "exact:\"Operations 2025\"" -> exact phrase matching
                - "regex:^Operations.*2025$" -> regex pattern matching
                - "case:DELTA" -> case-sensitive search
                - "and:DELTA AND 2025 Mars AND Group" -> all AND terms must be present

        Returns:
            tuple[str, str, str, dict]: (pathQuery, fileQuery, searchType, searchOptions)
        """
        try:
            if not searchQuery or not searchQuery.strip() or searchQuery.strip() == "*":
                return "*", "*", "all", {}

            searchQuery = searchQuery.strip()
            searchOptions = {}

            # Check for search type specification (files:, folders:, all:) FIRST
            searchType = "all"  # Default
            if searchQuery.startswith(("files:", "folders:", "all:")):
                type_parts = searchQuery.split(':', 1)
                searchType = type_parts[0].strip()
                searchQuery = type_parts[1].strip()

            # Extract optional site hint tokens: support "site=Name" or leading "site:Name"
            def _extract_site_hint(q: str) -> tuple[str, Optional[str]]:
                try:
                    q_strip = q.strip()
                    # Leading form: site:KM LayerFinance ...
                    if q_strip.lower().startswith("site:"):
                        after = q_strip[5:].lstrip()
                        # site name until next space or end
                        if ' ' in after:
                            site_name, rest = after.split(' ', 1)
                        else:
                            site_name, rest = after, ''
                        return rest.strip(), site_name.strip()
                    # Inline key=value form anywhere
                    m = re.search(r"\bsite=([^;\s]+)", q_strip, flags=re.IGNORECASE)
                    if m:
                        site_name = m.group(1).strip()
                        # remove the token from query
                        q_new = re.sub(r"\bsite=[^;\s]+;?", "", q_strip, flags=re.IGNORECASE).strip()
                        return q_new, site_name
                except Exception:
                    pass
                return q, None

            searchQuery, extracted_site = _extract_site_hint(searchQuery)
            if extracted_site:
                searchOptions["site_hint"] = extracted_site
                logger.info(f"Extracted site hint: '{extracted_site}'")

            # Extract name="..." if present (for quoted multi-word names)
            name_match = re.search(r"name=\"([^\"]+)\"", searchQuery)
            if name_match:
                searchQuery = name_match.group(1)
                logger.info(f"Extracted name from quotes: '{searchQuery}'")

            # Check for search mode specification (exact:, regex:, case:, and:)
            if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
                mode_parts = searchQuery.split(':', 1)
                mode = mode_parts[0].strip()
                searchQuery = mode_parts[1].strip()

                if mode == "exact":
                    searchOptions["exact_match"] = True
                    # Remove quotes if present
                    if searchQuery.startswith('"') and searchQuery.endswith('"'):
                        searchQuery = searchQuery[1:-1]
                elif mode == "regex":
                    searchOptions["regex_match"] = True
                elif mode == "case":
                    searchOptions["case_sensitive"] = True
                elif mode == "and":
                    searchOptions["and_terms"] = True

            # Check if it contains path:search format
            if ':' in searchQuery:
                parts = searchQuery.split(':', 1)  # Split only on first colon
                path_part = parts[0].strip()
                search_part = parts[1].strip()

                # Handle path part
                if not path_part or path_part == "*":
                    pathQuery = "*"
                elif path_part.startswith('/'):
                    pathQuery = path_part
                else:
                    pathQuery = f"/Documents/{path_part}"

                # Handle search part
                if not search_part or search_part == "*":
                    fileQuery = "*"
                else:
                    fileQuery = search_part

                # Use search_part as fileQuery (name extraction already handled above)
                return pathQuery, fileQuery, searchType, searchOptions

            # No colon - check if it looks like a path
            elif searchQuery.startswith('/'):
                # It's a path only
                return searchQuery, "*", searchType, searchOptions

            else:
                # It's a search term only
                return "*", searchQuery, searchType, searchOptions

        except Exception as e:
            logger.error(f"Error parsing searchQuery '{searchQuery}': {str(e)}")
            return "*", "*", "all", {}

    def _resolvePathQuery(self, pathQuery: str) -> List[str]:
        """
        Resolve pathQuery into a list of search paths for SharePoint operations.

        Parameters:
            pathQuery (str): Query string that can contain:
                - Direct paths (e.g., "/Documents/Project1")
                - Wildcards (e.g., "/Documents/*")
                - Multiple paths separated by semicolons (e.g., "/Docs; /Files")
                - Relative paths (e.g., "Project1" -> resolved to default folder)
                - Empty string or "*" for global search

        Returns:
            List[str]: List of resolved paths
        """
        try:
            if not pathQuery or not pathQuery.strip() or pathQuery.strip() == "*":
                return ["*"]  # Global search across all sites

            # Split by semicolon to handle multiple paths
            raw_paths = [path.strip() for path in pathQuery.split(';') if path.strip()]
            resolved_paths = []

            for raw_path in raw_paths:
                # Handle wildcards - return as-is
                if '*' in raw_path:
                    resolved_paths.append(raw_path)
                # Handle absolute paths
                elif raw_path.startswith('/'):
                    resolved_paths.append(raw_path)
                # Handle relative paths - prepend default folder
                else:
                    resolved_paths.append(f"/Documents/{raw_path}")

            # Remove duplicates while preserving order
            seen = set()
            unique_paths = []
            for path in resolved_paths:
                if path not in seen:
                    seen.add(path)
                    unique_paths.append(path)

            logger.info(f"Resolved pathQuery '{pathQuery}' to {len(unique_paths)} paths: {unique_paths}")
            return unique_paths

        except Exception as e:
            logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}")
            return ["*"]  # Fallback to global search

    def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]:
        """Parse SharePoint site URL to extract hostname and site path"""
        try:
            parsed = urlparse(siteUrl)
            hostname = parsed.hostname
            path = parsed.path.strip('/')

            return {
                "hostname": hostname,
                "sitePath": path
            }
        except Exception as e:
            logger.error(f"Error parsing site URL {siteUrl}: {str(e)}")
            return {"hostname": "", "sitePath": ""}

    async def _makeGraphApiCall(self, access_token: str, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
        """Make a Microsoft Graph API call with timeout and detailed logging"""
        try:
            headers = {
                "Authorization": f"Bearer {access_token}",
                "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
            }

            url = f"https://graph.microsoft.com/v1.0/{endpoint}"
            logger.info(f"Making Graph API call: {method} {url}")

            # Set timeout to 30 seconds
            timeout = aiohttp.ClientTimeout(total=30)

            async with aiohttp.ClientSession(timeout=timeout) as session:
                if method == "GET":
                    logger.debug(f"Starting GET request to {url}")
                    async with session.get(url, headers=headers) as response:
                        logger.info(f"Graph API response: {response.status}")
                        if response.status == 200:
                            result = await response.json()
                            logger.debug(f"Graph API success: {len(str(result))} characters response")
                            return result
                        else:
                            error_text = await response.text()
                            logger.error(f"Graph API call failed: {response.status} - {error_text}")
                            return {"error": f"API call failed: {response.status} - {error_text}"}

                elif method == "PUT":
                    logger.debug(f"Starting PUT request to {url}")
                    async with session.put(url, headers=headers, data=data) as response:
                        logger.info(f"Graph API response: {response.status}")
                        if response.status in [200, 201]:
                            result = await response.json()
                            logger.debug(f"Graph API success: {len(str(result))} characters response")
                            return result
                        else:
                            error_text = await response.text()
                            logger.error(f"Graph API call failed: {response.status} - {error_text}")
                            return {"error": f"API call failed: {response.status} - {error_text}"}

                elif method == "POST":
                    logger.debug(f"Starting POST request to {url}")
                    async with session.post(url, headers=headers, data=data) as response:
                        logger.info(f"Graph API response: {response.status}")
                        if response.status in [200, 201]:
                            result = await response.json()
                            logger.debug(f"Graph API success: {len(str(result))} characters response")
                            return result
                        else:
                            error_text = await response.text()
                            logger.error(f"Graph API call failed: {response.status} - {error_text}")
                            return {"error": f"API call failed: {response.status} - {error_text}"}

        except asyncio.TimeoutError:
            logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
            return {"error": f"API call timed out after 30 seconds: {endpoint}"}
        except Exception as e:
            logger.error(f"Error making Graph API call: {str(e)}")
            return {"error": f"Error making Graph API call: {str(e)}"}

    async def _getSiteId(self, access_token: str, hostname: str, site_path: str) -> str:
        """Get SharePoint site ID from hostname and site path"""
        try:
            endpoint = f"sites/{hostname}:/{site_path}"
            result = await self._makeGraphApiCall(access_token, endpoint)

            if "error" in result:
                logger.error(f"Error getting site ID: {result['error']}")
                return ""

            return result.get("id", "")
        except Exception as e:
            logger.error(f"Error getting site ID: {str(e)}")
            return ""

    @action
    async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Find documents by searching their content, names, or metadata across all accessible SharePoint sites

        Parameters:
            connectionReference (str): Reference to the Microsoft connection
            site (str, optional): SharePoint site name or hint to search within (e.g., "SSS", "KM XYZ"). If not provided, searches all accessible sites
            searchQuery (str): [path:][type:][mode:]query - Enhanced search syntax:
                - "budget", "/Documents:budget", "files:budget", "folders:DELTA", "*"
                - "exact:\"Operations 2025\"" - exact phrase matching
                - "regex:^Operations.*2025$" - regex pattern matching
                - "case:DELTA" - case-sensitive search
                - "and:DELTA AND 2025 Mars AND Group" - all terms must be present
                - "folders:and:DELTA AND 2025 Mars AND Group" - combined options
                - Site hint support: "folders:site=KM LayerFinance;name=Work" or "folders:site:KM LayerFinance Work"
                - For quoted names: "folders:site=KM;name=\"page staten\""
                - For folder search: words like "part1 part2" will search for folders containing BOTH terms
                Note: For storage locations, use "folders:" prefix. When searching for folders, multiple words are treated as search terms that must all appear in the folder name or path.
                Site hints help narrow search to specific SharePoint sites for better accuracy.
            resultDocument (str, optional): JSON result document from previous findDocumentPath action to refine search
            searchScope (str, optional): Search scope - options: "all" (default), "documents" (files only), "pages" (SharePoint pages only)
            maxResults (int, optional): Maximum number of results to return (default: 100)
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
        """
        try:
            connectionReference = parameters.get("connectionReference")
            site = parameters.get("site")
            searchQuery = parameters.get("searchQuery", "*")
            resultDocument = parameters.get("resultDocument")
            searchScope = parameters.get("searchScope", "all")
            maxResults = parameters.get("maxResults", 100)
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])

            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")

            # If resultDocument is provided, extract site information to refine search
            if resultDocument:
                try:
                    import json
                    # Resolve the reference label to get the actual document list
                    document_list = self.service.getChatDocumentsFromDocumentList([resultDocument])
                    if not document_list or len(document_list) == 0:
                        return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}")

                    # Get the first document's content (which should be the JSON)
                    first_document = document_list[0]
                    file_data = self.service.getFileData(first_document.fileId)
                    if not file_data:
                        return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}")

                    # Parse the JSON content
                    result_data = json.loads(file_data)
                    found_documents = result_data.get("foundDocuments", [])

                    # Extract site information from the result for context
                    if found_documents:
                        # Use the site information from the previous search to refine current search
                        # This could be used to limit search to specific sites or add context
                        logger.info(f"Refining search using {len(found_documents)} documents from previous result")

                except json.JSONDecodeError as e:
                    return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}")
                except Exception as e:
                    return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}")

            # Parse searchQuery to extract path, search terms, search type, and options
            pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery)

            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")

            # Discover all SharePoint sites accessible to the user
            sites = await self._discoverSharePointSites(connection["accessToken"])
            if not sites:
                return ActionResult.isFailure(error="No SharePoint sites found or accessible")

            # Filter sites by site parameter if provided
            if site:
                sites = self._filter_sites_by_hint(sites, site)
                logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites")
                if not sites:
                    return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'")

            # Resolve path query into search paths
            search_paths = self._resolvePathQuery(pathQuery)

            try:
                # Search across all discovered sites
                found_documents = []
                all_sites_searched = []

                # Apply site hint filtering if provided in search options
                site_scoped_sites = sites
                strict_folder_name: Optional[str] = None

                # First check for explicit site hint in search options
                if searchOptions.get("site_hint"):
                    site_scoped_sites = self._filter_sites_by_hint(sites, searchOptions["site_hint"])
                    logger.info(f"Filtered sites by explicit site hint: '{searchOptions['site_hint']}' -> {len(site_scoped_sites)} sites")

                # Heuristic: if user searched for folders with pattern "<siteHint> <folderName>",
                # prefer filtering sites by the first token(s) and match folder name exactly for the last token
                elif searchType == "folders" and fileQuery and ' ' in fileQuery and not searchOptions.get("regex_match"):
                    # treat last token as folder name, preceding tokens combined as site hint
                    tokens = [t for t in fileQuery.split(' ') if t]
                    if len(tokens) >= 2:
                        strict_folder_name = tokens[-1]
                        site_hint = ' '.join(tokens[:-1])
                        site_scoped_sites = self._filter_sites_by_hint(sites, site_hint)
                        logger.info(f"Filtered sites by heuristic site hint: '{site_hint}' -> {len(site_scoped_sites)} sites")

                for site in site_scoped_sites:
                    site_id = site["id"]
                    site_name = site["displayName"]
                    site_url = site["webUrl"]

                    logger.info(f"Searching in site: {site_name} ({site_url})")

                    # Use Microsoft Graph API for this specific site
                    # Handle empty or wildcard queries
                    if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
                        # For wildcard/empty queries, list all items in the drive
                        endpoint = f"sites/{site_id}/drive/root/children"
                    else:
                        # For specific queries, use different approaches based on search type
                        if searchType == "folders":
                            # Use Microsoft Graph unified search endpoint: POST /search/query
                            # Scope by all drives in the site (e.g., Shared Documents, Documents, language variants)
                            try:
                                import json
                                # Discover drives for the site to build precise path scopes
                                drives_resp = await self._makeGraphApiCall(connection["accessToken"], f"sites/{site_id}/drives")
                                path_filters = []
                                if not ("error" in drives_resp):
                                    for drv in (drives_resp.get("value", []) or []):
                                        web_url = (drv.get("webUrl") or "").rstrip('/') + '/'
                                        if web_url:
                                            # path:"<drive webUrl>/"
                                            path_filters.append(f"path:\"{web_url}\"")
                                if not path_filters:
                                    # fallback to site root if no drives found
                                    scoped_path = site_url.rstrip('/') + '/'
                                    path_filters = [f"path:\"{scoped_path}\""]

                                # Use KQL syntax for folder search
                                terms = [t for t in fileQuery.split() if t.strip()]
                                if len(terms) > 1:
                                    # Multiple terms: first search for folders containing ANY of the terms (OR)
                                    # This broadens the search to catch all potential matches
                                    name_terms = " OR ".join([f"foldername:*{t}*" for t in terms])
                                    name_filter = f"({name_terms})"
                                else:
                                    # Single term: search for folders containing the term
                                    single_term = terms[0] if terms else fileQuery
                                    name_filter = f"foldername:*{single_term}*"

                                # Use KQL syntax with isFolder:true
                                query_string = f"isFolder:true AND {name_filter}"
                                logger.info(f"Using KQL query: {query_string}")

                                payload = {
                                    "requests": [
                                        {
                                            "entityTypes": ["driveItem"],
                                            "query": {"queryString": query_string},
                                            "from": 0,
                                            "size": 50
                                        }
                                    ]
                                }
                                logger.info(f"Using unified search API for folders with queryString: {query_string}")
                                logger.info(f"Payload: {json.dumps(payload, indent=2)}")
                                unified_result = await self._makeGraphApiCall(
                                    connection["accessToken"],
                                    "search/query",
                                    method="POST",
                                    data=json.dumps(payload).encode("utf-8")
                                )
                                logger.info(f"Unified search response: {json.dumps(unified_result, indent=2)}")
                                if "error" in unified_result:
                                    logger.warning(f"Unified search failed for site {site_name}: {unified_result['error']}")
                                    items = []
                                else:
                                    # Flatten hits -> driveItem resources
                                    items = []
                                    for container in (unified_result.get("value", []) or []):
                                        for hits_container in (container.get("hitsContainers", []) or []):
                                            for hit in (hits_container.get("hits", []) or []):
                                                resource = hit.get("resource")
                                                if resource:
                                                    items.append(resource)
                                logger.info(f"Unified search returned {len(items)} items (pre-filter)")

                                # Post-filter: For multiple terms, filter results to only include folders that contain ALL terms
                                if len(terms) > 1:
                                    filtered_items = []
                                    for item in items:
                                        folder_name = item.get("name", "").lower()
                                        # Check if folder name contains ALL search terms
                                        if all(term.lower() in folder_name for term in terms):
                                            filtered_items.append(item)
                                    items = filtered_items
                                    logger.info(f"Post-filtered to {len(items)} items containing all terms: {terms}")

                            except Exception as e:
                                logger.error(f"Error performing unified folder search: {str(e)}")
                                items = []
                        else:
                            # For files, use regular search API
                            search_query = fileQuery.replace("'", "''")  # Escape single quotes for OData
                            endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
                            logger.info(f"Using search API for files with query: '{search_query}'")

                            # Make the search API call (files)
                            search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
                            if "error" in search_result:
                                logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
                                continue
                            # Process search results for this site (files)
                            items = search_result.get("value", [])
                            logger.info(f"Retrieved {len(items)} items from site {site_name}")
                    site_documents = []

                    for item in items:
                        item_name = item.get("name", "")
                        item_type = "folder" if "folder" in item else "file"
                        item_path = item.get("parentReference", {}).get("path", "")
                        logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")

                        # Filter by search scope if specified
                        if searchScope == "documents" and "folder" in item:
                            logger.debug(f"Skipping folder '{item_name}' due to documents scope")
                            continue
                        elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"):
                            logger.debug(f"Skipping file '{item_name}' due to pages scope")
                            continue

                        # Filter by search type (files, folders, all)
                        if searchType == "files" and "folder" in item:
                            logger.debug(f"Skipping folder '{item_name}' due to files search type")
                            continue
                        elif searchType == "folders" and "file" in item:
                            logger.debug(f"Skipping file '{item_name}' due to folders search type")
                            continue

                        # Enhanced post-filtering based on search options
                        if fileQuery != "*" and fileQuery.strip() and searchType != "folders":
                            # For non-folder searches, apply name filtering
                            # (Folder searches are already filtered by the recursive search)
                            search_target = item_name

                            # Apply different filtering based on search options
                            if searchOptions.get("exact_match"):
                                # Exact phrase matching
                                if searchOptions.get("case_sensitive"):
                                    if fileQuery not in search_target:
                                        continue
                                else:
                                    if fileQuery.lower() not in search_target.lower():
                                        continue
                            elif searchOptions.get("regex_match"):
                                # Regex pattern matching
                                import re
                                flags = 0 if searchOptions.get("case_sensitive") else re.IGNORECASE
                                if not re.search(fileQuery, search_target, flags):
                                    continue
                            elif searchOptions.get("and_terms"):
                                # AND terms mode: Split by " AND " and ensure ALL terms are present
                                search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
                                and_terms = [term.strip() for term in fileQuery.split(" AND ") if term.strip()]
                                and_terms = [term.lower() if not searchOptions.get("case_sensitive") else term for term in and_terms]
                                if not all(term in search_name for term in and_terms):
                                    continue  # Skip this item if not all AND terms match
                            else:
                                # Default: ALL search terms must be present (space-separated)
                                search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
                                search_terms = [term.strip().lower() if not searchOptions.get("case_sensitive") else term.strip()
                                              for term in fileQuery.split() if term.strip()]
                                if not all(term in search_name for term in search_terms):
                                    continue  # Skip this item if not all terms match

                        # If strict folder name requested, enforce exact (case-insensitive) match on folders
                        if strict_folder_name:
                            item_is_folder = "folder" in item
                            item_name_ci = (item.get("name") or "").strip().lower()
                            if item_is_folder and item_name_ci != strict_folder_name.lower():
                                logger.debug(f"Skipping folder '{item_name}' - doesn't match strict name '{strict_folder_name}'")
                                continue

                        logger.debug(f"Item '{item_name}' passed all filters - adding to results")

                        # Create minimal result with only essential reference information
                        doc_info = {
                            "id": item.get("id"),
                            "name": item.get("name"),
                            "type": "folder" if "folder" in item else "file",
                            "siteName": site_name,
                            "siteId": site_id
                        }

                        site_documents.append(doc_info)

                    found_documents.extend(site_documents)
                    all_sites_searched.append({
                        "siteName": site_name,
                        "siteUrl": site_url,
                        "siteId": site_id,
                        "documentsFound": len(site_documents)
                    })

                    logger.info(f"Found {len(site_documents)} documents in site {site_name}")

                # Limit total results to maxResults
                if len(found_documents) > maxResults:
                    found_documents = found_documents[:maxResults]
                    logger.info(f"Limited results to {maxResults} items")

                result_data = {
                    "searchQuery": searchQuery,
                    "totalResults": len(found_documents),
                    "maxResults": maxResults,
                    "foundDocuments": found_documents,
                    "timestamp": get_utc_timestamp()
                }

            except Exception as e:
                logger.error(f"Error searching SharePoint: {str(e)}")
                return ActionResult.isFailure(error=str(e))

            # Determine output format based on expected formats
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default

            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                # Use the first expected format
                expected_format = expectedDocumentFormats[0]
                output_extension = expected_format.get("extension", ".json")
                output_mime_type = expected_format.get("mimeType", "application/json")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")

            return ActionResult(
                success=True,
                documents=[
                    {
                        "documentName": f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}",
                        "documentData": result_data,
                        "mimeType": output_mime_type
                    }
                ]
            )

        except Exception as e:
            logger.error(f"Error finding document path: {str(e)}")
            return ActionResult.isFailure(error=str(e))

    @action
    async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Read documents from SharePoint across all accessible sites

        Parameters:
            documentList (str): Reference to the document list to read
            connectionReference (str): Reference to the Microsoft connection
            pathQuery (str): Path query to locate documents (e.g., "/Documents/Project1", "*" for all sites)
            resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to pathQuery)
            includeMetadata (bool, optional): Whether to include metadata (default: True)
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
        """
        try:
            documentList = parameters.get("documentList")
            connectionReference = parameters.get("connectionReference")
            pathQuery = parameters.get("pathQuery", "*")
            resultDocument = parameters.get("resultDocument")
            includeMetadata = parameters.get("includeMetadata", True)
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])

            if not documentList or not connectionReference:
                return ActionResult.isFailure(error="Document list reference and connection reference are required")

            # If resultDocument is provided, extract folder IDs from it
            if resultDocument:
                try:
                    import json
                    # Resolve the reference label to get the actual document list
                    document_list = self.service.getChatDocumentsFromDocumentList([resultDocument])
                    if not document_list or len(document_list) == 0:
                        return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}")

                    # Get the first document's content (which should be the JSON)
                    first_document = document_list[0]
                    file_data = self.service.getFileData(first_document.fileId)
                    if not file_data:
                        return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}")

                    # Parse the JSON content
                    result_data = json.loads(file_data)
                    found_documents = result_data.get("foundDocuments", [])

                    # Extract folder IDs from the result
                    folder_ids = []
                    for doc in found_documents:
                        if doc.get("type") == "folder":
                            folder_ids.append(doc.get("id"))

                    if folder_ids:
                        # Use the first folder ID found as pathQuery
                        pathQuery = folder_ids[0]
                        logger.info(f"Using folder ID from resultDocument: {pathQuery}")
                    else:
                        return ActionResult.isFailure(error="No folders found in resultDocument")

                except json.JSONDecodeError as e:
                    return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}")
                except Exception as e:
                    return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}")

            # Get documents from reference - ensure documentList is a list, not a string
            if isinstance(documentList, str):
                documentList = [documentList]  # Convert string to list
            chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)

            if not chatDocuments:
                return ActionResult.isFailure(error="No documents found for the provided reference")

            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")

            # Discover all SharePoint sites accessible to the user
            sites = await self._discoverSharePointSites(connection["accessToken"])
            if not sites:
                return ActionResult.isFailure(error="No SharePoint sites found or accessible")

            # Resolve path query into search paths
            search_paths = self._resolvePathQuery(pathQuery)

            # Process each chat document across all sites
            read_results = []

            for i, chatDocument in enumerate(chatDocuments):
                try:
                    fileId = chatDocument.fileId
                    fileName = chatDocument.fileName

                    # Search for this file across all sites
                    file_found = False

                    for site in sites:
                        site_id = site["id"]
                        site_name = site["displayName"]
                        site_url = site["webUrl"]

                        # Try to find the file by name in this site
                        search_query = fileName.replace("'", "''")  # Escape single quotes for OData
                        endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"

                        search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)

                        if "error" in search_result:
                            continue

                        items = search_result.get("value", [])
                        for item in items:
                            if item.get("name") == fileName:
                                # Found the file, get its details
                                file_id = item.get("id")
                                file_endpoint = f"sites/{site_id}/drive/items/{file_id}"

                                # Get file metadata
                                file_info_result = await self._makeGraphApiCall(connection["accessToken"], file_endpoint)

                                if "error" in file_info_result:
                                    continue

                                # Build result with metadata
                                result_item = {
                                    "fileId": fileId,
                                    "fileName": fileName,
                                    "sharepointFileId": file_id,
                                    "siteName": site_name,
                                    "siteUrl": site_url,
                                    "size": file_info_result.get("size", 0),
                                    "createdDateTime": file_info_result.get("createdDateTime"),
                                    "lastModifiedDateTime": file_info_result.get("lastModifiedDateTime"),
                                    "webUrl": file_info_result.get("webUrl")
                                }

                                # Add metadata if requested
                                if includeMetadata:
                                    result_item["metadata"] = {
                                        "mimeType": file_info_result.get("file", {}).get("mimeType"),
                                        "downloadUrl": file_info_result.get("@microsoft.graph.downloadUrl"),
                                        "createdBy": file_info_result.get("createdBy", {}),
                                        "lastModifiedBy": file_info_result.get("lastModifiedBy", {}),
                                        "parentReference": file_info_result.get("parentReference", {})
                                    }

                                # Get file content if it's a readable format
                                mime_type = file_info_result.get("file", {}).get("mimeType", "")
                                if mime_type.startswith("text/") or mime_type in [
                                    "application/json", "application/xml", "application/javascript"
                                ]:
                                    # Download the file content
                                    content_endpoint = f"sites/{site_id}/drive/items/{file_id}/content"

                                    # For content download, we need to handle binary data
                                    try:
                                        async with aiohttp.ClientSession() as session:
                                            headers = {"Authorization": f"Bearer {connection['accessToken']}"}
                                            async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
                                                if response.status == 200:
                                                    content = await response.text()
                                                    result_item["content"] = content
                                                else:
                                                    result_item["content"] = f"Could not download content: HTTP {response.status}"
                                    except Exception as e:
                                        result_item["content"] = f"Error downloading content: {str(e)}"
                                else:
                                    result_item["content"] = f"Binary file type ({mime_type}) - content not retrieved"

                                read_results.append(result_item)
                                file_found = True
                                break

                        if file_found:
                            break

                    if not file_found:
                        read_results.append({
                            "fileId": fileId,
                            "fileName": fileName,
                            "error": "File not found in any accessible SharePoint site",
                            "content": None
                        })

                except Exception as e:
                    logger.error(f"Error reading document {chatDocument.fileName}: {str(e)}")
                    read_results.append({
                        "fileId": chatDocument.fileId,
                        "fileName": chatDocument.fileName,
                        "error": str(e),
                        "content": None
                    })

            result_data = {
                "connectionReference": connectionReference,
                "pathQuery": pathQuery,
                "documentList": documentList,
                "includeMetadata": includeMetadata,
                "sitesSearched": len(sites),
                "readResults": read_results,
                "connection": {
                    "id": connection["id"],
                    "authority": "microsoft",
                    "reference": connectionReference
                },
                "timestamp": get_utc_timestamp()
            }

            # Determine output format based on expected formats
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default

            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                # Use the first expected format
                expected_format = expectedDocumentFormats[0]
                output_extension = expected_format.get("extension", ".json")
                output_mime_type = expected_format.get("mimeType", "application/json")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")

            return ActionResult(
                success=True,
                documents=[
                    {
                        "documentName": f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}",
                        "documentData": result_data,
                        "mimeType": output_mime_type
                    }
                ]
            )
        except Exception as e:
            logger.error(f"Error reading SharePoint documents: {str(e)}")
            return ActionResult(
                success=False,
                error=str(e)
            )

    @action
    async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Upload documents to SharePoint across accessible sites

        Parameters:
            connectionReference (str): Reference to the Microsoft connection
            sitePath (str): REQUIRED - Specific SharePoint path where to upload documents. Must be a valid SharePoint path format:
                - For direct upload: "/site:<Site Name>/<Library>/<Folder Path>" (e.g., "/site:KM XYZ/Documents/Work")
                - If user provides words like "word1 word2", the system MUST call findDocumentPath first to locate the actual folder path, the result then to give to this parameter
            documentList (str): Reference to the document list to upload
            fileNames (List[str]): List of names for the uploaded files
            resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to sitePath)
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
        """
        try:
            connectionReference = parameters.get("connectionReference")
            sitePath = parameters.get("sitePath", "/Documents")
            documentList = parameters.get("documentList")
            fileNames = parameters.get("fileNames")
            resultDocument = parameters.get("resultDocument")
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])

            if not connectionReference or not documentList or not fileNames:
                return ActionResult.isFailure(error="Connection reference, document list, and file names are required")

            # If resultDocument is provided, extract folder IDs from it
            if resultDocument:
                try:
                    import json
                    # Resolve the reference label to get the actual document list
                    document_list = self.service.getChatDocumentsFromDocumentList([resultDocument])
                    if not document_list or len(document_list) == 0:
                        return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}")

                    # Get the first document's content (which should be the JSON)
                    first_document = document_list[0]
                    file_data = self.service.getFileData(first_document.fileId)
                    if not file_data:
                        return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}")

                    # Parse the JSON content
                    result_data = json.loads(file_data)
                    found_documents = result_data.get("foundDocuments", [])

                    # Extract folder IDs from the result
                    folder_ids = []
                    for doc in found_documents:
                        if doc.get("type") == "folder":
                            folder_ids.append(doc.get("id"))

                    if folder_ids:
                        # Use the first folder ID found as sitePath
                        sitePath = folder_ids[0]
                        logger.info(f"Using folder ID from resultDocument: {sitePath}")
                    else:
                        return ActionResult.isFailure(error="No folders found in resultDocument")

                except json.JSONDecodeError as e:
                    return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}")
                except Exception as e:
                    return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}")

            # Get Microsoft connection
            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")

            # Get documents from reference - ensure documentList is a list, not a string
            if isinstance(documentList, str):
                documentList = [documentList]  # Convert string to list
            chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
            if not chatDocuments:
                return ActionResult.isFailure(error="No documents found for the provided reference")

            # Discover all SharePoint sites accessible to the user
            sites = await self._discoverSharePointSites(connection["accessToken"])
            if not sites:
                return ActionResult.isFailure(error="No SharePoint sites found or accessible")

            # Enforce site-scoped path usage when using sitePath directly (without resultDocument)
            upload_site_scope = None
            if not resultDocument:
                if not sitePath or not sitePath.startswith('/'):
                    return ActionResult.isFailure(error="sitePath must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")

                # Check if sitePath contains search terms (words without proper path structure)
                if not sitePath.startswith('/site:') and not sitePath.startswith('/Documents') and not sitePath.startswith('/Shared Documents'):
                    # This looks like search terms, not a valid path
                    return ActionResult.isFailure(error=f"Invalid sitePath '{sitePath}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as sitePath.")

                parsed = self._parse_site_scoped_path(sitePath)
                if not parsed:
                    return ActionResult.isFailure(error="Invalid sitePath. Use /site:<Site Display Name>/<Library or Folder Path>")
                # find matching site
                candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"])  # substring match
                # choose exact displayName match if available
                exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
                selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None)
                if not selected_site:
                    return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible")
                upload_site_scope = selected_site
                # Use the inner path portion as the actual upload target path
                upload_paths = [f"/{parsed['innerPath'].lstrip('/')}"]
                sites = [selected_site]
            else:
                # Resolve path query into upload paths (fallback behavior when using resultDocument)
                upload_paths = self._resolvePathQuery(sitePath)

            # Process each document upload
            upload_results = []

            for i, (chatDocument, fileName) in enumerate(zip(chatDocuments, fileNames)):
                try:
                    fileId = chatDocument.fileId
                    file_data = self.service.getFileData(fileId)

                    if not file_data:
                        logger.warning(f"File data not found for fileId: {fileId}")
                        upload_results.append({
                            "fileName": fileName,
                            "fileId": fileId,
                            "error": "File data not found",
                            "uploadStatus": "failed"
                        })
                        continue

                    # Upload to the first available site (or could be made configurable)
                    upload_successful = False

                    for site in sites:
                        site_id = site["id"]
                        site_name = site["displayName"]
                        site_url = site["webUrl"]

                        # Use the first upload path or default to Documents
                        upload_path = upload_paths[0] if upload_paths else "/Documents"
                        upload_path = upload_path.rstrip('/') + '/' + fileName
                        upload_path_clean = upload_path.lstrip('/')

                        # Upload endpoint for small files (< 4MB)
                        if len(file_data) < 4 * 1024 * 1024:  # 4MB
                            upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content"

                            # Upload the file
                            upload_result = await self._makeGraphApiCall(
                                connection["accessToken"],
                                upload_endpoint,
                                method="PUT",
                                data=file_data
                            )

                            if "error" not in upload_result:
                                upload_results.append({
                                    "fileName": fileName,
                                    "fileId": fileId,
                                    "uploadStatus": "success",
                                    "siteName": site_name,
                                    "siteUrl": site_url,
                                    "uploadPath": upload_path,
                                    "sharepointFileId": upload_result.get("id"),
                                    "webUrl": upload_result.get("webUrl"),
                                    "size": upload_result.get("size"),
                                    "createdDateTime": upload_result.get("createdDateTime")
                                })
                                upload_successful = True
                                break
                            else:
                                logger.warning(f"Upload failed to site {site_name}: {upload_result['error']}")
                        else:
                            # For large files, we would need to implement resumable upload
                            logger.warning(f"File too large ({len(file_data)} bytes) for site {site_name}")
                            continue

                    if not upload_successful:
                        upload_results.append({
                            "fileName": fileName,
                            "fileId": fileId,
                            "error": f"File too large ({len(file_data)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
                            "uploadStatus": "failed"
                        })

                except Exception as e:
                    logger.error(f"Error uploading document {fileName}: {str(e)}")
                    upload_results.append({
                        "fileName": fileName,
                        "fileId": fileId,
                        "error": str(e),
                        "uploadStatus": "failed"
                    })

            # Create result data
            result_data = {
                "connectionReference": connectionReference,
                "sitePath": sitePath,
                "documentList": documentList,
                "fileNames": fileNames,
                "sitesAvailable": len(sites),
                "uploadResults": upload_results,
                "connection": {
                    "id": connection["id"],
                    "authority": "microsoft",
                    "reference": connectionReference
                },
                "timestamp": get_utc_timestamp()
            }

            # Determine output format based on expected formats
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default

            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                # Use the first expected format
                expected_format = expectedDocumentFormats[0]
                output_extension = expected_format.get("extension", ".json")
                output_mime_type = expected_format.get("mimeType", "application/json")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")

            return ActionResult(
                success=True,
                documents=[
                    {
                        "documentName": f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}",
                        "documentData": result_data,
                        "mimeType": output_mime_type
                    }
                ]
            )

        except Exception as e:
            logger.error(f"Error uploading to SharePoint: {str(e)}")
            return ActionResult(
                success=False,
                error=str(e)
            )

    @action
    async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        List documents in SharePoint folders across accessible sites

        Parameters:
            connectionReference (str): Reference to the Microsoft connection
            searchQuery (str): [path:][type:][mode:]query - "Test Plan", "folders:Test Plan", "/Documents", "*"
                Note: Use "folders:Name" to search for folders anywhere, not "path:/Name" which looks only in root
            resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to searchQuery)
            includeSubfolders (bool, optional): Whether to include subfolders (default: False)
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
        """
        try:
            connectionReference = parameters.get("connectionReference")
            searchQuery = parameters.get("searchQuery", "*")
            resultDocument = parameters.get("resultDocument")
            includeSubfolders = parameters.get("includeSubfolders", False)  # Default to False for better UX
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])

            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")

            # If resultDocument is provided, resolve the reference and extract folder IDs from it
            if resultDocument:
                try:
                    import json
                    # Resolve the reference label to get the actual document list
                    document_list = self.service.getChatDocumentsFromDocumentList([resultDocument])
                    if not document_list or len(document_list) == 0:
                        return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}")

                    # Get the first document's content (which should be the JSON)
                    first_document = document_list[0]
                    logger.info(f"Document fileId: {first_document.fileId}, fileName: {first_document.fileName}")
                    file_data = self.service.getFileData(first_document.fileId)
                    if not file_data:
                        return ActionResult.isFailure(error=f"No file data found for document: {resultDocument} (fileId: {first_document.fileId})")
                    logger.info(f"File data length: {len(file_data) if file_data else 0}")

                    # Parse the JSON content
                    result_data = json.loads(file_data)
                    found_documents = result_data.get("foundDocuments", [])

                    # Extract folder IDs from the result
                    folder_ids = []
                    for doc in found_documents:
                        if doc.get("type") == "folder":
                            folder_ids.append(doc.get("id"))

                    if folder_ids:
                        # Use the first folder ID found
                        searchQuery = folder_ids[0]
                        logger.info(f"Using folder ID from resultDocument: {searchQuery}")
                    else:
                        return ActionResult.isFailure(error="No folders found in resultDocument")

                except json.JSONDecodeError as e:
                    return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}")
                except Exception as e:
                    return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}")

            # Get Microsoft connection
            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")

            logger.info(f"Starting SharePoint listDocuments for searchQuery: {searchQuery}")
            logger.debug(f"Connection ID: {connection['id']}")

            # Parse searchQuery to extract path, search terms, search type, and options
            pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery)

            # Discover all SharePoint sites accessible to the user
            sites = await self._discoverSharePointSites(connection["accessToken"])
            if not sites:
                return ActionResult.isFailure(error="No SharePoint sites found or accessible")

            # Check if searchQuery is a folder ID (starts with 01PPXICCB...)
            if searchQuery.startswith('01PPXICCB') or searchQuery.startswith('01'):
                # Direct folder ID - use it directly
                folder_paths = [searchQuery]
                logger.info(f"Using direct folder ID: {searchQuery}")
            else:
                # Resolve path query into folder paths
                folder_paths = self._resolvePathQuery(pathQuery)
                logger.info(f"Resolved folder paths: {folder_paths}")

            # Process each folder path across all sites
            list_results = []

            for folderPath in folder_paths:
                try:
                    folder_results = []

                    for site in sites:
                        site_id = site["id"]
                        site_name = site["displayName"]
                        site_url = site["webUrl"]

                        logger.info(f"Listing folder {folderPath} in site: {site_name}")

                        # Determine the endpoint based on folder path
                        if folderPath in ["/", ""] or folderPath == "*":
                            # Root folder
                            endpoint = f"sites/{site_id}/drive/root/children"
                        elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'):
                            # Direct folder ID
                            endpoint = f"sites/{site_id}/drive/items/{folderPath}/children"
                        else:
                            # Specific folder path - remove leading slash if present
                            folder_path_clean = folderPath.lstrip('/')
                            endpoint = f"sites/{site_id}/drive/root:/{folder_path_clean}:/children"

                        # Make the API call to list folder contents
                        api_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)

                        if "error" in api_result:
                            logger.warning(f"Failed to list folder {folderPath} in site {site_name}: {api_result['error']}")
                            continue

                        # Process the results
                        items = api_result.get("value", [])
                        processed_items = []

                        for item in items:
                            item_info = {
                                "id": item.get("id"),
                                "name": item.get("name"),
                                "size": item.get("size", 0),
                                "createdDateTime": item.get("createdDateTime"),
                                "lastModifiedDateTime": item.get("lastModifiedDateTime"),
                                "webUrl": item.get("webUrl"),
                                "type": "folder" if "folder" in item else "file",
                                "siteName": site_name,
                                "siteUrl": site_url
                            }

                            # Add file-specific information
                            if "file" in item:
                                item_info.update({
                                    "mimeType": item["file"].get("mimeType"),
                                    "downloadUrl": item.get("@microsoft.graph.downloadUrl")
                                })

                            # Add folder-specific information
                            if "folder" in item:
                                item_info.update({
                                    "childCount": item["folder"].get("childCount", 0)
                                })

                            processed_items.append(item_info)

                        # If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
                        if includeSubfolders:
                            logger.info(f"Including subfolders - processing {len([item for item in processed_items if item['type'] == 'folder'])} folders")
                            subfolder_count = 0
                            max_subfolders = 10  # Limit to prevent infinite loops

                            for item in processed_items[:]:  # Use slice to avoid modifying list during iteration
                                if item["type"] == "folder" and subfolder_count < max_subfolders:
                                    subfolder_count += 1
                                    subfolder_path = f"{folderPath.rstrip('/')}/{item['name']}"
                                    subfolder_endpoint = f"sites/{site_id}/drive/items/{item['id']}/children"

                                    logger.debug(f"Getting contents of subfolder: {item['name']}")
                                    subfolder_result = await self._makeGraphApiCall(connection["accessToken"], subfolder_endpoint)
                                    if "error" not in subfolder_result:
                                        subfolder_items = subfolder_result.get("value", [])
                                        logger.debug(f"Found {len(subfolder_items)} items in subfolder {item['name']}")

                                        for subfolder_item in subfolder_items:
                                            # Only add files and direct subfolders, NO RECURSION
                                            subfolder_item_info = {
                                                "id": subfolder_item.get("id"),
                                                "name": subfolder_item.get("name"),
                                                "size": subfolder_item.get("size", 0),
                                                "createdDateTime": subfolder_item.get("createdDateTime"),
                                                "lastModifiedDateTime": subfolder_item.get("lastModifiedDateTime"),
                                                "webUrl": subfolder_item.get("webUrl"),
                                                "type": "folder" if "folder" in subfolder_item else "file",
                                                "parentPath": subfolder_path,
                                                "siteName": site_name,
                                                "siteUrl": site_url
                                            }

                                            if "file" in subfolder_item:
                                                subfolder_item_info.update({
                                                    "mimeType": subfolder_item["file"].get("mimeType"),
                                                    "downloadUrl": subfolder_item.get("@microsoft.graph.downloadUrl")
                                                })

                                            processed_items.append(subfolder_item_info)
                                    else:
                                        logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolder_result.get('error')}")
                                elif subfolder_count >= max_subfolders:
                                    logger.warning(f"Reached maximum subfolder limit ({max_subfolders}), skipping remaining folders")
                                    break

                            logger.info(f"Processed {subfolder_count} subfolders, total items: {len(processed_items)}")

                        folder_results.append({
                            "siteName": site_name,
                            "siteUrl": site_url,
                            "itemCount": len(processed_items),
                            "items": processed_items
                        })

                    list_results.append({
                        "folderPath": folderPath,
                        "sitesProcessed": len(folder_results),
                        "siteResults": folder_results
                    })

                except Exception as e:
                    logger.error(f"Error listing folder {folderPath}: {str(e)}")
                    list_results.append({
                        "folderPath": folderPath,
                        "error": str(e),
                        "siteResults": []
                    })

            # Create result data
            result_data = {
                "searchQuery": searchQuery,
                "includeSubfolders": includeSubfolders,
                "sitesSearched": len(sites),
                "listResults": list_results,
                "timestamp": get_utc_timestamp()
            }

            # Determine output format based on expected formats
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default

            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                # Use the first expected format
                expected_format = expectedDocumentFormats[0]
                output_extension = expected_format.get("extension", ".json")
                output_mime_type = expected_format.get("mimeType", "application/json")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")

            return ActionResult(
                success=True,
                documents=[
                    {
                        "documentName": f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}",
                        "documentData": result_data,
                        "mimeType": output_mime_type
                    }
                ]
            )

        except Exception as e:
            logger.error(f"Error listing SharePoint documents: {str(e)}")
            return ActionResult(
                success=False,
                error=str(e)
            )