gateway/modules/workflows/methods/methodSharepoint.py

"""
SharePoint operations method module.
Handles SharePoint document operations using the SharePoint service.
"""

import logging
import json
import re
from typing import Dict, Any, List, Optional
from datetime import datetime, UTC
import base64
from urllib.parse import urlparse
import aiohttp
import asyncio

from modules.workflows.methods.methodBase import MethodBase, action
from modules.interfaces.interfaceChatModel import ActionResult
from modules.shared.timezoneUtils import get_utc_timestamp

logger = logging.getLogger(__name__)

class MethodSharepoint(MethodBase):
    """SharePoint operations methods."""

    def __init__(self, service):
        super().__init__(service)
        self.name = "sharepoint"
        self.description = "SharePoint operations methods"

    def _format_timestamp_for_filename(self) -> str:
        """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
        return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")

    def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
        """Get Microsoft connection from connection reference"""
        try:
            userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference)
            if not userConnection:
                logger.warning(f"No user connection found for reference: {connectionReference}")
                return None

            if userConnection.authority.value != "msft":
                logger.warning(f"Connection {userConnection.id} is not Microsoft (authority: {userConnection.authority.value})")
                return None

            # Check if connection is active or pending (pending means OAuth in progress)
            if userConnection.status.value not in ["active", "pending"]:
                logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
                return None

            # Get a fresh token for this specific connection
            from modules.security.tokenManager import TokenManager
            token = TokenManager().getFreshToken(self.service.interfaceApp, userConnection.id)
            if not token:
                logger.warning(f"No token found for connection {userConnection.id}")
                return None

            # Check if token is expired
            if hasattr(token, 'expiresAt') and token.expiresAt:
                current_time = get_utc_timestamp()
                if current_time > token.expiresAt:
                    logger.warning(f"Token for connection {userConnection.id} is expired (expiresAt: {token.expiresAt}, current: {current_time})")
                    return None

            logger.info(f"Successfully retrieved Microsoft connection: {userConnection.id}, status: {userConnection.status.value}, externalId: {userConnection.externalId}")

            return {
                "id": userConnection.id,
                "userConnection": userConnection,
                "accessToken": token.tokenAccess,
                "refreshToken": token.tokenRefresh,
                "scopes": ["Sites.ReadWrite.All", "Files.ReadWrite.All", "User.Read"]  # SharePoint scopes
            }
        except Exception as e:
            logger.error(f"Error getting Microsoft connection: {str(e)}")
            return None

    async def _discoverSharePointSites(self, access_token: str) -> List[Dict[str, Any]]:
        """
        Discover all SharePoint sites accessible to the user via Microsoft Graph API

        Parameters:
            access_token (str): Microsoft Graph access token

        Returns:
            List[Dict[str, Any]]: List of SharePoint site information
        """
        try:
            # Query Microsoft Graph to get all sites the user has access to
            endpoint = "sites?search=*"
            result = await self._makeGraphApiCall(access_token, endpoint)

            if "error" in result:
                logger.error(f"Error discovering SharePoint sites: {result['error']}")
                return []

            sites = result.get("value", [])
            logger.info(f"Discovered {len(sites)} SharePoint sites")

            # Process and return site information
            processed_sites = []
            for site in sites:
                site_info = {
                    "id": site.get("id"),
                    "displayName": site.get("displayName"),
                    "name": site.get("name"),
                    "webUrl": site.get("webUrl"),
                    "description": site.get("description"),
                    "createdDateTime": site.get("createdDateTime"),
                    "lastModifiedDateTime": site.get("lastModifiedDateTime")
                }
                processed_sites.append(site_info)
                logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")

            return processed_sites

        except Exception as e:
            logger.error(f"Error discovering SharePoint sites: {str(e)}")
            return []

    def _filter_sites_by_hint(self, sites: List[Dict[str, Any]], site_hint: str) -> List[Dict[str, Any]]:
        """Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
        try:
            if not site_hint:
                return sites
            hint = site_hint.strip().lower()
            filtered: List[Dict[str, Any]] = []
            for site in sites:
                name = (site.get("displayName") or "").lower()
                web_url = (site.get("webUrl") or "").lower()
                if hint in name or hint in web_url:
                    filtered.append(site)
            return filtered if filtered else sites
        except Exception as e:
            logger.error(f"Error filtering sites by hint '{site_hint}': {str(e)}")
            return sites


    def _parse_site_scoped_path(self, path_query: str) -> Optional[Dict[str, str]]:
        """
        Parse a site-scoped path of the form:
        /site:KM LayerFinance/Documents/Work or /site:KM LayerFinance/Shared Documents/Work

        Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
        """
        try:
            if not path_query or not path_query.startswith('/'):
                return None
            # expected syntax prefix
            prefix = '/site:'
            if not path_query.startswith(prefix):
                return None
            remainder = path_query[len(prefix):]
            # split once on the next '/'
            if '/' not in remainder:
                return None
            site_name, inner = remainder.split('/', 1)
            site_name = site_name.strip()
            inner_path = inner.strip()
            if not site_name or not inner_path:
                return None
            return {"siteName": site_name, "innerPath": inner_path}
        except Exception as e:
            logger.error(f"Error parsing site-scoped path '{path_query}': {str(e)}")
            return None

    def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
        """
        Parse searchQuery to extract path, search terms, search type, and search options.

        CRITICAL: NEVER convert words to paths! Words stay as search terms.
        - "root document lesson" → fileQuery="root document lesson" (NOT "/root/document/lesson")
        - "root, gose" → fileQuery="root, gose" (NOT "/root/gose")
        - "druckersteuerung eskalation logobject" → fileQuery="druckersteuerung eskalation logobject"

        Parameters:
            searchQuery (str): Enhanced search query with options:
                - "budget" -> pathQuery="*", fileQuery="budget", searchType="all", options={}
                - "root document lesson" -> pathQuery="*", fileQuery="root document lesson", searchType="all", options={}
                - "root, gose" -> pathQuery="*", fileQuery="root, gose", searchType="all", options={}
                - "/Documents:budget" -> pathQuery="/Documents", fileQuery="budget", searchType="all", options={}
                - "files:budget" -> pathQuery="*", fileQuery="budget", searchType="files", options={}
                - "folders:DELTA" -> pathQuery="*", fileQuery="DELTA", searchType="folders", options={}
                - "exact:\"Operations 2025\"" -> exact phrase matching
                - "regex:^Operations.*2025$" -> regex pattern matching
                - "case:DELTA" -> case-sensitive search
                - "and:DELTA AND 2025 Mars AND Group" -> all AND terms must be present

        Returns:
            tuple[str, str, str, dict]: (pathQuery, fileQuery, searchType, searchOptions)
        """
        try:
            if not searchQuery or not searchQuery.strip() or searchQuery.strip() == "*":
                return "*", "*", "all", {}

            searchQuery = searchQuery.strip()
            searchOptions = {}

            # CRITICAL: Do NOT convert space-separated or comma-separated words to paths!
            # "root document lesson" should stay as "root document lesson", NOT "/root/document/lesson"
            # "root, gose" should stay as "root, gose", NOT "/root/gose"

            # Check for search type specification (files:, folders:, all:) FIRST
            searchType = "all"  # Default
            if searchQuery.startswith(("files:", "folders:", "all:")):
                type_parts = searchQuery.split(':', 1)
                searchType = type_parts[0].strip()
                searchQuery = type_parts[1].strip()

            # Extract optional site hint tokens: support "site=Name" or leading "site:Name"
            def _extract_site_hint(q: str) -> tuple[str, Optional[str]]:
                try:
                    q_strip = q.strip()
                    # Leading form: site:KM LayerFinance ...
                    if q_strip.lower().startswith("site:"):
                        after = q_strip[5:].lstrip()
                        # site name until next space or end
                        if ' ' in after:
                            site_name, rest = after.split(' ', 1)
                        else:
                            site_name, rest = after, ''
                        return rest.strip(), site_name.strip()
                    # Inline key=value form anywhere
                    m = re.search(r"\bsite=([^;\s]+)", q_strip, flags=re.IGNORECASE)
                    if m:
                        site_name = m.group(1).strip()
                        # remove the token from query
                        q_new = re.sub(r"\bsite=[^;\s]+;?", "", q_strip, flags=re.IGNORECASE).strip()
                        return q_new, site_name
                except Exception:
                    pass
                return q, None

            searchQuery, extracted_site = _extract_site_hint(searchQuery)
            if extracted_site:
                searchOptions["site_hint"] = extracted_site
                logger.info(f"Extracted site hint: '{extracted_site}'")

            # Extract name="..." if present (for quoted multi-word names)
            name_match = re.search(r"name=\"([^\"]+)\"", searchQuery)
            if name_match:
                searchQuery = name_match.group(1)
                logger.info(f"Extracted name from quotes: '{searchQuery}'")

            # Check for search mode specification (exact:, regex:, case:, and:)
            if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
                mode_parts = searchQuery.split(':', 1)
                mode = mode_parts[0].strip()
                searchQuery = mode_parts[1].strip()

                if mode == "exact":
                    searchOptions["exact_match"] = True
                    # Remove quotes if present
                    if searchQuery.startswith('"') and searchQuery.endswith('"'):
                        searchQuery = searchQuery[1:-1]
                elif mode == "regex":
                    searchOptions["regex_match"] = True
                elif mode == "case":
                    searchOptions["case_sensitive"] = True
                elif mode == "and":
                    searchOptions["and_terms"] = True

            # Check if it contains path:search format
            if ':' in searchQuery:
                parts = searchQuery.split(':', 1)  # Split only on first colon
                path_part = parts[0].strip()
                search_part = parts[1].strip()

                # Handle path part
                if not path_part or path_part == "*":
                    pathQuery = "*"
                elif path_part.startswith('/'):
                    pathQuery = path_part
                else:
                    pathQuery = f"/Documents/{path_part}"

                # Handle search part
                if not search_part or search_part == "*":
                    fileQuery = "*"
                else:
                    fileQuery = search_part

                # Use search_part as fileQuery (name extraction already handled above)
                return pathQuery, fileQuery, searchType, searchOptions

            # No colon - check if it looks like a path
            elif searchQuery.startswith('/'):
                # It's a path only
                return searchQuery, "*", searchType, searchOptions

            else:
                # It's a search term only - keep words as-is, do NOT convert to paths
                # "root document lesson" stays as "root document lesson"
                # "root, gose" stays as "root, gose"
                return "*", searchQuery, searchType, searchOptions

        except Exception as e:
            logger.error(f"Error parsing searchQuery '{searchQuery}': {str(e)}")
            raise ValueError(f"Failed to parse searchQuery '{searchQuery}': {str(e)}")

    def _resolvePathQuery(self, pathQuery: str) -> List[str]:
        """
        Resolve pathQuery into a list of search paths for SharePoint operations.

        Parameters:
            pathQuery (str): Query string that can contain:
                - Direct paths (e.g., "/Documents/Project1")
                - Wildcards (e.g., "/Documents/*")
                - Multiple paths separated by semicolons (e.g., "/Docs; /Files")
                - Single word relative paths (e.g., "Project1" -> resolved to default folder)
                - Empty string or "*" for global search
                - Space-separated words are treated as search terms, NOT folder paths

        Returns:
            List[str]: List of resolved paths
        """
        try:
            if not pathQuery or not pathQuery.strip() or pathQuery.strip() == "*":
                return ["*"]  # Global search across all sites

            # Split by semicolon to handle multiple paths
            raw_paths = [path.strip() for path in pathQuery.split(';') if path.strip()]
            resolved_paths = []

            for raw_path in raw_paths:
                # Handle wildcards - return as-is
                if '*' in raw_path:
                    resolved_paths.append(raw_path)
                # Handle absolute paths
                elif raw_path.startswith('/'):
                    resolved_paths.append(raw_path)
                # Handle single word relative paths - prepend default folder
                # BUT NOT space-separated words (those are search terms, not paths)
                elif ' ' not in raw_path:
                    resolved_paths.append(f"/Documents/{raw_path}")
                else:
                    # Check if this looks like a path (has path separators) or search terms
                    if '\\' in raw_path or '/' in raw_path:
                        # This looks like a path with spaces in folder names - treat as valid path
                        resolved_paths.append(raw_path)
                        logger.info(f"Path with spaces '{raw_path}' treated as valid folder path")
                    else:
                        # Space-separated words without path separators are search terms
                        # Return as "*" to search globally
                        logger.info(f"Space-separated words '{raw_path}' treated as search terms, not folder path")
                        resolved_paths.append("*")

            # Remove duplicates while preserving order
            seen = set()
            unique_paths = []
            for path in resolved_paths:
                if path not in seen:
                    seen.add(path)
                    unique_paths.append(path)

            logger.info(f"Resolved pathQuery '{pathQuery}' to {len(unique_paths)} paths: {unique_paths}")
            return unique_paths

        except Exception as e:
            logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}")
            raise ValueError(f"Failed to resolve pathQuery '{pathQuery}': {str(e)}")

    def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]:
        """Parse SharePoint site URL to extract hostname and site path"""
        try:
            parsed = urlparse(siteUrl)
            hostname = parsed.hostname
            path = parsed.path.strip('/')

            return {
                "hostname": hostname,
                "sitePath": path
            }
        except Exception as e:
            logger.error(f"Error parsing site URL {siteUrl}: {str(e)}")
            return {"hostname": "", "sitePath": ""}

    async def _makeGraphApiCall(self, access_token: str, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
        """Make a Microsoft Graph API call with timeout and detailed logging"""
        try:
            headers = {
                "Authorization": f"Bearer {access_token}",
                "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
            }

            url = f"https://graph.microsoft.com/v1.0/{endpoint}"
            logger.info(f"Making Graph API call: {method} {url}")

            # Set timeout to 30 seconds
            timeout = aiohttp.ClientTimeout(total=30)

            async with aiohttp.ClientSession(timeout=timeout) as session:
                if method == "GET":
                    logger.debug(f"Starting GET request to {url}")
                    async with session.get(url, headers=headers) as response:
                        logger.info(f"Graph API response: {response.status}")
                        if response.status == 200:
                            result = await response.json()
                            logger.debug(f"Graph API success: {len(str(result))} characters response")
                            return result
                        else:
                            error_text = await response.text()
                            logger.error(f"Graph API call failed: {response.status} - {error_text}")
                            return {"error": f"API call failed: {response.status} - {error_text}"}

                elif method == "PUT":
                    logger.debug(f"Starting PUT request to {url}")
                    async with session.put(url, headers=headers, data=data) as response:
                        logger.info(f"Graph API response: {response.status}")
                        if response.status in [200, 201]:
                            result = await response.json()
                            logger.debug(f"Graph API success: {len(str(result))} characters response")
                            return result
                        else:
                            error_text = await response.text()
                            logger.error(f"Graph API call failed: {response.status} - {error_text}")
                            return {"error": f"API call failed: {response.status} - {error_text}"}

                elif method == "POST":
                    logger.debug(f"Starting POST request to {url}")
                    async with session.post(url, headers=headers, data=data) as response:
                        logger.info(f"Graph API response: {response.status}")
                        if response.status in [200, 201]:
                            result = await response.json()
                            logger.debug(f"Graph API success: {len(str(result))} characters response")
                            return result
                        else:
                            error_text = await response.text()
                            logger.error(f"Graph API call failed: {response.status} - {error_text}")
                            return {"error": f"API call failed: {response.status} - {error_text}"}

        except asyncio.TimeoutError:
            logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
            return {"error": f"API call timed out after 30 seconds: {endpoint}"}
        except Exception as e:
            logger.error(f"Error making Graph API call: {str(e)}")
            return {"error": f"Error making Graph API call: {str(e)}"}

    async def _getSiteId(self, access_token: str, hostname: str, site_path: str) -> str:
        """Get SharePoint site ID from hostname and site path"""
        try:
            endpoint = f"sites/{hostname}:/{site_path}"
            result = await self._makeGraphApiCall(access_token, endpoint)

            if "error" in result:
                logger.error(f"Error getting site ID: {result['error']}")
                return ""

            return result.get("id", "")
        except Exception as e:
            logger.error(f"Error getting site ID: {str(e)}")
            return ""


    @action
    async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Find documents/folders by searching their NAMES across SharePoint sites.

        Parameters:
            connectionReference (str): Microsoft connection reference
            site (str, optional): Site hint (e.g., "SSS", "KM XYZ")
            searchQuery (str): Search query - "budget", "folders:alpha", "files:budget", "/Documents/Project1", "namepart1 namepart2 namepart3". Use "folders:" prefix when user wants to store files or find folders
            maxResults (int, optional): Max results (default: 100)
        """
        try:
            connectionReference = parameters.get("connectionReference")
            site = parameters.get("site")
            searchQuery = parameters.get("searchQuery", "*")
            maxResults = parameters.get("maxResults", 100)

            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")

            # Parse searchQuery to extract path, search terms, search type, and options
            pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery)

            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")

            # Discover SharePoint sites - use targeted approach when site parameter is provided
            if site:
                # When site parameter is provided, discover all sites first, then filter
                all_sites = await self._discoverSharePointSites(connection["accessToken"])
                if not all_sites:
                    return ActionResult.isFailure(error="No SharePoint sites found or accessible")

                sites = self._filter_sites_by_hint(all_sites, site)
                logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites")
                if not sites:
                    return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'")
            else:
                # No site parameter - discover all sites
                sites = await self._discoverSharePointSites(connection["accessToken"])
                if not sites:
                    return ActionResult.isFailure(error="No SharePoint sites found or accessible")

            # Resolve path query into search paths
            search_paths = self._resolvePathQuery(pathQuery)

            try:
                # Search across all discovered sites
                found_documents = []
                all_sites_searched = []

                # Handle different search approaches based on search type
                if searchType == "folders" and fileQuery and fileQuery.strip() != "" and fileQuery.strip() != "*":
                    # Use unified search for folders - this is global and searches all sites
                    try:
                        import json

                        # Use Microsoft Graph Search API syntax (simple term search only)
                        terms = [t for t in fileQuery.split() if t.strip()]

                        if len(terms) > 1:
                            # Multiple terms: search for ALL terms (AND) - more specific results
                            query_string = " AND ".join(terms)
                        else:
                            # Single term: search for the term
                            query_string = terms[0] if terms else fileQuery
                        logger.info(f"Using unified search for folders: {query_string}")

                        payload = {
                            "requests": [
                                {
                                    "entityTypes": ["driveItem"],
                                    "query": {"queryString": query_string},
                                    "from": 0,
                                    "size": 50
                                }
                            ]
                        }
                        logger.info(f"Using unified search API for folders with queryString: {query_string}")

                        # Use global search endpoint (site-specific search not available)
                        unified_result = await self._makeGraphApiCall(
                            connection["accessToken"],
                            "search/query",
                            method="POST",
                            data=json.dumps(payload).encode("utf-8")
                        )

                        if "error" in unified_result:
                            logger.warning(f"Unified search failed: {unified_result['error']}")
                            items = []
                        else:
                            # Flatten hits -> driveItem resources
                            items = []
                            for container in (unified_result.get("value", []) or []):
                                for hits_container in (container.get("hitsContainers", []) or []):
                                    for hit in (hits_container.get("hits", []) or []):
                                        resource = hit.get("resource")
                                        if resource:
                                            items.append(resource)

                            logger.info(f"Unified search returned {len(items)} items (pre-filter)")

                            # Apply our improved folder detection logic
                            folder_items = []
                            for item in items:
                                resource = item

                                # Use the same detection logic as our test
                                is_folder = False
                                if 'folder' in resource:
                                    is_folder = True
                                else:
                                    # Try to detect by URL pattern or other indicators
                                    web_url = resource.get('webUrl', '')
                                    name = resource.get('name', '')

                                    # Check if URL has no file extension and looks like a folder path
                                    if '.' not in name and ('/' in web_url or '\\' in web_url):
                                        is_folder = True

                                if is_folder:
                                    folder_items.append(item)

                            items = folder_items
                            logger.info(f"Filtered to {len(items)} folders using improved detection logic")

                            # Process unified search results - extract site information from webUrl
                            for item in items:
                                item_name = item.get("name", "")
                                web_url = item.get("webUrl", "")

                                # Extract site information from webUrl
                                site_name = "Unknown Site"
                                site_id = "unknown"

                                if web_url and '/sites/' in web_url:
                                    try:
                                        # Extract site name from URL: https://pcuster.sharepoint.com/sites/SiteName/...
                                        url_parts = web_url.split('/sites/')
                                        if len(url_parts) > 1:
                                            site_path = url_parts[1].split('/')[0]
                                            # Find matching site from discovered sites
                                            # First try to match by site name (URL path)
                                            for site in sites:
                                                if site.get("name") == site_path:
                                                    site_name = site.get("displayName", site_path)
                                                    site_id = site.get("id", "unknown")
                                                    break
                                            else:
                                                # If no match by name, try to match by displayName
                                                for site in sites:
                                                    if site.get("displayName") == site_path:
                                                        site_name = site.get("displayName", site_path)
                                                        site_id = site.get("id", "unknown")
                                                        break
                                                else:
                                                    # If no exact match, use the site path as site name
                                                    site_name = site_path
                                                    # Try to find a site with similar name
                                                    for site in sites:
                                                        if site_path.lower() in site.get("name", "").lower() or site_path.lower() in site.get("displayName", "").lower():
                                                            site_name = site.get("displayName", site_path)
                                                            site_id = site.get("id", "unknown")
                                                            break
                                    except Exception as e:
                                        logger.warning(f"Error extracting site info from URL {web_url}: {e}")

                                # Use improved folder detection logic
                                is_folder = False
                                if 'folder' in item:
                                    is_folder = True
                                else:
                                    # Try to detect by URL pattern or other indicators
                                    name = item.get('name', '')

                                    # Check if URL has no file extension and looks like a folder path
                                    if '.' not in name and ('/' in web_url or '\\' in web_url):
                                        is_folder = True

                                item_type = "folder" if is_folder else "file"
                                item_path = item.get("parentReference", {}).get("path", "")
                                logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")

                                # Simple filtering like test file - just check search type
                                if searchType == "files" and is_folder:
                                    continue  # Skip folders when searching for files
                                elif searchType == "folders" and not is_folder:
                                    continue  # Skip files when searching for folders

                                # Simple approach like test file - no complex filtering
                                logger.debug(f"Item '{item_name}' found - adding to results")

                                # Create result with full path information for proper action chaining
                                parent_path = item.get("parentReference", {}).get("path", "")

                                # Extract the full SharePoint path from webUrl or parentReference
                                full_path = ""
                                if web_url:
                                    # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
                                    if '/sites/' in web_url:
                                        path_part = web_url.split('/sites/')[1]
                                        # Decode URL encoding and convert to backslash format
                                        import urllib.parse
                                        decoded_path = urllib.parse.unquote(path_part)
                                        full_path = "\\" + decoded_path.replace('/', '\\')
                                elif parent_path:
                                    # Use parentReference path if available
                                    full_path = parent_path.replace('/', '\\')

                                doc_info = {
                                    "id": item.get("id"),
                                    "name": item.get("name"),
                                    "type": "folder" if is_folder else "file",
                                    "siteName": site_name,
                                    "siteId": site_id,
                                    "webUrl": web_url,
                                    "fullPath": full_path,
                                    "parentPath": parent_path
                                }

                                found_documents.append(doc_info)

                            logger.info(f"Found {len(found_documents)} documents from unified search")

                    except Exception as e:
                        logger.error(f"Error performing unified folder search: {str(e)}")
                        # Fallback to site-by-site search
                        pass

                # If no unified search was performed or it failed, fall back to site-by-site search
                if not found_documents:
                    # Use simple approach like test file - no complex filtering
                    site_scoped_sites = sites

                    for site in site_scoped_sites:
                        site_id = site["id"]
                        site_name = site["displayName"]
                        site_url = site["webUrl"]

                        logger.info(f"Searching in site: {site_name} ({site_url})")

                        # Use Microsoft Graph API for this specific site
                        # Handle empty or wildcard queries
                        if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
                            # For wildcard/empty queries, list all items in the drive
                            endpoint = f"sites/{site_id}/drive/root/children"
                        else:
                            # For files, use regular search API
                            search_query = fileQuery.replace("'", "''")  # Escape single quotes for OData
                            endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
                            logger.info(f"Using search API for files with query: '{search_query}'")

                            # Make the search API call (files)
                            search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
                            if "error" in search_result:
                                logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
                                continue
                            # Process search results for this site (files)
                            items = search_result.get("value", [])
                            logger.info(f"Retrieved {len(items)} items from site {site_name}")

                        site_documents = []

                        for item in items:
                            item_name = item.get("name", "")

                            # Use improved folder detection logic
                            is_folder = False
                            if 'folder' in item:
                                is_folder = True
                            else:
                                # Try to detect by URL pattern or other indicators
                                web_url = item.get('webUrl', '')
                                name = item.get('name', '')

                                # Check if URL has no file extension and looks like a folder path
                                if '.' not in name and ('/' in web_url or '\\' in web_url):
                                    is_folder = True

                            item_type = "folder" if is_folder else "file"
                            item_path = item.get("parentReference", {}).get("path", "")
                            logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")

                            # Simple filtering like test file - just check search type
                            if searchType == "files" and is_folder:
                                continue  # Skip folders when searching for files
                            elif searchType == "folders" and not is_folder:
                                continue  # Skip files when searching for folders

                            # Simple approach like test file - no complex filtering
                            logger.debug(f"Item '{item_name}' found - adding to results")

                            # Create result with full path information for proper action chaining
                            web_url = item.get("webUrl", "")
                            parent_path = item.get("parentReference", {}).get("path", "")

                            # Extract the full SharePoint path from webUrl or parentReference
                            full_path = ""
                            if web_url:
                                # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
                                if '/sites/' in web_url:
                                    path_part = web_url.split('/sites/')[1]
                                    # Decode URL encoding and convert to backslash format
                                    import urllib.parse
                                    decoded_path = urllib.parse.unquote(path_part)
                                    full_path = "\\" + decoded_path.replace('/', '\\')
                            elif parent_path:
                                # Use parentReference path if available
                                full_path = parent_path.replace('/', '\\')

                            doc_info = {
                                "id": item.get("id"),
                                "name": item.get("name"),
                                "type": "folder" if is_folder else "file",
                                "siteName": site_name,
                                "siteId": site_id,
                                "webUrl": web_url,
                                "fullPath": full_path,
                                "parentPath": parent_path
                            }

                            site_documents.append(doc_info)

                        found_documents.extend(site_documents)
                        all_sites_searched.append({
                            "siteName": site_name,
                            "siteUrl": site_url,
                            "siteId": site_id,
                            "documentsFound": len(site_documents)
                        })

                        logger.info(f"Found {len(site_documents)} documents in site {site_name}")

                # Limit total results to maxResults
                if len(found_documents) > maxResults:
                    found_documents = found_documents[:maxResults]
                    logger.info(f"Limited results to {maxResults} items")

                result_data = {
                    "searchQuery": searchQuery,
                    "totalResults": len(found_documents),
                    "maxResults": maxResults,
                    "foundDocuments": found_documents,
                    "timestamp": get_utc_timestamp()
                }

            except Exception as e:
                logger.error(f"Error searching SharePoint: {str(e)}")
                return ActionResult.isFailure(error=str(e))

            # Use default JSON format for output
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default


            return ActionResult(
                success=True,
                documents=[
                    {
                        "documentName": f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}",
                        "documentData": result_data,
                        "mimeType": output_mime_type
                    }
                ]
            )

        except Exception as e:
            logger.error(f"Error finding document path: {str(e)}")
            return ActionResult.isFailure(error=str(e))

    @action
    async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Read documents from SharePoint across all accessible sites

        Parameters:
            documentList (list): Reference(s) to the document list to read
            connectionReference (str): Reference to the Microsoft connection
            pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
            pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
            includeMetadata (bool, optional): Whether to include metadata (default: True)
        """
        try:
            documentList = parameters.get("documentList")
            if isinstance(documentList, str):
                documentList = [documentList]
            connectionReference = parameters.get("connectionReference")
            pathQuery = parameters.get("pathQuery", "*")
            pathObject = parameters.get("pathObject")
            includeMetadata = parameters.get("includeMetadata", True)

            if not documentList or not connectionReference:
                return ActionResult.isFailure(error="Document list reference and connection reference are required")

            # If pathObject is provided, extract folder IDs from it
            # Note: pathObject takes precedence over pathQuery when both are provided
            if pathObject:
                if pathQuery and pathQuery != "*":
                    logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
                try:
                    import json
                    # Resolve the reference label to get the actual document list
                    document_list = self.service.getChatDocumentsFromDocumentList([pathObject])
                    if not document_list or len(document_list) == 0:
                        return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")

                    # Get the first document's content (which should be the JSON)
                    first_document = document_list[0]
                    file_data = self.service.getFileData(first_document.fileId)
                    if not file_data:
                        return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")

                    # Parse the JSON content
                    result_data = json.loads(file_data)
                    found_documents = result_data.get("foundDocuments", [])

                    # Extract folder IDs from the result
                    folder_ids = []
                    for doc in found_documents:
                        if doc.get("type") == "folder":
                            folder_ids.append(doc.get("id"))

                    if folder_ids:
                        # Use the first folder ID found as pathQuery
                        pathQuery = folder_ids[0]
                        logger.info(f"Using folder ID from pathObject: {pathQuery}")
                    else:
                        return ActionResult.isFailure(error="No folders found in pathObject")

                except json.JSONDecodeError as e:
                    return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
                except Exception as e:
                    return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")

            # Get documents from reference - ensure documentList is a list, not a string
            # documentList is already normalized above
            chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)

            if not chatDocuments:
                return ActionResult.isFailure(error="No documents found for the provided reference")

            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")

            # Determine sites to use - strict validation: pathObject → pathQuery → ERROR
            sites = None

            # Step 1: Check pathObject first
            if pathObject:
                # When pathObject is provided, we should have specific site information
                # Extract site information from the pathObject result
                try:
                    # Get the site information from the first folder in pathObject
                    if 'found_documents' in locals() and found_documents:
                        first_folder = found_documents[0]
                        site_name = first_folder.get("siteName")
                        site_id = first_folder.get("siteId")

                        if site_name and site_id:
                            # Use the specific site from pathObject instead of discovering all sites
                            sites = [{
                                "id": site_id,
                                "displayName": site_name,
                                "webUrl": first_folder.get("webUrl", "")
                            }]
                            logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
                        else:
                            # Site info missing from pathObject - this is an error
                            return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
                    else:
                        # No documents found in pathObject - this is an error
                        return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for read operation.")
                except Exception as e:
                    # Error processing pathObject - this is an error
                    return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for read operation.")

            # Step 2: If no pathObject, check pathQuery
            elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
                # Validate pathQuery format
                if not pathQuery.startswith('/'):
                    return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")

                # Check if pathQuery contains search terms (words without proper path structure)
                if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'):
                    return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")

                # For pathQuery, we need to discover sites to find the specific one
                sites = await self._discoverSharePointSites(connection["accessToken"])
                if not sites:
                    return ActionResult.isFailure(error="No SharePoint sites found or accessible")
            else:
                # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
                return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")

            if not sites:
                return ActionResult.isFailure(error="No valid target site determined for read operation")

            # Resolve path query into search paths
            search_paths = self._resolvePathQuery(pathQuery)

            # Process each chat document across all sites
            read_results = []

            for i, chatDocument in enumerate(chatDocuments):
                try:
                    fileId = chatDocument.fileId
                    fileName = chatDocument.fileName

                    # Search for this file across all sites
                    file_found = False

                    for site in sites:
                        site_id = site["id"]
                        site_name = site["displayName"]
                        site_url = site["webUrl"]

                        # Try to find the file by name in this site
                        search_query = fileName.replace("'", "''")  # Escape single quotes for OData
                        endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"

                        search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)

                        if "error" in search_result:
                            continue

                        items = search_result.get("value", [])
                        for item in items:
                            if item.get("name") == fileName:
                                # Found the file, get its details
                                file_id = item.get("id")
                                file_endpoint = f"sites/{site_id}/drive/items/{file_id}"

                                # Get file metadata
                                file_info_result = await self._makeGraphApiCall(connection["accessToken"], file_endpoint)

                                if "error" in file_info_result:
                                    continue

                                # Build result with metadata
                                result_item = {
                                    "fileId": fileId,
                                    "fileName": fileName,
                                    "sharepointFileId": file_id,
                                    "siteName": site_name,
                                    "siteUrl": site_url,
                                    "size": file_info_result.get("size", 0),
                                    "createdDateTime": file_info_result.get("createdDateTime"),
                                    "lastModifiedDateTime": file_info_result.get("lastModifiedDateTime"),
                                    "webUrl": file_info_result.get("webUrl")
                                }

                                # Add metadata if requested
                                if includeMetadata:
                                    result_item["metadata"] = {
                                        "mimeType": file_info_result.get("file", {}).get("mimeType"),
                                        "downloadUrl": file_info_result.get("@microsoft.graph.downloadUrl"),
                                        "createdBy": file_info_result.get("createdBy", {}),
                                        "lastModifiedBy": file_info_result.get("lastModifiedBy", {}),
                                        "parentReference": file_info_result.get("parentReference", {})
                                    }

                                # Get file content if it's a readable format
                                mime_type = file_info_result.get("file", {}).get("mimeType", "")
                                if mime_type.startswith("text/") or mime_type in [
                                    "application/json", "application/xml", "application/javascript"
                                ]:
                                    # Download the file content
                                    content_endpoint = f"sites/{site_id}/drive/items/{file_id}/content"

                                    # For content download, we need to handle binary data
                                    try:
                                        async with aiohttp.ClientSession() as session:
                                            headers = {"Authorization": f"Bearer {connection['accessToken']}"}
                                            async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
                                                if response.status == 200:
                                                    content = await response.text()
                                                    result_item["content"] = content
                                                else:
                                                    result_item["content"] = f"Could not download content: HTTP {response.status}"
                                    except Exception as e:
                                        result_item["content"] = f"Error downloading content: {str(e)}"
                                else:
                                    result_item["content"] = f"Binary file type ({mime_type}) - content not retrieved"

                                read_results.append(result_item)
                                file_found = True
                                break

                        if file_found:
                            break

                    if not file_found:
                        read_results.append({
                            "fileId": fileId,
                            "fileName": fileName,
                            "error": "File not found in any accessible SharePoint site",
                            "content": None
                        })

                except Exception as e:
                    logger.error(f"Error reading document {chatDocument.fileName}: {str(e)}")
                    read_results.append({
                        "fileId": chatDocument.fileId,
                        "fileName": chatDocument.fileName,
                        "error": str(e),
                        "content": None
                    })

            result_data = {
                "connectionReference": connectionReference,
                "pathQuery": pathQuery,
                "documentList": documentList,
                "includeMetadata": includeMetadata,
                "sitesSearched": len(sites),
                "readResults": read_results,
                "connection": {
                    "id": connection["id"],
                    "authority": "microsoft",
                    "reference": connectionReference
                },
                "timestamp": get_utc_timestamp()
            }

            # Use default JSON format for output
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default


            return ActionResult(
                success=True,
                documents=[
                    {
                        "documentName": f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}",
                        "documentData": result_data,
                        "mimeType": output_mime_type
                    }
                ]
            )
        except Exception as e:
            logger.error(f"Error reading SharePoint documents: {str(e)}")
            return ActionResult(
                success=False,
                error=str(e)
            )

    @action
    async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Upload documents to SharePoint across accessible sites

        Parameters:
            connectionReference (str): Reference to the Microsoft connection
            pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
            pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
            documentList (list): Reference(s) to the document list to upload
            fileNames (List[str]): List of names for the uploaded files
        """
        try:
            connectionReference = parameters.get("connectionReference")
            pathQuery = parameters.get("pathQuery")
            documentList = parameters.get("documentList")
            if isinstance(documentList, str):
                documentList = [documentList]
            fileNames = parameters.get("fileNames")
            pathObject = parameters.get("pathObject")

            upload_path = pathQuery
            logger.debug(f"Using pathQuery: {pathQuery}")

            if not connectionReference or not documentList or not fileNames:
                return ActionResult.isFailure(error="Connection reference, document list, and file names are required")

            # If pathObject is provided, extract folder IDs from it
            if pathObject:
                try:
                    import json
                    # Resolve the reference label to get the actual document list
                    document_list = self.service.getChatDocumentsFromDocumentList([pathObject])
                    if not document_list or len(document_list) == 0:
                        return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")

                    # Get the first document's content (which should be the JSON)
                    first_document = document_list[0]
                    file_data = self.service.getFileData(first_document.fileId)
                    if not file_data:
                        return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")

                    # Parse the JSON content
                    result_data = json.loads(file_data)

                    # Debug: Log the structure of the result document
                    logger.info(f"Result document keys: {list(result_data.keys())}")

                    # Handle different result document formats
                    found_documents = []

                    # Check if it's a direct SharePoint result (has foundDocuments)
                    if "foundDocuments" in result_data:
                        found_documents = result_data.get("foundDocuments", [])
                        logger.info(f"Found {len(found_documents)} documents in foundDocuments array")
                    # Check if it's an AI validation result (has result string with validationReport)
                    elif "result" in result_data and "validationReport" in result_data["result"]:
                        try:
                            # Parse the nested JSON in the result field
                            nested_result = json.loads(result_data["result"])
                            validation_report = nested_result.get("validationReport", {})
                            document_details = validation_report.get("documentDetails", {})

                            if document_details:
                                # Convert the single document details to the expected format
                                doc = {
                                    "id": document_details.get("id"),
                                    "name": document_details.get("name"),
                                    "type": document_details.get("type", "").lower(),  # Convert "Folder" to "folder"
                                    "siteName": document_details.get("siteName"),
                                    "siteId": document_details.get("siteId"),
                                    "fullPath": document_details.get("fullPath"),
                                    "webUrl": document_details.get("webUrl", ""),
                                    "parentPath": document_details.get("parentPath", "")
                                }
                                found_documents = [doc]
                                logger.info(f"Extracted 1 document from validation report")
                        except json.JSONDecodeError as e:
                            logger.error(f"Failed to parse nested JSON in result field: {e}")
                            return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")

                    # Debug: Log what we found in the result document
                    logger.info(f"Result document contains {len(found_documents)} documents")
                    for i, doc in enumerate(found_documents):
                        logger.info(f"  Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")

                    # Extract folder information from the result
                    folders = []
                    for doc in found_documents:
                        if doc.get("type") == "folder":
                            folders.append(doc)

                    logger.info(f"Found {len(folders)} folders in result document")

                    if folders:
                        # Use the first folder found - prefer folder ID for direct API calls
                        first_folder = folders[0]
                        if first_folder.get("id"):
                            # Use folder ID directly for most reliable API calls
                            upload_path = first_folder.get("id")
                            logger.info(f"Using folder ID from pathObject: {upload_path}")
                        elif first_folder.get("fullPath"):
                            # Extract the correct path portion from fullPath by removing site name
                            full_path = first_folder.get("fullPath")
                            # fullPath format: \\SiteName\\Library\\Folder\\SubFolder
                            # We need to remove the first two parts (\\SiteName\\) to get the actual folder path
                            path_parts = full_path.lstrip('\\').split('\\')
                            if len(path_parts) > 1:
                                # Remove the first part (site name) and reconstruct the path
                                actual_path = '\\'.join(path_parts[1:])
                                upload_path = actual_path
                                logger.info(f"Extracted path from fullPath: {upload_path}")
                            else:
                                upload_path = full_path
                                logger.info(f"Using full path from pathObject (no site name to remove): {upload_path}")
                        else:
                            return ActionResult.isFailure(error="No valid folder information found in pathObject")
                    else:
                        return ActionResult.isFailure(error="No folders found in pathObject")

                except json.JSONDecodeError as e:
                    return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
                except Exception as e:
                    return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")

            # Get Microsoft connection
            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")

            # Get documents from reference - ensure documentList is a list, not a string
            if isinstance(documentList, str):
                documentList = [documentList]  # Convert string to list
            chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
            if not chatDocuments:
                return ActionResult.isFailure(error="No documents found for the provided reference")

            # Determine sites to use based on whether pathObject was provided
            sites = None
            if pathObject:
                # When pathObject is provided, we should have specific site information
                # Extract site information from the pathObject result
                try:
                    # Get the site information from the first folder in pathObject
                    if 'found_documents' in locals() and found_documents:
                        first_folder = found_documents[0]
                        site_name = first_folder.get("siteName")
                        site_id = first_folder.get("siteId")

                        if site_name and site_id:
                            # Use the specific site from pathObject instead of discovering all sites
                            sites = [{
                                "id": site_id,
                                "displayName": site_name,
                                "webUrl": first_folder.get("webUrl", "")
                            }]
                            logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
                        else:
                            # Site info missing from pathObject - this is an error, not a fallback
                            return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for upload.")
                    else:
                        # No documents found in pathObject - this is an error
                        return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for upload.")
                except Exception as e:
                    # Error processing pathObject - this is an error, not a fallback
                    return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for upload.")
            else:
                # No pathObject provided - check if pathQuery is valid
                if not upload_path or upload_path.strip() == "" or upload_path.strip() == "*":
                    return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")

                # Validate pathQuery format
                if not upload_path.startswith('/'):
                    return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")

                # Check if upload_path contains search terms (words without proper path structure)
                if not upload_path.startswith('/site:') and not upload_path.startswith('/Documents') and not upload_path.startswith('/Shared Documents'):
                    return ActionResult.isFailure(error=f"Invalid pathQuery '{upload_path}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")

                # For pathQuery, we need to discover sites to find the specific one
                sites = await self._discoverSharePointSites(connection["accessToken"])
                if not sites:
                    return ActionResult.isFailure(error="No SharePoint sites found or accessible")

            if not sites:
                return ActionResult.isFailure(error="No valid target site determined for upload")

            # Process upload paths based on whether pathObject was provided
            upload_site_scope = None
            if not pathObject:
                # Parse the validated pathQuery to extract site and path information
                parsed = self._parse_site_scoped_path(upload_path)
                if not parsed:
                    return ActionResult.isFailure(error="Invalid upload_path. Use /site:<Site Display Name>/<Library or Folder Path>")

                # Find matching site
                candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"])  # substring match
                # Choose exact displayName match if available
                exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
                selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None)
                if not selected_site:
                    return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible")

                upload_site_scope = selected_site
                # Use the inner path portion as the actual upload target path
                upload_paths = [f"/{parsed['innerPath'].lstrip('/')}"]
                sites = [selected_site]
            else:
                # When using pathObject, check if upload_path is a folder ID or a path
                if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'):
                    # It's a folder ID - use it directly
                    upload_paths = [upload_path]
                    logger.info(f"Using folder ID directly for upload: {upload_path}")
                else:
                    # It's a path - resolve it normally
                    upload_paths = self._resolvePathQuery(upload_path)

            # Process each document upload
            upload_results = []

            for i, (chatDocument, fileName) in enumerate(zip(chatDocuments, fileNames)):
                try:
                    fileId = chatDocument.fileId
                    file_data = self.service.getFileData(fileId)

                    if not file_data:
                        logger.warning(f"File data not found for fileId: {fileId}")
                        upload_results.append({
                            "fileName": fileName,
                            "fileId": fileId,
                            "error": "File data not found",
                            "uploadStatus": "failed"
                        })
                        continue

                    # Upload to the first available site (or could be made configurable)
                    upload_successful = False

                    for site in sites:
                        site_id = site["id"]
                        site_name = site["displayName"]
                        site_url = site["webUrl"]

                        # Use the first upload path or default to Documents
                        upload_path = upload_paths[0] if upload_paths else "/Documents"

                        # Handle wildcard paths - replace with default Documents folder
                        if upload_path == "*":
                            upload_path = "/Documents"
                            logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")

                        # Check if upload_path is a folder ID or a regular path
                        if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'):
                            # It's a folder ID - use the folder-specific upload endpoint
                            upload_endpoint = f"sites/{site_id}/drive/items/{upload_path}:/{fileName}:/content"
                            logger.info(f"Using folder ID upload endpoint: {upload_endpoint}")
                        else:
                            # It's a regular path - use the root-based upload endpoint
                            upload_path = upload_path.rstrip('/') + '/' + fileName
                            upload_path_clean = upload_path.lstrip('/')
                            upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content"
                            logger.info(f"Using path-based upload endpoint: {upload_endpoint}")

                        # Upload endpoint for small files (< 4MB)
                        if len(file_data) < 4 * 1024 * 1024:  # 4MB

                            # Upload the file
                            upload_result = await self._makeGraphApiCall(
                                connection["accessToken"],
                                upload_endpoint,
                                method="PUT",
                                data=file_data
                            )

                            if "error" not in upload_result:
                                upload_results.append({
                                    "fileName": fileName,
                                    "fileId": fileId,
                                    "uploadStatus": "success",
                                    "siteName": site_name,
                                    "siteUrl": site_url,
                                    "uploadPath": upload_path,
                                    "uploadEndpoint": upload_endpoint,
                                    "sharepointFileId": upload_result.get("id"),
                                    "webUrl": upload_result.get("webUrl"),
                                    "size": upload_result.get("size"),
                                    "createdDateTime": upload_result.get("createdDateTime")
                                })
                                upload_successful = True
                                break
                            else:
                                logger.warning(f"Upload failed to site {site_name}: {upload_result['error']}")
                        else:
                            # For large files, we would need to implement resumable upload
                            logger.warning(f"File too large ({len(file_data)} bytes) for site {site_name}")
                            continue

                    if not upload_successful:
                        upload_results.append({
                            "fileName": fileName,
                            "fileId": fileId,
                            "error": f"File too large ({len(file_data)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
                            "uploadStatus": "failed"
                        })

                except Exception as e:
                    logger.error(f"Error uploading document {fileName}: {str(e)}")
                    upload_results.append({
                        "fileName": fileName,
                        "fileId": fileId,
                        "error": str(e),
                        "uploadStatus": "failed"
                    })

            # Create result data
            result_data = {
                "connectionReference": connectionReference,
                "pathQuery": upload_path,
                "documentList": documentList,
                "fileNames": fileNames,
                "sitesAvailable": len(sites),
                "uploadResults": upload_results,
                "connection": {
                    "id": connection["id"],
                    "authority": "microsoft",
                    "reference": connectionReference
                },
                "timestamp": get_utc_timestamp()
            }

            # Use default JSON format for output
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default


            return ActionResult(
                success=True,
                documents=[
                    {
                        "documentName": f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}",
                        "documentData": result_data,
                        "mimeType": output_mime_type
                    }
                ]
            )

        except Exception as e:
            logger.error(f"Error uploading to SharePoint: {str(e)}")
            return ActionResult(
                success=False,
                error=str(e)
            )

    @action
    async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        List documents in SharePoint folders across accessible sites

        Parameters:
            connectionReference (str): Reference to the Microsoft connection
            pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
            pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
            includeSubfolders (bool, optional): Whether to include subfolders (default: False)
        """
        try:
            connectionReference = parameters.get("connectionReference")
            pathObject = parameters.get("pathObject")
            pathQuery = parameters.get("pathQuery")
            includeSubfolders = parameters.get("includeSubfolders", False)  # Default to False for better UX

            list_query = pathQuery
            logger.info(f"Using pathQuery: {pathQuery}")

            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")

            # If pathObject is provided, resolve the reference and extract folder IDs from it
            # Note: pathObject takes precedence over pathQuery when both are provided
            if pathObject:
                if pathQuery and pathQuery != "*":
                    logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
                try:
                    import json
                    # Resolve the reference label to get the actual document list
                    document_list = self.service.getChatDocumentsFromDocumentList([pathObject])
                    if not document_list or len(document_list) == 0:
                        return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")

                    # Get the first document's content (which should be the JSON)
                    first_document = document_list[0]
                    logger.info(f"Document fileId: {first_document.fileId}, fileName: {first_document.fileName}")
                    file_data = self.service.getFileData(first_document.fileId)
                    if not file_data:
                        return ActionResult.isFailure(error=f"No file data found for document: {pathObject} (fileId: {first_document.fileId})")
                    logger.info(f"File data length: {len(file_data) if file_data else 0}")

                    # Parse the JSON content
                    result_data = json.loads(file_data)

                    # Debug: Log the structure of the result document
                    logger.info(f"Result document keys: {list(result_data.keys())}")

                    # Handle different result document formats
                    found_documents = []

                    # Check if it's a direct SharePoint result (has foundDocuments)
                    if "foundDocuments" in result_data:
                        found_documents = result_data.get("foundDocuments", [])
                        logger.info(f"Found {len(found_documents)} documents in foundDocuments array")
                    # Check if it's an AI validation result (has result string with validationReport)
                    elif "result" in result_data and "validationReport" in result_data["result"]:
                        try:
                            # Parse the nested JSON in the result field
                            nested_result = json.loads(result_data["result"])
                            validation_report = nested_result.get("validationReport", {})
                            document_details = validation_report.get("documentDetails", {})

                            if document_details:
                                # Convert the single document details to the expected format
                                doc = {
                                    "id": document_details.get("id"),
                                    "name": document_details.get("name"),
                                    "type": document_details.get("type", "").lower(),  # Convert "Folder" to "folder"
                                    "siteName": document_details.get("siteName"),
                                    "siteId": document_details.get("siteId"),
                                    "fullPath": document_details.get("fullPath"),
                                    "webUrl": document_details.get("webUrl", ""),
                                    "parentPath": document_details.get("parentPath", "")
                                }
                                found_documents = [doc]
                                logger.info(f"Extracted 1 document from validation report")
                        except json.JSONDecodeError as e:
                            logger.error(f"Failed to parse nested JSON in result field: {e}")
                            return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")

                    # Debug: Log what we found in the result document
                    logger.info(f"Result document contains {len(found_documents)} documents")
                    for i, doc in enumerate(found_documents):
                        logger.info(f"  Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")

                    # Extract folder information from the result
                    folders = []
                    for doc in found_documents:
                        if doc.get("type") == "folder":
                            folders.append(doc)

                    logger.info(f"Found {len(folders)} folders in result document")

                    if folders:
                        # Use the first folder found - prefer folder ID for direct API calls
                        first_folder = folders[0]
                        if first_folder.get("id"):
                            # Use folder ID directly for most reliable API calls
                            list_query = first_folder.get("id")
                            logger.info(f"Using folder ID from pathObject: {list_query}")
                        elif first_folder.get("fullPath"):
                            # Extract the correct path portion from fullPath by removing site name
                            full_path = first_folder.get("fullPath")
                            # fullPath format: \\SiteName\\Library\\Folder\\SubFolder
                            # We need to remove the first two parts (\\SiteName\\) to get the actual folder path
                            path_parts = full_path.lstrip('\\').split('\\')
                            if len(path_parts) > 1:
                                # Remove the first part (site name) and reconstruct the path
                                actual_path = '\\'.join(path_parts[1:])
                                list_query = actual_path
                                logger.info(f"Extracted path from fullPath: {list_query}")
                            else:
                                list_query = full_path
                                logger.info(f"Using full path from pathObject (no site name to remove): {list_query}")
                        else:
                            return ActionResult.isFailure(error="No valid folder information found in pathObject")
                    else:
                        return ActionResult.isFailure(error="No folders found in pathObject")

                except json.JSONDecodeError as e:
                    return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
                except Exception as e:
                    return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")

            # Get Microsoft connection
            connection = self._getMicrosoftConnection(connectionReference)
            if not connection:
                return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")

            logger.info(f"Starting SharePoint listDocuments for list_query: {list_query}")
            logger.debug(f"Connection ID: {connection['id']}")

            # Parse list_query to extract path, search terms, search type, and options
            pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(list_query)

            # Determine sites to use - strict validation: pathObject → pathQuery → ERROR
            sites = None

            # Step 1: Check pathObject first
            if pathObject:
                # When pathObject is provided, we should have specific site information
                # Extract site information from the pathObject result
                try:
                    # Get the site information from the first folder in pathObject
                    if 'found_documents' in locals() and found_documents:
                        first_folder = found_documents[0]
                        site_name = first_folder.get("siteName")
                        site_id = first_folder.get("siteId")

                        if site_name and site_id:
                            # Use the specific site from pathObject instead of discovering all sites
                            sites = [{
                                "id": site_id,
                                "displayName": site_name,
                                "webUrl": first_folder.get("webUrl", "")
                            }]
                            logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
                        else:
                            # Site info missing from pathObject - this is an error
                            return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for list operation.")
                    else:
                        # No documents found in pathObject - this is an error
                        return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for list operation.")
                except Exception as e:
                    # Error processing pathObject - this is an error
                    return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for list operation.")

            # Step 2: If no pathObject, check pathQuery
            elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
                # Validate pathQuery format
                if not pathQuery.startswith('/'):
                    return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")

                # Check if pathQuery contains search terms (words without proper path structure)
                if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'):
                    return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")

                # For pathQuery, we need to discover sites to find the specific one
                sites = await self._discoverSharePointSites(connection["accessToken"])
                if not sites:
                    return ActionResult.isFailure(error="No SharePoint sites found or accessible")
            else:
                # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
                return ActionResult.isFailure(error="No valid list path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")

            if not sites:
                return ActionResult.isFailure(error="No valid target site determined for list operation")

            # Check if list_query is a folder ID (starts with 01PPXICCB...)
            if list_query.startswith('01PPXICCB') or list_query.startswith('01'):
                # Direct folder ID - use it directly
                folder_paths = [list_query]
                logger.info(f"Using direct folder ID: {list_query}")
            else:
                # Resolve path query into folder paths
                folder_paths = self._resolvePathQuery(pathQuery)
                logger.info(f"Resolved folder paths: {folder_paths}")

            # Process each folder path across all sites
            list_results = []

            for folderPath in folder_paths:
                try:
                    folder_results = []

                    for site in sites:
                        site_id = site["id"]
                        site_name = site["displayName"]
                        site_url = site["webUrl"]

                        logger.info(f"Listing folder {folderPath} in site: {site_name}")

                        # Determine the endpoint based on folder path
                        if folderPath in ["/", ""] or folderPath == "*":
                            # Root folder
                            endpoint = f"sites/{site_id}/drive/root/children"
                        elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'):
                            # Direct folder ID
                            endpoint = f"sites/{site_id}/drive/items/{folderPath}/children"
                        else:
                            # Specific folder path - remove leading slash if present
                            folder_path_clean = folderPath.lstrip('/')
                            endpoint = f"sites/{site_id}/drive/root:/{folder_path_clean}:/children"

                        # Make the API call to list folder contents
                        api_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)

                        if "error" in api_result:
                            logger.warning(f"Failed to list folder {folderPath} in site {site_name}: {api_result['error']}")
                            continue

                        # Process the results
                        items = api_result.get("value", [])
                        processed_items = []

                        for item in items:
                            # Use improved folder detection logic
                            is_folder = False
                            if 'folder' in item:
                                is_folder = True
                            else:
                                # Try to detect by URL pattern or other indicators
                                web_url = item.get('webUrl', '')
                                name = item.get('name', '')

                                # Check if URL has no file extension and looks like a folder path
                                if '.' not in name and ('/' in web_url or '\\' in web_url):
                                    is_folder = True

                            item_info = {
                                "id": item.get("id"),
                                "name": item.get("name"),
                                "size": item.get("size", 0),
                                "createdDateTime": item.get("createdDateTime"),
                                "lastModifiedDateTime": item.get("lastModifiedDateTime"),
                                "webUrl": item.get("webUrl"),
                                "type": "folder" if is_folder else "file",
                                "siteName": site_name,
                                "siteUrl": site_url
                            }

                            # Add file-specific information
                            if "file" in item:
                                item_info.update({
                                    "mimeType": item["file"].get("mimeType"),
                                    "downloadUrl": item.get("@microsoft.graph.downloadUrl")
                                })

                            # Add folder-specific information
                            if "folder" in item:
                                item_info.update({
                                    "childCount": item["folder"].get("childCount", 0)
                                })

                            processed_items.append(item_info)

                        # If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
                        if includeSubfolders:
                            folder_items = [item for item in processed_items if item['type'] == 'folder']
                            logger.info(f"Including subfolders - processing {len(folder_items)} folders")
                            subfolder_count = 0
                            max_subfolders = 10  # Limit to prevent infinite loops

                            for item in processed_items[:]:  # Use slice to avoid modifying list during iteration
                                if item["type"] == "folder" and subfolder_count < max_subfolders:
                                    subfolder_count += 1
                                    subfolder_path = f"{folderPath.rstrip('/')}/{item['name']}"
                                    subfolder_endpoint = f"sites/{site_id}/drive/items/{item['id']}/children"

                                    logger.debug(f"Getting contents of subfolder: {item['name']}")
                                    subfolder_result = await self._makeGraphApiCall(connection["accessToken"], subfolder_endpoint)
                                    if "error" not in subfolder_result:
                                        subfolder_items = subfolder_result.get("value", [])
                                        logger.debug(f"Found {len(subfolder_items)} items in subfolder {item['name']}")

                                        for subfolder_item in subfolder_items:
                                            # Use improved folder detection logic for subfolder items
                                            subfolder_is_folder = False
                                            if 'folder' in subfolder_item:
                                                subfolder_is_folder = True
                                            else:
                                                # Try to detect by URL pattern or other indicators
                                                subfolder_web_url = subfolder_item.get('webUrl', '')
                                                subfolder_name = subfolder_item.get('name', '')

                                                # Check if URL has no file extension and looks like a folder path
                                                if '.' not in subfolder_name and ('/' in subfolder_web_url or '\\' in subfolder_web_url):
                                                    subfolder_is_folder = True

                                            # Only add files and direct subfolders, NO RECURSION
                                            subfolder_item_info = {
                                                "id": subfolder_item.get("id"),
                                                "name": subfolder_item.get("name"),
                                                "size": subfolder_item.get("size", 0),
                                                "createdDateTime": subfolder_item.get("createdDateTime"),
                                                "lastModifiedDateTime": subfolder_item.get("lastModifiedDateTime"),
                                                "webUrl": subfolder_item.get("webUrl"),
                                                "type": "folder" if subfolder_is_folder else "file",
                                                "parentPath": subfolder_path,
                                                "siteName": site_name,
                                                "siteUrl": site_url
                                            }

                                            if "file" in subfolder_item:
                                                subfolder_item_info.update({
                                                    "mimeType": subfolder_item["file"].get("mimeType"),
                                                    "downloadUrl": subfolder_item.get("@microsoft.graph.downloadUrl")
                                                })

                                            processed_items.append(subfolder_item_info)
                                    else:
                                        logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolder_result.get('error')}")
                                elif subfolder_count >= max_subfolders:
                                    logger.warning(f"Reached maximum subfolder limit ({max_subfolders}), skipping remaining folders")
                                    break

                            logger.info(f"Processed {subfolder_count} subfolders, total items: {len(processed_items)}")

                        folder_results.append({
                            "siteName": site_name,
                            "siteUrl": site_url,
                            "itemCount": len(processed_items),
                            "items": processed_items
                        })

                    list_results.append({
                        "folderPath": folderPath,
                        "sitesProcessed": len(folder_results),
                        "siteResults": folder_results
                    })

                except Exception as e:
                    logger.error(f"Error listing folder {folderPath}: {str(e)}")
                    list_results.append({
                        "folderPath": folderPath,
                        "error": str(e),
                        "siteResults": []
                    })

            # Create result data
            result_data = {
                "pathQuery": list_query,
                "includeSubfolders": includeSubfolders,
                "sitesSearched": len(sites),
                "listResults": list_results,
                "timestamp": get_utc_timestamp()
            }

            # Use default JSON format for output
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default


            return ActionResult(
                success=True,
                documents=[
                    {
                        "documentName": f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}",
                        "documentData": result_data,
                        "mimeType": output_mime_type
                    }
                ]
            )

        except Exception as e:
            logger.error(f"Error listing SharePoint documents: {str(e)}")
            return ActionResult(
                success=False,
                error=str(e)
            )