1847 lines
No EOL
104 KiB
Python
1847 lines
No EOL
104 KiB
Python
"""
|
|
SharePoint operations method module.
|
|
Handles SharePoint document operations using the SharePoint service.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import re
|
|
from typing import Dict, Any, List, Optional
|
|
from datetime import datetime, UTC
|
|
import base64
|
|
from urllib.parse import urlparse
|
|
import aiohttp
|
|
import asyncio
|
|
|
|
from modules.workflows.methods.methodBase import MethodBase, action
|
|
from modules.interfaces.interfaceChatModel import ActionResult
|
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MethodSharepoint(MethodBase):
|
|
"""SharePoint operations methods."""
|
|
|
|
def __init__(self, service):
|
|
super().__init__(service)
|
|
self.name = "sharepoint"
|
|
self.description = "SharePoint operations methods"
|
|
|
|
def _format_timestamp_for_filename(self) -> str:
|
|
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
|
return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
|
|
|
def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
|
|
"""Get Microsoft connection from connection reference"""
|
|
try:
|
|
userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference)
|
|
if not userConnection:
|
|
logger.warning(f"No user connection found for reference: {connectionReference}")
|
|
return None
|
|
|
|
if userConnection.authority.value != "msft":
|
|
logger.warning(f"Connection {userConnection.id} is not Microsoft (authority: {userConnection.authority.value})")
|
|
return None
|
|
|
|
# Check if connection is active or pending (pending means OAuth in progress)
|
|
if userConnection.status.value not in ["active", "pending"]:
|
|
logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
|
|
return None
|
|
|
|
# Get a fresh token for this specific connection
|
|
from modules.security.tokenManager import TokenManager
|
|
token = TokenManager().getFreshToken(self.service.interfaceApp, userConnection.id)
|
|
if not token:
|
|
logger.warning(f"No token found for connection {userConnection.id}")
|
|
return None
|
|
|
|
# Check if token is expired
|
|
if hasattr(token, 'expiresAt') and token.expiresAt:
|
|
current_time = get_utc_timestamp()
|
|
if current_time > token.expiresAt:
|
|
logger.warning(f"Token for connection {userConnection.id} is expired (expiresAt: {token.expiresAt}, current: {current_time})")
|
|
return None
|
|
|
|
logger.info(f"Successfully retrieved Microsoft connection: {userConnection.id}, status: {userConnection.status.value}, externalId: {userConnection.externalId}")
|
|
|
|
return {
|
|
"id": userConnection.id,
|
|
"userConnection": userConnection,
|
|
"accessToken": token.tokenAccess,
|
|
"refreshToken": token.tokenRefresh,
|
|
"scopes": ["Sites.ReadWrite.All", "Files.ReadWrite.All", "User.Read"] # SharePoint scopes
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error getting Microsoft connection: {str(e)}")
|
|
return None
|
|
|
|
async def _discoverSharePointSites(self, access_token: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Discover all SharePoint sites accessible to the user via Microsoft Graph API
|
|
|
|
Parameters:
|
|
access_token (str): Microsoft Graph access token
|
|
|
|
Returns:
|
|
List[Dict[str, Any]]: List of SharePoint site information
|
|
"""
|
|
try:
|
|
# Query Microsoft Graph to get all sites the user has access to
|
|
endpoint = "sites?search=*"
|
|
result = await self._makeGraphApiCall(access_token, endpoint)
|
|
|
|
if "error" in result:
|
|
logger.error(f"Error discovering SharePoint sites: {result['error']}")
|
|
return []
|
|
|
|
sites = result.get("value", [])
|
|
logger.info(f"Discovered {len(sites)} SharePoint sites")
|
|
|
|
# Process and return site information
|
|
processed_sites = []
|
|
for site in sites:
|
|
site_info = {
|
|
"id": site.get("id"),
|
|
"displayName": site.get("displayName"),
|
|
"name": site.get("name"),
|
|
"webUrl": site.get("webUrl"),
|
|
"description": site.get("description"),
|
|
"createdDateTime": site.get("createdDateTime"),
|
|
"lastModifiedDateTime": site.get("lastModifiedDateTime")
|
|
}
|
|
processed_sites.append(site_info)
|
|
logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")
|
|
|
|
return processed_sites
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error discovering SharePoint sites: {str(e)}")
|
|
return []
|
|
|
|
def _filter_sites_by_hint(self, sites: List[Dict[str, Any]], site_hint: str) -> List[Dict[str, Any]]:
|
|
"""Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
|
|
try:
|
|
if not site_hint:
|
|
return sites
|
|
hint = site_hint.strip().lower()
|
|
filtered: List[Dict[str, Any]] = []
|
|
for site in sites:
|
|
name = (site.get("displayName") or "").lower()
|
|
web_url = (site.get("webUrl") or "").lower()
|
|
if hint in name or hint in web_url:
|
|
filtered.append(site)
|
|
return filtered if filtered else sites
|
|
except Exception as e:
|
|
logger.error(f"Error filtering sites by hint '{site_hint}': {str(e)}")
|
|
return sites
|
|
|
|
|
|
def _parse_site_scoped_path(self, path_query: str) -> Optional[Dict[str, str]]:
|
|
"""
|
|
Parse a site-scoped path of the form:
|
|
/site:KM LayerFinance/Documents/Work or /site:KM LayerFinance/Shared Documents/Work
|
|
|
|
Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
|
|
"""
|
|
try:
|
|
if not path_query or not path_query.startswith('/'):
|
|
return None
|
|
# expected syntax prefix
|
|
prefix = '/site:'
|
|
if not path_query.startswith(prefix):
|
|
return None
|
|
remainder = path_query[len(prefix):]
|
|
# split once on the next '/'
|
|
if '/' not in remainder:
|
|
return None
|
|
site_name, inner = remainder.split('/', 1)
|
|
site_name = site_name.strip()
|
|
inner_path = inner.strip()
|
|
if not site_name or not inner_path:
|
|
return None
|
|
return {"siteName": site_name, "innerPath": inner_path}
|
|
except Exception as e:
|
|
logger.error(f"Error parsing site-scoped path '{path_query}': {str(e)}")
|
|
return None
|
|
|
|
def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
|
|
"""
|
|
Parse searchQuery to extract path, search terms, search type, and search options.
|
|
|
|
CRITICAL: NEVER convert words to paths! Words stay as search terms.
|
|
- "root document lesson" → fileQuery="root document lesson" (NOT "/root/document/lesson")
|
|
- "root, gose" → fileQuery="root, gose" (NOT "/root/gose")
|
|
- "druckersteuerung eskalation logobject" → fileQuery="druckersteuerung eskalation logobject"
|
|
|
|
Parameters:
|
|
searchQuery (str): Enhanced search query with options:
|
|
- "budget" -> pathQuery="*", fileQuery="budget", searchType="all", options={}
|
|
- "root document lesson" -> pathQuery="*", fileQuery="root document lesson", searchType="all", options={}
|
|
- "root, gose" -> pathQuery="*", fileQuery="root, gose", searchType="all", options={}
|
|
- "/Documents:budget" -> pathQuery="/Documents", fileQuery="budget", searchType="all", options={}
|
|
- "files:budget" -> pathQuery="*", fileQuery="budget", searchType="files", options={}
|
|
- "folders:DELTA" -> pathQuery="*", fileQuery="DELTA", searchType="folders", options={}
|
|
- "exact:\"Operations 2025\"" -> exact phrase matching
|
|
- "regex:^Operations.*2025$" -> regex pattern matching
|
|
- "case:DELTA" -> case-sensitive search
|
|
- "and:DELTA AND 2025 Mars AND Group" -> all AND terms must be present
|
|
|
|
Returns:
|
|
tuple[str, str, str, dict]: (pathQuery, fileQuery, searchType, searchOptions)
|
|
"""
|
|
try:
|
|
if not searchQuery or not searchQuery.strip() or searchQuery.strip() == "*":
|
|
return "*", "*", "all", {}
|
|
|
|
searchQuery = searchQuery.strip()
|
|
searchOptions = {}
|
|
|
|
# CRITICAL: Do NOT convert space-separated or comma-separated words to paths!
|
|
# "root document lesson" should stay as "root document lesson", NOT "/root/document/lesson"
|
|
# "root, gose" should stay as "root, gose", NOT "/root/gose"
|
|
|
|
# Check for search type specification (files:, folders:, all:) FIRST
|
|
searchType = "all" # Default
|
|
if searchQuery.startswith(("files:", "folders:", "all:")):
|
|
type_parts = searchQuery.split(':', 1)
|
|
searchType = type_parts[0].strip()
|
|
searchQuery = type_parts[1].strip()
|
|
|
|
# Extract optional site hint tokens: support "site=Name" or leading "site:Name"
|
|
def _extract_site_hint(q: str) -> tuple[str, Optional[str]]:
|
|
try:
|
|
q_strip = q.strip()
|
|
# Leading form: site:KM LayerFinance ...
|
|
if q_strip.lower().startswith("site:"):
|
|
after = q_strip[5:].lstrip()
|
|
# site name until next space or end
|
|
if ' ' in after:
|
|
site_name, rest = after.split(' ', 1)
|
|
else:
|
|
site_name, rest = after, ''
|
|
return rest.strip(), site_name.strip()
|
|
# Inline key=value form anywhere
|
|
m = re.search(r"\bsite=([^;\s]+)", q_strip, flags=re.IGNORECASE)
|
|
if m:
|
|
site_name = m.group(1).strip()
|
|
# remove the token from query
|
|
q_new = re.sub(r"\bsite=[^;\s]+;?", "", q_strip, flags=re.IGNORECASE).strip()
|
|
return q_new, site_name
|
|
except Exception:
|
|
pass
|
|
return q, None
|
|
|
|
searchQuery, extracted_site = _extract_site_hint(searchQuery)
|
|
if extracted_site:
|
|
searchOptions["site_hint"] = extracted_site
|
|
logger.info(f"Extracted site hint: '{extracted_site}'")
|
|
|
|
# Extract name="..." if present (for quoted multi-word names)
|
|
name_match = re.search(r"name=\"([^\"]+)\"", searchQuery)
|
|
if name_match:
|
|
searchQuery = name_match.group(1)
|
|
logger.info(f"Extracted name from quotes: '{searchQuery}'")
|
|
|
|
# Check for search mode specification (exact:, regex:, case:, and:)
|
|
if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
|
|
mode_parts = searchQuery.split(':', 1)
|
|
mode = mode_parts[0].strip()
|
|
searchQuery = mode_parts[1].strip()
|
|
|
|
if mode == "exact":
|
|
searchOptions["exact_match"] = True
|
|
# Remove quotes if present
|
|
if searchQuery.startswith('"') and searchQuery.endswith('"'):
|
|
searchQuery = searchQuery[1:-1]
|
|
elif mode == "regex":
|
|
searchOptions["regex_match"] = True
|
|
elif mode == "case":
|
|
searchOptions["case_sensitive"] = True
|
|
elif mode == "and":
|
|
searchOptions["and_terms"] = True
|
|
|
|
# Check if it contains path:search format
|
|
if ':' in searchQuery:
|
|
parts = searchQuery.split(':', 1) # Split only on first colon
|
|
path_part = parts[0].strip()
|
|
search_part = parts[1].strip()
|
|
|
|
# Handle path part
|
|
if not path_part or path_part == "*":
|
|
pathQuery = "*"
|
|
elif path_part.startswith('/'):
|
|
pathQuery = path_part
|
|
else:
|
|
pathQuery = f"/Documents/{path_part}"
|
|
|
|
# Handle search part
|
|
if not search_part or search_part == "*":
|
|
fileQuery = "*"
|
|
else:
|
|
fileQuery = search_part
|
|
|
|
# Use search_part as fileQuery (name extraction already handled above)
|
|
return pathQuery, fileQuery, searchType, searchOptions
|
|
|
|
# No colon - check if it looks like a path
|
|
elif searchQuery.startswith('/'):
|
|
# It's a path only
|
|
return searchQuery, "*", searchType, searchOptions
|
|
|
|
else:
|
|
# It's a search term only - keep words as-is, do NOT convert to paths
|
|
# "root document lesson" stays as "root document lesson"
|
|
# "root, gose" stays as "root, gose"
|
|
return "*", searchQuery, searchType, searchOptions
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error parsing searchQuery '{searchQuery}': {str(e)}")
|
|
raise ValueError(f"Failed to parse searchQuery '{searchQuery}': {str(e)}")
|
|
|
|
def _resolvePathQuery(self, pathQuery: str) -> List[str]:
|
|
"""
|
|
Resolve pathQuery into a list of search paths for SharePoint operations.
|
|
|
|
Parameters:
|
|
pathQuery (str): Query string that can contain:
|
|
- Direct paths (e.g., "/Documents/Project1")
|
|
- Wildcards (e.g., "/Documents/*")
|
|
- Multiple paths separated by semicolons (e.g., "/Docs; /Files")
|
|
- Single word relative paths (e.g., "Project1" -> resolved to default folder)
|
|
- Empty string or "*" for global search
|
|
- Space-separated words are treated as search terms, NOT folder paths
|
|
|
|
Returns:
|
|
List[str]: List of resolved paths
|
|
"""
|
|
try:
|
|
if not pathQuery or not pathQuery.strip() or pathQuery.strip() == "*":
|
|
return ["*"] # Global search across all sites
|
|
|
|
# Split by semicolon to handle multiple paths
|
|
raw_paths = [path.strip() for path in pathQuery.split(';') if path.strip()]
|
|
resolved_paths = []
|
|
|
|
for raw_path in raw_paths:
|
|
# Handle wildcards - return as-is
|
|
if '*' in raw_path:
|
|
resolved_paths.append(raw_path)
|
|
# Handle absolute paths
|
|
elif raw_path.startswith('/'):
|
|
resolved_paths.append(raw_path)
|
|
# Handle single word relative paths - prepend default folder
|
|
# BUT NOT space-separated words (those are search terms, not paths)
|
|
elif ' ' not in raw_path:
|
|
resolved_paths.append(f"/Documents/{raw_path}")
|
|
else:
|
|
# Check if this looks like a path (has path separators) or search terms
|
|
if '\\' in raw_path or '/' in raw_path:
|
|
# This looks like a path with spaces in folder names - treat as valid path
|
|
resolved_paths.append(raw_path)
|
|
logger.info(f"Path with spaces '{raw_path}' treated as valid folder path")
|
|
else:
|
|
# Space-separated words without path separators are search terms
|
|
# Return as "*" to search globally
|
|
logger.info(f"Space-separated words '{raw_path}' treated as search terms, not folder path")
|
|
resolved_paths.append("*")
|
|
|
|
# Remove duplicates while preserving order
|
|
seen = set()
|
|
unique_paths = []
|
|
for path in resolved_paths:
|
|
if path not in seen:
|
|
seen.add(path)
|
|
unique_paths.append(path)
|
|
|
|
logger.info(f"Resolved pathQuery '{pathQuery}' to {len(unique_paths)} paths: {unique_paths}")
|
|
return unique_paths
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}")
|
|
raise ValueError(f"Failed to resolve pathQuery '{pathQuery}': {str(e)}")
|
|
|
|
def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]:
|
|
"""Parse SharePoint site URL to extract hostname and site path"""
|
|
try:
|
|
parsed = urlparse(siteUrl)
|
|
hostname = parsed.hostname
|
|
path = parsed.path.strip('/')
|
|
|
|
return {
|
|
"hostname": hostname,
|
|
"sitePath": path
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error parsing site URL {siteUrl}: {str(e)}")
|
|
return {"hostname": "", "sitePath": ""}
|
|
|
|
async def _makeGraphApiCall(self, access_token: str, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
|
|
"""Make a Microsoft Graph API call with timeout and detailed logging"""
|
|
try:
|
|
headers = {
|
|
"Authorization": f"Bearer {access_token}",
|
|
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
|
|
}
|
|
|
|
url = f"https://graph.microsoft.com/v1.0/{endpoint}"
|
|
logger.info(f"Making Graph API call: {method} {url}")
|
|
|
|
# Set timeout to 30 seconds
|
|
timeout = aiohttp.ClientTimeout(total=30)
|
|
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
if method == "GET":
|
|
logger.debug(f"Starting GET request to {url}")
|
|
async with session.get(url, headers=headers) as response:
|
|
logger.info(f"Graph API response: {response.status}")
|
|
if response.status == 200:
|
|
result = await response.json()
|
|
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
|
return result
|
|
else:
|
|
error_text = await response.text()
|
|
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
|
return {"error": f"API call failed: {response.status} - {error_text}"}
|
|
|
|
elif method == "PUT":
|
|
logger.debug(f"Starting PUT request to {url}")
|
|
async with session.put(url, headers=headers, data=data) as response:
|
|
logger.info(f"Graph API response: {response.status}")
|
|
if response.status in [200, 201]:
|
|
result = await response.json()
|
|
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
|
return result
|
|
else:
|
|
error_text = await response.text()
|
|
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
|
return {"error": f"API call failed: {response.status} - {error_text}"}
|
|
|
|
elif method == "POST":
|
|
logger.debug(f"Starting POST request to {url}")
|
|
async with session.post(url, headers=headers, data=data) as response:
|
|
logger.info(f"Graph API response: {response.status}")
|
|
if response.status in [200, 201]:
|
|
result = await response.json()
|
|
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
|
return result
|
|
else:
|
|
error_text = await response.text()
|
|
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
|
return {"error": f"API call failed: {response.status} - {error_text}"}
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
|
|
return {"error": f"API call timed out after 30 seconds: {endpoint}"}
|
|
except Exception as e:
|
|
logger.error(f"Error making Graph API call: {str(e)}")
|
|
return {"error": f"Error making Graph API call: {str(e)}"}
|
|
|
|
async def _getSiteId(self, access_token: str, hostname: str, site_path: str) -> str:
|
|
"""Get SharePoint site ID from hostname and site path"""
|
|
try:
|
|
endpoint = f"sites/{hostname}:/{site_path}"
|
|
result = await self._makeGraphApiCall(access_token, endpoint)
|
|
|
|
if "error" in result:
|
|
logger.error(f"Error getting site ID: {result['error']}")
|
|
return ""
|
|
|
|
return result.get("id", "")
|
|
except Exception as e:
|
|
logger.error(f"Error getting site ID: {str(e)}")
|
|
return ""
|
|
|
|
|
|
@action
|
|
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
Find documents/folders by searching their NAMES across SharePoint sites.
|
|
|
|
Parameters:
|
|
connectionReference (str): Microsoft connection reference
|
|
site (str, optional): Site hint (e.g., "SSS", "KM XYZ")
|
|
searchQuery (str): Search query - "budget", "folders:alpha", "files:budget", "/Documents/Project1", "namepart1 namepart2 namepart3". Use "folders:" prefix when user wants to store files or find folders
|
|
maxResults (int, optional): Max results (default: 100)
|
|
"""
|
|
try:
|
|
connectionReference = parameters.get("connectionReference")
|
|
site = parameters.get("site")
|
|
searchQuery = parameters.get("searchQuery", "*")
|
|
maxResults = parameters.get("maxResults", 100)
|
|
|
|
if not connectionReference:
|
|
return ActionResult.isFailure(error="Connection reference is required")
|
|
|
|
# Parse searchQuery to extract path, search terms, search type, and options
|
|
pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery)
|
|
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
# Discover SharePoint sites - use targeted approach when site parameter is provided
|
|
if site:
|
|
# When site parameter is provided, discover all sites first, then filter
|
|
all_sites = await self._discoverSharePointSites(connection["accessToken"])
|
|
if not all_sites:
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
|
|
sites = self._filter_sites_by_hint(all_sites, site)
|
|
logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites")
|
|
if not sites:
|
|
return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'")
|
|
else:
|
|
# No site parameter - discover all sites
|
|
sites = await self._discoverSharePointSites(connection["accessToken"])
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
|
|
# Resolve path query into search paths
|
|
search_paths = self._resolvePathQuery(pathQuery)
|
|
|
|
try:
|
|
# Search across all discovered sites
|
|
found_documents = []
|
|
all_sites_searched = []
|
|
|
|
# Handle different search approaches based on search type
|
|
if searchType == "folders" and fileQuery and fileQuery.strip() != "" and fileQuery.strip() != "*":
|
|
# Use unified search for folders - this is global and searches all sites
|
|
try:
|
|
import json
|
|
|
|
# Use Microsoft Graph Search API syntax (simple term search only)
|
|
terms = [t for t in fileQuery.split() if t.strip()]
|
|
|
|
if len(terms) > 1:
|
|
# Multiple terms: search for ALL terms (AND) - more specific results
|
|
query_string = " AND ".join(terms)
|
|
else:
|
|
# Single term: search for the term
|
|
query_string = terms[0] if terms else fileQuery
|
|
logger.info(f"Using unified search for folders: {query_string}")
|
|
|
|
payload = {
|
|
"requests": [
|
|
{
|
|
"entityTypes": ["driveItem"],
|
|
"query": {"queryString": query_string},
|
|
"from": 0,
|
|
"size": 50
|
|
}
|
|
]
|
|
}
|
|
logger.info(f"Using unified search API for folders with queryString: {query_string}")
|
|
|
|
# Use global search endpoint (site-specific search not available)
|
|
unified_result = await self._makeGraphApiCall(
|
|
connection["accessToken"],
|
|
"search/query",
|
|
method="POST",
|
|
data=json.dumps(payload).encode("utf-8")
|
|
)
|
|
|
|
if "error" in unified_result:
|
|
logger.warning(f"Unified search failed: {unified_result['error']}")
|
|
items = []
|
|
else:
|
|
# Flatten hits -> driveItem resources
|
|
items = []
|
|
for container in (unified_result.get("value", []) or []):
|
|
for hits_container in (container.get("hitsContainers", []) or []):
|
|
for hit in (hits_container.get("hits", []) or []):
|
|
resource = hit.get("resource")
|
|
if resource:
|
|
items.append(resource)
|
|
|
|
logger.info(f"Unified search returned {len(items)} items (pre-filter)")
|
|
|
|
# Apply our improved folder detection logic
|
|
folder_items = []
|
|
for item in items:
|
|
resource = item
|
|
|
|
# Use the same detection logic as our test
|
|
is_folder = False
|
|
if 'folder' in resource:
|
|
is_folder = True
|
|
else:
|
|
# Try to detect by URL pattern or other indicators
|
|
web_url = resource.get('webUrl', '')
|
|
name = resource.get('name', '')
|
|
|
|
# Check if URL has no file extension and looks like a folder path
|
|
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
|
is_folder = True
|
|
|
|
if is_folder:
|
|
folder_items.append(item)
|
|
|
|
items = folder_items
|
|
logger.info(f"Filtered to {len(items)} folders using improved detection logic")
|
|
|
|
# Process unified search results - extract site information from webUrl
|
|
for item in items:
|
|
item_name = item.get("name", "")
|
|
web_url = item.get("webUrl", "")
|
|
|
|
# Extract site information from webUrl
|
|
site_name = "Unknown Site"
|
|
site_id = "unknown"
|
|
|
|
if web_url and '/sites/' in web_url:
|
|
try:
|
|
# Extract site name from URL: https://pcuster.sharepoint.com/sites/SiteName/...
|
|
url_parts = web_url.split('/sites/')
|
|
if len(url_parts) > 1:
|
|
site_path = url_parts[1].split('/')[0]
|
|
# Find matching site from discovered sites
|
|
# First try to match by site name (URL path)
|
|
for site in sites:
|
|
if site.get("name") == site_path:
|
|
site_name = site.get("displayName", site_path)
|
|
site_id = site.get("id", "unknown")
|
|
break
|
|
else:
|
|
# If no match by name, try to match by displayName
|
|
for site in sites:
|
|
if site.get("displayName") == site_path:
|
|
site_name = site.get("displayName", site_path)
|
|
site_id = site.get("id", "unknown")
|
|
break
|
|
else:
|
|
# If no exact match, use the site path as site name
|
|
site_name = site_path
|
|
# Try to find a site with similar name
|
|
for site in sites:
|
|
if site_path.lower() in site.get("name", "").lower() or site_path.lower() in site.get("displayName", "").lower():
|
|
site_name = site.get("displayName", site_path)
|
|
site_id = site.get("id", "unknown")
|
|
break
|
|
except Exception as e:
|
|
logger.warning(f"Error extracting site info from URL {web_url}: {e}")
|
|
|
|
# Use improved folder detection logic
|
|
is_folder = False
|
|
if 'folder' in item:
|
|
is_folder = True
|
|
else:
|
|
# Try to detect by URL pattern or other indicators
|
|
name = item.get('name', '')
|
|
|
|
# Check if URL has no file extension and looks like a folder path
|
|
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
|
is_folder = True
|
|
|
|
item_type = "folder" if is_folder else "file"
|
|
item_path = item.get("parentReference", {}).get("path", "")
|
|
logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
|
|
|
|
# Simple filtering like test file - just check search type
|
|
if searchType == "files" and is_folder:
|
|
continue # Skip folders when searching for files
|
|
elif searchType == "folders" and not is_folder:
|
|
continue # Skip files when searching for folders
|
|
|
|
# Simple approach like test file - no complex filtering
|
|
logger.debug(f"Item '{item_name}' found - adding to results")
|
|
|
|
# Create result with full path information for proper action chaining
|
|
parent_path = item.get("parentReference", {}).get("path", "")
|
|
|
|
# Extract the full SharePoint path from webUrl or parentReference
|
|
full_path = ""
|
|
if web_url:
|
|
# Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
|
|
if '/sites/' in web_url:
|
|
path_part = web_url.split('/sites/')[1]
|
|
# Decode URL encoding and convert to backslash format
|
|
import urllib.parse
|
|
decoded_path = urllib.parse.unquote(path_part)
|
|
full_path = "\\" + decoded_path.replace('/', '\\')
|
|
elif parent_path:
|
|
# Use parentReference path if available
|
|
full_path = parent_path.replace('/', '\\')
|
|
|
|
doc_info = {
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"type": "folder" if is_folder else "file",
|
|
"siteName": site_name,
|
|
"siteId": site_id,
|
|
"webUrl": web_url,
|
|
"fullPath": full_path,
|
|
"parentPath": parent_path
|
|
}
|
|
|
|
found_documents.append(doc_info)
|
|
|
|
logger.info(f"Found {len(found_documents)} documents from unified search")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error performing unified folder search: {str(e)}")
|
|
# Fallback to site-by-site search
|
|
pass
|
|
|
|
# If no unified search was performed or it failed, fall back to site-by-site search
|
|
if not found_documents:
|
|
# Use simple approach like test file - no complex filtering
|
|
site_scoped_sites = sites
|
|
|
|
for site in site_scoped_sites:
|
|
site_id = site["id"]
|
|
site_name = site["displayName"]
|
|
site_url = site["webUrl"]
|
|
|
|
logger.info(f"Searching in site: {site_name} ({site_url})")
|
|
|
|
# Use Microsoft Graph API for this specific site
|
|
# Handle empty or wildcard queries
|
|
if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
|
|
# For wildcard/empty queries, list all items in the drive
|
|
endpoint = f"sites/{site_id}/drive/root/children"
|
|
else:
|
|
# For files, use regular search API
|
|
search_query = fileQuery.replace("'", "''") # Escape single quotes for OData
|
|
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
|
logger.info(f"Using search API for files with query: '{search_query}'")
|
|
|
|
# Make the search API call (files)
|
|
search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
|
|
if "error" in search_result:
|
|
logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
|
|
continue
|
|
# Process search results for this site (files)
|
|
items = search_result.get("value", [])
|
|
logger.info(f"Retrieved {len(items)} items from site {site_name}")
|
|
|
|
site_documents = []
|
|
|
|
for item in items:
|
|
item_name = item.get("name", "")
|
|
|
|
# Use improved folder detection logic
|
|
is_folder = False
|
|
if 'folder' in item:
|
|
is_folder = True
|
|
else:
|
|
# Try to detect by URL pattern or other indicators
|
|
web_url = item.get('webUrl', '')
|
|
name = item.get('name', '')
|
|
|
|
# Check if URL has no file extension and looks like a folder path
|
|
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
|
is_folder = True
|
|
|
|
item_type = "folder" if is_folder else "file"
|
|
item_path = item.get("parentReference", {}).get("path", "")
|
|
logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
|
|
|
|
# Simple filtering like test file - just check search type
|
|
if searchType == "files" and is_folder:
|
|
continue # Skip folders when searching for files
|
|
elif searchType == "folders" and not is_folder:
|
|
continue # Skip files when searching for folders
|
|
|
|
# Simple approach like test file - no complex filtering
|
|
logger.debug(f"Item '{item_name}' found - adding to results")
|
|
|
|
# Create result with full path information for proper action chaining
|
|
web_url = item.get("webUrl", "")
|
|
parent_path = item.get("parentReference", {}).get("path", "")
|
|
|
|
# Extract the full SharePoint path from webUrl or parentReference
|
|
full_path = ""
|
|
if web_url:
|
|
# Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
|
|
if '/sites/' in web_url:
|
|
path_part = web_url.split('/sites/')[1]
|
|
# Decode URL encoding and convert to backslash format
|
|
import urllib.parse
|
|
decoded_path = urllib.parse.unquote(path_part)
|
|
full_path = "\\" + decoded_path.replace('/', '\\')
|
|
elif parent_path:
|
|
# Use parentReference path if available
|
|
full_path = parent_path.replace('/', '\\')
|
|
|
|
doc_info = {
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"type": "folder" if is_folder else "file",
|
|
"siteName": site_name,
|
|
"siteId": site_id,
|
|
"webUrl": web_url,
|
|
"fullPath": full_path,
|
|
"parentPath": parent_path
|
|
}
|
|
|
|
site_documents.append(doc_info)
|
|
|
|
found_documents.extend(site_documents)
|
|
all_sites_searched.append({
|
|
"siteName": site_name,
|
|
"siteUrl": site_url,
|
|
"siteId": site_id,
|
|
"documentsFound": len(site_documents)
|
|
})
|
|
|
|
logger.info(f"Found {len(site_documents)} documents in site {site_name}")
|
|
|
|
# Limit total results to maxResults
|
|
if len(found_documents) > maxResults:
|
|
found_documents = found_documents[:maxResults]
|
|
logger.info(f"Limited results to {maxResults} items")
|
|
|
|
result_data = {
|
|
"searchQuery": searchQuery,
|
|
"totalResults": len(found_documents),
|
|
"maxResults": maxResults,
|
|
"foundDocuments": found_documents,
|
|
"timestamp": get_utc_timestamp()
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error searching SharePoint: {str(e)}")
|
|
return ActionResult.isFailure(error=str(e))
|
|
|
|
# Use default JSON format for output
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
{
|
|
"documentName": f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}",
|
|
"documentData": result_data,
|
|
"mimeType": output_mime_type
|
|
}
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finding document path: {str(e)}")
|
|
return ActionResult.isFailure(error=str(e))
|
|
|
|
@action
|
|
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
Read documents from SharePoint across all accessible sites
|
|
|
|
Parameters:
|
|
documentList (list): Reference(s) to the document list to read
|
|
connectionReference (str): Reference to the Microsoft connection
|
|
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
|
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
|
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
|
"""
|
|
try:
|
|
documentList = parameters.get("documentList")
|
|
if isinstance(documentList, str):
|
|
documentList = [documentList]
|
|
connectionReference = parameters.get("connectionReference")
|
|
pathQuery = parameters.get("pathQuery", "*")
|
|
pathObject = parameters.get("pathObject")
|
|
includeMetadata = parameters.get("includeMetadata", True)
|
|
|
|
if not documentList or not connectionReference:
|
|
return ActionResult.isFailure(error="Document list reference and connection reference are required")
|
|
|
|
# If pathObject is provided, extract folder IDs from it
|
|
# Note: pathObject takes precedence over pathQuery when both are provided
|
|
if pathObject:
|
|
if pathQuery and pathQuery != "*":
|
|
logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
|
|
try:
|
|
import json
|
|
# Resolve the reference label to get the actual document list
|
|
document_list = self.service.getChatDocumentsFromDocumentList([pathObject])
|
|
if not document_list or len(document_list) == 0:
|
|
return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
|
|
|
|
# Get the first document's content (which should be the JSON)
|
|
first_document = document_list[0]
|
|
file_data = self.service.getFileData(first_document.fileId)
|
|
if not file_data:
|
|
return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")
|
|
|
|
# Parse the JSON content
|
|
result_data = json.loads(file_data)
|
|
found_documents = result_data.get("foundDocuments", [])
|
|
|
|
# Extract folder IDs from the result
|
|
folder_ids = []
|
|
for doc in found_documents:
|
|
if doc.get("type") == "folder":
|
|
folder_ids.append(doc.get("id"))
|
|
|
|
if folder_ids:
|
|
# Use the first folder ID found as pathQuery
|
|
pathQuery = folder_ids[0]
|
|
logger.info(f"Using folder ID from pathObject: {pathQuery}")
|
|
else:
|
|
return ActionResult.isFailure(error="No folders found in pathObject")
|
|
|
|
except json.JSONDecodeError as e:
|
|
return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
|
|
except Exception as e:
|
|
return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
|
|
|
|
# Get documents from reference - ensure documentList is a list, not a string
|
|
# documentList is already normalized above
|
|
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
|
|
|
if not chatDocuments:
|
|
return ActionResult.isFailure(error="No documents found for the provided reference")
|
|
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
# Determine sites to use - strict validation: pathObject → pathQuery → ERROR
|
|
sites = None
|
|
|
|
# Step 1: Check pathObject first
|
|
if pathObject:
|
|
# When pathObject is provided, we should have specific site information
|
|
# Extract site information from the pathObject result
|
|
try:
|
|
# Get the site information from the first folder in pathObject
|
|
if 'found_documents' in locals() and found_documents:
|
|
first_folder = found_documents[0]
|
|
site_name = first_folder.get("siteName")
|
|
site_id = first_folder.get("siteId")
|
|
|
|
if site_name and site_id:
|
|
# Use the specific site from pathObject instead of discovering all sites
|
|
sites = [{
|
|
"id": site_id,
|
|
"displayName": site_name,
|
|
"webUrl": first_folder.get("webUrl", "")
|
|
}]
|
|
logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
|
|
else:
|
|
# Site info missing from pathObject - this is an error
|
|
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
|
|
else:
|
|
# No documents found in pathObject - this is an error
|
|
return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for read operation.")
|
|
except Exception as e:
|
|
# Error processing pathObject - this is an error
|
|
return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for read operation.")
|
|
|
|
# Step 2: If no pathObject, check pathQuery
|
|
elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
|
|
# Validate pathQuery format
|
|
if not pathQuery.startswith('/'):
|
|
return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
|
|
|
|
# Check if pathQuery contains search terms (words without proper path structure)
|
|
if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'):
|
|
return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
|
|
|
|
# For pathQuery, we need to discover sites to find the specific one
|
|
sites = await self._discoverSharePointSites(connection["accessToken"])
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
else:
|
|
# Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
|
|
return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
|
|
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No valid target site determined for read operation")
|
|
|
|
# Resolve path query into search paths
|
|
search_paths = self._resolvePathQuery(pathQuery)
|
|
|
|
# Process each chat document across all sites
|
|
read_results = []
|
|
|
|
for i, chatDocument in enumerate(chatDocuments):
|
|
try:
|
|
fileId = chatDocument.fileId
|
|
fileName = chatDocument.fileName
|
|
|
|
# Search for this file across all sites
|
|
file_found = False
|
|
|
|
for site in sites:
|
|
site_id = site["id"]
|
|
site_name = site["displayName"]
|
|
site_url = site["webUrl"]
|
|
|
|
# Try to find the file by name in this site
|
|
search_query = fileName.replace("'", "''") # Escape single quotes for OData
|
|
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
|
|
|
search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
|
|
|
|
if "error" in search_result:
|
|
continue
|
|
|
|
items = search_result.get("value", [])
|
|
for item in items:
|
|
if item.get("name") == fileName:
|
|
# Found the file, get its details
|
|
file_id = item.get("id")
|
|
file_endpoint = f"sites/{site_id}/drive/items/{file_id}"
|
|
|
|
# Get file metadata
|
|
file_info_result = await self._makeGraphApiCall(connection["accessToken"], file_endpoint)
|
|
|
|
if "error" in file_info_result:
|
|
continue
|
|
|
|
# Build result with metadata
|
|
result_item = {
|
|
"fileId": fileId,
|
|
"fileName": fileName,
|
|
"sharepointFileId": file_id,
|
|
"siteName": site_name,
|
|
"siteUrl": site_url,
|
|
"size": file_info_result.get("size", 0),
|
|
"createdDateTime": file_info_result.get("createdDateTime"),
|
|
"lastModifiedDateTime": file_info_result.get("lastModifiedDateTime"),
|
|
"webUrl": file_info_result.get("webUrl")
|
|
}
|
|
|
|
# Add metadata if requested
|
|
if includeMetadata:
|
|
result_item["metadata"] = {
|
|
"mimeType": file_info_result.get("file", {}).get("mimeType"),
|
|
"downloadUrl": file_info_result.get("@microsoft.graph.downloadUrl"),
|
|
"createdBy": file_info_result.get("createdBy", {}),
|
|
"lastModifiedBy": file_info_result.get("lastModifiedBy", {}),
|
|
"parentReference": file_info_result.get("parentReference", {})
|
|
}
|
|
|
|
# Get file content if it's a readable format
|
|
mime_type = file_info_result.get("file", {}).get("mimeType", "")
|
|
if mime_type.startswith("text/") or mime_type in [
|
|
"application/json", "application/xml", "application/javascript"
|
|
]:
|
|
# Download the file content
|
|
content_endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
|
|
|
|
# For content download, we need to handle binary data
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
headers = {"Authorization": f"Bearer {connection['accessToken']}"}
|
|
async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
|
|
if response.status == 200:
|
|
content = await response.text()
|
|
result_item["content"] = content
|
|
else:
|
|
result_item["content"] = f"Could not download content: HTTP {response.status}"
|
|
except Exception as e:
|
|
result_item["content"] = f"Error downloading content: {str(e)}"
|
|
else:
|
|
result_item["content"] = f"Binary file type ({mime_type}) - content not retrieved"
|
|
|
|
read_results.append(result_item)
|
|
file_found = True
|
|
break
|
|
|
|
if file_found:
|
|
break
|
|
|
|
if not file_found:
|
|
read_results.append({
|
|
"fileId": fileId,
|
|
"fileName": fileName,
|
|
"error": "File not found in any accessible SharePoint site",
|
|
"content": None
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error reading document {chatDocument.fileName}: {str(e)}")
|
|
read_results.append({
|
|
"fileId": chatDocument.fileId,
|
|
"fileName": chatDocument.fileName,
|
|
"error": str(e),
|
|
"content": None
|
|
})
|
|
|
|
result_data = {
|
|
"connectionReference": connectionReference,
|
|
"pathQuery": pathQuery,
|
|
"documentList": documentList,
|
|
"includeMetadata": includeMetadata,
|
|
"sitesSearched": len(sites),
|
|
"readResults": read_results,
|
|
"connection": {
|
|
"id": connection["id"],
|
|
"authority": "microsoft",
|
|
"reference": connectionReference
|
|
},
|
|
"timestamp": get_utc_timestamp()
|
|
}
|
|
|
|
# Use default JSON format for output
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
{
|
|
"documentName": f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}",
|
|
"documentData": result_data,
|
|
"mimeType": output_mime_type
|
|
}
|
|
]
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Error reading SharePoint documents: {str(e)}")
|
|
return ActionResult(
|
|
success=False,
|
|
error=str(e)
|
|
)
|
|
|
|
@action
|
|
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
Upload documents to SharePoint across accessible sites
|
|
|
|
Parameters:
|
|
connectionReference (str): Reference to the Microsoft connection
|
|
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
|
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
|
documentList (list): Reference(s) to the document list to upload
|
|
fileNames (List[str]): List of names for the uploaded files
|
|
"""
|
|
try:
|
|
connectionReference = parameters.get("connectionReference")
|
|
pathQuery = parameters.get("pathQuery")
|
|
documentList = parameters.get("documentList")
|
|
if isinstance(documentList, str):
|
|
documentList = [documentList]
|
|
fileNames = parameters.get("fileNames")
|
|
pathObject = parameters.get("pathObject")
|
|
|
|
upload_path = pathQuery
|
|
logger.debug(f"Using pathQuery: {pathQuery}")
|
|
|
|
if not connectionReference or not documentList or not fileNames:
|
|
return ActionResult.isFailure(error="Connection reference, document list, and file names are required")
|
|
|
|
# If pathObject is provided, extract folder IDs from it
|
|
if pathObject:
|
|
try:
|
|
import json
|
|
# Resolve the reference label to get the actual document list
|
|
document_list = self.service.getChatDocumentsFromDocumentList([pathObject])
|
|
if not document_list or len(document_list) == 0:
|
|
return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
|
|
|
|
# Get the first document's content (which should be the JSON)
|
|
first_document = document_list[0]
|
|
file_data = self.service.getFileData(first_document.fileId)
|
|
if not file_data:
|
|
return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")
|
|
|
|
# Parse the JSON content
|
|
result_data = json.loads(file_data)
|
|
|
|
# Debug: Log the structure of the result document
|
|
logger.info(f"Result document keys: {list(result_data.keys())}")
|
|
|
|
# Handle different result document formats
|
|
found_documents = []
|
|
|
|
# Check if it's a direct SharePoint result (has foundDocuments)
|
|
if "foundDocuments" in result_data:
|
|
found_documents = result_data.get("foundDocuments", [])
|
|
logger.info(f"Found {len(found_documents)} documents in foundDocuments array")
|
|
# Check if it's an AI validation result (has result string with validationReport)
|
|
elif "result" in result_data and "validationReport" in result_data["result"]:
|
|
try:
|
|
# Parse the nested JSON in the result field
|
|
nested_result = json.loads(result_data["result"])
|
|
validation_report = nested_result.get("validationReport", {})
|
|
document_details = validation_report.get("documentDetails", {})
|
|
|
|
if document_details:
|
|
# Convert the single document details to the expected format
|
|
doc = {
|
|
"id": document_details.get("id"),
|
|
"name": document_details.get("name"),
|
|
"type": document_details.get("type", "").lower(), # Convert "Folder" to "folder"
|
|
"siteName": document_details.get("siteName"),
|
|
"siteId": document_details.get("siteId"),
|
|
"fullPath": document_details.get("fullPath"),
|
|
"webUrl": document_details.get("webUrl", ""),
|
|
"parentPath": document_details.get("parentPath", "")
|
|
}
|
|
found_documents = [doc]
|
|
logger.info(f"Extracted 1 document from validation report")
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Failed to parse nested JSON in result field: {e}")
|
|
return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")
|
|
|
|
# Debug: Log what we found in the result document
|
|
logger.info(f"Result document contains {len(found_documents)} documents")
|
|
for i, doc in enumerate(found_documents):
|
|
logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")
|
|
|
|
# Extract folder information from the result
|
|
folders = []
|
|
for doc in found_documents:
|
|
if doc.get("type") == "folder":
|
|
folders.append(doc)
|
|
|
|
logger.info(f"Found {len(folders)} folders in result document")
|
|
|
|
if folders:
|
|
# Use the first folder found - prefer folder ID for direct API calls
|
|
first_folder = folders[0]
|
|
if first_folder.get("id"):
|
|
# Use folder ID directly for most reliable API calls
|
|
upload_path = first_folder.get("id")
|
|
logger.info(f"Using folder ID from pathObject: {upload_path}")
|
|
elif first_folder.get("fullPath"):
|
|
# Extract the correct path portion from fullPath by removing site name
|
|
full_path = first_folder.get("fullPath")
|
|
# fullPath format: \\SiteName\\Library\\Folder\\SubFolder
|
|
# We need to remove the first two parts (\\SiteName\\) to get the actual folder path
|
|
path_parts = full_path.lstrip('\\').split('\\')
|
|
if len(path_parts) > 1:
|
|
# Remove the first part (site name) and reconstruct the path
|
|
actual_path = '\\'.join(path_parts[1:])
|
|
upload_path = actual_path
|
|
logger.info(f"Extracted path from fullPath: {upload_path}")
|
|
else:
|
|
upload_path = full_path
|
|
logger.info(f"Using full path from pathObject (no site name to remove): {upload_path}")
|
|
else:
|
|
return ActionResult.isFailure(error="No valid folder information found in pathObject")
|
|
else:
|
|
return ActionResult.isFailure(error="No folders found in pathObject")
|
|
|
|
except json.JSONDecodeError as e:
|
|
return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
|
|
except Exception as e:
|
|
return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
|
|
|
|
# Get Microsoft connection
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
# Get documents from reference - ensure documentList is a list, not a string
|
|
if isinstance(documentList, str):
|
|
documentList = [documentList] # Convert string to list
|
|
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
|
if not chatDocuments:
|
|
return ActionResult.isFailure(error="No documents found for the provided reference")
|
|
|
|
# Determine sites to use based on whether pathObject was provided
|
|
sites = None
|
|
if pathObject:
|
|
# When pathObject is provided, we should have specific site information
|
|
# Extract site information from the pathObject result
|
|
try:
|
|
# Get the site information from the first folder in pathObject
|
|
if 'found_documents' in locals() and found_documents:
|
|
first_folder = found_documents[0]
|
|
site_name = first_folder.get("siteName")
|
|
site_id = first_folder.get("siteId")
|
|
|
|
if site_name and site_id:
|
|
# Use the specific site from pathObject instead of discovering all sites
|
|
sites = [{
|
|
"id": site_id,
|
|
"displayName": site_name,
|
|
"webUrl": first_folder.get("webUrl", "")
|
|
}]
|
|
logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
|
|
else:
|
|
# Site info missing from pathObject - this is an error, not a fallback
|
|
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for upload.")
|
|
else:
|
|
# No documents found in pathObject - this is an error
|
|
return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for upload.")
|
|
except Exception as e:
|
|
# Error processing pathObject - this is an error, not a fallback
|
|
return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for upload.")
|
|
else:
|
|
# No pathObject provided - check if pathQuery is valid
|
|
if not upload_path or upload_path.strip() == "" or upload_path.strip() == "*":
|
|
return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
|
|
|
|
# Validate pathQuery format
|
|
if not upload_path.startswith('/'):
|
|
return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
|
|
|
|
# Check if upload_path contains search terms (words without proper path structure)
|
|
if not upload_path.startswith('/site:') and not upload_path.startswith('/Documents') and not upload_path.startswith('/Shared Documents'):
|
|
return ActionResult.isFailure(error=f"Invalid pathQuery '{upload_path}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
|
|
|
|
# For pathQuery, we need to discover sites to find the specific one
|
|
sites = await self._discoverSharePointSites(connection["accessToken"])
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No valid target site determined for upload")
|
|
|
|
# Process upload paths based on whether pathObject was provided
|
|
upload_site_scope = None
|
|
if not pathObject:
|
|
# Parse the validated pathQuery to extract site and path information
|
|
parsed = self._parse_site_scoped_path(upload_path)
|
|
if not parsed:
|
|
return ActionResult.isFailure(error="Invalid upload_path. Use /site:<Site Display Name>/<Library or Folder Path>")
|
|
|
|
# Find matching site
|
|
candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"]) # substring match
|
|
# Choose exact displayName match if available
|
|
exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
|
|
selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None)
|
|
if not selected_site:
|
|
return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible")
|
|
|
|
upload_site_scope = selected_site
|
|
# Use the inner path portion as the actual upload target path
|
|
upload_paths = [f"/{parsed['innerPath'].lstrip('/')}"]
|
|
sites = [selected_site]
|
|
else:
|
|
# When using pathObject, check if upload_path is a folder ID or a path
|
|
if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'):
|
|
# It's a folder ID - use it directly
|
|
upload_paths = [upload_path]
|
|
logger.info(f"Using folder ID directly for upload: {upload_path}")
|
|
else:
|
|
# It's a path - resolve it normally
|
|
upload_paths = self._resolvePathQuery(upload_path)
|
|
|
|
# Process each document upload
|
|
upload_results = []
|
|
|
|
for i, (chatDocument, fileName) in enumerate(zip(chatDocuments, fileNames)):
|
|
try:
|
|
fileId = chatDocument.fileId
|
|
file_data = self.service.getFileData(fileId)
|
|
|
|
if not file_data:
|
|
logger.warning(f"File data not found for fileId: {fileId}")
|
|
upload_results.append({
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"error": "File data not found",
|
|
"uploadStatus": "failed"
|
|
})
|
|
continue
|
|
|
|
# Upload to the first available site (or could be made configurable)
|
|
upload_successful = False
|
|
|
|
for site in sites:
|
|
site_id = site["id"]
|
|
site_name = site["displayName"]
|
|
site_url = site["webUrl"]
|
|
|
|
# Use the first upload path or default to Documents
|
|
upload_path = upload_paths[0] if upload_paths else "/Documents"
|
|
|
|
# Handle wildcard paths - replace with default Documents folder
|
|
if upload_path == "*":
|
|
upload_path = "/Documents"
|
|
logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")
|
|
|
|
# Check if upload_path is a folder ID or a regular path
|
|
if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'):
|
|
# It's a folder ID - use the folder-specific upload endpoint
|
|
upload_endpoint = f"sites/{site_id}/drive/items/{upload_path}:/{fileName}:/content"
|
|
logger.info(f"Using folder ID upload endpoint: {upload_endpoint}")
|
|
else:
|
|
# It's a regular path - use the root-based upload endpoint
|
|
upload_path = upload_path.rstrip('/') + '/' + fileName
|
|
upload_path_clean = upload_path.lstrip('/')
|
|
upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content"
|
|
logger.info(f"Using path-based upload endpoint: {upload_endpoint}")
|
|
|
|
# Upload endpoint for small files (< 4MB)
|
|
if len(file_data) < 4 * 1024 * 1024: # 4MB
|
|
|
|
# Upload the file
|
|
upload_result = await self._makeGraphApiCall(
|
|
connection["accessToken"],
|
|
upload_endpoint,
|
|
method="PUT",
|
|
data=file_data
|
|
)
|
|
|
|
if "error" not in upload_result:
|
|
upload_results.append({
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"uploadStatus": "success",
|
|
"siteName": site_name,
|
|
"siteUrl": site_url,
|
|
"uploadPath": upload_path,
|
|
"uploadEndpoint": upload_endpoint,
|
|
"sharepointFileId": upload_result.get("id"),
|
|
"webUrl": upload_result.get("webUrl"),
|
|
"size": upload_result.get("size"),
|
|
"createdDateTime": upload_result.get("createdDateTime")
|
|
})
|
|
upload_successful = True
|
|
break
|
|
else:
|
|
logger.warning(f"Upload failed to site {site_name}: {upload_result['error']}")
|
|
else:
|
|
# For large files, we would need to implement resumable upload
|
|
logger.warning(f"File too large ({len(file_data)} bytes) for site {site_name}")
|
|
continue
|
|
|
|
if not upload_successful:
|
|
upload_results.append({
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"error": f"File too large ({len(file_data)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
|
|
"uploadStatus": "failed"
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error uploading document {fileName}: {str(e)}")
|
|
upload_results.append({
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"error": str(e),
|
|
"uploadStatus": "failed"
|
|
})
|
|
|
|
# Create result data
|
|
result_data = {
|
|
"connectionReference": connectionReference,
|
|
"pathQuery": upload_path,
|
|
"documentList": documentList,
|
|
"fileNames": fileNames,
|
|
"sitesAvailable": len(sites),
|
|
"uploadResults": upload_results,
|
|
"connection": {
|
|
"id": connection["id"],
|
|
"authority": "microsoft",
|
|
"reference": connectionReference
|
|
},
|
|
"timestamp": get_utc_timestamp()
|
|
}
|
|
|
|
# Use default JSON format for output
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
{
|
|
"documentName": f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}",
|
|
"documentData": result_data,
|
|
"mimeType": output_mime_type
|
|
}
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error uploading to SharePoint: {str(e)}")
|
|
return ActionResult(
|
|
success=False,
|
|
error=str(e)
|
|
)
|
|
|
|
@action
|
|
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
List documents in SharePoint folders across accessible sites
|
|
|
|
Parameters:
|
|
connectionReference (str): Reference to the Microsoft connection
|
|
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
|
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
|
includeSubfolders (bool, optional): Whether to include subfolders (default: False)
|
|
"""
|
|
try:
|
|
connectionReference = parameters.get("connectionReference")
|
|
pathObject = parameters.get("pathObject")
|
|
pathQuery = parameters.get("pathQuery")
|
|
includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX
|
|
|
|
list_query = pathQuery
|
|
logger.info(f"Using pathQuery: {pathQuery}")
|
|
|
|
if not connectionReference:
|
|
return ActionResult.isFailure(error="Connection reference is required")
|
|
|
|
# If pathObject is provided, resolve the reference and extract folder IDs from it
|
|
# Note: pathObject takes precedence over pathQuery when both are provided
|
|
if pathObject:
|
|
if pathQuery and pathQuery != "*":
|
|
logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
|
|
try:
|
|
import json
|
|
# Resolve the reference label to get the actual document list
|
|
document_list = self.service.getChatDocumentsFromDocumentList([pathObject])
|
|
if not document_list or len(document_list) == 0:
|
|
return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
|
|
|
|
# Get the first document's content (which should be the JSON)
|
|
first_document = document_list[0]
|
|
logger.info(f"Document fileId: {first_document.fileId}, fileName: {first_document.fileName}")
|
|
file_data = self.service.getFileData(first_document.fileId)
|
|
if not file_data:
|
|
return ActionResult.isFailure(error=f"No file data found for document: {pathObject} (fileId: {first_document.fileId})")
|
|
logger.info(f"File data length: {len(file_data) if file_data else 0}")
|
|
|
|
# Parse the JSON content
|
|
result_data = json.loads(file_data)
|
|
|
|
# Debug: Log the structure of the result document
|
|
logger.info(f"Result document keys: {list(result_data.keys())}")
|
|
|
|
# Handle different result document formats
|
|
found_documents = []
|
|
|
|
# Check if it's a direct SharePoint result (has foundDocuments)
|
|
if "foundDocuments" in result_data:
|
|
found_documents = result_data.get("foundDocuments", [])
|
|
logger.info(f"Found {len(found_documents)} documents in foundDocuments array")
|
|
# Check if it's an AI validation result (has result string with validationReport)
|
|
elif "result" in result_data and "validationReport" in result_data["result"]:
|
|
try:
|
|
# Parse the nested JSON in the result field
|
|
nested_result = json.loads(result_data["result"])
|
|
validation_report = nested_result.get("validationReport", {})
|
|
document_details = validation_report.get("documentDetails", {})
|
|
|
|
if document_details:
|
|
# Convert the single document details to the expected format
|
|
doc = {
|
|
"id": document_details.get("id"),
|
|
"name": document_details.get("name"),
|
|
"type": document_details.get("type", "").lower(), # Convert "Folder" to "folder"
|
|
"siteName": document_details.get("siteName"),
|
|
"siteId": document_details.get("siteId"),
|
|
"fullPath": document_details.get("fullPath"),
|
|
"webUrl": document_details.get("webUrl", ""),
|
|
"parentPath": document_details.get("parentPath", "")
|
|
}
|
|
found_documents = [doc]
|
|
logger.info(f"Extracted 1 document from validation report")
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Failed to parse nested JSON in result field: {e}")
|
|
return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")
|
|
|
|
# Debug: Log what we found in the result document
|
|
logger.info(f"Result document contains {len(found_documents)} documents")
|
|
for i, doc in enumerate(found_documents):
|
|
logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")
|
|
|
|
# Extract folder information from the result
|
|
folders = []
|
|
for doc in found_documents:
|
|
if doc.get("type") == "folder":
|
|
folders.append(doc)
|
|
|
|
logger.info(f"Found {len(folders)} folders in result document")
|
|
|
|
if folders:
|
|
# Use the first folder found - prefer folder ID for direct API calls
|
|
first_folder = folders[0]
|
|
if first_folder.get("id"):
|
|
# Use folder ID directly for most reliable API calls
|
|
list_query = first_folder.get("id")
|
|
logger.info(f"Using folder ID from pathObject: {list_query}")
|
|
elif first_folder.get("fullPath"):
|
|
# Extract the correct path portion from fullPath by removing site name
|
|
full_path = first_folder.get("fullPath")
|
|
# fullPath format: \\SiteName\\Library\\Folder\\SubFolder
|
|
# We need to remove the first two parts (\\SiteName\\) to get the actual folder path
|
|
path_parts = full_path.lstrip('\\').split('\\')
|
|
if len(path_parts) > 1:
|
|
# Remove the first part (site name) and reconstruct the path
|
|
actual_path = '\\'.join(path_parts[1:])
|
|
list_query = actual_path
|
|
logger.info(f"Extracted path from fullPath: {list_query}")
|
|
else:
|
|
list_query = full_path
|
|
logger.info(f"Using full path from pathObject (no site name to remove): {list_query}")
|
|
else:
|
|
return ActionResult.isFailure(error="No valid folder information found in pathObject")
|
|
else:
|
|
return ActionResult.isFailure(error="No folders found in pathObject")
|
|
|
|
except json.JSONDecodeError as e:
|
|
return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
|
|
except Exception as e:
|
|
return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
|
|
|
|
# Get Microsoft connection
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
logger.info(f"Starting SharePoint listDocuments for list_query: {list_query}")
|
|
logger.debug(f"Connection ID: {connection['id']}")
|
|
|
|
# Parse list_query to extract path, search terms, search type, and options
|
|
pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(list_query)
|
|
|
|
# Determine sites to use - strict validation: pathObject → pathQuery → ERROR
|
|
sites = None
|
|
|
|
# Step 1: Check pathObject first
|
|
if pathObject:
|
|
# When pathObject is provided, we should have specific site information
|
|
# Extract site information from the pathObject result
|
|
try:
|
|
# Get the site information from the first folder in pathObject
|
|
if 'found_documents' in locals() and found_documents:
|
|
first_folder = found_documents[0]
|
|
site_name = first_folder.get("siteName")
|
|
site_id = first_folder.get("siteId")
|
|
|
|
if site_name and site_id:
|
|
# Use the specific site from pathObject instead of discovering all sites
|
|
sites = [{
|
|
"id": site_id,
|
|
"displayName": site_name,
|
|
"webUrl": first_folder.get("webUrl", "")
|
|
}]
|
|
logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
|
|
else:
|
|
# Site info missing from pathObject - this is an error
|
|
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for list operation.")
|
|
else:
|
|
# No documents found in pathObject - this is an error
|
|
return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for list operation.")
|
|
except Exception as e:
|
|
# Error processing pathObject - this is an error
|
|
return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for list operation.")
|
|
|
|
# Step 2: If no pathObject, check pathQuery
|
|
elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
|
|
# Validate pathQuery format
|
|
if not pathQuery.startswith('/'):
|
|
return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
|
|
|
|
# Check if pathQuery contains search terms (words without proper path structure)
|
|
if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'):
|
|
return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
|
|
|
|
# For pathQuery, we need to discover sites to find the specific one
|
|
sites = await self._discoverSharePointSites(connection["accessToken"])
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
else:
|
|
# Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
|
|
return ActionResult.isFailure(error="No valid list path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
|
|
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No valid target site determined for list operation")
|
|
|
|
# Check if list_query is a folder ID (starts with 01PPXICCB...)
|
|
if list_query.startswith('01PPXICCB') or list_query.startswith('01'):
|
|
# Direct folder ID - use it directly
|
|
folder_paths = [list_query]
|
|
logger.info(f"Using direct folder ID: {list_query}")
|
|
else:
|
|
# Resolve path query into folder paths
|
|
folder_paths = self._resolvePathQuery(pathQuery)
|
|
logger.info(f"Resolved folder paths: {folder_paths}")
|
|
|
|
# Process each folder path across all sites
|
|
list_results = []
|
|
|
|
for folderPath in folder_paths:
|
|
try:
|
|
folder_results = []
|
|
|
|
for site in sites:
|
|
site_id = site["id"]
|
|
site_name = site["displayName"]
|
|
site_url = site["webUrl"]
|
|
|
|
logger.info(f"Listing folder {folderPath} in site: {site_name}")
|
|
|
|
# Determine the endpoint based on folder path
|
|
if folderPath in ["/", ""] or folderPath == "*":
|
|
# Root folder
|
|
endpoint = f"sites/{site_id}/drive/root/children"
|
|
elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'):
|
|
# Direct folder ID
|
|
endpoint = f"sites/{site_id}/drive/items/{folderPath}/children"
|
|
else:
|
|
# Specific folder path - remove leading slash if present
|
|
folder_path_clean = folderPath.lstrip('/')
|
|
endpoint = f"sites/{site_id}/drive/root:/{folder_path_clean}:/children"
|
|
|
|
# Make the API call to list folder contents
|
|
api_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
|
|
|
|
if "error" in api_result:
|
|
logger.warning(f"Failed to list folder {folderPath} in site {site_name}: {api_result['error']}")
|
|
continue
|
|
|
|
# Process the results
|
|
items = api_result.get("value", [])
|
|
processed_items = []
|
|
|
|
for item in items:
|
|
# Use improved folder detection logic
|
|
is_folder = False
|
|
if 'folder' in item:
|
|
is_folder = True
|
|
else:
|
|
# Try to detect by URL pattern or other indicators
|
|
web_url = item.get('webUrl', '')
|
|
name = item.get('name', '')
|
|
|
|
# Check if URL has no file extension and looks like a folder path
|
|
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
|
is_folder = True
|
|
|
|
item_info = {
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"size": item.get("size", 0),
|
|
"createdDateTime": item.get("createdDateTime"),
|
|
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
|
"webUrl": item.get("webUrl"),
|
|
"type": "folder" if is_folder else "file",
|
|
"siteName": site_name,
|
|
"siteUrl": site_url
|
|
}
|
|
|
|
# Add file-specific information
|
|
if "file" in item:
|
|
item_info.update({
|
|
"mimeType": item["file"].get("mimeType"),
|
|
"downloadUrl": item.get("@microsoft.graph.downloadUrl")
|
|
})
|
|
|
|
# Add folder-specific information
|
|
if "folder" in item:
|
|
item_info.update({
|
|
"childCount": item["folder"].get("childCount", 0)
|
|
})
|
|
|
|
processed_items.append(item_info)
|
|
|
|
# If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
|
|
if includeSubfolders:
|
|
folder_items = [item for item in processed_items if item['type'] == 'folder']
|
|
logger.info(f"Including subfolders - processing {len(folder_items)} folders")
|
|
subfolder_count = 0
|
|
max_subfolders = 10 # Limit to prevent infinite loops
|
|
|
|
for item in processed_items[:]: # Use slice to avoid modifying list during iteration
|
|
if item["type"] == "folder" and subfolder_count < max_subfolders:
|
|
subfolder_count += 1
|
|
subfolder_path = f"{folderPath.rstrip('/')}/{item['name']}"
|
|
subfolder_endpoint = f"sites/{site_id}/drive/items/{item['id']}/children"
|
|
|
|
logger.debug(f"Getting contents of subfolder: {item['name']}")
|
|
subfolder_result = await self._makeGraphApiCall(connection["accessToken"], subfolder_endpoint)
|
|
if "error" not in subfolder_result:
|
|
subfolder_items = subfolder_result.get("value", [])
|
|
logger.debug(f"Found {len(subfolder_items)} items in subfolder {item['name']}")
|
|
|
|
for subfolder_item in subfolder_items:
|
|
# Use improved folder detection logic for subfolder items
|
|
subfolder_is_folder = False
|
|
if 'folder' in subfolder_item:
|
|
subfolder_is_folder = True
|
|
else:
|
|
# Try to detect by URL pattern or other indicators
|
|
subfolder_web_url = subfolder_item.get('webUrl', '')
|
|
subfolder_name = subfolder_item.get('name', '')
|
|
|
|
# Check if URL has no file extension and looks like a folder path
|
|
if '.' not in subfolder_name and ('/' in subfolder_web_url or '\\' in subfolder_web_url):
|
|
subfolder_is_folder = True
|
|
|
|
# Only add files and direct subfolders, NO RECURSION
|
|
subfolder_item_info = {
|
|
"id": subfolder_item.get("id"),
|
|
"name": subfolder_item.get("name"),
|
|
"size": subfolder_item.get("size", 0),
|
|
"createdDateTime": subfolder_item.get("createdDateTime"),
|
|
"lastModifiedDateTime": subfolder_item.get("lastModifiedDateTime"),
|
|
"webUrl": subfolder_item.get("webUrl"),
|
|
"type": "folder" if subfolder_is_folder else "file",
|
|
"parentPath": subfolder_path,
|
|
"siteName": site_name,
|
|
"siteUrl": site_url
|
|
}
|
|
|
|
if "file" in subfolder_item:
|
|
subfolder_item_info.update({
|
|
"mimeType": subfolder_item["file"].get("mimeType"),
|
|
"downloadUrl": subfolder_item.get("@microsoft.graph.downloadUrl")
|
|
})
|
|
|
|
processed_items.append(subfolder_item_info)
|
|
else:
|
|
logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolder_result.get('error')}")
|
|
elif subfolder_count >= max_subfolders:
|
|
logger.warning(f"Reached maximum subfolder limit ({max_subfolders}), skipping remaining folders")
|
|
break
|
|
|
|
logger.info(f"Processed {subfolder_count} subfolders, total items: {len(processed_items)}")
|
|
|
|
folder_results.append({
|
|
"siteName": site_name,
|
|
"siteUrl": site_url,
|
|
"itemCount": len(processed_items),
|
|
"items": processed_items
|
|
})
|
|
|
|
list_results.append({
|
|
"folderPath": folderPath,
|
|
"sitesProcessed": len(folder_results),
|
|
"siteResults": folder_results
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing folder {folderPath}: {str(e)}")
|
|
list_results.append({
|
|
"folderPath": folderPath,
|
|
"error": str(e),
|
|
"siteResults": []
|
|
})
|
|
|
|
# Create result data
|
|
result_data = {
|
|
"pathQuery": list_query,
|
|
"includeSubfolders": includeSubfolders,
|
|
"sitesSearched": len(sites),
|
|
"listResults": list_results,
|
|
"timestamp": get_utc_timestamp()
|
|
}
|
|
|
|
# Use default JSON format for output
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
{
|
|
"documentName": f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}",
|
|
"documentData": result_data,
|
|
"mimeType": output_mime_type
|
|
}
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing SharePoint documents: {str(e)}")
|
|
return ActionResult(
|
|
success=False,
|
|
error=str(e)
|
|
) |