1934 lines
No EOL
109 KiB
Python
1934 lines
No EOL
109 KiB
Python
"""
|
|
SharePoint operations method module.
|
|
Handles SharePoint document operations using the SharePoint service.
|
|
"""
|
|
|
|
import logging
|
|
import re
|
|
import json
|
|
from typing import Dict, Any, List, Optional
|
|
from datetime import datetime, UTC
|
|
import urllib
|
|
import aiohttp
|
|
import asyncio
|
|
|
|
from modules.workflows.methods.methodBase import MethodBase, action
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MethodSharepoint(MethodBase):
|
|
"""SharePoint operations methods."""
|
|
|
|
def __init__(self, services):
|
|
super().__init__(services)
|
|
self.name = "sharepoint"
|
|
self.description = "SharePoint operations methods"
|
|
|
|
def _format_timestamp_for_filename(self) -> str:
|
|
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
|
return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
|
|
|
def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
|
|
"""Get Microsoft connection from connection reference and configure SharePoint service"""
|
|
try:
|
|
userConnection = self.services.chat.getUserConnectionFromConnectionReference(connectionReference)
|
|
if not userConnection:
|
|
logger.warning(f"No user connection found for reference: {connectionReference}")
|
|
return None
|
|
|
|
if userConnection.authority.value != "msft":
|
|
logger.warning(f"Connection {userConnection.id} is not Microsoft (authority: {userConnection.authority.value})")
|
|
return None
|
|
|
|
# Check if connection is active or pending (pending means OAuth in progress)
|
|
if userConnection.status.value not in ["active", "pending"]:
|
|
logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
|
|
return None
|
|
|
|
# Configure SharePoint service with the UserConnection
|
|
if not self.services.sharepoint.setAccessTokenFromConnection(userConnection):
|
|
logger.warning(f"Failed to configure SharePoint service with connection {userConnection.id}")
|
|
return None
|
|
|
|
logger.info(f"Successfully configured SharePoint service with Microsoft connection: {userConnection.id}, status: {userConnection.status.value}, externalId: {userConnection.externalId}")
|
|
|
|
return {
|
|
"id": userConnection.id,
|
|
"userConnection": userConnection,
|
|
"scopes": ["Sites.ReadWrite.All", "Files.ReadWrite.All", "User.Read"] # SharePoint scopes
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error getting Microsoft connection: {str(e)}")
|
|
return None
|
|
|
|
async def _discoverSharePointSites(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Discover all SharePoint sites accessible to the user via Microsoft Graph API
|
|
|
|
Returns:
|
|
List[Dict[str, Any]]: List of SharePoint site information
|
|
"""
|
|
try:
|
|
# Query Microsoft Graph to get all sites the user has access to
|
|
endpoint = "sites?search=*"
|
|
result = await self._makeGraphApiCall(endpoint)
|
|
|
|
if "error" in result:
|
|
logger.error(f"Error discovering SharePoint sites: {result['error']}")
|
|
return []
|
|
|
|
sites = result.get("value", [])
|
|
logger.info(f"Discovered {len(sites)} SharePoint sites")
|
|
|
|
# Process and return site information
|
|
processed_sites = []
|
|
for site in sites:
|
|
site_info = {
|
|
"id": site.get("id"),
|
|
"displayName": site.get("displayName"),
|
|
"name": site.get("name"),
|
|
"webUrl": site.get("webUrl"),
|
|
"description": site.get("description"),
|
|
"createdDateTime": site.get("createdDateTime"),
|
|
"lastModifiedDateTime": site.get("lastModifiedDateTime")
|
|
}
|
|
processed_sites.append(site_info)
|
|
logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")
|
|
|
|
return processed_sites
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error discovering SharePoint sites: {str(e)}")
|
|
return []
|
|
|
|
def _filter_sites_by_hint(self, sites: List[Dict[str, Any]], site_hint: str) -> List[Dict[str, Any]]:
|
|
"""Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
|
|
try:
|
|
if not site_hint:
|
|
return sites
|
|
hint = site_hint.strip().lower()
|
|
filtered: List[Dict[str, Any]] = []
|
|
for site in sites:
|
|
name = (site.get("displayName") or "").lower()
|
|
web_url = (site.get("webUrl") or "").lower()
|
|
if hint in name or hint in web_url:
|
|
filtered.append(site)
|
|
return filtered if filtered else sites
|
|
except Exception as e:
|
|
logger.error(f"Error filtering sites by hint '{site_hint}': {str(e)}")
|
|
return sites
|
|
|
|
|
|
def _parse_site_scoped_path(self, path_query: str) -> Optional[Dict[str, str]]:
|
|
"""
|
|
Parse a site-scoped path of the form:
|
|
/site:KM LayerFinance/Documents/Work or /site:KM LayerFinance/Shared Documents/Work
|
|
|
|
Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
|
|
"""
|
|
try:
|
|
if not path_query or not path_query.startswith('/'):
|
|
return None
|
|
# expected syntax prefix
|
|
prefix = '/site:'
|
|
if not path_query.startswith(prefix):
|
|
return None
|
|
remainder = path_query[len(prefix):]
|
|
# split once on the next '/'
|
|
if '/' not in remainder:
|
|
return None
|
|
site_name, inner = remainder.split('/', 1)
|
|
site_name = site_name.strip()
|
|
inner_path = inner.strip()
|
|
if not site_name or not inner_path:
|
|
return None
|
|
return {"siteName": site_name, "innerPath": inner_path}
|
|
except Exception as e:
|
|
logger.error(f"Error parsing site-scoped path '{path_query}': {str(e)}")
|
|
return None
|
|
|
|
def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
|
|
"""
|
|
Parse searchQuery to extract path, search terms, search type, and search options.
|
|
|
|
CRITICAL: NEVER convert words to paths! Words stay as search terms.
|
|
- "root document lesson" → fileQuery="root document lesson" (NOT "/root/document/lesson")
|
|
- "root, gose" → fileQuery="root, gose" (NOT "/root/gose")
|
|
- "druckersteuerung eskalation logobject" → fileQuery="druckersteuerung eskalation logobject"
|
|
|
|
Parameters:
|
|
searchQuery (str): Enhanced search query with options:
|
|
- "budget" -> pathQuery="*", fileQuery="budget", searchType="all", options={}
|
|
- "root document lesson" -> pathQuery="*", fileQuery="root document lesson", searchType="all", options={}
|
|
- "root, gose" -> pathQuery="*", fileQuery="root, gose", searchType="all", options={}
|
|
- "/Documents:budget" -> pathQuery="/Documents", fileQuery="budget", searchType="all", options={}
|
|
- "files:budget" -> pathQuery="*", fileQuery="budget", searchType="files", options={}
|
|
- "folders:DELTA" -> pathQuery="*", fileQuery="DELTA", searchType="folders", options={}
|
|
- "exact:\"Operations 2025\"" -> exact phrase matching
|
|
- "regex:^Operations.*2025$" -> regex pattern matching
|
|
- "case:DELTA" -> case-sensitive search
|
|
- "and:DELTA AND 2025 Mars AND Group" -> all AND terms must be present
|
|
|
|
Returns:
|
|
tuple[str, str, str, dict]: (pathQuery, fileQuery, searchType, searchOptions)
|
|
"""
|
|
try:
|
|
if not searchQuery or not searchQuery.strip() or searchQuery.strip() == "*":
|
|
return "*", "*", "all", {}
|
|
|
|
searchQuery = searchQuery.strip()
|
|
searchOptions = {}
|
|
|
|
# CRITICAL: Do NOT convert space-separated or comma-separated words to paths!
|
|
# "root document lesson" should stay as "root document lesson", NOT "/root/document/lesson"
|
|
# "root, gose" should stay as "root, gose", NOT "/root/gose"
|
|
|
|
# Check for search type specification (files:, folders:, all:) FIRST
|
|
searchType = "all" # Default
|
|
if searchQuery.startswith(("files:", "folders:", "all:")):
|
|
type_parts = searchQuery.split(':', 1)
|
|
searchType = type_parts[0].strip()
|
|
searchQuery = type_parts[1].strip()
|
|
|
|
# Extract optional site hint tokens: support "site=Name" or leading "site:Name"
|
|
def _extract_site_hint(q: str) -> tuple[str, Optional[str]]:
|
|
try:
|
|
q_strip = q.strip()
|
|
# Leading form: site:KM LayerFinance ...
|
|
if q_strip.lower().startswith("site:"):
|
|
after = q_strip[5:].lstrip()
|
|
# site name until next space or end
|
|
if ' ' in after:
|
|
site_name, rest = after.split(' ', 1)
|
|
else:
|
|
site_name, rest = after, ''
|
|
return rest.strip(), site_name.strip()
|
|
# Inline key=value form anywhere
|
|
m = re.search(r"\bsite=([^;\s]+)", q_strip, flags=re.IGNORECASE)
|
|
if m:
|
|
site_name = m.group(1).strip()
|
|
# remove the token from query
|
|
q_new = re.sub(r"\bsite=[^;\s]+;?", "", q_strip, flags=re.IGNORECASE).strip()
|
|
return q_new, site_name
|
|
except Exception:
|
|
pass
|
|
return q, None
|
|
|
|
searchQuery, extracted_site = _extract_site_hint(searchQuery)
|
|
if extracted_site:
|
|
searchOptions["site_hint"] = extracted_site
|
|
logger.info(f"Extracted site hint: '{extracted_site}'")
|
|
|
|
# Extract name="..." if present (for quoted multi-word names)
|
|
name_match = re.search(r"name=\"([^\"]+)\"", searchQuery)
|
|
if name_match:
|
|
searchQuery = name_match.group(1)
|
|
logger.info(f"Extracted name from quotes: '{searchQuery}'")
|
|
|
|
# Check for search mode specification (exact:, regex:, case:, and:)
|
|
if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
|
|
mode_parts = searchQuery.split(':', 1)
|
|
mode = mode_parts[0].strip()
|
|
searchQuery = mode_parts[1].strip()
|
|
|
|
if mode == "exact":
|
|
searchOptions["exact_match"] = True
|
|
# Remove quotes if present
|
|
if searchQuery.startswith('"') and searchQuery.endswith('"'):
|
|
searchQuery = searchQuery[1:-1]
|
|
elif mode == "regex":
|
|
searchOptions["regex_match"] = True
|
|
elif mode == "case":
|
|
searchOptions["case_sensitive"] = True
|
|
elif mode == "and":
|
|
searchOptions["and_terms"] = True
|
|
|
|
# Check if it contains path:search format
|
|
if ':' in searchQuery:
|
|
parts = searchQuery.split(':', 1) # Split only on first colon
|
|
path_part = parts[0].strip()
|
|
search_part = parts[1].strip()
|
|
|
|
# Handle path part
|
|
if not path_part or path_part == "*":
|
|
pathQuery = "*"
|
|
elif path_part.startswith('/'):
|
|
pathQuery = path_part
|
|
else:
|
|
pathQuery = f"/Documents/{path_part}"
|
|
|
|
# Handle search part
|
|
if not search_part or search_part == "*":
|
|
fileQuery = "*"
|
|
else:
|
|
fileQuery = search_part
|
|
|
|
# Use search_part as fileQuery (name extraction already handled above)
|
|
return pathQuery, fileQuery, searchType, searchOptions
|
|
|
|
# No colon - check if it looks like a path
|
|
elif searchQuery.startswith('/'):
|
|
# It's a path only
|
|
return searchQuery, "*", searchType, searchOptions
|
|
|
|
else:
|
|
# It's a search term only - keep words as-is, do NOT convert to paths
|
|
# "root document lesson" stays as "root document lesson"
|
|
# "root, gose" stays as "root, gose"
|
|
return "*", searchQuery, searchType, searchOptions
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error parsing searchQuery '{searchQuery}': {str(e)}")
|
|
raise ValueError(f"Failed to parse searchQuery '{searchQuery}': {str(e)}")
|
|
|
|
def _resolvePathQuery(self, pathQuery: str) -> List[str]:
|
|
"""
|
|
Resolve pathQuery into a list of search paths for SharePoint operations.
|
|
|
|
Parameters:
|
|
pathQuery (str): Query string that can contain:
|
|
- Direct paths (e.g., "/Documents/Project1")
|
|
- Wildcards (e.g., "/Documents/*")
|
|
- Multiple paths separated by semicolons (e.g., "/Docs; /Files")
|
|
- Single word relative paths (e.g., "Project1" -> resolved to default folder)
|
|
- Empty string or "*" for global search
|
|
- Space-separated words are treated as search terms, NOT folder paths
|
|
|
|
Returns:
|
|
List[str]: List of resolved paths
|
|
"""
|
|
try:
|
|
if not pathQuery or not pathQuery.strip() or pathQuery.strip() == "*":
|
|
return ["*"] # Global search across all sites
|
|
|
|
# Split by semicolon to handle multiple paths
|
|
raw_paths = [path.strip() for path in pathQuery.split(';') if path.strip()]
|
|
resolved_paths = []
|
|
|
|
for raw_path in raw_paths:
|
|
# Handle wildcards - return as-is
|
|
if '*' in raw_path:
|
|
resolved_paths.append(raw_path)
|
|
# Handle absolute paths
|
|
elif raw_path.startswith('/'):
|
|
resolved_paths.append(raw_path)
|
|
# Handle single word relative paths - prepend default folder
|
|
# BUT NOT space-separated words (those are search terms, not paths)
|
|
elif ' ' not in raw_path:
|
|
resolved_paths.append(f"/Documents/{raw_path}")
|
|
else:
|
|
# Check if this looks like a path (has path separators) or search terms
|
|
if '\\' in raw_path or '/' in raw_path:
|
|
# This looks like a path with spaces in folder names - treat as valid path
|
|
resolved_paths.append(raw_path)
|
|
logger.info(f"Path with spaces '{raw_path}' treated as valid folder path")
|
|
else:
|
|
# Space-separated words without path separators are search terms
|
|
# Return as "*" to search globally
|
|
logger.info(f"Space-separated words '{raw_path}' treated as search terms, not folder path")
|
|
resolved_paths.append("*")
|
|
|
|
# Remove duplicates while preserving order
|
|
seen = set()
|
|
unique_paths = []
|
|
for path in resolved_paths:
|
|
if path not in seen:
|
|
seen.add(path)
|
|
unique_paths.append(path)
|
|
|
|
logger.info(f"Resolved pathQuery '{pathQuery}' to {len(unique_paths)} paths: {unique_paths}")
|
|
return unique_paths
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}")
|
|
raise ValueError(f"Failed to resolve pathQuery '{pathQuery}': {str(e)}")
|
|
|
|
def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]:
|
|
"""Parse SharePoint site URL to extract hostname and site path"""
|
|
try:
|
|
parsed = urllib.parse.urlparse(siteUrl)
|
|
hostname = parsed.hostname
|
|
path = parsed.path.strip('/')
|
|
|
|
return {
|
|
"hostname": hostname,
|
|
"sitePath": path
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error parsing site URL {siteUrl}: {str(e)}")
|
|
return {"hostname": "", "sitePath": ""}
|
|
|
|
async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
|
|
"""Make a Microsoft Graph API call with timeout and detailed logging"""
|
|
try:
|
|
if not hasattr(self.services, 'sharepoint') or not self.services.sharepoint._target.accessToken:
|
|
return {"error": "SharePoint service not configured with access token"}
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {self.services.sharepoint._target.accessToken}",
|
|
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
|
|
}
|
|
|
|
url = f"https://graph.microsoft.com/v1.0/{endpoint}"
|
|
logger.info(f"Making Graph API call: {method} {url}")
|
|
|
|
# Set timeout to 30 seconds
|
|
timeout = aiohttp.ClientTimeout(total=30)
|
|
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
if method == "GET":
|
|
logger.debug(f"Starting GET request to {url}")
|
|
async with session.get(url, headers=headers) as response:
|
|
logger.info(f"Graph API response: {response.status}")
|
|
if response.status == 200:
|
|
result = await response.json()
|
|
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
|
return result
|
|
else:
|
|
error_text = await response.text()
|
|
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
|
return {"error": f"API call failed: {response.status} - {error_text}"}
|
|
|
|
elif method == "PUT":
|
|
logger.debug(f"Starting PUT request to {url}")
|
|
async with session.put(url, headers=headers, data=data) as response:
|
|
logger.info(f"Graph API response: {response.status}")
|
|
if response.status in [200, 201]:
|
|
result = await response.json()
|
|
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
|
return result
|
|
else:
|
|
error_text = await response.text()
|
|
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
|
return {"error": f"API call failed: {response.status} - {error_text}"}
|
|
|
|
elif method == "POST":
|
|
logger.debug(f"Starting POST request to {url}")
|
|
async with session.post(url, headers=headers, data=data) as response:
|
|
logger.info(f"Graph API response: {response.status}")
|
|
if response.status in [200, 201]:
|
|
result = await response.json()
|
|
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
|
return result
|
|
else:
|
|
error_text = await response.text()
|
|
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
|
return {"error": f"API call failed: {response.status} - {error_text}"}
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
|
|
return {"error": f"API call timed out after 30 seconds: {endpoint}"}
|
|
except Exception as e:
|
|
logger.error(f"Error making Graph API call: {str(e)}")
|
|
return {"error": f"Error making Graph API call: {str(e)}"}
|
|
|
|
async def _getSiteId(self, hostname: str, site_path: str) -> str:
|
|
"""Get SharePoint site ID from hostname and site path"""
|
|
try:
|
|
endpoint = f"sites/{hostname}:/{site_path}"
|
|
result = await self._makeGraphApiCall(endpoint)
|
|
|
|
if "error" in result:
|
|
logger.error(f"Error getting site ID: {result['error']}")
|
|
return ""
|
|
|
|
return result.get("id", "")
|
|
except Exception as e:
|
|
logger.error(f"Error getting site ID: {str(e)}")
|
|
return ""
|
|
|
|
|
|
@action
|
|
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Find documents and folders by name/path across sites.
|
|
- Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
|
|
- Output format: JSON with found items and paths.
|
|
|
|
Parameters:
|
|
- connectionReference (str, required): Microsoft connection label.
|
|
- site (str, optional): Site hint.
|
|
- searchQuery (str, required): Search terms or path.
|
|
- maxResults (int, optional): Maximum items to return. Default: 100.
|
|
"""
|
|
try:
|
|
connectionReference = parameters.get("connectionReference")
|
|
site = parameters.get("site")
|
|
searchQuery = parameters.get("searchQuery", "*")
|
|
maxResults = parameters.get("maxResults", 100)
|
|
|
|
if not connectionReference:
|
|
return ActionResult.isFailure(error="Connection reference is required")
|
|
|
|
# Parse searchQuery to extract path, search terms, search type, and options
|
|
pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery)
|
|
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
# Discover SharePoint sites - use targeted approach when site parameter is provided
|
|
if site:
|
|
# When site parameter is provided, discover all sites first, then filter
|
|
all_sites = await self._discoverSharePointSites()
|
|
if not all_sites:
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
|
|
sites = self._filter_sites_by_hint(all_sites, site)
|
|
logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites")
|
|
if not sites:
|
|
return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'")
|
|
else:
|
|
# No site parameter - discover all sites
|
|
sites = await self._discoverSharePointSites()
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
|
|
# Resolve path query into search paths
|
|
search_paths = self._resolvePathQuery(pathQuery)
|
|
|
|
try:
|
|
# Search across all discovered sites
|
|
found_documents = []
|
|
all_sites_searched = []
|
|
|
|
# Handle different search approaches based on search type
|
|
if searchType == "folders" and fileQuery and fileQuery.strip() != "" and fileQuery.strip() != "*":
|
|
# Use unified search for folders - this is global and searches all sites
|
|
try:
|
|
|
|
# Use Microsoft Graph Search API syntax (simple term search only)
|
|
terms = [t for t in fileQuery.split() if t.strip()]
|
|
|
|
if len(terms) > 1:
|
|
# Multiple terms: search for ALL terms (AND) - more specific results
|
|
query_string = " AND ".join(terms)
|
|
else:
|
|
# Single term: search for the term
|
|
query_string = terms[0] if terms else fileQuery
|
|
logger.info(f"Using unified search for folders: {query_string}")
|
|
|
|
payload = {
|
|
"requests": [
|
|
{
|
|
"entityTypes": ["driveItem"],
|
|
"query": {"queryString": query_string},
|
|
"from": 0,
|
|
"size": 50
|
|
}
|
|
]
|
|
}
|
|
logger.info(f"Using unified search API for folders with queryString: {query_string}")
|
|
|
|
# Use global search endpoint (site-specific search not available)
|
|
unified_result = await self._makeGraphApiCall(
|
|
"search/query",
|
|
method="POST",
|
|
data=json.dumps(payload).encode("utf-8")
|
|
)
|
|
|
|
if "error" in unified_result:
|
|
logger.warning(f"Unified search failed: {unified_result['error']}")
|
|
items = []
|
|
else:
|
|
# Flatten hits -> driveItem resources
|
|
items = []
|
|
for container in (unified_result.get("value", []) or []):
|
|
for hits_container in (container.get("hitsContainers", []) or []):
|
|
for hit in (hits_container.get("hits", []) or []):
|
|
resource = hit.get("resource")
|
|
if resource:
|
|
items.append(resource)
|
|
|
|
logger.info(f"Unified search returned {len(items)} items (pre-filter)")
|
|
|
|
# Apply our improved folder detection logic
|
|
folder_items = []
|
|
for item in items:
|
|
resource = item
|
|
|
|
# Use the same detection logic as our test
|
|
is_folder = False
|
|
if 'folder' in resource:
|
|
is_folder = True
|
|
else:
|
|
# Try to detect by URL pattern or other indicators
|
|
web_url = resource.get('webUrl', '')
|
|
name = resource.get('name', '')
|
|
|
|
# Check if URL has no file extension and looks like a folder path
|
|
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
|
is_folder = True
|
|
|
|
if is_folder:
|
|
folder_items.append(item)
|
|
|
|
items = folder_items
|
|
logger.info(f"Filtered to {len(items)} folders using improved detection logic")
|
|
|
|
# Process unified search results - extract site information from webUrl
|
|
for item in items:
|
|
item_name = item.get("name", "")
|
|
web_url = item.get("webUrl", "")
|
|
|
|
# Extract site information from webUrl
|
|
site_name = "Unknown Site"
|
|
site_id = "unknown"
|
|
|
|
if web_url and '/sites/' in web_url:
|
|
try:
|
|
# Extract site name from URL: https://pcuster.sharepoint.com/sites/SiteName/...
|
|
url_parts = web_url.split('/sites/')
|
|
if len(url_parts) > 1:
|
|
site_path = url_parts[1].split('/')[0]
|
|
# Find matching site from discovered sites
|
|
# First try to match by site name (URL path)
|
|
for site in sites:
|
|
if site.get("name") == site_path:
|
|
site_name = site.get("displayName", site_path)
|
|
site_id = site.get("id", "unknown")
|
|
break
|
|
else:
|
|
# If no match by name, try to match by displayName
|
|
for site in sites:
|
|
if site.get("displayName") == site_path:
|
|
site_name = site.get("displayName", site_path)
|
|
site_id = site.get("id", "unknown")
|
|
break
|
|
else:
|
|
# If no exact match, use the site path as site name
|
|
site_name = site_path
|
|
# Try to find a site with similar name
|
|
for site in sites:
|
|
if site_path.lower() in site.get("name", "").lower() or site_path.lower() in site.get("displayName", "").lower():
|
|
site_name = site.get("displayName", site_path)
|
|
site_id = site.get("id", "unknown")
|
|
break
|
|
except Exception as e:
|
|
logger.warning(f"Error extracting site info from URL {web_url}: {e}")
|
|
|
|
# Use improved folder detection logic
|
|
is_folder = False
|
|
if 'folder' in item:
|
|
is_folder = True
|
|
else:
|
|
# Try to detect by URL pattern or other indicators
|
|
name = item.get('name', '')
|
|
|
|
# Check if URL has no file extension and looks like a folder path
|
|
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
|
is_folder = True
|
|
|
|
item_type = "folder" if is_folder else "file"
|
|
item_path = item.get("parentReference", {}).get("path", "")
|
|
logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
|
|
|
|
# Simple filtering like test file - just check search type
|
|
if searchType == "files" and is_folder:
|
|
continue # Skip folders when searching for files
|
|
elif searchType == "folders" and not is_folder:
|
|
continue # Skip files when searching for folders
|
|
|
|
# Simple approach like test file - no complex filtering
|
|
logger.debug(f"Item '{item_name}' found - adding to results")
|
|
|
|
# Create result with full path information for proper action chaining
|
|
parent_path = item.get("parentReference", {}).get("path", "")
|
|
|
|
# Extract the full SharePoint path from webUrl or parentReference
|
|
full_path = ""
|
|
if web_url:
|
|
# Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
|
|
if '/sites/' in web_url:
|
|
path_part = web_url.split('/sites/')[1]
|
|
# Decode URL encoding and convert to backslash format
|
|
decoded_path = urllib.parse.unquote(path_part)
|
|
full_path = "\\" + decoded_path.replace('/', '\\')
|
|
elif parent_path:
|
|
# Use parentReference path if available
|
|
full_path = parent_path.replace('/', '\\')
|
|
|
|
doc_info = {
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"type": "folder" if is_folder else "file",
|
|
"siteName": site_name,
|
|
"siteId": site_id,
|
|
"webUrl": web_url,
|
|
"fullPath": full_path,
|
|
"parentPath": parent_path
|
|
}
|
|
|
|
found_documents.append(doc_info)
|
|
|
|
logger.info(f"Found {len(found_documents)} documents from unified search")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error performing unified folder search: {str(e)}")
|
|
# Fallback to site-by-site search
|
|
pass
|
|
|
|
# If no unified search was performed or it failed, fall back to site-by-site search
|
|
if not found_documents:
|
|
# Use simple approach like test file - no complex filtering
|
|
site_scoped_sites = sites
|
|
|
|
for site in site_scoped_sites:
|
|
site_id = site["id"]
|
|
site_name = site["displayName"]
|
|
site_url = site["webUrl"]
|
|
|
|
logger.info(f"Searching in site: {site_name} ({site_url})")
|
|
|
|
# Use Microsoft Graph API for this specific site
|
|
# Handle empty or wildcard queries
|
|
if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
|
|
# For wildcard/empty queries, list all items in the drive
|
|
endpoint = f"sites/{site_id}/drive/root/children"
|
|
else:
|
|
# For files, use regular search API
|
|
search_query = fileQuery.replace("'", "''") # Escape single quotes for OData
|
|
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
|
logger.info(f"Using search API for files with query: '{search_query}'")
|
|
|
|
# Make the search API call (files)
|
|
search_result = await self._makeGraphApiCall(endpoint)
|
|
if "error" in search_result:
|
|
logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
|
|
continue
|
|
# Process search results for this site (files)
|
|
items = search_result.get("value", [])
|
|
logger.info(f"Retrieved {len(items)} items from site {site_name}")
|
|
|
|
site_documents = []
|
|
|
|
for item in items:
|
|
item_name = item.get("name", "")
|
|
|
|
# Use improved folder detection logic
|
|
is_folder = False
|
|
if 'folder' in item:
|
|
is_folder = True
|
|
else:
|
|
# Try to detect by URL pattern or other indicators
|
|
web_url = item.get('webUrl', '')
|
|
name = item.get('name', '')
|
|
|
|
# Check if URL has no file extension and looks like a folder path
|
|
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
|
is_folder = True
|
|
|
|
item_type = "folder" if is_folder else "file"
|
|
item_path = item.get("parentReference", {}).get("path", "")
|
|
logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
|
|
|
|
# Simple filtering like test file - just check search type
|
|
if searchType == "files" and is_folder:
|
|
continue # Skip folders when searching for files
|
|
elif searchType == "folders" and not is_folder:
|
|
continue # Skip files when searching for folders
|
|
|
|
# Simple approach like test file - no complex filtering
|
|
logger.debug(f"Item '{item_name}' found - adding to results")
|
|
|
|
# Create result with full path information for proper action chaining
|
|
web_url = item.get("webUrl", "")
|
|
parent_path = item.get("parentReference", {}).get("path", "")
|
|
|
|
# Extract the full SharePoint path from webUrl or parentReference
|
|
full_path = ""
|
|
if web_url:
|
|
# Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
|
|
if '/sites/' in web_url:
|
|
path_part = web_url.split('/sites/')[1]
|
|
# Decode URL encoding and convert to backslash format
|
|
decoded_path = urllib.parse.unquote(path_part)
|
|
full_path = "\\" + decoded_path.replace('/', '\\')
|
|
elif parent_path:
|
|
# Use parentReference path if available
|
|
full_path = parent_path.replace('/', '\\')
|
|
|
|
doc_info = {
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"type": "folder" if is_folder else "file",
|
|
"siteName": site_name,
|
|
"siteId": site_id,
|
|
"webUrl": web_url,
|
|
"fullPath": full_path,
|
|
"parentPath": parent_path
|
|
}
|
|
|
|
site_documents.append(doc_info)
|
|
|
|
found_documents.extend(site_documents)
|
|
all_sites_searched.append({
|
|
"siteName": site_name,
|
|
"siteUrl": site_url,
|
|
"siteId": site_id,
|
|
"documentsFound": len(site_documents)
|
|
})
|
|
|
|
logger.info(f"Found {len(site_documents)} documents in site {site_name}")
|
|
|
|
# Limit total results to maxResults
|
|
if len(found_documents) > maxResults:
|
|
found_documents = found_documents[:maxResults]
|
|
logger.info(f"Limited results to {maxResults} items")
|
|
|
|
result_data = {
|
|
"searchQuery": searchQuery,
|
|
"totalResults": len(found_documents),
|
|
"maxResults": maxResults,
|
|
"foundDocuments": found_documents,
|
|
"timestamp": self.services.utils.timestampGetUtc()
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error searching SharePoint: {str(e)}")
|
|
return ActionResult.isFailure(error=str(e))
|
|
|
|
# Use default JSON format for output
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
ActionDocument(
|
|
documentName=f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}",
|
|
documentData=json.dumps(result_data, indent=2),
|
|
mimeType=output_mime_type
|
|
)
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finding document path: {str(e)}")
|
|
return ActionResult.isFailure(error=str(e))
|
|
|
|
@action
|
|
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Read documents from SharePoint and extract content/metadata.
|
|
- Input requirements: connectionReference (required); documentList (required); optional pathObject or pathQuery; includeMetadata.
|
|
- Output format: JSON with read results per document.
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document list reference(s) to read.
|
|
- connectionReference (str, required): Microsoft connection label.
|
|
- pathObject (str, optional): Reference to a previous path result.
|
|
- pathQuery (str, optional): Path query if no pathObject.
|
|
- includeMetadata (bool, optional): Include metadata. Default: True.
|
|
"""
|
|
try:
|
|
documentList = parameters.get("documentList")
|
|
if isinstance(documentList, str):
|
|
documentList = [documentList]
|
|
connectionReference = parameters.get("connectionReference")
|
|
pathQuery = parameters.get("pathQuery", "*")
|
|
pathObject = parameters.get("pathObject")
|
|
includeMetadata = parameters.get("includeMetadata", True)
|
|
|
|
if not documentList or not connectionReference:
|
|
return ActionResult.isFailure(error="Document list reference and connection reference are required")
|
|
|
|
# If pathObject is provided, extract folder IDs from it
|
|
# Note: pathObject takes precedence over pathQuery when both are provided
|
|
if pathObject:
|
|
if pathQuery and pathQuery != "*":
|
|
logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
|
|
try:
|
|
# Resolve the reference label to get the actual document list
|
|
document_list = self.services.chat.getChatDocumentsFromDocumentList([pathObject])
|
|
if not document_list or len(document_list) == 0:
|
|
return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
|
|
|
|
# Get the first document's content (which should be the JSON)
|
|
first_document = document_list[0]
|
|
file_data = self.services.chat.getFileData(first_document.fileId)
|
|
if not file_data:
|
|
return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")
|
|
|
|
# Parse the JSON content
|
|
result_data = json.loads(file_data)
|
|
found_documents = result_data.get("foundDocuments", [])
|
|
|
|
# Extract folder IDs from the result
|
|
folder_ids = []
|
|
for doc in found_documents:
|
|
if doc.get("type") == "folder":
|
|
folder_ids.append(doc.get("id"))
|
|
|
|
if folder_ids:
|
|
# Use the first folder ID found as pathQuery
|
|
pathQuery = folder_ids[0]
|
|
logger.info(f"Using folder ID from pathObject: {pathQuery}")
|
|
else:
|
|
return ActionResult.isFailure(error="No folders found in pathObject")
|
|
|
|
except json.JSONDecodeError as e:
|
|
return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
|
|
except Exception as e:
|
|
return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
|
|
|
|
# Get documents from reference - ensure documentList is a list, not a string
|
|
# documentList is already normalized above
|
|
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
|
|
|
if not chatDocuments:
|
|
return ActionResult.isFailure(error="No documents found for the provided reference")
|
|
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
# Determine sites to use - strict validation: pathObject → pathQuery → ERROR
|
|
sites = None
|
|
|
|
# Step 1: Check pathObject first
|
|
if pathObject:
|
|
# When pathObject is provided, we should have specific site information
|
|
# Extract site information from the pathObject result
|
|
try:
|
|
# Get the site information from the first folder in pathObject
|
|
if 'found_documents' in locals() and found_documents:
|
|
first_folder = found_documents[0]
|
|
site_name = first_folder.get("siteName")
|
|
site_id = first_folder.get("siteId")
|
|
|
|
if site_name and site_id:
|
|
# Use the specific site from pathObject instead of discovering all sites
|
|
sites = [{
|
|
"id": site_id,
|
|
"displayName": site_name,
|
|
"webUrl": first_folder.get("webUrl", "")
|
|
}]
|
|
logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
|
|
else:
|
|
# Site info missing from pathObject - this is an error
|
|
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
|
|
else:
|
|
# No documents found in pathObject - this is an error
|
|
return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for read operation.")
|
|
except Exception as e:
|
|
# Error processing pathObject - this is an error
|
|
return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for read operation.")
|
|
|
|
# Step 2: If no pathObject, check pathQuery
|
|
elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
|
|
# Validate pathQuery format
|
|
if not pathQuery.startswith('/'):
|
|
return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
|
|
|
|
# Check if pathQuery contains search terms (words without proper path structure)
|
|
valid_path_prefixes = ['/site:', '/Documents', '/documents', '/Shared Documents', '/shared documents']
|
|
if not any(pathQuery.startswith(prefix) for prefix in valid_path_prefixes):
|
|
return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
|
|
|
|
# For pathQuery, we need to discover sites to find the specific one
|
|
all_sites = await self._discoverSharePointSites()
|
|
if not all_sites:
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
|
|
# If pathQuery starts with /site:, extract site name and filter
|
|
if pathQuery.startswith('/site:'):
|
|
# Extract site name from /site:Company Share/... format
|
|
site_path_part = pathQuery[6:] # Remove '/site:'
|
|
if '/' in site_path_part:
|
|
site_name = site_path_part.split('/', 1)[0]
|
|
else:
|
|
site_name = site_path_part
|
|
|
|
# Filter sites by name (case-insensitive substring match)
|
|
sites = self._filter_sites_by_hint(all_sites, site_name)
|
|
if not sites:
|
|
return ActionResult.isFailure(error=f"No SharePoint site found matching '{site_name}'")
|
|
logger.info(f"Filtered to site(s) matching '{site_name}': {[s['displayName'] for s in sites]}")
|
|
else:
|
|
sites = all_sites
|
|
else:
|
|
# Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
|
|
return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
|
|
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No valid target site determined for read operation")
|
|
|
|
# Resolve path query into search paths
|
|
search_paths = self._resolvePathQuery(pathQuery)
|
|
|
|
# Process each chat document across all sites
|
|
read_results = []
|
|
|
|
for i, chatDocument in enumerate(chatDocuments):
|
|
try:
|
|
fileId = chatDocument.fileId
|
|
fileName = chatDocument.fileName
|
|
|
|
# Search for this file across all sites
|
|
file_found = False
|
|
|
|
for site in sites:
|
|
site_id = site["id"]
|
|
site_name = site["displayName"]
|
|
site_url = site["webUrl"]
|
|
|
|
# Try to find the file by name in this site
|
|
search_query = fileName.replace("'", "''") # Escape single quotes for OData
|
|
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
|
|
|
search_result = await self._makeGraphApiCall(endpoint)
|
|
|
|
if "error" in search_result:
|
|
continue
|
|
|
|
items = search_result.get("value", [])
|
|
for item in items:
|
|
if item.get("name") == fileName:
|
|
# Found the file, get its details
|
|
file_id = item.get("id")
|
|
file_endpoint = f"sites/{site_id}/drive/items/{file_id}"
|
|
|
|
# Get file metadata
|
|
file_info_result = await self._makeGraphApiCall(file_endpoint)
|
|
|
|
if "error" in file_info_result:
|
|
continue
|
|
|
|
# Build result with metadata
|
|
result_item = {
|
|
"fileId": fileId,
|
|
"fileName": fileName,
|
|
"sharepointFileId": file_id,
|
|
"siteName": site_name,
|
|
"siteUrl": site_url,
|
|
"size": file_info_result.get("size", 0),
|
|
"createdDateTime": file_info_result.get("createdDateTime"),
|
|
"lastModifiedDateTime": file_info_result.get("lastModifiedDateTime"),
|
|
"webUrl": file_info_result.get("webUrl")
|
|
}
|
|
|
|
# Add metadata if requested
|
|
if includeMetadata:
|
|
result_item["metadata"] = {
|
|
"mimeType": file_info_result.get("file", {}).get("mimeType"),
|
|
"downloadUrl": file_info_result.get("@microsoft.graph.downloadUrl"),
|
|
"createdBy": file_info_result.get("createdBy", {}),
|
|
"lastModifiedBy": file_info_result.get("lastModifiedBy", {}),
|
|
"parentReference": file_info_result.get("parentReference", {})
|
|
}
|
|
|
|
# Get file content if it's a readable format
|
|
mime_type = file_info_result.get("file", {}).get("mimeType", "")
|
|
if mime_type.startswith("text/") or mime_type in [
|
|
"application/json", "application/xml", "application/javascript"
|
|
]:
|
|
# Download the file content
|
|
content_endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
|
|
|
|
# For content download, we need to handle binary data
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
headers = {"Authorization": f"Bearer {self.services.sharepoint._target.accessToken}"}
|
|
async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
|
|
if response.status == 200:
|
|
content = await response.text()
|
|
result_item["content"] = content
|
|
else:
|
|
result_item["content"] = f"Could not download content: HTTP {response.status}"
|
|
except Exception as e:
|
|
result_item["content"] = f"Error downloading content: {str(e)}"
|
|
else:
|
|
result_item["content"] = f"Binary file type ({mime_type}) - content not retrieved"
|
|
|
|
read_results.append(result_item)
|
|
file_found = True
|
|
break
|
|
|
|
if file_found:
|
|
break
|
|
|
|
if not file_found:
|
|
read_results.append({
|
|
"fileId": fileId,
|
|
"fileName": fileName,
|
|
"error": "File not found in any accessible SharePoint site",
|
|
"content": None
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error reading document {chatDocument.fileName}: {str(e)}")
|
|
read_results.append({
|
|
"fileId": chatDocument.fileId,
|
|
"fileName": chatDocument.fileName,
|
|
"error": str(e),
|
|
"content": None
|
|
})
|
|
|
|
result_data = {
|
|
"connectionReference": connectionReference,
|
|
"pathQuery": pathQuery,
|
|
"documentList": documentList,
|
|
"includeMetadata": includeMetadata,
|
|
"sitesSearched": len(sites),
|
|
"readResults": read_results,
|
|
"connection": {
|
|
"id": connection["id"],
|
|
"authority": "microsoft",
|
|
"reference": connectionReference
|
|
},
|
|
"timestamp": self.services.utils.timestampGetUtc()
|
|
}
|
|
|
|
# Use default JSON format for output
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
ActionDocument(
|
|
documentName=f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}",
|
|
documentData=json.dumps(result_data, indent=2),
|
|
mimeType=output_mime_type
|
|
)
|
|
]
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Error reading SharePoint documents: {str(e)}")
|
|
return ActionResult(
|
|
success=False,
|
|
error=str(e)
|
|
)
|
|
|
|
@action
|
|
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
|
|
- Input requirements: connectionReference (required); documentList (required); optional pathObject or pathQuery.
|
|
- Output format: JSON with upload status and file info.
|
|
|
|
Parameters:
|
|
- connectionReference (str, required): Microsoft connection label.
|
|
- pathObject (str, optional): Reference to a previous path result.
|
|
- pathQuery (str, optional): Upload target path if no pathObject.
|
|
- documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
|
|
"""
|
|
try:
|
|
connectionReference = parameters.get("connectionReference")
|
|
pathQuery = parameters.get("pathQuery")
|
|
documentList = parameters.get("documentList")
|
|
if isinstance(documentList, str):
|
|
documentList = [documentList]
|
|
pathObject = parameters.get("pathObject")
|
|
|
|
upload_path = pathQuery
|
|
logger.debug(f"Using pathQuery: {pathQuery}")
|
|
|
|
if not connectionReference or not documentList:
|
|
return ActionResult.isFailure(error="Connection reference and document list are required")
|
|
|
|
# If pathObject is provided, extract folder IDs from it
|
|
if pathObject:
|
|
try:
|
|
# Resolve the reference label to get the actual document list
|
|
document_list = self.services.chat.getChatDocumentsFromDocumentList([pathObject])
|
|
if not document_list or len(document_list) == 0:
|
|
return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
|
|
|
|
# Get the first document's content (which should be the JSON)
|
|
first_document = document_list[0]
|
|
file_data = self.services.chat.getFileData(first_document.fileId)
|
|
if not file_data:
|
|
return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")
|
|
|
|
# Parse the JSON content
|
|
result_data = json.loads(file_data)
|
|
|
|
# Debug: Log the structure of the result document
|
|
logger.info(f"Result document keys: {list(result_data.keys())}")
|
|
|
|
# Handle different result document formats
|
|
found_documents = []
|
|
|
|
# Check if it's a direct SharePoint result (has foundDocuments)
|
|
if "foundDocuments" in result_data:
|
|
found_documents = result_data.get("foundDocuments", [])
|
|
logger.info(f"Found {len(found_documents)} documents in foundDocuments array")
|
|
# Check if it's an AI validation result (has result string with validationReport)
|
|
elif "result" in result_data and "validationReport" in result_data["result"]:
|
|
try:
|
|
# Parse the nested JSON in the result field
|
|
nested_result = json.loads(result_data["result"])
|
|
validation_report = nested_result.get("validationReport", {})
|
|
document_details = validation_report.get("documentDetails", {})
|
|
|
|
if document_details:
|
|
# Convert the single document details to the expected format
|
|
doc = {
|
|
"id": document_details.get("id"),
|
|
"name": document_details.get("name"),
|
|
"type": document_details.get("type", "").lower(), # Convert "Folder" to "folder"
|
|
"siteName": document_details.get("siteName"),
|
|
"siteId": document_details.get("siteId"),
|
|
"fullPath": document_details.get("fullPath"),
|
|
"webUrl": document_details.get("webUrl", ""),
|
|
"parentPath": document_details.get("parentPath", "")
|
|
}
|
|
found_documents = [doc]
|
|
logger.info(f"Extracted 1 document from validation report")
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Failed to parse nested JSON in result field: {e}")
|
|
return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")
|
|
|
|
# Debug: Log what we found in the result document
|
|
logger.info(f"Result document contains {len(found_documents)} documents")
|
|
for i, doc in enumerate(found_documents):
|
|
logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")
|
|
|
|
# Extract folder information from the result
|
|
folders = []
|
|
for doc in found_documents:
|
|
if doc.get("type") == "folder":
|
|
folders.append(doc)
|
|
|
|
logger.info(f"Found {len(folders)} folders in result document")
|
|
|
|
if folders:
|
|
# Use the first folder found - prefer folder ID for direct API calls
|
|
first_folder = folders[0]
|
|
if first_folder.get("id"):
|
|
# Use folder ID directly for most reliable API calls
|
|
upload_path = first_folder.get("id")
|
|
logger.info(f"Using folder ID from pathObject: {upload_path}")
|
|
elif first_folder.get("fullPath"):
|
|
# Extract the correct path portion from fullPath by removing site name
|
|
full_path = first_folder.get("fullPath")
|
|
# fullPath format: \\SiteName\\Library\\Folder\\SubFolder
|
|
# We need to remove the first two parts (\\SiteName\\) to get the actual folder path
|
|
path_parts = full_path.lstrip('\\').split('\\')
|
|
if len(path_parts) > 1:
|
|
# Remove the first part (site name) and reconstruct the path
|
|
actual_path = '\\'.join(path_parts[1:])
|
|
upload_path = actual_path
|
|
logger.info(f"Extracted path from fullPath: {upload_path}")
|
|
else:
|
|
upload_path = full_path
|
|
logger.info(f"Using full path from pathObject (no site name to remove): {upload_path}")
|
|
else:
|
|
return ActionResult.isFailure(error="No valid folder information found in pathObject")
|
|
else:
|
|
return ActionResult.isFailure(error="No folders found in pathObject")
|
|
|
|
except json.JSONDecodeError as e:
|
|
return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
|
|
except Exception as e:
|
|
return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
|
|
|
|
# Get Microsoft connection
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
# Get documents from reference - ensure documentList is a list, not a string
|
|
if isinstance(documentList, str):
|
|
documentList = [documentList] # Convert string to list
|
|
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
|
if not chatDocuments:
|
|
return ActionResult.isFailure(error="No documents found for the provided reference")
|
|
|
|
# Determine sites to use based on whether pathObject was provided
|
|
sites = None
|
|
if pathObject:
|
|
# When pathObject is provided, we should have specific site information
|
|
# Extract site information from the pathObject result
|
|
try:
|
|
# Get the site information from the first folder in pathObject
|
|
if 'found_documents' in locals() and found_documents:
|
|
first_folder = found_documents[0]
|
|
site_name = first_folder.get("siteName")
|
|
site_id = first_folder.get("siteId")
|
|
|
|
if site_name and site_id:
|
|
# Use the specific site from pathObject instead of discovering all sites
|
|
sites = [{
|
|
"id": site_id,
|
|
"displayName": site_name,
|
|
"webUrl": first_folder.get("webUrl", "")
|
|
}]
|
|
logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
|
|
else:
|
|
# Site info missing from pathObject - this is an error, not a fallback
|
|
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for upload.")
|
|
else:
|
|
# No documents found in pathObject - this is an error
|
|
return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for upload.")
|
|
except Exception as e:
|
|
# Error processing pathObject - this is an error, not a fallback
|
|
return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for upload.")
|
|
else:
|
|
# No pathObject provided - check if pathQuery is valid
|
|
if not upload_path or upload_path.strip() == "" or upload_path.strip() == "*":
|
|
return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
|
|
|
|
# Validate pathQuery format
|
|
if not upload_path.startswith('/'):
|
|
return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
|
|
|
|
# Check if upload_path contains search terms (words without proper path structure)
|
|
if not upload_path.startswith('/site:') and not upload_path.startswith('/Documents') and not upload_path.startswith('/Shared Documents'):
|
|
return ActionResult.isFailure(error=f"Invalid pathQuery '{upload_path}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
|
|
|
|
# For pathQuery, we need to discover sites to find the specific one
|
|
sites = await self._discoverSharePointSites()
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No valid target site determined for upload")
|
|
|
|
# Process upload paths based on whether pathObject was provided
|
|
upload_site_scope = None
|
|
if not pathObject:
|
|
# Parse the validated pathQuery to extract site and path information
|
|
parsed = self._parse_site_scoped_path(upload_path)
|
|
if not parsed:
|
|
return ActionResult.isFailure(error="Invalid upload_path. Use /site:<Site Display Name>/<Library or Folder Path>")
|
|
|
|
# Find matching site
|
|
candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"]) # substring match
|
|
# Choose exact displayName match if available
|
|
exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
|
|
selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None)
|
|
if not selected_site:
|
|
return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible")
|
|
|
|
upload_site_scope = selected_site
|
|
# Use the inner path portion as the actual upload target path
|
|
# Remove document library name from path (same logic as listDocuments)
|
|
inner_path = parsed['innerPath'].lstrip('/')
|
|
path_segments = [s for s in inner_path.split('/') if s.strip()]
|
|
if len(path_segments) > 1:
|
|
# Path has multiple segments - first might be a library name
|
|
# Try without first segment (assuming it's a library name)
|
|
inner_path = '/'.join(path_segments[1:])
|
|
logger.info(f"Removed first path segment (potential library name), path changed from '{parsed['innerPath']}' to '{inner_path}'")
|
|
elif len(path_segments) == 1:
|
|
# Only one segment - if it's a common library-like name, use empty path (root)
|
|
first_segment_lower = path_segments[0].lower()
|
|
library_indicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek']
|
|
if any(indicator in first_segment_lower for indicator in library_indicators):
|
|
inner_path = ''
|
|
logger.info(f"First segment '{path_segments[0]}' appears to be a library name, using root")
|
|
|
|
upload_paths = [f"/{inner_path}" if inner_path else "/"]
|
|
sites = [selected_site]
|
|
else:
|
|
# When using pathObject, check if upload_path is a folder ID or a path
|
|
if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'):
|
|
# It's a folder ID - use it directly
|
|
upload_paths = [upload_path]
|
|
logger.info(f"Using folder ID directly for upload: {upload_path}")
|
|
else:
|
|
# It's a path - resolve it normally
|
|
upload_paths = self._resolvePathQuery(upload_path)
|
|
|
|
# Process each document upload
|
|
upload_results = []
|
|
|
|
# Extract file names from documents
|
|
fileNames = [doc.fileName for doc in chatDocuments]
|
|
logger.info(f"Using file names from documentList: {fileNames}")
|
|
|
|
for i, (chatDocument, fileName) in enumerate(zip(chatDocuments, fileNames)):
|
|
try:
|
|
fileId = chatDocument.fileId
|
|
file_data = self.services.chat.getFileData(fileId)
|
|
|
|
if not file_data:
|
|
logger.warning(f"File data not found for fileId: {fileId}")
|
|
upload_results.append({
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"error": "File data not found",
|
|
"uploadStatus": "failed"
|
|
})
|
|
continue
|
|
|
|
# Upload to the first available site (or could be made configurable)
|
|
upload_successful = False
|
|
|
|
for site in sites:
|
|
site_id = site["id"]
|
|
site_name = site["displayName"]
|
|
site_url = site["webUrl"]
|
|
|
|
# Use the first upload path or default to Documents
|
|
upload_path = upload_paths[0] if upload_paths else "/Documents"
|
|
|
|
# Handle wildcard paths - replace with default Documents folder
|
|
if upload_path == "*":
|
|
upload_path = "/Documents"
|
|
logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")
|
|
|
|
# Check if upload_path is a folder ID or a regular path
|
|
if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'):
|
|
# It's a folder ID - use the folder-specific upload endpoint
|
|
upload_endpoint = f"sites/{site_id}/drive/items/{upload_path}:/{fileName}:/content"
|
|
logger.info(f"Using folder ID upload endpoint: {upload_endpoint}")
|
|
else:
|
|
# It's a regular path - use the root-based upload endpoint
|
|
upload_path = upload_path.rstrip('/') + '/' + fileName
|
|
upload_path_clean = upload_path.lstrip('/')
|
|
upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content"
|
|
logger.info(f"Using path-based upload endpoint: {upload_endpoint}")
|
|
|
|
# Upload endpoint for small files (< 4MB)
|
|
if len(file_data) < 4 * 1024 * 1024: # 4MB
|
|
|
|
# Upload the file
|
|
upload_result = await self._makeGraphApiCall(
|
|
upload_endpoint,
|
|
method="PUT",
|
|
data=file_data
|
|
)
|
|
|
|
if "error" not in upload_result:
|
|
upload_results.append({
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"uploadStatus": "success",
|
|
"siteName": site_name,
|
|
"siteUrl": site_url,
|
|
"uploadPath": upload_path,
|
|
"uploadEndpoint": upload_endpoint,
|
|
"sharepointFileId": upload_result.get("id"),
|
|
"webUrl": upload_result.get("webUrl"),
|
|
"size": upload_result.get("size"),
|
|
"createdDateTime": upload_result.get("createdDateTime")
|
|
})
|
|
upload_successful = True
|
|
break
|
|
else:
|
|
logger.warning(f"Upload failed to site {site_name}: {upload_result['error']}")
|
|
else:
|
|
# For large files, we would need to implement resumable upload
|
|
logger.warning(f"File too large ({len(file_data)} bytes) for site {site_name}")
|
|
continue
|
|
|
|
if not upload_successful:
|
|
upload_results.append({
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"error": f"File too large ({len(file_data)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
|
|
"uploadStatus": "failed"
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error uploading document {fileName}: {str(e)}")
|
|
upload_results.append({
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"error": str(e),
|
|
"uploadStatus": "failed"
|
|
})
|
|
|
|
# Create result data
|
|
result_data = {
|
|
"connectionReference": connectionReference,
|
|
"pathQuery": upload_path,
|
|
"documentList": documentList,
|
|
"fileNames": fileNames,
|
|
"sitesAvailable": len(sites),
|
|
"uploadResults": upload_results,
|
|
"connection": {
|
|
"id": connection["id"],
|
|
"authority": "microsoft",
|
|
"reference": connectionReference
|
|
},
|
|
"timestamp": self.services.utils.timestampGetUtc()
|
|
}
|
|
|
|
# Use default JSON format for output
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
ActionDocument(
|
|
documentName=f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}",
|
|
documentData=json.dumps(result_data, indent=2),
|
|
mimeType=output_mime_type
|
|
)
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error uploading to SharePoint: {str(e)}")
|
|
return ActionResult(
|
|
success=False,
|
|
error=str(e)
|
|
)
|
|
|
|
@action
|
|
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: List documents and folders in SharePoint paths across sites.
|
|
- Input requirements: connectionReference (required); optional pathObject or pathQuery; includeSubfolders.
|
|
- Output format: JSON with folder items and metadata.
|
|
|
|
Parameters:
|
|
- connectionReference (str, required): Microsoft connection label.
|
|
- pathObject (str, optional): Reference to a previous path result.
|
|
- pathQuery (str, optional): Path query if no pathObject.
|
|
- includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
|
|
"""
|
|
try:
|
|
connectionReference = parameters.get("connectionReference")
|
|
pathObject = parameters.get("pathObject")
|
|
pathQuery = parameters.get("pathQuery")
|
|
includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX
|
|
|
|
list_query = pathQuery
|
|
logger.info(f"Using pathQuery: {pathQuery}")
|
|
|
|
if not connectionReference:
|
|
return ActionResult.isFailure(error="Connection reference is required")
|
|
|
|
# If pathObject is provided, resolve the reference and extract folder IDs from it
|
|
# Note: pathObject takes precedence over pathQuery when both are provided
|
|
if pathObject:
|
|
if pathQuery and pathQuery != "*":
|
|
logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
|
|
try:
|
|
# Resolve the reference label to get the actual document list
|
|
document_list = self.services.chat.getChatDocumentsFromDocumentList([pathObject])
|
|
if not document_list or len(document_list) == 0:
|
|
return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
|
|
|
|
# Get the first document's content (which should be the JSON)
|
|
first_document = document_list[0]
|
|
logger.info(f"Document fileId: {first_document.fileId}, fileName: {first_document.fileName}")
|
|
file_data = self.services.chat.getFileData(first_document.fileId)
|
|
if not file_data:
|
|
return ActionResult.isFailure(error=f"No file data found for document: {pathObject} (fileId: {first_document.fileId})")
|
|
logger.info(f"File data length: {len(file_data) if file_data else 0}")
|
|
|
|
# Parse the JSON content
|
|
result_data = json.loads(file_data)
|
|
|
|
# Debug: Log the structure of the result document
|
|
logger.info(f"Result document keys: {list(result_data.keys())}")
|
|
|
|
# Handle different result document formats
|
|
found_documents = []
|
|
|
|
# Check if it's a direct SharePoint result (has foundDocuments)
|
|
if "foundDocuments" in result_data:
|
|
found_documents = result_data.get("foundDocuments", [])
|
|
logger.info(f"Found {len(found_documents)} documents in foundDocuments array")
|
|
# Check if it's an AI validation result (has result string with validationReport)
|
|
elif "result" in result_data and "validationReport" in result_data["result"]:
|
|
try:
|
|
# Parse the nested JSON in the result field
|
|
nested_result = json.loads(result_data["result"])
|
|
validation_report = nested_result.get("validationReport", {})
|
|
document_details = validation_report.get("documentDetails", {})
|
|
|
|
if document_details:
|
|
# Convert the single document details to the expected format
|
|
doc = {
|
|
"id": document_details.get("id"),
|
|
"name": document_details.get("name"),
|
|
"type": document_details.get("type", "").lower(), # Convert "Folder" to "folder"
|
|
"siteName": document_details.get("siteName"),
|
|
"siteId": document_details.get("siteId"),
|
|
"fullPath": document_details.get("fullPath"),
|
|
"webUrl": document_details.get("webUrl", ""),
|
|
"parentPath": document_details.get("parentPath", "")
|
|
}
|
|
found_documents = [doc]
|
|
logger.info(f"Extracted 1 document from validation report")
|
|
except ValueError as e:
|
|
logger.error(f"Failed to parse nested JSON in result field: {e}")
|
|
return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")
|
|
|
|
# Debug: Log what we found in the result document
|
|
logger.info(f"Result document contains {len(found_documents)} documents")
|
|
for i, doc in enumerate(found_documents):
|
|
logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")
|
|
|
|
# Extract folder information from the result
|
|
folders = []
|
|
for doc in found_documents:
|
|
if doc.get("type") == "folder":
|
|
folders.append(doc)
|
|
|
|
logger.info(f"Found {len(folders)} folders in result document")
|
|
|
|
if folders:
|
|
# Use the first folder found - prefer folder ID for direct API calls
|
|
first_folder = folders[0]
|
|
if first_folder.get("id"):
|
|
# Use folder ID directly for most reliable API calls
|
|
list_query = first_folder.get("id")
|
|
logger.info(f"Using folder ID from pathObject: {list_query}")
|
|
elif first_folder.get("fullPath"):
|
|
# Extract the correct path portion from fullPath by removing site name
|
|
full_path = first_folder.get("fullPath")
|
|
# fullPath format: \\SiteName\\Library\\Folder\\SubFolder
|
|
# We need to remove the first two parts (\\SiteName\\) to get the actual folder path
|
|
path_parts = full_path.lstrip('\\').split('\\')
|
|
if len(path_parts) > 1:
|
|
# Remove the first part (site name) and reconstruct the path
|
|
actual_path = '\\'.join(path_parts[1:])
|
|
list_query = actual_path
|
|
logger.info(f"Extracted path from fullPath: {list_query}")
|
|
else:
|
|
list_query = full_path
|
|
logger.info(f"Using full path from pathObject (no site name to remove): {list_query}")
|
|
else:
|
|
return ActionResult.isFailure(error="No valid folder information found in pathObject")
|
|
else:
|
|
return ActionResult.isFailure(error="No folders found in pathObject")
|
|
|
|
except ValueError as e:
|
|
return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
|
|
except Exception as e:
|
|
return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
|
|
|
|
# Get Microsoft connection
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
logger.info(f"Starting SharePoint listDocuments for list_query: {list_query}")
|
|
logger.debug(f"Connection ID: {connection['id']}")
|
|
|
|
# For listDocuments, if pathQuery starts with /site:, use it directly without parsing
|
|
# (parsing would split on the colon and break the site name)
|
|
if list_query and list_query.strip().startswith('/site:'):
|
|
pathQuery = list_query.strip()
|
|
fileQuery = "*"
|
|
searchType = "all"
|
|
searchOptions = {}
|
|
else:
|
|
# Parse list_query to extract path, search terms, search type, and options
|
|
pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(list_query)
|
|
|
|
# Determine sites to use - strict validation: pathObject → pathQuery → ERROR
|
|
sites = None
|
|
|
|
# Step 1: Check pathObject first
|
|
if pathObject:
|
|
# When pathObject is provided, we should have specific site information
|
|
# Extract site information from the pathObject result
|
|
try:
|
|
# Get the site information from the first folder in pathObject
|
|
if 'found_documents' in locals() and found_documents:
|
|
first_folder = found_documents[0]
|
|
site_name = first_folder.get("siteName")
|
|
site_id = first_folder.get("siteId")
|
|
|
|
if site_name and site_id:
|
|
# Use the specific site from pathObject instead of discovering all sites
|
|
sites = [{
|
|
"id": site_id,
|
|
"displayName": site_name,
|
|
"webUrl": first_folder.get("webUrl", "")
|
|
}]
|
|
logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
|
|
else:
|
|
# Site info missing from pathObject - this is an error
|
|
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for list operation.")
|
|
else:
|
|
# No documents found in pathObject - this is an error
|
|
return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for list operation.")
|
|
except Exception as e:
|
|
# Error processing pathObject - this is an error
|
|
return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for list operation.")
|
|
|
|
# Step 2: If no pathObject, check pathQuery
|
|
elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
|
|
# Validate pathQuery format
|
|
if not pathQuery.startswith('/'):
|
|
return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
|
|
|
|
# Check if pathQuery contains search terms (words without proper path structure)
|
|
valid_path_prefixes = ['/site:', '/Documents', '/documents', '/Shared Documents', '/shared documents']
|
|
if not any(pathQuery.startswith(prefix) for prefix in valid_path_prefixes):
|
|
return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
|
|
|
|
# For pathQuery, we need to discover sites to find the specific one
|
|
all_sites = await self._discoverSharePointSites()
|
|
if not all_sites:
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
|
|
# If pathQuery starts with /site:, extract site name and filter
|
|
if pathQuery.startswith('/site:'):
|
|
# Extract site name from /site:Company Share/... format
|
|
site_path_part = pathQuery[6:] # Remove '/site:'
|
|
if '/' in site_path_part:
|
|
site_name = site_path_part.split('/', 1)[0]
|
|
else:
|
|
site_name = site_path_part
|
|
|
|
# Filter sites by name (case-insensitive substring match)
|
|
sites = self._filter_sites_by_hint(all_sites, site_name)
|
|
if not sites:
|
|
return ActionResult.isFailure(error=f"No SharePoint site found matching '{site_name}'")
|
|
logger.info(f"Filtered to site(s) matching '{site_name}': {[s['displayName'] for s in sites]}")
|
|
else:
|
|
sites = all_sites
|
|
else:
|
|
# Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
|
|
return ActionResult.isFailure(error="No valid list path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
|
|
|
|
if not sites:
|
|
return ActionResult.isFailure(error="No valid target site determined for list operation")
|
|
|
|
# Check if list_query is a folder ID (starts with 01PPXICCB...)
|
|
if list_query.startswith('01PPXICCB') or list_query.startswith('01'):
|
|
# Direct folder ID - use it directly
|
|
folder_paths = [list_query]
|
|
logger.info(f"Using direct folder ID: {list_query}")
|
|
else:
|
|
# Remove /site:SiteName prefix from pathQuery before resolving (it's only for site filtering)
|
|
pathQueryForResolve = pathQuery
|
|
if pathQuery.startswith('/site:'):
|
|
# Remove /site:SiteName/ and keep the rest
|
|
site_path_part = pathQuery[6:] # Remove '/site:'
|
|
if '/' in site_path_part:
|
|
# Remove the site name part, keep the folder path
|
|
pathQueryForResolve = '/' + site_path_part.split('/', 1)[1]
|
|
else:
|
|
# Only site name, no path - use root
|
|
pathQueryForResolve = '/'
|
|
|
|
# Remove first path segment if it looks like a document library name
|
|
# In SharePoint Graph API, /drive/root already points to the default document library,
|
|
# so library names in paths should be removed
|
|
# Generic approach: if path has multiple segments, store original for fallback
|
|
path_segments = [s for s in pathQueryForResolve.split('/') if s.strip()]
|
|
if len(path_segments) > 1:
|
|
# Path has multiple segments - first might be a library name
|
|
# Store original for potential fallback
|
|
original_path = pathQueryForResolve
|
|
# Try without first segment (assuming it's a library name)
|
|
pathQueryForResolve = '/' + '/'.join(path_segments[1:])
|
|
logger.info(f"Removed first path segment (potential library name), path changed from '{original_path}' to '{pathQueryForResolve}'")
|
|
elif len(path_segments) == 1:
|
|
# Only one segment - if it's a common library-like name, use root
|
|
first_segment_lower = path_segments[0].lower()
|
|
library_indicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek']
|
|
if any(indicator in first_segment_lower for indicator in library_indicators):
|
|
pathQueryForResolve = '/'
|
|
logger.info(f"First segment '{path_segments[0]}' appears to be a library name, using root")
|
|
|
|
# Resolve path query into folder paths
|
|
folder_paths = self._resolvePathQuery(pathQueryForResolve)
|
|
logger.info(f"Resolved folder paths: {folder_paths}")
|
|
|
|
# Process each folder path across all sites
|
|
list_results = []
|
|
|
|
for folderPath in folder_paths:
|
|
try:
|
|
folder_results = []
|
|
|
|
for site in sites:
|
|
site_id = site["id"]
|
|
site_name = site["displayName"]
|
|
site_url = site["webUrl"]
|
|
|
|
logger.info(f"Listing folder {folderPath} in site: {site_name}")
|
|
|
|
# Determine the endpoint based on folder path
|
|
if folderPath in ["/", ""] or folderPath == "*":
|
|
# Root folder
|
|
endpoint = f"sites/{site_id}/drive/root/children"
|
|
elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'):
|
|
# Direct folder ID
|
|
endpoint = f"sites/{site_id}/drive/items/{folderPath}/children"
|
|
else:
|
|
# Specific folder path - remove leading slash if present and URL encode
|
|
folder_path_clean = folderPath.lstrip('/')
|
|
# URL encode the path for Graph API (spaces and special characters need encoding)
|
|
folder_path_encoded = urllib.parse.quote(folder_path_clean, safe='/')
|
|
endpoint = f"sites/{site_id}/drive/root:/{folder_path_encoded}:/children"
|
|
|
|
# Make the API call to list folder contents
|
|
api_result = await self._makeGraphApiCall(endpoint)
|
|
|
|
if "error" in api_result:
|
|
logger.warning(f"Failed to list folder {folderPath} in site {site_name}: {api_result['error']}")
|
|
continue
|
|
|
|
# Process the results
|
|
items = api_result.get("value", [])
|
|
processed_items = []
|
|
|
|
for item in items:
|
|
# Use improved folder detection logic
|
|
is_folder = False
|
|
if 'folder' in item:
|
|
is_folder = True
|
|
else:
|
|
# Try to detect by URL pattern or other indicators
|
|
web_url = item.get('webUrl', '')
|
|
name = item.get('name', '')
|
|
|
|
# Check if URL has no file extension and looks like a folder path
|
|
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
|
is_folder = True
|
|
|
|
item_info = {
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"size": item.get("size", 0),
|
|
"createdDateTime": item.get("createdDateTime"),
|
|
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
|
"webUrl": item.get("webUrl"),
|
|
"type": "folder" if is_folder else "file",
|
|
"siteName": site_name,
|
|
"siteUrl": site_url
|
|
}
|
|
|
|
# Add file-specific information
|
|
if "file" in item:
|
|
item_info.update({
|
|
"mimeType": item["file"].get("mimeType"),
|
|
"downloadUrl": item.get("@microsoft.graph.downloadUrl")
|
|
})
|
|
|
|
# Add folder-specific information
|
|
if "folder" in item:
|
|
item_info.update({
|
|
"childCount": item["folder"].get("childCount", 0)
|
|
})
|
|
|
|
processed_items.append(item_info)
|
|
|
|
# If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
|
|
if includeSubfolders:
|
|
folder_items = [item for item in processed_items if item['type'] == 'folder']
|
|
logger.info(f"Including subfolders - processing {len(folder_items)} folders")
|
|
subfolder_count = 0
|
|
max_subfolders = 10 # Limit to prevent infinite loops
|
|
|
|
for item in processed_items[:]: # Use slice to avoid modifying list during iteration
|
|
if item["type"] == "folder" and subfolder_count < max_subfolders:
|
|
subfolder_count += 1
|
|
subfolder_path = f"{folderPath.rstrip('/')}/{item['name']}"
|
|
subfolder_endpoint = f"sites/{site_id}/drive/items/{item['id']}/children"
|
|
|
|
logger.debug(f"Getting contents of subfolder: {item['name']}")
|
|
subfolder_result = await self._makeGraphApiCall(subfolder_endpoint)
|
|
if "error" not in subfolder_result:
|
|
subfolder_items = subfolder_result.get("value", [])
|
|
logger.debug(f"Found {len(subfolder_items)} items in subfolder {item['name']}")
|
|
|
|
for subfolder_item in subfolder_items:
|
|
# Use improved folder detection logic for subfolder items
|
|
subfolder_is_folder = False
|
|
if 'folder' in subfolder_item:
|
|
subfolder_is_folder = True
|
|
else:
|
|
# Try to detect by URL pattern or other indicators
|
|
subfolder_web_url = subfolder_item.get('webUrl', '')
|
|
subfolder_name = subfolder_item.get('name', '')
|
|
|
|
# Check if URL has no file extension and looks like a folder path
|
|
if '.' not in subfolder_name and ('/' in subfolder_web_url or '\\' in subfolder_web_url):
|
|
subfolder_is_folder = True
|
|
|
|
# Only add files and direct subfolders, NO RECURSION
|
|
subfolder_item_info = {
|
|
"id": subfolder_item.get("id"),
|
|
"name": subfolder_item.get("name"),
|
|
"size": subfolder_item.get("size", 0),
|
|
"createdDateTime": subfolder_item.get("createdDateTime"),
|
|
"lastModifiedDateTime": subfolder_item.get("lastModifiedDateTime"),
|
|
"webUrl": subfolder_item.get("webUrl"),
|
|
"type": "folder" if subfolder_is_folder else "file",
|
|
"parentPath": subfolder_path,
|
|
"siteName": site_name,
|
|
"siteUrl": site_url
|
|
}
|
|
|
|
if "file" in subfolder_item:
|
|
subfolder_item_info.update({
|
|
"mimeType": subfolder_item["file"].get("mimeType"),
|
|
"downloadUrl": subfolder_item.get("@microsoft.graph.downloadUrl")
|
|
})
|
|
|
|
processed_items.append(subfolder_item_info)
|
|
else:
|
|
logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolder_result.get('error')}")
|
|
elif subfolder_count >= max_subfolders:
|
|
logger.warning(f"Reached maximum subfolder limit ({max_subfolders}), skipping remaining folders")
|
|
break
|
|
|
|
logger.info(f"Processed {subfolder_count} subfolders, total items: {len(processed_items)}")
|
|
|
|
folder_results.append({
|
|
"siteName": site_name,
|
|
"siteUrl": site_url,
|
|
"itemCount": len(processed_items),
|
|
"items": processed_items
|
|
})
|
|
|
|
list_results.append({
|
|
"folderPath": folderPath,
|
|
"sitesProcessed": len(folder_results),
|
|
"siteResults": folder_results
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing folder {folderPath}: {str(e)}")
|
|
list_results.append({
|
|
"folderPath": folderPath,
|
|
"error": str(e),
|
|
"siteResults": []
|
|
})
|
|
|
|
# Create result data
|
|
result_data = {
|
|
"pathQuery": list_query,
|
|
"includeSubfolders": includeSubfolders,
|
|
"sitesSearched": len(sites),
|
|
"listResults": list_results,
|
|
"timestamp": self.services.utils.timestampGetUtc()
|
|
}
|
|
|
|
# Use default JSON format for output
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
ActionDocument(
|
|
documentName=f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}",
|
|
documentData=json.dumps(result_data, indent=2),
|
|
mimeType=output_mime_type
|
|
)
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing SharePoint documents: {str(e)}")
|
|
return ActionResult(
|
|
success=False,
|
|
error=str(e)
|
|
) |