874 lines
No EOL
44 KiB
Python
874 lines
No EOL
44 KiB
Python
"""
|
|
SharePoint operations method module.
|
|
Handles SharePoint document operations using the SharePoint service.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import re
|
|
from typing import Dict, Any, List, Optional
|
|
from datetime import datetime, UTC
|
|
import base64
|
|
from urllib.parse import urlparse
|
|
import aiohttp
|
|
import asyncio
|
|
|
|
from modules.chat.methodBase import MethodBase, action
|
|
from modules.interfaces.interfaceChatModel import ActionResult
|
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MethodSharepoint(MethodBase):
|
|
"""SharePoint operations methods."""
|
|
|
|
def __init__(self, service):
|
|
super().__init__(service)
|
|
self.name = "sharepoint"
|
|
self.description = "SharePoint operations methods"
|
|
|
|
def _format_timestamp_for_filename(self) -> str:
|
|
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
|
return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
|
|
|
def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
|
|
"""Get Microsoft connection from connection reference"""
|
|
try:
|
|
userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference)
|
|
if not userConnection:
|
|
logger.warning(f"No user connection found for reference: {connectionReference}")
|
|
return None
|
|
|
|
if userConnection.authority.value != "msft":
|
|
logger.warning(f"Connection {userConnection.id} is not Microsoft (authority: {userConnection.authority.value})")
|
|
return None
|
|
|
|
# Check if connection is active or pending (pending means OAuth in progress)
|
|
if userConnection.status.value not in ["active", "pending"]:
|
|
logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
|
|
return None
|
|
|
|
# Get the token for this specific connection
|
|
token = self.service.interfaceApp.getConnectionToken(userConnection.id)
|
|
if not token:
|
|
logger.warning(f"No token found for connection {userConnection.id}")
|
|
return None
|
|
|
|
# Check if token is expired
|
|
if hasattr(token, 'expiresAt') and token.expiresAt:
|
|
current_time = get_utc_timestamp()
|
|
if current_time > token.expiresAt:
|
|
logger.warning(f"Token for connection {userConnection.id} is expired (expiresAt: {token.expiresAt}, current: {current_time})")
|
|
return None
|
|
|
|
logger.info(f"Successfully retrieved Microsoft connection: {userConnection.id}, status: {userConnection.status.value}, externalId: {userConnection.externalId}")
|
|
|
|
return {
|
|
"id": userConnection.id,
|
|
"accessToken": token.tokenAccess,
|
|
"refreshToken": token.tokenRefresh,
|
|
"scopes": ["Sites.ReadWrite.All", "Files.ReadWrite.All", "User.Read"] # SharePoint scopes
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error getting Microsoft connection: {str(e)}")
|
|
return None
|
|
|
|
def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]:
|
|
"""Parse SharePoint site URL to extract hostname and site path"""
|
|
try:
|
|
parsed = urlparse(siteUrl)
|
|
hostname = parsed.hostname
|
|
path = parsed.path.strip('/')
|
|
|
|
return {
|
|
"hostname": hostname,
|
|
"sitePath": path
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error parsing site URL {siteUrl}: {str(e)}")
|
|
return {"hostname": "", "sitePath": ""}
|
|
|
|
async def _makeGraphApiCall(self, access_token: str, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
|
|
"""Make a Microsoft Graph API call with timeout and detailed logging"""
|
|
try:
|
|
headers = {
|
|
"Authorization": f"Bearer {access_token}",
|
|
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
|
|
}
|
|
|
|
url = f"https://graph.microsoft.com/v1.0/{endpoint}"
|
|
logger.info(f"Making Graph API call: {method} {url}")
|
|
|
|
# Set timeout to 30 seconds
|
|
timeout = aiohttp.ClientTimeout(total=30)
|
|
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
if method == "GET":
|
|
logger.debug(f"Starting GET request to {url}")
|
|
async with session.get(url, headers=headers) as response:
|
|
logger.info(f"Graph API response: {response.status}")
|
|
if response.status == 200:
|
|
result = await response.json()
|
|
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
|
return result
|
|
else:
|
|
error_text = await response.text()
|
|
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
|
return {"error": f"API call failed: {response.status} - {error_text}"}
|
|
|
|
elif method == "PUT":
|
|
logger.debug(f"Starting PUT request to {url}")
|
|
async with session.put(url, headers=headers, data=data) as response:
|
|
logger.info(f"Graph API response: {response.status}")
|
|
if response.status in [200, 201]:
|
|
result = await response.json()
|
|
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
|
return result
|
|
else:
|
|
error_text = await response.text()
|
|
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
|
return {"error": f"API call failed: {response.status} - {error_text}"}
|
|
|
|
elif method == "POST":
|
|
logger.debug(f"Starting POST request to {url}")
|
|
async with session.post(url, headers=headers, data=data) as response:
|
|
logger.info(f"Graph API response: {response.status}")
|
|
if response.status in [200, 201]:
|
|
result = await response.json()
|
|
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
|
return result
|
|
else:
|
|
error_text = await response.text()
|
|
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
|
return {"error": f"API call failed: {response.status} - {error_text}"}
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
|
|
return {"error": f"API call timed out after 30 seconds: {endpoint}"}
|
|
except Exception as e:
|
|
logger.error(f"Error making Graph API call: {str(e)}")
|
|
return {"error": f"Error making Graph API call: {str(e)}"}
|
|
|
|
async def _getSiteId(self, access_token: str, hostname: str, site_path: str) -> str:
|
|
"""Get SharePoint site ID from hostname and site path"""
|
|
try:
|
|
endpoint = f"sites/{hostname}:/{site_path}"
|
|
result = await self._makeGraphApiCall(access_token, endpoint)
|
|
|
|
if "error" in result:
|
|
logger.error(f"Error getting site ID: {result['error']}")
|
|
return ""
|
|
|
|
return result.get("id", "")
|
|
except Exception as e:
|
|
logger.error(f"Error getting site ID: {str(e)}")
|
|
return ""
|
|
|
|
@action
|
|
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
Find document path based on query/description
|
|
|
|
Parameters:
|
|
connectionReference (str): Reference to the Microsoft connection
|
|
siteUrl (str): SharePoint site URL
|
|
query (str): Query or description to find document
|
|
searchScope (str, optional): Search scope (default: "all")
|
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
|
"""
|
|
try:
|
|
connectionReference = parameters.get("connectionReference")
|
|
siteUrl = parameters.get("siteUrl")
|
|
query = parameters.get("query")
|
|
searchScope = parameters.get("searchScope", "all")
|
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
|
|
|
if not connectionReference or not siteUrl or not query:
|
|
return ActionResult.isFailure(error="Connection reference, site URL, and query are required")
|
|
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
# Parse site URL to get hostname and site path
|
|
site_info = self._parseSiteUrl(siteUrl)
|
|
if not site_info["hostname"] or not site_info["sitePath"]:
|
|
return ActionResult.isFailure(error=f"Invalid SharePoint site URL: {siteUrl}")
|
|
|
|
# Get site ID
|
|
site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
|
|
if not site_id:
|
|
return ActionResult.isFailure(error="Failed to get SharePoint site ID")
|
|
|
|
try:
|
|
# Use Microsoft Graph search API
|
|
search_query = query.replace("'", "''") # Escape single quotes for OData
|
|
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
|
|
|
# Make the search API call
|
|
search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
|
|
|
|
if "error" in search_result:
|
|
return ActionResult.isFailure(error=f"Search failed: {search_result['error']}")
|
|
|
|
# Process search results
|
|
items = search_result.get("value", [])
|
|
found_documents = []
|
|
|
|
for item in items:
|
|
# Filter by search scope if specified
|
|
if searchScope == "documents" and "folder" in item:
|
|
continue
|
|
elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"):
|
|
continue
|
|
|
|
doc_info = {
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"path": item.get("parentReference", {}).get("path", "") + "/" + item.get("name", ""),
|
|
"size": item.get("size", 0),
|
|
"createdDateTime": item.get("createdDateTime"),
|
|
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
|
"webUrl": item.get("webUrl"),
|
|
"type": "folder" if "folder" in item else "file"
|
|
}
|
|
|
|
# Add file-specific information
|
|
if "file" in item:
|
|
doc_info.update({
|
|
"mimeType": item["file"].get("mimeType"),
|
|
"downloadUrl": item.get("@microsoft.graph.downloadUrl")
|
|
})
|
|
|
|
# Add folder-specific information
|
|
if "folder" in item:
|
|
doc_info.update({
|
|
"childCount": item["folder"].get("childCount", 0)
|
|
})
|
|
|
|
found_documents.append(doc_info)
|
|
|
|
result_data = {
|
|
"connectionReference": connectionReference,
|
|
"siteUrl": siteUrl,
|
|
"query": query,
|
|
"searchScope": searchScope,
|
|
"totalResults": len(found_documents),
|
|
"foundDocuments": found_documents,
|
|
"connection": {
|
|
"id": connection["id"],
|
|
"authority": "microsoft",
|
|
"reference": connectionReference
|
|
},
|
|
"timestamp": get_utc_timestamp()
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error searching SharePoint: {str(e)}")
|
|
return ActionResult.isFailure(error=str(e))
|
|
|
|
# Determine output format based on expected formats
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
|
# Use the first expected format
|
|
expected_format = expectedDocumentFormats[0]
|
|
output_extension = expected_format.get("extension", ".json")
|
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
|
else:
|
|
logger.info("No expected format specified, using default .json format")
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
{
|
|
"documentName": f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}",
|
|
"documentData": result_data,
|
|
"mimeType": output_mime_type
|
|
}
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finding document path: {str(e)}")
|
|
return ActionResult.isFailure(error=str(e))
|
|
|
|
@action
|
|
async def readDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
Read documents from SharePoint
|
|
|
|
Parameters:
|
|
documentList (str): Reference to the document list to read
|
|
connectionReference (str): Reference to the Microsoft connection
|
|
siteUrl (str): SharePoint site URL
|
|
documentPaths (List[str]): List of paths to the documents in SharePoint
|
|
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
|
"""
|
|
try:
|
|
documentList = parameters.get("documentList")
|
|
connectionReference = parameters.get("connectionReference")
|
|
siteUrl = parameters.get("siteUrl")
|
|
documentPaths = parameters.get("documentPaths")
|
|
includeMetadata = parameters.get("includeMetadata", True)
|
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
|
|
|
if not documentList or not connectionReference or not siteUrl or not documentPaths:
|
|
return ActionResult.isFailure(error="Document list reference, connection reference, site URL, and document paths are required")
|
|
|
|
# Get documents from reference - ensure documentList is a list, not a string
|
|
if isinstance(documentList, str):
|
|
documentList = [documentList] # Convert string to list
|
|
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
|
|
|
# For testing: if no chat documents found, create mock documents based on document paths
|
|
if not chatDocuments and documentPaths:
|
|
logger.info("No chat documents found, creating mock documents for testing based on document paths")
|
|
chatDocuments = []
|
|
for i, path in enumerate(documentPaths):
|
|
mock_doc = type('MockChatDocument', (), {
|
|
'fileId': f'mock_file_id_{i}',
|
|
'fileName': path.split('/')[-1] if '/' in path else path
|
|
})()
|
|
chatDocuments.append(mock_doc)
|
|
logger.info(f"Created {len(chatDocuments)} mock documents for testing")
|
|
|
|
if not chatDocuments:
|
|
return ActionResult.isFailure(error="No documents found for the provided reference")
|
|
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
# Parse site URL to get hostname and site path
|
|
site_info = self._parseSiteUrl(siteUrl)
|
|
if not site_info["hostname"] or not site_info["sitePath"]:
|
|
return ActionResult.isFailure(error=f"Invalid SharePoint site URL: {siteUrl}")
|
|
|
|
# Get site ID
|
|
site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
|
|
if not site_id:
|
|
return ActionResult.isFailure(error="Failed to get SharePoint site ID")
|
|
|
|
# Process each document path
|
|
read_results = []
|
|
|
|
for i, documentPath in enumerate(documentPaths):
|
|
try:
|
|
# Check if documentPath is actually a file ID (starts with 016GRP6V)
|
|
if documentPath.startswith('016GRP6V'):
|
|
# Use file ID directly
|
|
file_endpoint = f"sites/{site_id}/drive/items/{documentPath}"
|
|
logger.info(f"Reading file by ID: {documentPath}")
|
|
else:
|
|
# First, find the file by its path
|
|
path_clean = documentPath.lstrip('/')
|
|
file_endpoint = f"sites/{site_id}/drive/root:/{path_clean}"
|
|
logger.info(f"Reading file by path: {path_clean}")
|
|
|
|
# Get file metadata
|
|
file_info_result = await self._makeGraphApiCall(connection["accessToken"], file_endpoint)
|
|
|
|
if "error" in file_info_result:
|
|
read_results.append({
|
|
"documentPath": documentPath,
|
|
"error": f"File not found: {file_info_result['error']}",
|
|
"content": None
|
|
})
|
|
continue
|
|
|
|
file_id = file_info_result.get("id")
|
|
if not file_id:
|
|
read_results.append({
|
|
"documentPath": documentPath,
|
|
"error": "Could not get file ID",
|
|
"content": None
|
|
})
|
|
continue
|
|
|
|
# Build result with metadata
|
|
result_item = {
|
|
"documentPath": documentPath,
|
|
"fileId": file_id,
|
|
"fileName": file_info_result.get("name"),
|
|
"size": file_info_result.get("size", 0),
|
|
"createdDateTime": file_info_result.get("createdDateTime"),
|
|
"lastModifiedDateTime": file_info_result.get("lastModifiedDateTime"),
|
|
"webUrl": file_info_result.get("webUrl")
|
|
}
|
|
|
|
# Add metadata if requested
|
|
if includeMetadata:
|
|
result_item["metadata"] = {
|
|
"mimeType": file_info_result.get("file", {}).get("mimeType"),
|
|
"downloadUrl": file_info_result.get("@microsoft.graph.downloadUrl"),
|
|
"createdBy": file_info_result.get("createdBy", {}),
|
|
"lastModifiedBy": file_info_result.get("lastModifiedBy", {}),
|
|
"parentReference": file_info_result.get("parentReference", {})
|
|
}
|
|
|
|
# Get file content if it's a readable format
|
|
mime_type = file_info_result.get("file", {}).get("mimeType", "")
|
|
if mime_type.startswith("text/") or mime_type in [
|
|
"application/json", "application/xml", "application/javascript"
|
|
]:
|
|
# Download the file content
|
|
content_endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
|
|
|
|
# For content download, we need to handle binary data
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
headers = {"Authorization": f"Bearer {connection['accessToken']}"}
|
|
async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
|
|
if response.status == 200:
|
|
content = await response.text()
|
|
result_item["content"] = content
|
|
else:
|
|
result_item["content"] = f"Could not download content: HTTP {response.status}"
|
|
except Exception as e:
|
|
result_item["content"] = f"Error downloading content: {str(e)}"
|
|
else:
|
|
result_item["content"] = f"Binary file type ({mime_type}) - content not retrieved"
|
|
|
|
read_results.append(result_item)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error reading document {documentPath}: {str(e)}")
|
|
read_results.append({
|
|
"documentPath": documentPath,
|
|
"error": str(e),
|
|
"content": None
|
|
})
|
|
|
|
result_data = {
|
|
"connectionReference": connectionReference,
|
|
"siteUrl": siteUrl,
|
|
"documentPaths": documentPaths,
|
|
"includeMetadata": includeMetadata,
|
|
"readResults": read_results,
|
|
"connection": {
|
|
"id": connection["id"],
|
|
"authority": "microsoft",
|
|
"reference": connectionReference
|
|
},
|
|
"timestamp": get_utc_timestamp()
|
|
}
|
|
|
|
# Determine output format based on expected formats
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
|
# Use the first expected format
|
|
expected_format = expectedDocumentFormats[0]
|
|
output_extension = expected_format.get("extension", ".json")
|
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
|
else:
|
|
logger.info("No expected format specified, using default .json format")
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
{
|
|
"documentName": f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}",
|
|
"documentData": result_data,
|
|
"mimeType": output_mime_type
|
|
}
|
|
]
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Error reading SharePoint documents: {str(e)}")
|
|
return ActionResult(
|
|
success=False,
|
|
error=str(e)
|
|
)
|
|
|
|
@action
|
|
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
Upload documents to SharePoint
|
|
|
|
Parameters:
|
|
connectionReference (str): Reference to the Microsoft connection
|
|
siteUrl (str): SharePoint site URL
|
|
documentPaths (List[str]): List of paths where to upload the documents
|
|
documentList (str): Reference to the document list to upload
|
|
fileNames (List[str]): List of names for the uploaded files
|
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
|
"""
|
|
try:
|
|
connectionReference = parameters.get("connectionReference")
|
|
siteUrl = parameters.get("siteUrl")
|
|
documentPaths = parameters.get("documentPaths")
|
|
documentList = parameters.get("documentList")
|
|
fileNames = parameters.get("fileNames")
|
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
|
|
|
if not connectionReference or not siteUrl or not documentPaths or not documentList or not fileNames:
|
|
return ActionResult.isFailure(error="Connection reference, site URL, document paths, document list, and file names are required")
|
|
|
|
# Get Microsoft connection
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
# Get documents from reference - ensure documentList is a list, not a string
|
|
if isinstance(documentList, str):
|
|
documentList = [documentList] # Convert string to list
|
|
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
|
if not chatDocuments:
|
|
return ActionResult.isFailure(error="No documents found for the provided reference")
|
|
|
|
# Parse site URL to get hostname and site path
|
|
site_info = self._parseSiteUrl(siteUrl)
|
|
if not site_info["hostname"] or not site_info["sitePath"]:
|
|
return ActionResult.isFailure(error=f"Invalid SharePoint site URL: {siteUrl}")
|
|
|
|
# Get site ID
|
|
site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
|
|
if not site_id:
|
|
return ActionResult.isFailure(error="Failed to get SharePoint site ID")
|
|
|
|
# Process each document upload
|
|
upload_results = []
|
|
|
|
for i, (documentPath, fileName) in enumerate(zip(documentPaths, fileNames)):
|
|
try:
|
|
if i < len(chatDocuments):
|
|
chatDocument = chatDocuments[i]
|
|
fileId = chatDocument.fileId
|
|
file_data = self.service.getFileData(fileId)
|
|
|
|
if not file_data:
|
|
logger.warning(f"File data not found for fileId: {fileId}")
|
|
upload_results.append({
|
|
"documentPath": documentPath,
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"error": "File data not found",
|
|
"uploadStatus": "failed"
|
|
})
|
|
continue
|
|
|
|
# Prepare upload path
|
|
upload_path = documentPath.rstrip('/') + '/' + fileName
|
|
upload_path_clean = upload_path.lstrip('/')
|
|
|
|
# Upload endpoint for small files (< 4MB)
|
|
if len(file_data) < 4 * 1024 * 1024: # 4MB
|
|
upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content"
|
|
|
|
# Upload the file
|
|
upload_result = await self._makeGraphApiCall(
|
|
connection["accessToken"],
|
|
upload_endpoint,
|
|
method="PUT",
|
|
data=file_data
|
|
)
|
|
|
|
if "error" in upload_result:
|
|
upload_results.append({
|
|
"documentPath": documentPath,
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"error": upload_result["error"],
|
|
"uploadStatus": "failed"
|
|
})
|
|
else:
|
|
upload_results.append({
|
|
"documentPath": documentPath,
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"uploadStatus": "success",
|
|
"sharepointFileId": upload_result.get("id"),
|
|
"webUrl": upload_result.get("webUrl"),
|
|
"size": upload_result.get("size"),
|
|
"createdDateTime": upload_result.get("createdDateTime")
|
|
})
|
|
else:
|
|
# For large files, we would need to implement resumable upload
|
|
# For now, return an error for large files
|
|
upload_results.append({
|
|
"documentPath": documentPath,
|
|
"fileName": fileName,
|
|
"fileId": fileId,
|
|
"error": f"File too large ({len(file_data)} bytes). Files larger than 4MB require resumable upload (not implemented).",
|
|
"uploadStatus": "failed"
|
|
})
|
|
else:
|
|
upload_results.append({
|
|
"documentPath": documentPath,
|
|
"fileName": fileName,
|
|
"fileId": None,
|
|
"error": "No corresponding chat document found",
|
|
"uploadStatus": "failed"
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error uploading document {fileName}: {str(e)}")
|
|
upload_results.append({
|
|
"documentPath": documentPath,
|
|
"fileName": fileName,
|
|
"fileId": fileId if i < len(chatDocuments) else None,
|
|
"error": str(e),
|
|
"uploadStatus": "failed"
|
|
})
|
|
|
|
# Create result data
|
|
result_data = {
|
|
"connectionReference": connectionReference,
|
|
"siteUrl": siteUrl,
|
|
"documentPaths": documentPaths,
|
|
"documentList": documentList,
|
|
"fileNames": fileNames,
|
|
"uploadResults": upload_results,
|
|
"connection": {
|
|
"id": connection["id"],
|
|
"authority": "microsoft",
|
|
"reference": connectionReference
|
|
},
|
|
"timestamp": get_utc_timestamp()
|
|
}
|
|
|
|
# Determine output format based on expected formats
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
|
# Use the first expected format
|
|
expected_format = expectedDocumentFormats[0]
|
|
output_extension = expected_format.get("extension", ".json")
|
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
|
else:
|
|
logger.info("No expected format specified, using default .json format")
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
{
|
|
"documentName": f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}",
|
|
"documentData": result_data,
|
|
"mimeType": output_mime_type
|
|
}
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error uploading to SharePoint: {str(e)}")
|
|
return ActionResult(
|
|
success=False,
|
|
error=str(e)
|
|
)
|
|
|
|
@action
|
|
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
List documents in SharePoint folder
|
|
|
|
Parameters:
|
|
connectionReference (str): Reference to the Microsoft connection
|
|
siteUrl (str): SharePoint site URL
|
|
folderPaths (List[str]): List of paths to the folders to list
|
|
includeSubfolders (bool, optional): Whether to include subfolders (default: False)
|
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
|
"""
|
|
try:
|
|
connectionReference = parameters.get("connectionReference")
|
|
siteUrl = parameters.get("siteUrl")
|
|
folderPaths = parameters.get("folderPaths")
|
|
includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX
|
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
|
|
|
if not connectionReference or not siteUrl or not folderPaths:
|
|
return ActionResult.isFailure(error="Connection reference, site URL, and folder paths are required")
|
|
|
|
# Get Microsoft connection
|
|
connection = self._getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
logger.info(f"Starting SharePoint listDocuments for site: {siteUrl}")
|
|
logger.debug(f"Connection ID: {connection['id']}")
|
|
logger.debug(f"Folder paths: {folderPaths}")
|
|
|
|
# Parse site URL to get hostname and site path
|
|
site_info = self._parseSiteUrl(siteUrl)
|
|
logger.info(f"Parsed site info - hostname: {site_info['hostname']}, sitePath: {site_info['sitePath']}")
|
|
|
|
if not site_info["hostname"] or not site_info["sitePath"]:
|
|
logger.error(f"Failed to parse site URL: {siteUrl}")
|
|
return ActionResult.isFailure(error=f"Invalid SharePoint site URL: {siteUrl}")
|
|
|
|
# Get site ID
|
|
logger.info(f"Getting site ID for hostname: {site_info['hostname']}, path: {site_info['sitePath']}")
|
|
site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
|
|
logger.info(f"Site ID result: {site_id}")
|
|
|
|
if not site_id:
|
|
return ActionResult.isFailure(error="Failed to get SharePoint site ID")
|
|
|
|
# Process each folder path
|
|
list_results = []
|
|
|
|
for folderPath in folderPaths:
|
|
try:
|
|
# Determine the endpoint based on folder path
|
|
if folderPath in ["/", ""]:
|
|
# Root folder
|
|
endpoint = f"sites/{site_id}/drive/root/children"
|
|
else:
|
|
# Specific folder - remove leading slash if present
|
|
folder_path_clean = folderPath.lstrip('/')
|
|
endpoint = f"sites/{site_id}/drive/root:/{folder_path_clean}:/children"
|
|
|
|
# Make the API call to list folder contents
|
|
api_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
|
|
|
|
if "error" in api_result:
|
|
list_results.append({
|
|
"folderPath": folderPath,
|
|
"error": api_result["error"],
|
|
"items": []
|
|
})
|
|
continue
|
|
|
|
# Process the results
|
|
items = api_result.get("value", [])
|
|
processed_items = []
|
|
|
|
for item in items:
|
|
item_info = {
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"size": item.get("size", 0),
|
|
"createdDateTime": item.get("createdDateTime"),
|
|
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
|
"webUrl": item.get("webUrl"),
|
|
"type": "folder" if "folder" in item else "file"
|
|
}
|
|
|
|
# Add file-specific information
|
|
if "file" in item:
|
|
item_info.update({
|
|
"mimeType": item["file"].get("mimeType"),
|
|
"downloadUrl": item.get("@microsoft.graph.downloadUrl")
|
|
})
|
|
|
|
# Add folder-specific information
|
|
if "folder" in item:
|
|
item_info.update({
|
|
"childCount": item["folder"].get("childCount", 0)
|
|
})
|
|
|
|
processed_items.append(item_info)
|
|
|
|
# If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
|
|
if includeSubfolders:
|
|
logger.info(f"Including subfolders - processing {len([item for item in processed_items if item['type'] == 'folder'])} folders")
|
|
subfolder_count = 0
|
|
max_subfolders = 10 # Limit to prevent infinite loops
|
|
|
|
for item in processed_items[:]: # Use slice to avoid modifying list during iteration
|
|
if item["type"] == "folder" and subfolder_count < max_subfolders:
|
|
subfolder_count += 1
|
|
subfolder_path = f"{folderPath.rstrip('/')}/{item['name']}"
|
|
subfolder_endpoint = f"sites/{site_id}/drive/items/{item['id']}/children"
|
|
|
|
logger.debug(f"Getting contents of subfolder: {item['name']}")
|
|
subfolder_result = await self._makeGraphApiCall(connection["accessToken"], subfolder_endpoint)
|
|
if "error" not in subfolder_result:
|
|
subfolder_items = subfolder_result.get("value", [])
|
|
logger.debug(f"Found {len(subfolder_items)} items in subfolder {item['name']}")
|
|
|
|
for subfolder_item in subfolder_items:
|
|
# Only add files and direct subfolders, NO RECURSION
|
|
subfolder_item_info = {
|
|
"id": subfolder_item.get("id"),
|
|
"name": subfolder_item.get("name"),
|
|
"size": subfolder_item.get("size", 0),
|
|
"createdDateTime": subfolder_item.get("createdDateTime"),
|
|
"lastModifiedDateTime": subfolder_item.get("lastModifiedDateTime"),
|
|
"webUrl": subfolder_item.get("webUrl"),
|
|
"type": "folder" if "folder" in subfolder_item else "file",
|
|
"parentPath": subfolder_path
|
|
}
|
|
|
|
if "file" in subfolder_item:
|
|
subfolder_item_info.update({
|
|
"mimeType": subfolder_item["file"].get("mimeType"),
|
|
"downloadUrl": subfolder_item.get("@microsoft.graph.downloadUrl")
|
|
})
|
|
|
|
processed_items.append(subfolder_item_info)
|
|
else:
|
|
logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolder_result.get('error')}")
|
|
elif subfolder_count >= max_subfolders:
|
|
logger.warning(f"Reached maximum subfolder limit ({max_subfolders}), skipping remaining folders")
|
|
break
|
|
|
|
logger.info(f"Processed {subfolder_count} subfolders, total items: {len(processed_items)}")
|
|
|
|
list_results.append({
|
|
"folderPath": folderPath,
|
|
"itemCount": len(processed_items),
|
|
"items": processed_items
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing folder {folderPath}: {str(e)}")
|
|
list_results.append({
|
|
"folderPath": folderPath,
|
|
"error": str(e),
|
|
"items": []
|
|
})
|
|
|
|
# Create result data
|
|
result_data = {
|
|
"connectionReference": connectionReference,
|
|
"siteUrl": siteUrl,
|
|
"folderPaths": folderPaths,
|
|
"includeSubfolders": includeSubfolders,
|
|
"listResults": list_results,
|
|
"connection": {
|
|
"id": connection["id"],
|
|
"authority": "microsoft",
|
|
"reference": connectionReference
|
|
},
|
|
"timestamp": get_utc_timestamp()
|
|
}
|
|
|
|
# Determine output format based on expected formats
|
|
output_extension = ".json" # Default
|
|
output_mime_type = "application/json" # Default
|
|
|
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
|
# Use the first expected format
|
|
expected_format = expectedDocumentFormats[0]
|
|
output_extension = expected_format.get("extension", ".json")
|
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
|
else:
|
|
logger.info("No expected format specified, using default .json format")
|
|
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
{
|
|
"documentName": f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}",
|
|
"documentData": result_data,
|
|
"mimeType": output_mime_type
|
|
}
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing SharePoint documents: {str(e)}")
|
|
return ActionResult(
|
|
success=False,
|
|
error=str(e)
|
|
) |