gateway/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
2025-12-17 10:45:09 +01:00

278 lines
13 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Upload Document action for SharePoint operations.
Uploads documents to SharePoint.
"""
import logging
import time
import json
import urllib.parse
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
- Input requirements: connectionReference (required); documentList (required); pathQuery (optional).
- Output format: JSON with upload status and file info.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
- pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath).
"""
operationId = None
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"sharepoint_upload_{workflowId}_{int(time.time())}"
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Upload Document",
"SharePoint Upload",
"Processing document list",
parentOperationId=parentOperationId
)
connectionReference = parameters.get("connectionReference")
documentList = parameters.get("documentList")
pathQuery = parameters.get("pathQuery")
if isinstance(documentList, str):
documentList = [documentList]
if not connectionReference:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Connection reference is required")
if not documentList:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Document list is required")
# Parse documentList to extract folder path and site information
uploadPath, sites, filesToUpload, errorMsg = await self.documentParsing.parseDocumentListForFolder(documentList)
if errorMsg:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=errorMsg)
# If no folder path found from documentList, use pathQuery if provided
if not uploadPath and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
uploadPath = pathQuery
logger.info(f"Using pathQuery for upload path: {uploadPath}")
# Resolve sites from pathQuery
sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
if errorMsg:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=errorMsg)
# Validate required parameters
if not uploadPath:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get upload folder, or provide pathQuery directly.")
if not sites:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Site information missing. Cannot determine target site for upload.")
if not filesToUpload:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No files to upload found in documentList.")
# Get connection
self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
connection = self.connection.getMicrosoftConnection(connectionReference)
if not connection:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
# Process upload paths
uploadPaths = []
if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
# It's a folder ID - use it directly
uploadPaths = [uploadPath]
logger.info(f"Using folder ID directly for upload: {uploadPath}")
else:
# It's a path - resolve it normally
uploadPaths = self.pathProcessing.resolvePathQuery(uploadPath)
# Process each document upload
uploadResults = []
# Extract file names from documents
fileNames = [doc.fileName for doc in filesToUpload]
logger.info(f"Using file names from documentList: {fileNames}")
self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)")
for i, (chatDocument, fileName) in enumerate(zip(filesToUpload, fileNames)):
try:
fileId = chatDocument.fileId
fileData = self.services.chat.getFileData(fileId)
if not fileData:
logger.warning(f"File data not found for fileId: {fileId}")
uploadResults.append({
"fileName": fileName,
"fileId": fileId,
"error": "File data not found",
"uploadStatus": "failed"
})
continue
# Upload to the first available site (or could be made configurable)
uploadSuccessful = False
for site in sites:
siteId = site["id"]
siteName = site["displayName"]
siteUrl = site["webUrl"]
# Use the first upload path or default to Documents
uploadPath = uploadPaths[0] if uploadPaths else "/Documents"
# Handle wildcard paths - replace with default Documents folder
if uploadPath == "*":
uploadPath = "/Documents"
logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")
# Check if uploadPath is a folder ID or a regular path
if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
# It's a folder ID - use the folder-specific upload endpoint
uploadEndpoint = f"sites/{siteId}/drive/items/{uploadPath}:/{fileName}:/content"
logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}")
else:
# It's a regular path - use the root-based upload endpoint
uploadPath = uploadPath.rstrip('/') + '/' + fileName
uploadPathClean = uploadPath.lstrip('/')
uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content"
logger.info(f"Using path-based upload endpoint: {uploadEndpoint}")
# Upload endpoint for small files (< 4MB)
if len(fileData) < 4 * 1024 * 1024: # 4MB
# Upload the file
uploadResult = await self.apiClient.makeGraphApiCall(
uploadEndpoint,
method="PUT",
data=fileData
)
if "error" not in uploadResult:
uploadResults.append({
"fileName": fileName,
"fileId": fileId,
"uploadStatus": "success",
"siteName": siteName,
"siteUrl": siteUrl,
"uploadPath": uploadPath,
"uploadEndpoint": uploadEndpoint,
"sharepointFileId": uploadResult.get("id"),
"webUrl": uploadResult.get("webUrl"),
"size": uploadResult.get("size"),
"createdDateTime": uploadResult.get("createdDateTime")
})
uploadSuccessful = True
break
else:
logger.warning(f"Upload failed to site {siteName}: {uploadResult['error']}")
else:
# For large files, we would need to implement resumable upload
logger.warning(f"File too large ({len(fileData)} bytes) for site {siteName}")
continue
if not uploadSuccessful:
uploadResults.append({
"fileName": fileName,
"fileId": fileId,
"error": f"File too large ({len(fileData)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
"uploadStatus": "failed"
})
except Exception as e:
logger.error(f"Error uploading document {fileName}: {str(e)}")
uploadResults.append({
"fileName": fileName,
"fileId": fileId,
"error": str(e),
"uploadStatus": "failed"
})
# Update progress for each file
self.services.chat.progressLogUpdate(operationId, 0.5 + (i * 0.4 / len(filesToUpload)), f"Uploaded {i + 1}/{len(filesToUpload)} file(s)")
# Create result data
resultData = {
"connectionReference": connectionReference,
"uploadPath": uploadPath,
"documentList": documentList,
"fileNames": fileNames,
"sitesAvailable": len(sites),
"uploadResults": uploadResults,
"connection": {
"id": connection["id"],
"authority": "microsoft",
"reference": connectionReference
},
"timestamp": self.services.utils.timestampGetUtc()
}
# Use default JSON format for output
outputExtension = ".json" # Default
outputMimeType = "application/json" # Default
validationMetadata = {
"actionType": "sharepoint.uploadDocument",
"connectionReference": connectionReference,
"uploadPath": uploadPath,
"fileNames": fileNames,
"uploadCount": len(uploadResults),
"successfulUploads": len([r for r in uploadResults if r.get("uploadStatus") == "success"]),
"failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"])
}
successfulUploads = len([r for r in uploadResults if r.get("uploadStatus") == "success"])
self.services.chat.progressLogUpdate(operationId, 0.9, f"Uploaded {successfulUploads}/{len(uploadResults)} file(s)")
self.services.chat.progressLogFinish(operationId, successfulUploads > 0)
return ActionResult(
success=True,
documents=[
ActionDocument(
documentName=self._generateMeaningfulFileName("sharepoint_upload", "json", None, "uploadDocument"),
documentData=json.dumps(resultData, indent=2),
mimeType=outputMimeType,
validationMetadata=validationMetadata
)
]
)
except Exception as e:
logger.error(f"Error uploading to SharePoint: {str(e)}")
if operationId:
try:
self.services.chat.progressLogFinish(operationId, False)
except:
pass
return ActionResult(
success=False,
error=str(e)
)