gateway/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
2026-03-03 18:57:20 +01:00

270 lines
13 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import logging
import time
import json
import urllib.parse
from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
operationId = None
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"sharepoint_upload_{workflowId}_{int(time.time())}"
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Upload Document",
"SharePoint Upload",
"Processing document list",
parentOperationId=parentOperationId
)
connectionReference = parameters.get("connectionReference")
documentList = parameters.get("documentList")
pathQuery = parameters.get("pathQuery")
if isinstance(documentList, str):
documentList = [documentList]
if not connectionReference:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Connection reference is required")
if not documentList:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Document list is required")
# Parse documentList to extract folder path and site information
uploadPath, sites, filesToUpload, errorMsg = await self.documentParsing.parseDocumentListForFolder(documentList)
if errorMsg:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=errorMsg)
# If no folder path found from documentList, use pathQuery if provided
if not uploadPath and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
uploadPath = pathQuery
logger.info(f"Using pathQuery for upload path: {uploadPath}")
# Resolve sites from pathQuery
sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
if errorMsg:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=errorMsg)
# Validate required parameters
if not uploadPath:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get upload folder, or provide pathQuery directly.")
if not sites:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="Site information missing. Cannot determine target site for upload.")
if not filesToUpload:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No files to upload found in documentList.")
# Get connection
self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
connection = self.connection.getMicrosoftConnection(connectionReference)
if not connection:
if operationId:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
# Process upload paths
uploadPaths = []
if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
# It's a folder ID - use it directly
uploadPaths = [uploadPath]
logger.info(f"Using folder ID directly for upload: {uploadPath}")
else:
# It's a path - resolve it normally
uploadPaths = self.pathProcessing.resolvePathQuery(uploadPath)
# Process each document upload
uploadResults = []
# Extract file names from documents
fileNames = [doc.fileName for doc in filesToUpload]
logger.info(f"Using file names from documentList: {fileNames}")
self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)")
for i, (chatDocument, fileName) in enumerate(zip(filesToUpload, fileNames)):
try:
fileId = chatDocument.fileId
fileData = self.services.chat.getFileData(fileId)
if not fileData:
logger.warning(f"File data not found for fileId: {fileId}")
uploadResults.append({
"fileName": fileName,
"fileId": fileId,
"error": "File data not found",
"uploadStatus": "failed"
})
continue
# Upload to the first available site (or could be made configurable)
uploadSuccessful = False
for site in sites:
siteId = site["id"]
siteName = site["displayName"]
siteUrl = site["webUrl"]
# Use the first upload path or default to Documents
uploadPath = uploadPaths[0] if uploadPaths else "/Documents"
# Handle wildcard paths - replace with default Documents folder
if uploadPath == "*":
uploadPath = "/Documents"
logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")
# Check if uploadPath is a folder ID or a regular path
if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
# It's a folder ID - use the folder-specific upload endpoint
uploadEndpoint = f"sites/{siteId}/drive/items/{uploadPath}:/{fileName}:/content"
logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}")
else:
# It's a regular path - use the root-based upload endpoint
# Strip /sites/{siteName}/ prefix if present (Graph API path is relative to site's drive)
uploadPathForApi = uploadPath
if uploadPathForApi.startswith('/sites/'):
# Extract path after /sites/{siteName}/
parts = uploadPathForApi.split('/', 3) # ['', 'sites', 'sitename', 'rest/of/path']
if len(parts) >= 4:
uploadPathForApi = '/' + parts[3] # Keep the rest after /sites/sitename/
logger.info(f"Stripped site prefix from upload path: '{uploadPath}' -> '{uploadPathForApi}'")
uploadPathForApi = uploadPathForApi.rstrip('/') + '/' + fileName
uploadPathClean = uploadPathForApi.lstrip('/')
uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content"
logger.info(f"Using path-based upload endpoint: {uploadEndpoint}")
# Upload endpoint for small files (< 4MB)
if len(fileData) < 4 * 1024 * 1024: # 4MB
# Upload the file
uploadResult = await self.apiClient.makeGraphApiCall(
uploadEndpoint,
method="PUT",
data=fileData
)
if "error" not in uploadResult:
uploadResults.append({
"fileName": fileName,
"fileId": fileId,
"uploadStatus": "success",
"siteName": siteName,
"siteUrl": siteUrl,
"uploadPath": uploadPath,
"uploadEndpoint": uploadEndpoint,
"sharepointFileId": uploadResult.get("id"),
"webUrl": uploadResult.get("webUrl"),
"size": uploadResult.get("size"),
"createdDateTime": uploadResult.get("createdDateTime")
})
uploadSuccessful = True
break
else:
logger.warning(f"Upload failed to site {siteName}: {uploadResult['error']}")
else:
# For large files, we would need to implement resumable upload
logger.warning(f"File too large ({len(fileData)} bytes) for site {siteName}")
continue
if not uploadSuccessful:
uploadResults.append({
"fileName": fileName,
"fileId": fileId,
"error": f"File too large ({len(fileData)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
"uploadStatus": "failed"
})
except Exception as e:
logger.error(f"Error uploading document {fileName}: {str(e)}")
uploadResults.append({
"fileName": fileName,
"fileId": fileId,
"error": str(e),
"uploadStatus": "failed"
})
# Update progress for each file
self.services.chat.progressLogUpdate(operationId, 0.5 + (i * 0.4 / len(filesToUpload)), f"Uploaded {i + 1}/{len(filesToUpload)} file(s)")
# Create result data
resultData = {
"connectionReference": connectionReference,
"uploadPath": uploadPath,
"documentList": documentList,
"fileNames": fileNames,
"sitesAvailable": len(sites),
"uploadResults": uploadResults,
"connection": {
"id": connection["id"],
"authority": "microsoft",
"reference": connectionReference
},
"timestamp": self.services.utils.timestampGetUtc()
}
# Use default JSON format for output
outputExtension = ".json" # Default
outputMimeType = "application/json" # Default
validationMetadata = {
"actionType": "sharepoint.uploadDocument",
"connectionReference": connectionReference,
"uploadPath": uploadPath,
"fileNames": fileNames,
"uploadCount": len(uploadResults),
"successfulUploads": len([r for r in uploadResults if r.get("uploadStatus") == "success"]),
"failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"])
}
successfulUploads = len([r for r in uploadResults if r.get("uploadStatus") == "success"])
overallSuccess = successfulUploads > 0
self.services.chat.progressLogUpdate(operationId, 0.9, f"Uploaded {successfulUploads}/{len(uploadResults)} file(s)")
self.services.chat.progressLogFinish(operationId, overallSuccess)
return ActionResult(
success=overallSuccess,
documents=[
ActionDocument(
documentName=self._generateMeaningfulFileName("sharepoint_upload", "json", None, "uploadDocument"),
documentData=json.dumps(resultData, indent=2),
mimeType=outputMimeType,
validationMetadata=validationMetadata
)
]
)
except Exception as e:
logger.error(f"Error uploading to SharePoint: {str(e)}")
if operationId:
try:
self.services.chat.progressLogFinish(operationId, False)
except Exception:
pass
return ActionResult(
success=False,
error=str(e)
)