gateway/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.

"""
Upload Document action for SharePoint operations.
Uploads documents to SharePoint.
"""

import logging
import time
import json
import urllib.parse
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument

logger = logging.getLogger(__name__)

@action
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
    """
    GENERAL:
    - Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
    - Input requirements: connectionReference (required); documentList (required); pathQuery (optional).
    - Output format: JSON with upload status and file info.

    Parameters:
    - connectionReference (str, required): Microsoft connection label.
    - documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
    - pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath).
    """
    operationId = None
    try:
        # Init progress logger
        workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
        operationId = f"sharepoint_upload_{workflowId}_{int(time.time())}"

        # Start progress tracking
        parentOperationId = parameters.get('parentOperationId')
        self.services.chat.progressLogStart(
            operationId,
            "Upload Document",
            "SharePoint Upload",
            "Processing document list",
            parentOperationId=parentOperationId
        )

        connectionReference = parameters.get("connectionReference")
        documentList = parameters.get("documentList")
        pathQuery = parameters.get("pathQuery")
        if isinstance(documentList, str):
            documentList = [documentList]

        if not connectionReference:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="Connection reference is required")

        if not documentList:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="Document list is required")

        # Parse documentList to extract folder path and site information
        uploadPath, sites, filesToUpload, errorMsg = await self.documentParsing.parseDocumentListForFolder(documentList)
        if errorMsg:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error=errorMsg)

        # If no folder path found from documentList, use pathQuery if provided
        if not uploadPath and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
            uploadPath = pathQuery
            logger.info(f"Using pathQuery for upload path: {uploadPath}")
            # Resolve sites from pathQuery
            sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
            if errorMsg:
                if operationId:
                    self.services.chat.progressLogFinish(operationId, False)
                return ActionResult.isFailure(error=errorMsg)

        # Validate required parameters
        if not uploadPath:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get upload folder, or provide pathQuery directly.")

        if not sites:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="Site information missing. Cannot determine target site for upload.")

        if not filesToUpload:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="No files to upload found in documentList.")

        # Get connection
        self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
        connection = self.connection.getMicrosoftConnection(connectionReference)
        if not connection:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")

        # Process upload paths
        uploadPaths = []
        if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
            # It's a folder ID - use it directly
            uploadPaths = [uploadPath]
            logger.info(f"Using folder ID directly for upload: {uploadPath}")
        else:
            # It's a path - resolve it normally
            uploadPaths = self.pathProcessing.resolvePathQuery(uploadPath)

        # Process each document upload
        uploadResults = []

        # Extract file names from documents
        fileNames = [doc.fileName for doc in filesToUpload]
        logger.info(f"Using file names from documentList: {fileNames}")

        self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)")

        for i, (chatDocument, fileName) in enumerate(zip(filesToUpload, fileNames)):
            try:
                fileId = chatDocument.fileId
                fileData = self.services.chat.getFileData(fileId)

                if not fileData:
                    logger.warning(f"File data not found for fileId: {fileId}")
                    uploadResults.append({
                        "fileName": fileName,
                        "fileId": fileId,
                        "error": "File data not found",
                        "uploadStatus": "failed"
                    })
                    continue

                # Upload to the first available site (or could be made configurable)
                uploadSuccessful = False

                for site in sites:
                    siteId = site["id"]
                    siteName = site["displayName"]
                    siteUrl = site["webUrl"]

                    # Use the first upload path or default to Documents
                    uploadPath = uploadPaths[0] if uploadPaths else "/Documents"

                    # Handle wildcard paths - replace with default Documents folder
                    if uploadPath == "*":
                        uploadPath = "/Documents"
                        logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")

                    # Check if uploadPath is a folder ID or a regular path
                    if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
                        # It's a folder ID - use the folder-specific upload endpoint
                        uploadEndpoint = f"sites/{siteId}/drive/items/{uploadPath}:/{fileName}:/content"
                        logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}")
                    else:
                        # It's a regular path - use the root-based upload endpoint
                        uploadPath = uploadPath.rstrip('/') + '/' + fileName
                        uploadPathClean = uploadPath.lstrip('/')
                        uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content"
                        logger.info(f"Using path-based upload endpoint: {uploadEndpoint}")

                    # Upload endpoint for small files (< 4MB)
                    if len(fileData) < 4 * 1024 * 1024:  # 4MB

                        # Upload the file
                        uploadResult = await self.apiClient.makeGraphApiCall(
                            uploadEndpoint,
                            method="PUT",
                            data=fileData
                        )

                        if "error" not in uploadResult:
                            uploadResults.append({
                                "fileName": fileName,
                                "fileId": fileId,
                                "uploadStatus": "success",
                                "siteName": siteName,
                                "siteUrl": siteUrl,
                                "uploadPath": uploadPath,
                                "uploadEndpoint": uploadEndpoint,
                                "sharepointFileId": uploadResult.get("id"),
                                "webUrl": uploadResult.get("webUrl"),
                                "size": uploadResult.get("size"),
                                "createdDateTime": uploadResult.get("createdDateTime")
                            })
                            uploadSuccessful = True
                            break
                        else:
                            logger.warning(f"Upload failed to site {siteName}: {uploadResult['error']}")
                    else:
                        # For large files, we would need to implement resumable upload
                        logger.warning(f"File too large ({len(fileData)} bytes) for site {siteName}")
                        continue

                if not uploadSuccessful:
                    uploadResults.append({
                        "fileName": fileName,
                        "fileId": fileId,
                        "error": f"File too large ({len(fileData)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
                        "uploadStatus": "failed"
                    })

            except Exception as e:
                logger.error(f"Error uploading document {fileName}: {str(e)}")
                uploadResults.append({
                    "fileName": fileName,
                    "fileId": fileId,
                    "error": str(e),
                    "uploadStatus": "failed"
                })

            # Update progress for each file
            self.services.chat.progressLogUpdate(operationId, 0.5 + (i * 0.4 / len(filesToUpload)), f"Uploaded {i + 1}/{len(filesToUpload)} file(s)")

        # Create result data
        resultData = {
            "connectionReference": connectionReference,
            "uploadPath": uploadPath,
            "documentList": documentList,
            "fileNames": fileNames,
            "sitesAvailable": len(sites),
            "uploadResults": uploadResults,
            "connection": {
                "id": connection["id"],
                "authority": "microsoft",
                "reference": connectionReference
            },
            "timestamp": self.services.utils.timestampGetUtc()
        }

        # Use default JSON format for output
        outputExtension = ".json"  # Default
        outputMimeType = "application/json"  # Default

        validationMetadata = {
            "actionType": "sharepoint.uploadDocument",
            "connectionReference": connectionReference,
            "uploadPath": uploadPath,
            "fileNames": fileNames,
            "uploadCount": len(uploadResults),
            "successfulUploads": len([r for r in uploadResults if r.get("uploadStatus") == "success"]),
            "failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"])
        }

        successfulUploads = len([r for r in uploadResults if r.get("uploadStatus") == "success"])
        self.services.chat.progressLogUpdate(operationId, 0.9, f"Uploaded {successfulUploads}/{len(uploadResults)} file(s)")
        self.services.chat.progressLogFinish(operationId, successfulUploads > 0)

        return ActionResult(
            success=True,
            documents=[
                ActionDocument(
                    documentName=self._generateMeaningfulFileName("sharepoint_upload", "json", None, "uploadDocument"),
                    documentData=json.dumps(resultData, indent=2),
                    mimeType=outputMimeType,
                    validationMetadata=validationMetadata
                )
            ]
        )

    except Exception as e:
        logger.error(f"Error uploading to SharePoint: {str(e)}")
        if operationId:
            try:
                self.services.chat.progressLogFinish(operationId, False)
            except:
                pass
        return ActionResult(
            success=False,
            error=str(e)
        )