gateway/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.

import logging
import time
import json
import urllib.parse
from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument

logger = logging.getLogger(__name__)

async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
    operationId = None
    try:
        # Init progress logger
        workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
        operationId = f"sharepoint_upload_{workflowId}_{int(time.time())}"

        # Start progress tracking
        parentOperationId = parameters.get('parentOperationId')
        self.services.chat.progressLogStart(
            operationId,
            "Upload Document",
            "SharePoint Upload",
            "Processing document list",
            parentOperationId=parentOperationId
        )

        connectionReference = parameters.get("connectionReference")
        documentList = parameters.get("documentList")
        pathQuery = parameters.get("pathQuery")
        if isinstance(documentList, str):
            documentList = [documentList]

        if not connectionReference:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="Connection reference is required")

        if not documentList:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="Document list is required")

        # Parse documentList to extract folder path and site information
        uploadPath, sites, filesToUpload, errorMsg = await self.documentParsing.parseDocumentListForFolder(documentList)
        if errorMsg:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error=errorMsg)

        # If no folder path found from documentList, use pathQuery if provided
        if not uploadPath and pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
            uploadPath = pathQuery
            logger.info(f"Using pathQuery for upload path: {uploadPath}")
            # Resolve sites from pathQuery
            sites, errorMsg = await self.siteDiscovery.resolveSitesFromPathQuery(pathQuery)
            if errorMsg:
                if operationId:
                    self.services.chat.progressLogFinish(operationId, False)
                return ActionResult.isFailure(error=errorMsg)

        # Validate required parameters
        if not uploadPath:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="Either documentList must contain findDocumentPath result with folder information, or pathQuery must be provided. Use findDocumentPath first to get upload folder, or provide pathQuery directly.")

        if not sites:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="Site information missing. Cannot determine target site for upload.")

        if not filesToUpload:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="No files to upload found in documentList.")

        # Get connection
        self.services.chat.progressLogUpdate(operationId, 0.3, "Getting Microsoft connection")
        connection = self.connection.getMicrosoftConnection(connectionReference)
        if not connection:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")

        # Process upload paths
        uploadPaths = []
        if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
            # It's a folder ID - use it directly
            uploadPaths = [uploadPath]
            logger.info(f"Using folder ID directly for upload: {uploadPath}")
        else:
            # It's a path - resolve it normally
            uploadPaths = self.pathProcessing.resolvePathQuery(uploadPath)

        # Process each document upload
        uploadResults = []

        # Extract file names from documents
        fileNames = [doc.fileName for doc in filesToUpload]
        logger.info(f"Using file names from documentList: {fileNames}")

        self.services.chat.progressLogUpdate(operationId, 0.5, f"Uploading {len(filesToUpload)} document(s)")

        for i, (chatDocument, fileName) in enumerate(zip(filesToUpload, fileNames)):
            try:
                fileId = chatDocument.fileId
                fileData = self.services.chat.getFileData(fileId)

                if not fileData:
                    logger.warning(f"File data not found for fileId: {fileId}")
                    uploadResults.append({
                        "fileName": fileName,
                        "fileId": fileId,
                        "error": "File data not found",
                        "uploadStatus": "failed"
                    })
                    continue

                # Upload to the first available site (or could be made configurable)
                uploadSuccessful = False

                for site in sites:
                    siteId = site["id"]
                    siteName = site["displayName"]
                    siteUrl = site["webUrl"]

                    # Use the first upload path or default to Documents
                    uploadPath = uploadPaths[0] if uploadPaths else "/Documents"

                    # Handle wildcard paths - replace with default Documents folder
                    if uploadPath == "*":
                        uploadPath = "/Documents"
                        logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")

                    # Check if uploadPath is a folder ID or a regular path
                    if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
                        # It's a folder ID - use the folder-specific upload endpoint
                        uploadEndpoint = f"sites/{siteId}/drive/items/{uploadPath}:/{fileName}:/content"
                        logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}")
                    else:
                        # It's a regular path - use the root-based upload endpoint
                        # Strip /sites/{siteName}/ prefix if present (Graph API path is relative to site's drive)
                        uploadPathForApi = uploadPath
                        if uploadPathForApi.startswith('/sites/'):
                            # Extract path after /sites/{siteName}/
                            parts = uploadPathForApi.split('/', 3)  # ['', 'sites', 'sitename', 'rest/of/path']
                            if len(parts) >= 4:
                                uploadPathForApi = '/' + parts[3]  # Keep the rest after /sites/sitename/
                                logger.info(f"Stripped site prefix from upload path: '{uploadPath}' -> '{uploadPathForApi}'")

                        uploadPathForApi = uploadPathForApi.rstrip('/') + '/' + fileName
                        uploadPathClean = uploadPathForApi.lstrip('/')
                        uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content"
                        logger.info(f"Using path-based upload endpoint: {uploadEndpoint}")

                    # Upload endpoint for small files (< 4MB)
                    if len(fileData) < 4 * 1024 * 1024:  # 4MB

                        # Upload the file
                        uploadResult = await self.apiClient.makeGraphApiCall(
                            uploadEndpoint,
                            method="PUT",
                            data=fileData
                        )

                        if "error" not in uploadResult:
                            uploadResults.append({
                                "fileName": fileName,
                                "fileId": fileId,
                                "uploadStatus": "success",
                                "siteName": siteName,
                                "siteUrl": siteUrl,
                                "uploadPath": uploadPath,
                                "uploadEndpoint": uploadEndpoint,
                                "sharepointFileId": uploadResult.get("id"),
                                "webUrl": uploadResult.get("webUrl"),
                                "size": uploadResult.get("size"),
                                "createdDateTime": uploadResult.get("createdDateTime")
                            })
                            uploadSuccessful = True
                            break
                        else:
                            logger.warning(f"Upload failed to site {siteName}: {uploadResult['error']}")
                    else:
                        # For large files, we would need to implement resumable upload
                        logger.warning(f"File too large ({len(fileData)} bytes) for site {siteName}")
                        continue

                if not uploadSuccessful:
                    uploadResults.append({
                        "fileName": fileName,
                        "fileId": fileId,
                        "error": f"File too large ({len(fileData)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
                        "uploadStatus": "failed"
                    })

            except Exception as e:
                logger.error(f"Error uploading document {fileName}: {str(e)}")
                uploadResults.append({
                    "fileName": fileName,
                    "fileId": fileId,
                    "error": str(e),
                    "uploadStatus": "failed"
                })

            # Update progress for each file
            self.services.chat.progressLogUpdate(operationId, 0.5 + (i * 0.4 / len(filesToUpload)), f"Uploaded {i + 1}/{len(filesToUpload)} file(s)")

        # Create result data
        resultData = {
            "connectionReference": connectionReference,
            "uploadPath": uploadPath,
            "documentList": documentList,
            "fileNames": fileNames,
            "sitesAvailable": len(sites),
            "uploadResults": uploadResults,
            "connection": {
                "id": connection["id"],
                "authority": "microsoft",
                "reference": connectionReference
            },
            "timestamp": self.services.utils.timestampGetUtc()
        }

        # Use default JSON format for output
        outputExtension = ".json"  # Default
        outputMimeType = "application/json"  # Default

        validationMetadata = {
            "actionType": "sharepoint.uploadDocument",
            "connectionReference": connectionReference,
            "uploadPath": uploadPath,
            "fileNames": fileNames,
            "uploadCount": len(uploadResults),
            "successfulUploads": len([r for r in uploadResults if r.get("uploadStatus") == "success"]),
            "failedUploads": len([r for r in uploadResults if r.get("uploadStatus") == "failed"])
        }

        successfulUploads = len([r for r in uploadResults if r.get("uploadStatus") == "success"])
        overallSuccess = successfulUploads > 0
        self.services.chat.progressLogUpdate(operationId, 0.9, f"Uploaded {successfulUploads}/{len(uploadResults)} file(s)")
        self.services.chat.progressLogFinish(operationId, overallSuccess)

        return ActionResult(
            success=overallSuccess,
            documents=[
                ActionDocument(
                    documentName=self._generateMeaningfulFileName("sharepoint_upload", "json", None, "uploadDocument"),
                    documentData=json.dumps(resultData, indent=2),
                    mimeType=outputMimeType,
                    validationMetadata=validationMetadata
                )
            ]
        )

    except Exception as e:
        logger.error(f"Error uploading to SharePoint: {str(e)}")
        if operationId:
            try:
                self.services.chat.progressLogFinish(operationId, False)
            except Exception:
                pass
        return ActionResult(
            success=False,
            error=str(e)
        )