gateway/modules/agents/agentSharepoint.py

"""
SharePoint Agent Module.
Handles SharePoint document search and data extraction using Microsoft Graph API.
"""

import logging
import json
from typing import Dict, Any, List, Optional
from modules.workflow.agentBase import AgentBase

logger = logging.getLogger(__name__)

class AgentSharepoint(AgentBase):
    """Agent for handling SharePoint document operations."""

    def __init__(self):
        """Initialize the SharePoint agent."""
        super().__init__()
        self.name = "sharepoint"
        self.label = "SharePoint Agent"
        self.description = "Searches and extracts data from SharePoint documents using Microsoft Graph API"
        self.capabilities = [
            "document_search",
            "content_extraction",
            "metadata_analysis",
            "document_processing"
        ]

    async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
        """
        Process a SharePoint-related task.

        Args:
            task: Task object containing:
                - prompt: Instructions for the agent
                - inputDocuments: List of documents to process
                - outputSpecifications: List of required output documents
                - context: Additional context including workflow info

        Returns:
            Dictionary containing:
                - feedback: Text response explaining what was done
                - documents: List of created documents
        """
        try:
            # Extract task information
            prompt = task.get("prompt", "")
            inputDocuments = task.get("inputDocuments", [])
            outputSpecs = task.get("outputSpecifications", [])

            # Check AI service
            if not self.service.base:
                return {
                    "feedback": "The SharePoint agent requires an AI service to function.",
                    "documents": []
                }

            # Check if Microsoft connector is available
            if not hasattr(self.service, 'msft'):
                return {
                    "feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.",
                    "documents": []
                }

            # Get Microsoft token
            token_data = self.service.msft.getMsftToken()
            if not token_data:
                # Create authentication trigger document
                auth_doc = self._createFrontendAuthTriggerDocument()
                return {
                    "feedback": "Microsoft authentication required. Please authenticate to continue.",
                    "documents": [auth_doc]
                }

            # Parse the search query from the prompt
            searchQuery = await self._parseSearchQuery(prompt)

            # Search SharePoint documents
            searchResults = await self._searchSharePointDocuments(searchQuery)

            # Process search results
            documents = []
            for spec in outputSpecs:
                label = spec.get("label", "")
                description = spec.get("description", "")

                if label.endswith(".json"):
                    # Create JSON summary of search results
                    summaryDoc = self._createSearchSummaryJson(searchResults, description)
                    documents.append(summaryDoc)
                elif label.endswith(".csv"):
                    # Create CSV summary of search results
                    summaryDoc = self._createSearchSummaryCsv(searchResults, description)
                    documents.append(summaryDoc)
                else:
                    # Create text summary of search results
                    summaryDoc = self._createSearchSummaryText(searchResults, description)
                    documents.append(summaryDoc)

            # Prepare feedback message
            feedback = f"Found {len(searchResults)} documents matching your search criteria. "
            if searchResults:
                feedback += "The results have been saved as documents."
            else:
                feedback += "No matching documents were found."

            return {
                "feedback": feedback,
                "documents": documents
            }

        except Exception as e:
            logger.error(f"Error in SharePoint agent: {str(e)}")
            return {
                "feedback": f"Error processing SharePoint task: {str(e)}",
                "documents": []
            }

    def _createFrontendAuthTriggerDocument(self) -> Dict[str, Any]:
        """Create a document that triggers Microsoft authentication in the frontend."""
        return self.formatAgentDocumentOutput(
            "microsoft_auth.html",
            """
            <div>
                <h2>Microsoft Authentication Required</h2>
                <p>Please click the button below to authenticate with Microsoft:</p>
                <button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
            </div>
            """,
            "text/html"
        )

    async def _parseSearchQuery(self, prompt: str) -> Dict[str, Any]:
        """
        Parse the search query from the prompt using AI.

        Args:
            prompt: The task prompt

        Returns:
            Dictionary containing search parameters
        """
        try:
            # Use AI to parse the search query
            response = await self.service.base.callAi([
                {"role": "system", "content": "You are a SharePoint search query parser. Extract search parameters from the user's request."},
                {"role": "user", "content": f"""
                Parse the following SharePoint search request into structured parameters:

                {prompt}

                Return a JSON object with these fields:
                - query: The main search query
                - site: Optional SharePoint site name
                - folder: Optional folder path
                - fileTypes: List of file types to search for
                - dateRange: Optional date range for filtering
                - maxResults: Maximum number of results to return

                Only return valid JSON. No preamble or explanations.
                """}
            ])

            # Extract JSON from response
            jsonStart = response.find('{')
            jsonEnd = response.rfind('}') + 1

            if jsonStart >= 0 and jsonEnd > jsonStart:
                return json.loads(response[jsonStart:jsonEnd])
            else:
                # Fallback to simple query
                return {
                    "query": prompt,
                    "maxResults": 10
                }

        except Exception as e:
            logger.warning(f"Error parsing search query: {str(e)}")
            return {
                "query": prompt,
                "maxResults": 10
            }

    async def _searchSharePointDocuments(self, searchParams: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Search SharePoint documents using Microsoft Graph API.

        Args:
            searchParams: Search parameters

        Returns:
            List of search results
        """
        try:
            # Get Microsoft token
            token = self.service.msft.getMsftToken()
            if not token:
                return []

            # Prepare search query
            query = searchParams.get("query", "")
            site = searchParams.get("site", "")
            folder = searchParams.get("folder", "")
            fileTypes = searchParams.get("fileTypes", [])
            maxResults = searchParams.get("maxResults", 10)

            # Build search URL
            searchUrl = "https://graph.microsoft.com/v1.0/sites/root/drives"
            if site:
                searchUrl = f"https://graph.microsoft.com/v1.0/sites/{site}/drives"

            # Get drives (document libraries)
            response = self.service.msft.makeGraphRequest("GET", searchUrl)
            if not response or "value" not in response:
                return []

            results = []
            for drive in response["value"]:
                # Search in each drive
                driveId = drive["id"]
                searchEndpoint = f"https://graph.microsoft.com/v1.0/drives/{driveId}/root/search(q='{query}')"

                # Add file type filters if specified
                if fileTypes:
                    typeFilter = " or ".join([f"fileType eq '{ft}'" for ft in fileTypes])
                    searchEndpoint += f"&filter={typeFilter}"

                # Add folder filter if specified
                if folder:
                    searchEndpoint += f"&filter=parentReference/path eq '/{folder}'"

                # Add result limit
                searchEndpoint += f"&top={maxResults}"

                # Make the search request
                searchResponse = self.service.msft.makeGraphRequest("GET", searchEndpoint)
                if searchResponse and "value" in searchResponse:
                    for item in searchResponse["value"]:
                        # Get file content
                        fileContent = await self._getFileContent(driveId, item["id"])

                        results.append({
                            "name": item["name"],
                            "id": item["id"],
                            "driveId": driveId,
                            "webUrl": item["webUrl"],
                            "lastModified": item["lastModifiedDateTime"],
                            "size": item["size"],
                            "content": fileContent
                        })

            return results

        except Exception as e:
            logger.error(f"Error searching SharePoint: {str(e)}")
            return []

    async def _getFileContent(self, driveId: str, fileId: str) -> str:
        """
        Get file content from SharePoint.

        Args:
            driveId: Drive ID
            fileId: File ID

        Returns:
            File content as string
        """
        try:
            # Get file content URL
            contentUrl = f"https://graph.microsoft.com/v1.0/drives/{driveId}/items/{fileId}/content"

            # Download file content
            response = self.service.msft.makeGraphRequest("GET", contentUrl, raw=True)
            if response:
                return response.decode('utf-8')
            return ""

        except Exception as e:
            logger.error(f"Error getting file content: {str(e)}")
            return ""

    def _createSearchSummaryJson(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
        """Create a JSON summary of search results."""
        summary = {
            "description": description,
            "totalResults": len(results),
            "results": []
        }

        for result in results:
            summary["results"].append({
                "name": result["name"],
                "url": result["webUrl"],
                "lastModified": result["lastModified"],
                "size": result["size"]
            })

        return self.formatAgentDocumentOutput(
            "sharepoint_search_results.json",
            json.dumps(summary, indent=2),
            "application/json"
        )

    def _createSearchSummaryCsv(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
        """Create a CSV summary of search results."""
        csvLines = ["Name,URL,Last Modified,Size (bytes)"]

        for result in results:
            name = result["name"].replace('"', '""')
            url = result["webUrl"].replace('"', '""')
            lastModified = result["lastModified"].replace('"', '""')
            size = str(result["size"])

            csvLines.append(f'"{name}","{url}","{lastModified}",{size}')

        return self.formatAgentDocumentOutput(
            "sharepoint_search_results.csv",
            "\n".join(csvLines),
            "text/csv"
        )

    def _createSearchSummaryText(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
        """Create a text summary of search results."""
        textLines = [
            f"SharePoint Search Results",
            f"Description: {description}",
            f"Total Results: {len(results)}",
            "\nResults:"
        ]

        for result in results:
            textLines.extend([
                f"\nName: {result['name']}",
                f"URL: {result['webUrl']}",
                f"Last Modified: {result['lastModified']}",
                f"Size: {result['size']} bytes"
            ])

        return self.formatAgentDocumentOutput(
            "sharepoint_search_results.txt",
            "\n".join(textLines),
            "text/plain"
        )

def getAgentSharepoint() -> AgentSharepoint:
    """Factory function to create and return a SharePointAgent instance."""
    return AgentSharepoint()