""" SharePoint Agent Module. Handles SharePoint document search and data extraction using Microsoft Graph API. """ import logging import json from typing import Dict, Any, List, Optional from modules.workflow.agentBase import AgentBase logger = logging.getLogger(__name__) class AgentSharepoint(AgentBase): """Agent for handling SharePoint document operations.""" def __init__(self): """Initialize the SharePoint agent.""" super().__init__() self.name = "sharepoint" self.label = "SharePoint Agent" self.description = "Searches and extracts data from SharePoint documents using Microsoft Graph API" self.capabilities = [ "document_search", "content_extraction", "metadata_analysis", "document_processing" ] async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]: """ Process a SharePoint-related task. Args: task: Task object containing: - prompt: Instructions for the agent - inputDocuments: List of documents to process - outputSpecifications: List of required output documents - context: Additional context including workflow info Returns: Dictionary containing: - feedback: Text response explaining what was done - documents: List of created documents """ try: # Extract task information prompt = task.get("prompt", "") inputDocuments = task.get("inputDocuments", []) outputSpecs = task.get("outputSpecifications", []) # Check AI service if not self.service.base: return { "feedback": "The SharePoint agent requires an AI service to function.", "documents": [] } # Check if Microsoft connector is available if not hasattr(self.service, 'msft'): return { "feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.", "documents": [] } # Get Microsoft token token_data = self.service.msft.getMsftToken() if not token_data: # Create authentication trigger document auth_doc = self._createFrontendAuthTriggerDocument() return { "feedback": "Microsoft authentication required. Please authenticate to continue.", "documents": [auth_doc] } # Parse the search query from the prompt searchQuery = await self._parseSearchQuery(prompt) # Search SharePoint documents searchResults = await self._searchSharePointDocuments(searchQuery) # Process search results documents = [] for spec in outputSpecs: label = spec.get("label", "") description = spec.get("description", "") if label.endswith(".json"): # Create JSON summary of search results summaryDoc = self._createSearchSummaryJson(searchResults, description) documents.append(summaryDoc) elif label.endswith(".csv"): # Create CSV summary of search results summaryDoc = self._createSearchSummaryCsv(searchResults, description) documents.append(summaryDoc) else: # Create text summary of search results summaryDoc = self._createSearchSummaryText(searchResults, description) documents.append(summaryDoc) # Prepare feedback message feedback = f"Found {len(searchResults)} documents matching your search criteria. " if searchResults: feedback += "The results have been saved as documents." else: feedback += "No matching documents were found." return { "feedback": feedback, "documents": documents } except Exception as e: logger.error(f"Error in SharePoint agent: {str(e)}") return { "feedback": f"Error processing SharePoint task: {str(e)}", "documents": [] } def _createFrontendAuthTriggerDocument(self) -> Dict[str, Any]: """Create a document that triggers Microsoft authentication in the frontend.""" return self.formatAgentDocumentOutput( "microsoft_auth.html", """

Microsoft Authentication Required

Please click the button below to authenticate with Microsoft:

""", "text/html" ) async def _parseSearchQuery(self, prompt: str) -> Dict[str, Any]: """ Parse the search query from the prompt using AI. Args: prompt: The task prompt Returns: Dictionary containing search parameters """ try: # Use AI to parse the search query response = await self.service.base.callAi([ {"role": "system", "content": "You are a SharePoint search query parser. Extract search parameters from the user's request."}, {"role": "user", "content": f""" Parse the following SharePoint search request into structured parameters: {prompt} Return a JSON object with these fields: - query: The main search query - site: Optional SharePoint site name - folder: Optional folder path - fileTypes: List of file types to search for - dateRange: Optional date range for filtering - maxResults: Maximum number of results to return Only return valid JSON. No preamble or explanations. """} ]) # Extract JSON from response jsonStart = response.find('{') jsonEnd = response.rfind('}') + 1 if jsonStart >= 0 and jsonEnd > jsonStart: return json.loads(response[jsonStart:jsonEnd]) else: # Fallback to simple query return { "query": prompt, "maxResults": 10 } except Exception as e: logger.warning(f"Error parsing search query: {str(e)}") return { "query": prompt, "maxResults": 10 } async def _searchSharePointDocuments(self, searchParams: Dict[str, Any]) -> List[Dict[str, Any]]: """ Search SharePoint documents using Microsoft Graph API. Args: searchParams: Search parameters Returns: List of search results """ try: # Get Microsoft token token = self.service.msft.getMsftToken() if not token: return [] # Prepare search query query = searchParams.get("query", "") site = searchParams.get("site", "") folder = searchParams.get("folder", "") fileTypes = searchParams.get("fileTypes", []) maxResults = searchParams.get("maxResults", 10) # Build search URL searchUrl = "https://graph.microsoft.com/v1.0/sites/root/drives" if site: searchUrl = f"https://graph.microsoft.com/v1.0/sites/{site}/drives" # Get drives (document libraries) response = self.service.msft.makeGraphRequest("GET", searchUrl) if not response or "value" not in response: return [] results = [] for drive in response["value"]: # Search in each drive driveId = drive["id"] searchEndpoint = f"https://graph.microsoft.com/v1.0/drives/{driveId}/root/search(q='{query}')" # Add file type filters if specified if fileTypes: typeFilter = " or ".join([f"fileType eq '{ft}'" for ft in fileTypes]) searchEndpoint += f"&filter={typeFilter}" # Add folder filter if specified if folder: searchEndpoint += f"&filter=parentReference/path eq '/{folder}'" # Add result limit searchEndpoint += f"&top={maxResults}" # Make the search request searchResponse = self.service.msft.makeGraphRequest("GET", searchEndpoint) if searchResponse and "value" in searchResponse: for item in searchResponse["value"]: # Get file content fileContent = await self._getFileContent(driveId, item["id"]) results.append({ "name": item["name"], "id": item["id"], "driveId": driveId, "webUrl": item["webUrl"], "lastModified": item["lastModifiedDateTime"], "size": item["size"], "content": fileContent }) return results except Exception as e: logger.error(f"Error searching SharePoint: {str(e)}") return [] async def _getFileContent(self, driveId: str, fileId: str) -> str: """ Get file content from SharePoint. Args: driveId: Drive ID fileId: File ID Returns: File content as string """ try: # Get file content URL contentUrl = f"https://graph.microsoft.com/v1.0/drives/{driveId}/items/{fileId}/content" # Download file content response = self.service.msft.makeGraphRequest("GET", contentUrl, raw=True) if response: return response.decode('utf-8') return "" except Exception as e: logger.error(f"Error getting file content: {str(e)}") return "" def _createSearchSummaryJson(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]: """Create a JSON summary of search results.""" summary = { "description": description, "totalResults": len(results), "results": [] } for result in results: summary["results"].append({ "name": result["name"], "url": result["webUrl"], "lastModified": result["lastModified"], "size": result["size"] }) return self.formatAgentDocumentOutput( "sharepoint_search_results.json", json.dumps(summary, indent=2), "application/json" ) def _createSearchSummaryCsv(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]: """Create a CSV summary of search results.""" csvLines = ["Name,URL,Last Modified,Size (bytes)"] for result in results: name = result["name"].replace('"', '""') url = result["webUrl"].replace('"', '""') lastModified = result["lastModified"].replace('"', '""') size = str(result["size"]) csvLines.append(f'"{name}","{url}","{lastModified}",{size}') return self.formatAgentDocumentOutput( "sharepoint_search_results.csv", "\n".join(csvLines), "text/csv" ) def _createSearchSummaryText(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]: """Create a text summary of search results.""" textLines = [ f"SharePoint Search Results", f"Description: {description}", f"Total Results: {len(results)}", "\nResults:" ] for result in results: textLines.extend([ f"\nName: {result['name']}", f"URL: {result['webUrl']}", f"Last Modified: {result['lastModified']}", f"Size: {result['size']} bytes" ]) return self.formatAgentDocumentOutput( "sharepoint_search_results.txt", "\n".join(textLines), "text/plain" ) def getAgentSharepoint() -> AgentSharepoint: """Factory function to create and return a SharePointAgent instance.""" return AgentSharepoint()