348 lines
No EOL
13 KiB
Python
348 lines
No EOL
13 KiB
Python
"""
|
|
SharePoint Agent Module.
|
|
Handles SharePoint document search and data extraction using Microsoft Graph API.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
from typing import Dict, Any, List, Optional
|
|
from modules.workflow.agentBase import AgentBase
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class AgentSharepoint(AgentBase):
|
|
"""Agent for handling SharePoint document operations."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the SharePoint agent."""
|
|
super().__init__()
|
|
self.name = "sharepoint"
|
|
self.label = "SharePoint Agent"
|
|
self.description = "Searches and extracts data from SharePoint documents using Microsoft Graph API"
|
|
self.capabilities = [
|
|
"document_search",
|
|
"content_extraction",
|
|
"metadata_analysis",
|
|
"document_processing"
|
|
]
|
|
|
|
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Process a SharePoint-related task.
|
|
|
|
Args:
|
|
task: Task object containing:
|
|
- prompt: Instructions for the agent
|
|
- inputDocuments: List of documents to process
|
|
- outputSpecifications: List of required output documents
|
|
- context: Additional context including workflow info
|
|
|
|
Returns:
|
|
Dictionary containing:
|
|
- feedback: Text response explaining what was done
|
|
- documents: List of created documents
|
|
"""
|
|
try:
|
|
# Extract task information
|
|
prompt = task.get("prompt", "")
|
|
inputDocuments = task.get("inputDocuments", [])
|
|
outputSpecs = task.get("outputSpecifications", [])
|
|
|
|
# Check AI service
|
|
if not self.service.base:
|
|
return {
|
|
"feedback": "The SharePoint agent requires an AI service to function.",
|
|
"documents": []
|
|
}
|
|
|
|
# Check if Microsoft connector is available
|
|
if not hasattr(self.service, 'msft'):
|
|
return {
|
|
"feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.",
|
|
"documents": []
|
|
}
|
|
|
|
# Get Microsoft token
|
|
token_data = self.service.msft.getMsftToken()
|
|
if not token_data:
|
|
# Create authentication trigger document
|
|
auth_doc = self._createFrontendAuthTriggerDocument()
|
|
return {
|
|
"feedback": "Microsoft authentication required. Please authenticate to continue.",
|
|
"documents": [auth_doc]
|
|
}
|
|
|
|
# Parse the search query from the prompt
|
|
searchQuery = await self._parseSearchQuery(prompt)
|
|
|
|
# Search SharePoint documents
|
|
searchResults = await self._searchSharePointDocuments(searchQuery)
|
|
|
|
# Process search results
|
|
documents = []
|
|
for spec in outputSpecs:
|
|
label = spec.get("label", "")
|
|
description = spec.get("description", "")
|
|
|
|
if label.endswith(".json"):
|
|
# Create JSON summary of search results
|
|
summaryDoc = self._createSearchSummaryJson(searchResults, description)
|
|
documents.append(summaryDoc)
|
|
elif label.endswith(".csv"):
|
|
# Create CSV summary of search results
|
|
summaryDoc = self._createSearchSummaryCsv(searchResults, description)
|
|
documents.append(summaryDoc)
|
|
else:
|
|
# Create text summary of search results
|
|
summaryDoc = self._createSearchSummaryText(searchResults, description)
|
|
documents.append(summaryDoc)
|
|
|
|
# Prepare feedback message
|
|
feedback = f"Found {len(searchResults)} documents matching your search criteria. "
|
|
if searchResults:
|
|
feedback += "The results have been saved as documents."
|
|
else:
|
|
feedback += "No matching documents were found."
|
|
|
|
return {
|
|
"feedback": feedback,
|
|
"documents": documents
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in SharePoint agent: {str(e)}")
|
|
return {
|
|
"feedback": f"Error processing SharePoint task: {str(e)}",
|
|
"documents": []
|
|
}
|
|
|
|
def _createFrontendAuthTriggerDocument(self) -> Dict[str, Any]:
|
|
"""Create a document that triggers Microsoft authentication in the frontend."""
|
|
return self.formatAgentDocumentOutput(
|
|
"microsoft_auth.html",
|
|
"""
|
|
<div>
|
|
<h2>Microsoft Authentication Required</h2>
|
|
<p>Please click the button below to authenticate with Microsoft:</p>
|
|
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
|
|
</div>
|
|
""",
|
|
"text/html"
|
|
)
|
|
|
|
async def _parseSearchQuery(self, prompt: str) -> Dict[str, Any]:
|
|
"""
|
|
Parse the search query from the prompt using AI.
|
|
|
|
Args:
|
|
prompt: The task prompt
|
|
|
|
Returns:
|
|
Dictionary containing search parameters
|
|
"""
|
|
try:
|
|
# Use AI to parse the search query
|
|
response = await self.service.base.callAi([
|
|
{"role": "system", "content": "You are a SharePoint search query parser. Extract search parameters from the user's request."},
|
|
{"role": "user", "content": f"""
|
|
Parse the following SharePoint search request into structured parameters:
|
|
|
|
{prompt}
|
|
|
|
Return a JSON object with these fields:
|
|
- query: The main search query
|
|
- site: Optional SharePoint site name
|
|
- folder: Optional folder path
|
|
- fileTypes: List of file types to search for
|
|
- dateRange: Optional date range for filtering
|
|
- maxResults: Maximum number of results to return
|
|
|
|
Only return valid JSON. No preamble or explanations.
|
|
"""}
|
|
])
|
|
|
|
# Extract JSON from response
|
|
jsonStart = response.find('{')
|
|
jsonEnd = response.rfind('}') + 1
|
|
|
|
if jsonStart >= 0 and jsonEnd > jsonStart:
|
|
return json.loads(response[jsonStart:jsonEnd])
|
|
else:
|
|
# Fallback to simple query
|
|
return {
|
|
"query": prompt,
|
|
"maxResults": 10
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error parsing search query: {str(e)}")
|
|
return {
|
|
"query": prompt,
|
|
"maxResults": 10
|
|
}
|
|
|
|
async def _searchSharePointDocuments(self, searchParams: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Search SharePoint documents using Microsoft Graph API.
|
|
|
|
Args:
|
|
searchParams: Search parameters
|
|
|
|
Returns:
|
|
List of search results
|
|
"""
|
|
try:
|
|
# Get Microsoft token
|
|
token = self.service.msft.getMsftToken()
|
|
if not token:
|
|
return []
|
|
|
|
# Prepare search query
|
|
query = searchParams.get("query", "")
|
|
site = searchParams.get("site", "")
|
|
folder = searchParams.get("folder", "")
|
|
fileTypes = searchParams.get("fileTypes", [])
|
|
maxResults = searchParams.get("maxResults", 10)
|
|
|
|
# Build search URL
|
|
searchUrl = "https://graph.microsoft.com/v1.0/sites/root/drives"
|
|
if site:
|
|
searchUrl = f"https://graph.microsoft.com/v1.0/sites/{site}/drives"
|
|
|
|
# Get drives (document libraries)
|
|
response = self.service.msft.makeGraphRequest("GET", searchUrl)
|
|
if not response or "value" not in response:
|
|
return []
|
|
|
|
results = []
|
|
for drive in response["value"]:
|
|
# Search in each drive
|
|
driveId = drive["id"]
|
|
searchEndpoint = f"https://graph.microsoft.com/v1.0/drives/{driveId}/root/search(q='{query}')"
|
|
|
|
# Add file type filters if specified
|
|
if fileTypes:
|
|
typeFilter = " or ".join([f"fileType eq '{ft}'" for ft in fileTypes])
|
|
searchEndpoint += f"&filter={typeFilter}"
|
|
|
|
# Add folder filter if specified
|
|
if folder:
|
|
searchEndpoint += f"&filter=parentReference/path eq '/{folder}'"
|
|
|
|
# Add result limit
|
|
searchEndpoint += f"&top={maxResults}"
|
|
|
|
# Make the search request
|
|
searchResponse = self.service.msft.makeGraphRequest("GET", searchEndpoint)
|
|
if searchResponse and "value" in searchResponse:
|
|
for item in searchResponse["value"]:
|
|
# Get file content
|
|
fileContent = await self._getFileContent(driveId, item["id"])
|
|
|
|
results.append({
|
|
"name": item["name"],
|
|
"id": item["id"],
|
|
"driveId": driveId,
|
|
"webUrl": item["webUrl"],
|
|
"lastModified": item["lastModifiedDateTime"],
|
|
"size": item["size"],
|
|
"content": fileContent
|
|
})
|
|
|
|
return results
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error searching SharePoint: {str(e)}")
|
|
return []
|
|
|
|
async def _getFileContent(self, driveId: str, fileId: str) -> str:
|
|
"""
|
|
Get file content from SharePoint.
|
|
|
|
Args:
|
|
driveId: Drive ID
|
|
fileId: File ID
|
|
|
|
Returns:
|
|
File content as string
|
|
"""
|
|
try:
|
|
# Get file content URL
|
|
contentUrl = f"https://graph.microsoft.com/v1.0/drives/{driveId}/items/{fileId}/content"
|
|
|
|
# Download file content
|
|
response = self.service.msft.makeGraphRequest("GET", contentUrl, raw=True)
|
|
if response:
|
|
return response.decode('utf-8')
|
|
return ""
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting file content: {str(e)}")
|
|
return ""
|
|
|
|
def _createSearchSummaryJson(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
|
|
"""Create a JSON summary of search results."""
|
|
summary = {
|
|
"description": description,
|
|
"totalResults": len(results),
|
|
"results": []
|
|
}
|
|
|
|
for result in results:
|
|
summary["results"].append({
|
|
"name": result["name"],
|
|
"url": result["webUrl"],
|
|
"lastModified": result["lastModified"],
|
|
"size": result["size"]
|
|
})
|
|
|
|
return self.formatAgentDocumentOutput(
|
|
"sharepoint_search_results.json",
|
|
json.dumps(summary, indent=2),
|
|
"application/json"
|
|
)
|
|
|
|
def _createSearchSummaryCsv(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
|
|
"""Create a CSV summary of search results."""
|
|
csvLines = ["Name,URL,Last Modified,Size (bytes)"]
|
|
|
|
for result in results:
|
|
name = result["name"].replace('"', '""')
|
|
url = result["webUrl"].replace('"', '""')
|
|
lastModified = result["lastModified"].replace('"', '""')
|
|
size = str(result["size"])
|
|
|
|
csvLines.append(f'"{name}","{url}","{lastModified}",{size}')
|
|
|
|
return self.formatAgentDocumentOutput(
|
|
"sharepoint_search_results.csv",
|
|
"\n".join(csvLines),
|
|
"text/csv"
|
|
)
|
|
|
|
def _createSearchSummaryText(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
|
|
"""Create a text summary of search results."""
|
|
textLines = [
|
|
f"SharePoint Search Results",
|
|
f"Description: {description}",
|
|
f"Total Results: {len(results)}",
|
|
"\nResults:"
|
|
]
|
|
|
|
for result in results:
|
|
textLines.extend([
|
|
f"\nName: {result['name']}",
|
|
f"URL: {result['webUrl']}",
|
|
f"Last Modified: {result['lastModified']}",
|
|
f"Size: {result['size']} bytes"
|
|
])
|
|
|
|
return self.formatAgentDocumentOutput(
|
|
"sharepoint_search_results.txt",
|
|
"\n".join(textLines),
|
|
"text/plain"
|
|
)
|
|
|
|
def getAgentSharepoint() -> AgentSharepoint:
|
|
"""Factory function to create and return a SharePointAgent instance."""
|
|
return AgentSharepoint() |