gateway/modules/agents/agentSharepoint.py
ValueOn AG cf94b1115b ref
2025-05-26 07:04:30 +02:00

348 lines
No EOL
13 KiB
Python

"""
SharePoint Agent Module.
Handles SharePoint document search and data extraction using Microsoft Graph API.
"""
import logging
import json
from typing import Dict, Any, List, Optional
from modules.workflow.agentBase import AgentBase
logger = logging.getLogger(__name__)
class AgentSharepoint(AgentBase):
"""Agent for handling SharePoint document operations."""
def __init__(self):
"""Initialize the SharePoint agent."""
super().__init__()
self.name = "sharepoint"
self.label = "SharePoint Agent"
self.description = "Searches and extracts data from SharePoint documents using Microsoft Graph API"
self.capabilities = [
"document_search",
"content_extraction",
"metadata_analysis",
"document_processing"
]
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a SharePoint-related task.
Args:
task: Task object containing:
- prompt: Instructions for the agent
- inputDocuments: List of documents to process
- outputSpecifications: List of required output documents
- context: Additional context including workflow info
Returns:
Dictionary containing:
- feedback: Text response explaining what was done
- documents: List of created documents
"""
try:
# Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.service.base:
return {
"feedback": "The SharePoint agent requires an AI service to function.",
"documents": []
}
# Check if Microsoft connector is available
if not hasattr(self.service, 'msft'):
return {
"feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.",
"documents": []
}
# Get Microsoft token
token_data = self.service.msft.getMsftToken()
if not token_data:
# Create authentication trigger document
auth_doc = self._createFrontendAuthTriggerDocument()
return {
"feedback": "Microsoft authentication required. Please authenticate to continue.",
"documents": [auth_doc]
}
# Parse the search query from the prompt
searchQuery = await self._parseSearchQuery(prompt)
# Search SharePoint documents
searchResults = await self._searchSharePointDocuments(searchQuery)
# Process search results
documents = []
for spec in outputSpecs:
label = spec.get("label", "")
description = spec.get("description", "")
if label.endswith(".json"):
# Create JSON summary of search results
summaryDoc = self._createSearchSummaryJson(searchResults, description)
documents.append(summaryDoc)
elif label.endswith(".csv"):
# Create CSV summary of search results
summaryDoc = self._createSearchSummaryCsv(searchResults, description)
documents.append(summaryDoc)
else:
# Create text summary of search results
summaryDoc = self._createSearchSummaryText(searchResults, description)
documents.append(summaryDoc)
# Prepare feedback message
feedback = f"Found {len(searchResults)} documents matching your search criteria. "
if searchResults:
feedback += "The results have been saved as documents."
else:
feedback += "No matching documents were found."
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error in SharePoint agent: {str(e)}")
return {
"feedback": f"Error processing SharePoint task: {str(e)}",
"documents": []
}
def _createFrontendAuthTriggerDocument(self) -> Dict[str, Any]:
"""Create a document that triggers Microsoft authentication in the frontend."""
return self.formatAgentDocumentOutput(
"microsoft_auth.html",
"""
<div>
<h2>Microsoft Authentication Required</h2>
<p>Please click the button below to authenticate with Microsoft:</p>
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
</div>
""",
"text/html"
)
async def _parseSearchQuery(self, prompt: str) -> Dict[str, Any]:
"""
Parse the search query from the prompt using AI.
Args:
prompt: The task prompt
Returns:
Dictionary containing search parameters
"""
try:
# Use AI to parse the search query
response = await self.service.base.callAi([
{"role": "system", "content": "You are a SharePoint search query parser. Extract search parameters from the user's request."},
{"role": "user", "content": f"""
Parse the following SharePoint search request into structured parameters:
{prompt}
Return a JSON object with these fields:
- query: The main search query
- site: Optional SharePoint site name
- folder: Optional folder path
- fileTypes: List of file types to search for
- dateRange: Optional date range for filtering
- maxResults: Maximum number of results to return
Only return valid JSON. No preamble or explanations.
"""}
])
# Extract JSON from response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
return json.loads(response[jsonStart:jsonEnd])
else:
# Fallback to simple query
return {
"query": prompt,
"maxResults": 10
}
except Exception as e:
logger.warning(f"Error parsing search query: {str(e)}")
return {
"query": prompt,
"maxResults": 10
}
async def _searchSharePointDocuments(self, searchParams: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Search SharePoint documents using Microsoft Graph API.
Args:
searchParams: Search parameters
Returns:
List of search results
"""
try:
# Get Microsoft token
token = self.service.msft.getMsftToken()
if not token:
return []
# Prepare search query
query = searchParams.get("query", "")
site = searchParams.get("site", "")
folder = searchParams.get("folder", "")
fileTypes = searchParams.get("fileTypes", [])
maxResults = searchParams.get("maxResults", 10)
# Build search URL
searchUrl = "https://graph.microsoft.com/v1.0/sites/root/drives"
if site:
searchUrl = f"https://graph.microsoft.com/v1.0/sites/{site}/drives"
# Get drives (document libraries)
response = self.service.msft.makeGraphRequest("GET", searchUrl)
if not response or "value" not in response:
return []
results = []
for drive in response["value"]:
# Search in each drive
driveId = drive["id"]
searchEndpoint = f"https://graph.microsoft.com/v1.0/drives/{driveId}/root/search(q='{query}')"
# Add file type filters if specified
if fileTypes:
typeFilter = " or ".join([f"fileType eq '{ft}'" for ft in fileTypes])
searchEndpoint += f"&filter={typeFilter}"
# Add folder filter if specified
if folder:
searchEndpoint += f"&filter=parentReference/path eq '/{folder}'"
# Add result limit
searchEndpoint += f"&top={maxResults}"
# Make the search request
searchResponse = self.service.msft.makeGraphRequest("GET", searchEndpoint)
if searchResponse and "value" in searchResponse:
for item in searchResponse["value"]:
# Get file content
fileContent = await self._getFileContent(driveId, item["id"])
results.append({
"name": item["name"],
"id": item["id"],
"driveId": driveId,
"webUrl": item["webUrl"],
"lastModified": item["lastModifiedDateTime"],
"size": item["size"],
"content": fileContent
})
return results
except Exception as e:
logger.error(f"Error searching SharePoint: {str(e)}")
return []
async def _getFileContent(self, driveId: str, fileId: str) -> str:
"""
Get file content from SharePoint.
Args:
driveId: Drive ID
fileId: File ID
Returns:
File content as string
"""
try:
# Get file content URL
contentUrl = f"https://graph.microsoft.com/v1.0/drives/{driveId}/items/{fileId}/content"
# Download file content
response = self.service.msft.makeGraphRequest("GET", contentUrl, raw=True)
if response:
return response.decode('utf-8')
return ""
except Exception as e:
logger.error(f"Error getting file content: {str(e)}")
return ""
def _createSearchSummaryJson(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
"""Create a JSON summary of search results."""
summary = {
"description": description,
"totalResults": len(results),
"results": []
}
for result in results:
summary["results"].append({
"name": result["name"],
"url": result["webUrl"],
"lastModified": result["lastModified"],
"size": result["size"]
})
return self.formatAgentDocumentOutput(
"sharepoint_search_results.json",
json.dumps(summary, indent=2),
"application/json"
)
def _createSearchSummaryCsv(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
"""Create a CSV summary of search results."""
csvLines = ["Name,URL,Last Modified,Size (bytes)"]
for result in results:
name = result["name"].replace('"', '""')
url = result["webUrl"].replace('"', '""')
lastModified = result["lastModified"].replace('"', '""')
size = str(result["size"])
csvLines.append(f'"{name}","{url}","{lastModified}",{size}')
return self.formatAgentDocumentOutput(
"sharepoint_search_results.csv",
"\n".join(csvLines),
"text/csv"
)
def _createSearchSummaryText(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
"""Create a text summary of search results."""
textLines = [
f"SharePoint Search Results",
f"Description: {description}",
f"Total Results: {len(results)}",
"\nResults:"
]
for result in results:
textLines.extend([
f"\nName: {result['name']}",
f"URL: {result['webUrl']}",
f"Last Modified: {result['lastModified']}",
f"Size: {result['size']} bytes"
])
return self.formatAgentDocumentOutput(
"sharepoint_search_results.txt",
"\n".join(textLines),
"text/plain"
)
def getAgentSharepoint() -> AgentSharepoint:
"""Factory function to create and return a SharePointAgent instance."""
return AgentSharepoint()