469 lines
18 KiB
Python
469 lines
18 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""Microsoft ProviderConnector -- one MSFT connection serves SharePoint, Outlook, Teams, OneDrive.
|
|
|
|
All ServiceAdapters share the same OAuth access token obtained from the
|
|
UserConnection (authority=msft).
|
|
"""
|
|
|
|
import logging
|
|
import aiohttp
|
|
import asyncio
|
|
from typing import Dict, Any, List, Optional
|
|
|
|
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult
|
|
from modules.datamodels.datamodelDataSource import ExternalEntry
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_GRAPH_BASE = "https://graph.microsoft.com/v1.0"
|
|
|
|
|
|
class _GraphApiMixin:
|
|
"""Shared Graph API call logic for all MSFT service adapters."""
|
|
|
|
def __init__(self, accessToken: str):
|
|
self._accessToken = accessToken
|
|
|
|
async def _graphGet(self, endpoint: str) -> Dict[str, Any]:
|
|
return await _makeGraphCall(self._accessToken, endpoint, "GET")
|
|
|
|
async def _graphPost(self, endpoint: str, data: Any = None) -> Dict[str, Any]:
|
|
return await _makeGraphCall(self._accessToken, endpoint, "POST", data)
|
|
|
|
async def _graphPut(self, endpoint: str, data: bytes = None) -> Dict[str, Any]:
|
|
return await _makeGraphCall(self._accessToken, endpoint, "PUT", data)
|
|
|
|
async def _graphDelete(self, endpoint: str) -> Dict[str, Any]:
|
|
return await _makeGraphCall(self._accessToken, endpoint, "DELETE")
|
|
|
|
async def _graphDownload(self, endpoint: str) -> Optional[bytes]:
|
|
"""Download binary content from Graph API."""
|
|
headers = {"Authorization": f"Bearer {self._accessToken}"}
|
|
timeout = aiohttp.ClientTimeout(total=60)
|
|
url = f"{_GRAPH_BASE}/{endpoint.lstrip('/')}"
|
|
try:
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
async with session.get(url, headers=headers) as resp:
|
|
if resp.status == 200:
|
|
return await resp.read()
|
|
logger.error(f"Download failed {resp.status}: {await resp.text()}")
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"Graph download error: {e}")
|
|
return None
|
|
|
|
|
|
async def _makeGraphCall(
|
|
token: str, endpoint: str, method: str = "GET", data: Any = None
|
|
) -> Dict[str, Any]:
|
|
"""Execute a single Microsoft Graph API call."""
|
|
url = f"{_GRAPH_BASE}/{endpoint.lstrip('/')}"
|
|
contentType = "application/json"
|
|
if method == "PUT" and isinstance(data, bytes):
|
|
contentType = "application/octet-stream"
|
|
headers = {
|
|
"Authorization": f"Bearer {token}",
|
|
"Content-Type": contentType,
|
|
}
|
|
timeout = aiohttp.ClientTimeout(total=30)
|
|
try:
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
kwargs: Dict[str, Any] = {"headers": headers}
|
|
if data is not None:
|
|
kwargs["data"] = data
|
|
|
|
if method == "GET":
|
|
async with session.get(url, **kwargs) as resp:
|
|
return await _handleResponse(resp)
|
|
elif method == "POST":
|
|
async with session.post(url, **kwargs) as resp:
|
|
return await _handleResponse(resp)
|
|
elif method == "PUT":
|
|
async with session.put(url, **kwargs) as resp:
|
|
return await _handleResponse(resp)
|
|
elif method == "DELETE":
|
|
async with session.delete(url, **kwargs) as resp:
|
|
if resp.status in (200, 204):
|
|
return {}
|
|
return await _handleResponse(resp)
|
|
|
|
except asyncio.TimeoutError:
|
|
return {"error": f"Graph API timeout: {endpoint}"}
|
|
except Exception as e:
|
|
return {"error": f"Graph API error: {e}"}
|
|
|
|
return {"error": f"Unsupported method: {method}"}
|
|
|
|
|
|
async def _handleResponse(resp: aiohttp.ClientResponse) -> Dict[str, Any]:
|
|
if resp.status in (200, 201):
|
|
return await resp.json()
|
|
errorText = await resp.text()
|
|
logger.error(f"Graph API {resp.status}: {errorText}")
|
|
return {"error": f"{resp.status}: {errorText}"}
|
|
|
|
|
|
def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry:
|
|
isFolder = "folder" in item
|
|
return ExternalEntry(
|
|
name=item.get("name", ""),
|
|
path=f"{basePath}/{item.get('name', '')}" if basePath else item.get("name", ""),
|
|
isFolder=isFolder,
|
|
size=item.get("size"),
|
|
mimeType=item.get("file", {}).get("mimeType") if not isFolder else None,
|
|
lastModified=None,
|
|
metadata={
|
|
"id": item.get("id"),
|
|
"webUrl": item.get("webUrl"),
|
|
"childCount": item.get("folder", {}).get("childCount") if isFolder else None,
|
|
},
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SharePoint Adapter
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
|
|
"""ServiceAdapter for SharePoint (files, sites) via Microsoft Graph."""
|
|
|
|
async def browse(self, path: str, filter: Optional[str] = None) -> List[ExternalEntry]:
|
|
"""List items in a SharePoint folder.
|
|
|
|
Path format: /sites/<SiteName>/<FolderPath>
|
|
Root "/" lists available sites via discovery.
|
|
"""
|
|
if not path or path == "/":
|
|
return await self._discoverSites()
|
|
|
|
siteId, folderPath = _parseSharepointPath(path)
|
|
if not siteId:
|
|
return await self._discoverSites()
|
|
|
|
if not folderPath or folderPath == "/":
|
|
endpoint = f"sites/{siteId}/drive/root/children"
|
|
else:
|
|
cleanPath = folderPath.lstrip("/")
|
|
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children"
|
|
|
|
result = await self._graphGet(endpoint)
|
|
if "error" in result:
|
|
logger.warning(f"SharePoint browse failed: {result['error']}")
|
|
return []
|
|
|
|
entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])]
|
|
if filter:
|
|
entries = [e for e in entries if _matchFilter(e, filter)]
|
|
return entries
|
|
|
|
async def _discoverSites(self) -> List[ExternalEntry]:
|
|
"""Discover accessible SharePoint sites."""
|
|
result = await self._graphGet("sites?search=*&$top=50")
|
|
if "error" in result:
|
|
logger.warning(f"SharePoint site discovery failed: {result['error']}")
|
|
return []
|
|
return [
|
|
ExternalEntry(
|
|
name=s.get("displayName") or s.get("name", ""),
|
|
path=f"/sites/{s.get('id', '')}",
|
|
isFolder=True,
|
|
metadata={
|
|
"id": s.get("id"),
|
|
"webUrl": s.get("webUrl"),
|
|
"description": s.get("description", ""),
|
|
},
|
|
)
|
|
for s in result.get("value", [])
|
|
if s.get("displayName")
|
|
]
|
|
|
|
async def download(self, path: str) -> bytes:
|
|
siteId, filePath = _parseSharepointPath(path)
|
|
if not siteId or not filePath:
|
|
return b""
|
|
cleanPath = filePath.strip("/")
|
|
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/content"
|
|
data = await self._graphDownload(endpoint)
|
|
return data or b""
|
|
|
|
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
|
siteId, folderPath = _parseSharepointPath(path)
|
|
if not siteId:
|
|
return {"error": "Invalid SharePoint path"}
|
|
cleanFolder = (folderPath or "").strip("/")
|
|
uploadPath = f"{cleanFolder}/{fileName}" if cleanFolder else fileName
|
|
endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
|
|
result = await self._graphPut(endpoint, data)
|
|
return result
|
|
|
|
async def search(self, query: str, path: Optional[str] = None) -> List[ExternalEntry]:
|
|
siteId, _ = _parseSharepointPath(path or "")
|
|
if not siteId:
|
|
return []
|
|
safeQuery = query.replace("'", "''")
|
|
endpoint = f"sites/{siteId}/drive/root/search(q='{safeQuery}')"
|
|
result = await self._graphGet(endpoint)
|
|
if "error" in result:
|
|
return []
|
|
return [_graphItemToExternalEntry(item) for item in result.get("value", [])]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Outlook Adapter
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
|
|
"""ServiceAdapter for Outlook (mail, calendar) via Microsoft Graph."""
|
|
|
|
async def browse(self, path: str, filter: Optional[str] = None) -> List[ExternalEntry]:
|
|
"""List mail folders or messages.
|
|
|
|
path = "" or "/" → list mail folders
|
|
path = "/Inbox" → list messages in Inbox
|
|
"""
|
|
if not path or path == "/":
|
|
result = await self._graphGet("me/mailFolders")
|
|
if "error" in result:
|
|
return []
|
|
return [
|
|
ExternalEntry(
|
|
name=f.get("displayName", ""),
|
|
path=f"/{f.get('id', '')}",
|
|
isFolder=True,
|
|
metadata={"id": f.get("id"), "totalItemCount": f.get("totalItemCount")},
|
|
)
|
|
for f in result.get("value", [])
|
|
]
|
|
|
|
folderId = path.strip("/")
|
|
endpoint = f"me/mailFolders/{folderId}/messages?$top=25&$orderby=receivedDateTime desc"
|
|
result = await self._graphGet(endpoint)
|
|
if "error" in result:
|
|
return []
|
|
return [
|
|
ExternalEntry(
|
|
name=m.get("subject", "(no subject)"),
|
|
path=f"{path}/{m.get('id', '')}",
|
|
isFolder=False,
|
|
metadata={
|
|
"id": m.get("id"),
|
|
"from": m.get("from", {}).get("emailAddress", {}).get("address"),
|
|
"receivedDateTime": m.get("receivedDateTime"),
|
|
"hasAttachments": m.get("hasAttachments", False),
|
|
},
|
|
)
|
|
for m in result.get("value", [])
|
|
]
|
|
|
|
async def download(self, path: str) -> DownloadResult:
|
|
"""Download a mail message as RFC 822 EML via Graph API $value endpoint."""
|
|
import re
|
|
messageId = path.strip("/").split("/")[-1]
|
|
|
|
meta = await self._graphGet(f"me/messages/{messageId}?$select=subject")
|
|
subject = meta.get("subject", messageId) if "error" not in meta else messageId
|
|
safeName = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", subject)[:80].strip(". ") or "email"
|
|
|
|
emlBytes = await self._graphDownload(f"me/messages/{messageId}/$value")
|
|
if not emlBytes:
|
|
return DownloadResult()
|
|
|
|
return DownloadResult(
|
|
data=emlBytes,
|
|
fileName=f"{safeName}.eml",
|
|
mimeType="message/rfc822",
|
|
)
|
|
|
|
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
|
"""Not applicable for Outlook in the file sense."""
|
|
return {"error": "Upload not supported for Outlook"}
|
|
|
|
async def search(self, query: str, path: Optional[str] = None) -> List[ExternalEntry]:
|
|
safeQuery = query.replace("'", "''")
|
|
endpoint = f"me/messages?$search=\"{safeQuery}\"&$top=25"
|
|
result = await self._graphGet(endpoint)
|
|
if "error" in result:
|
|
return []
|
|
return [
|
|
ExternalEntry(
|
|
name=m.get("subject", "(no subject)"),
|
|
path=f"/search/{m.get('id', '')}",
|
|
isFolder=False,
|
|
metadata={
|
|
"id": m.get("id"),
|
|
"from": m.get("from", {}).get("emailAddress", {}).get("address"),
|
|
"receivedDateTime": m.get("receivedDateTime"),
|
|
},
|
|
)
|
|
for m in result.get("value", [])
|
|
]
|
|
|
|
async def sendMail(
|
|
self, to: List[str], subject: str, body: str,
|
|
cc: Optional[List[str]] = None, attachments: Optional[List[Dict]] = None
|
|
) -> Dict[str, Any]:
|
|
"""Send an email via Microsoft Graph."""
|
|
import json
|
|
message: Dict[str, Any] = {
|
|
"subject": subject,
|
|
"body": {"contentType": "Text", "content": body},
|
|
"toRecipients": [{"emailAddress": {"address": addr}} for addr in to],
|
|
}
|
|
if cc:
|
|
message["ccRecipients"] = [{"emailAddress": {"address": addr}} for addr in cc]
|
|
|
|
payload = json.dumps({"message": message, "saveToSentItems": True}).encode("utf-8")
|
|
result = await self._graphPost("me/sendMail", payload)
|
|
if "error" in result:
|
|
return result
|
|
return {"success": True}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Teams Adapter (Stub)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TeamsAdapter(_GraphApiMixin, ServiceAdapter):
|
|
"""ServiceAdapter for Microsoft Teams -- browse joined teams and channels."""
|
|
|
|
async def browse(self, path: str, filter: Optional[str] = None) -> list:
|
|
cleanPath = (path or "").strip("/")
|
|
|
|
if not cleanPath:
|
|
result = await self._graphGet("me/joinedTeams")
|
|
if "error" in result:
|
|
logger.warning(f"Teams browse failed: {result['error']}")
|
|
return []
|
|
return [
|
|
ExternalEntry(
|
|
name=t.get("displayName", ""),
|
|
path=f"/{t.get('id', '')}",
|
|
isFolder=True,
|
|
metadata={"id": t.get("id"), "description": t.get("description", "")},
|
|
)
|
|
for t in result.get("value", [])
|
|
]
|
|
|
|
parts = cleanPath.split("/", 1)
|
|
teamId = parts[0]
|
|
if len(parts) == 1:
|
|
result = await self._graphGet(f"teams/{teamId}/channels")
|
|
if "error" in result:
|
|
return []
|
|
return [
|
|
ExternalEntry(
|
|
name=ch.get("displayName", ""),
|
|
path=f"/{teamId}/{ch.get('id', '')}",
|
|
isFolder=True,
|
|
metadata={"id": ch.get("id"), "membershipType": ch.get("membershipType", "")},
|
|
)
|
|
for ch in result.get("value", [])
|
|
]
|
|
|
|
return []
|
|
|
|
async def download(self, path: str) -> bytes:
|
|
return b""
|
|
|
|
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
|
return {"error": "Teams upload not implemented"}
|
|
|
|
async def search(self, query: str, path: Optional[str] = None) -> list:
|
|
return []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# OneDrive Adapter (Stub -- similar to SharePoint but personal drive)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class OneDriveAdapter(_GraphApiMixin, ServiceAdapter):
|
|
"""ServiceAdapter stub for OneDrive (personal drive)."""
|
|
|
|
async def browse(self, path: str, filter: Optional[str] = None) -> List[ExternalEntry]:
|
|
cleanPath = (path or "").strip("/")
|
|
if not cleanPath:
|
|
endpoint = "me/drive/root/children"
|
|
else:
|
|
endpoint = f"me/drive/root:/{cleanPath}:/children"
|
|
|
|
result = await self._graphGet(endpoint)
|
|
if "error" in result:
|
|
return []
|
|
entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])]
|
|
if filter:
|
|
entries = [e for e in entries if _matchFilter(e, filter)]
|
|
return entries
|
|
|
|
async def download(self, path: str) -> bytes:
|
|
cleanPath = (path or "").strip("/")
|
|
if not cleanPath:
|
|
return b""
|
|
data = await self._graphDownload(f"me/drive/root:/{cleanPath}:/content")
|
|
return data or b""
|
|
|
|
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
|
cleanPath = (path or "").strip("/")
|
|
uploadPath = f"{cleanPath}/{fileName}" if cleanPath else fileName
|
|
endpoint = f"me/drive/root:/{uploadPath}:/content"
|
|
return await self._graphPut(endpoint, data)
|
|
|
|
async def search(self, query: str, path: Optional[str] = None) -> List[ExternalEntry]:
|
|
safeQuery = query.replace("'", "''")
|
|
endpoint = f"me/drive/root/search(q='{safeQuery}')"
|
|
result = await self._graphGet(endpoint)
|
|
if "error" in result:
|
|
return []
|
|
return [_graphItemToExternalEntry(item) for item in result.get("value", [])]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# MsftConnector (1:n)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class MsftConnector(ProviderConnector):
|
|
"""Microsoft ProviderConnector -- 1 connection → n services."""
|
|
|
|
_SERVICE_MAP = {
|
|
"sharepoint": SharepointAdapter,
|
|
"outlook": OutlookAdapter,
|
|
"teams": TeamsAdapter,
|
|
"onedrive": OneDriveAdapter,
|
|
}
|
|
|
|
def getAvailableServices(self) -> List[str]:
|
|
return list(self._SERVICE_MAP.keys())
|
|
|
|
def getServiceAdapter(self, service: str) -> ServiceAdapter:
|
|
adapterClass = self._SERVICE_MAP.get(service)
|
|
if not adapterClass:
|
|
raise ValueError(f"Unknown MSFT service: {service}. Available: {list(self._SERVICE_MAP.keys())}")
|
|
return adapterClass(self.accessToken)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _parseSharepointPath(path: str) -> tuple:
|
|
"""Parse a SharePoint path into (siteId, innerPath).
|
|
|
|
Expected format: /sites/<siteId>/<innerPath>
|
|
Also accepts bare siteId if no /sites/ prefix.
|
|
"""
|
|
if not path:
|
|
return ("", "")
|
|
clean = path.strip("/")
|
|
if clean.startswith("sites/"):
|
|
parts = clean.split("/", 2)
|
|
siteId = parts[1] if len(parts) > 1 else ""
|
|
innerPath = parts[2] if len(parts) > 2 else ""
|
|
return (siteId, innerPath)
|
|
parts = clean.split("/", 1)
|
|
return (parts[0], parts[1] if len(parts) > 1 else "")
|
|
|
|
|
|
def _matchFilter(entry: ExternalEntry, pattern: str) -> bool:
|
|
"""Simple glob-like filter (supports * wildcard)."""
|
|
import fnmatch
|
|
return fnmatch.fnmatch(entry.name.lower(), pattern.lower())
|