gateway/modules/connectors/providerMsft/connectorMsft.py
2026-04-21 00:50:36 +02:00

624 lines
24 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Microsoft ProviderConnector -- one MSFT connection serves SharePoint, Outlook, Teams, OneDrive.
All ServiceAdapters share the same OAuth access token obtained from the
UserConnection (authority=msft).
"""
import logging
import aiohttp
import asyncio
from typing import Dict, Any, List, Optional
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult
from modules.datamodels.datamodelDataSource import ExternalEntry
logger = logging.getLogger(__name__)
_GRAPH_BASE = "https://graph.microsoft.com/v1.0"
class _GraphApiMixin:
"""Shared Graph API call logic for all MSFT service adapters."""
def __init__(self, accessToken: str):
self._accessToken = accessToken
async def _graphGet(self, endpoint: str) -> Dict[str, Any]:
return await _makeGraphCall(self._accessToken, endpoint, "GET")
async def _graphPost(self, endpoint: str, data: Any = None) -> Dict[str, Any]:
return await _makeGraphCall(self._accessToken, endpoint, "POST", data)
async def _graphPut(self, endpoint: str, data: bytes = None) -> Dict[str, Any]:
return await _makeGraphCall(self._accessToken, endpoint, "PUT", data)
async def _graphDelete(self, endpoint: str) -> Dict[str, Any]:
return await _makeGraphCall(self._accessToken, endpoint, "DELETE")
async def _graphDownload(self, endpoint: str) -> Optional[bytes]:
"""Download binary content from Graph API."""
headers = {"Authorization": f"Bearer {self._accessToken}"}
timeout = aiohttp.ClientTimeout(total=60)
url = f"{_GRAPH_BASE}/{endpoint.lstrip('/')}"
try:
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(url, headers=headers) as resp:
if resp.status == 200:
return await resp.read()
logger.error(f"Download failed {resp.status}: {await resp.text()}")
return None
except Exception as e:
logger.error(f"Graph download error: {e}")
return None
async def _makeGraphCall(
token: str, endpoint: str, method: str = "GET", data: Any = None
) -> Dict[str, Any]:
"""Execute a single Microsoft Graph API call."""
url = f"{_GRAPH_BASE}/{endpoint.lstrip('/')}"
contentType = "application/json"
if method == "PUT" and isinstance(data, bytes):
contentType = "application/octet-stream"
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": contentType,
}
timeout = aiohttp.ClientTimeout(total=30)
try:
async with aiohttp.ClientSession(timeout=timeout) as session:
kwargs: Dict[str, Any] = {"headers": headers}
if data is not None:
kwargs["data"] = data
if method == "GET":
async with session.get(url, **kwargs) as resp:
return await _handleResponse(resp)
elif method == "POST":
async with session.post(url, **kwargs) as resp:
return await _handleResponse(resp)
elif method == "PUT":
async with session.put(url, **kwargs) as resp:
return await _handleResponse(resp)
elif method == "DELETE":
async with session.delete(url, **kwargs) as resp:
if resp.status in (200, 204):
return {}
return await _handleResponse(resp)
except asyncio.TimeoutError:
return {"error": f"Graph API timeout: {endpoint}"}
except Exception as e:
return {"error": f"Graph API error: {e}"}
return {"error": f"Unsupported method: {method}"}
async def _handleResponse(resp: aiohttp.ClientResponse) -> Dict[str, Any]:
if resp.status in (200, 201):
return await resp.json()
errorText = await resp.text()
logger.error(f"Graph API {resp.status}: {errorText}")
return {"error": f"{resp.status}: {errorText}"}
def _stripGraphBase(url: str) -> str:
"""Convert an absolute Graph URL (used by @odata.nextLink) into the
relative endpoint that ``_makeGraphCall`` expects."""
if not url:
return ""
if url.startswith(_GRAPH_BASE):
return url[len(_GRAPH_BASE):].lstrip("/")
return url
def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry:
isFolder = "folder" in item
return ExternalEntry(
name=item.get("name", ""),
path=f"{basePath}/{item.get('name', '')}" if basePath else item.get("name", ""),
isFolder=isFolder,
size=item.get("size"),
mimeType=item.get("file", {}).get("mimeType") if not isFolder else None,
lastModified=None,
metadata={
"id": item.get("id"),
"webUrl": item.get("webUrl"),
"childCount": item.get("folder", {}).get("childCount") if isFolder else None,
},
)
# ---------------------------------------------------------------------------
# SharePoint Adapter
# ---------------------------------------------------------------------------
class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
"""ServiceAdapter for SharePoint (files, sites) via Microsoft Graph."""
async def browse(
self,
path: str,
filter: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
"""List items in a SharePoint folder.
Path format: /sites/<SiteName>/<FolderPath>
Root "/" lists available sites via discovery.
"""
if not path or path == "/":
return await self._discoverSites()
siteId, folderPath = _parseSharepointPath(path)
if not siteId:
return await self._discoverSites()
if not folderPath or folderPath == "/":
endpoint = f"sites/{siteId}/drive/root/children"
else:
cleanPath = folderPath.lstrip("/")
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children"
result = await self._graphGet(endpoint)
if "error" in result:
logger.warning(f"SharePoint browse failed: {result['error']}")
return []
entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])]
if filter:
entries = [e for e in entries if _matchFilter(e, filter)]
if limit is not None:
entries = entries[: max(1, int(limit))]
return entries
async def _discoverSites(self) -> List[ExternalEntry]:
"""Discover accessible SharePoint sites."""
result = await self._graphGet("sites?search=*&$top=50")
if "error" in result:
logger.warning(f"SharePoint site discovery failed: {result['error']}")
return []
return [
ExternalEntry(
name=s.get("displayName") or s.get("name", ""),
path=f"/sites/{s.get('id', '')}",
isFolder=True,
metadata={
"id": s.get("id"),
"webUrl": s.get("webUrl"),
"description": s.get("description", ""),
},
)
for s in result.get("value", [])
if s.get("displayName")
]
async def download(self, path: str) -> bytes:
siteId, filePath = _parseSharepointPath(path)
if not siteId or not filePath:
return b""
cleanPath = filePath.strip("/")
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/content"
data = await self._graphDownload(endpoint)
return data or b""
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
siteId, folderPath = _parseSharepointPath(path)
if not siteId:
return {"error": "Invalid SharePoint path"}
cleanFolder = (folderPath or "").strip("/")
uploadPath = f"{cleanFolder}/{fileName}" if cleanFolder else fileName
endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
result = await self._graphPut(endpoint, data)
return result
async def search(
self,
query: str,
path: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
siteId, _ = _parseSharepointPath(path or "")
if not siteId:
return []
safeQuery = query.replace("'", "''")
endpoint = f"sites/{siteId}/drive/root/search(q='{safeQuery}')"
result = await self._graphGet(endpoint)
if "error" in result:
return []
entries = [_graphItemToExternalEntry(item) for item in result.get("value", [])]
if limit is not None:
entries = entries[: max(1, int(limit))]
return entries
# ---------------------------------------------------------------------------
# Outlook Adapter
# ---------------------------------------------------------------------------
class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
"""ServiceAdapter for Outlook (mail, calendar) via Microsoft Graph."""
# Default upper bound for messages returned from a single browse() call.
# Graph allows $top up to 1000 per page; we keep the default modest so
# accidental "browse all" calls don't blow up the LLM context. Callers
# (e.g. the agent's browseDataSource tool) can override via ``limit``.
_DEFAULT_MESSAGE_LIMIT = 100
_MAX_MESSAGE_LIMIT = 1000
_PAGE_SIZE = 100
async def browse(
self,
path: str,
filter: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
"""List mail folders or messages.
path = "" or "/" → list ALL top-level mail folders (paginated)
path = "/<folderId>" → list messages in that folder (paginated, up to ``limit``)
"""
if not path or path == "/":
# Graph default page size for /me/mailFolders is 10. Mailboxes with
# localized + many system folders (Posteingang, Gesendet, Archiv, …)
# often exceed that, so the well-known Inbox can fall off the first
# page. We page through all results AND hard-fall-back to the
# well-known shortcut /me/mailFolders/inbox so the default folder
# is always visible regardless of locale/order.
folders: List[Dict[str, Any]] = []
seenIds: set = set()
endpoint: Optional[str] = "me/mailFolders?$top=100"
while endpoint:
result = await self._graphGet(endpoint)
if "error" in result:
break
for f in result.get("value", []):
fid = f.get("id")
if fid and fid not in seenIds:
seenIds.add(fid)
folders.append(f)
nextLink = result.get("@odata.nextLink")
if not nextLink:
endpoint = None
else:
endpoint = _stripGraphBase(nextLink)
# Guarantee Inbox is present (well-known name, locale-independent)
if not any((f.get("displayName") or "").lower() in ("inbox", "posteingang") for f in folders):
inbox = await self._graphGet("me/mailFolders/inbox")
if "error" not in inbox and inbox.get("id") and inbox.get("id") not in seenIds:
folders.insert(0, inbox)
return [
ExternalEntry(
name=f.get("displayName", ""),
path=f"/{f.get('id', '')}",
isFolder=True,
metadata={
"id": f.get("id"),
"totalItemCount": f.get("totalItemCount"),
"unreadItemCount": f.get("unreadItemCount"),
"childFolderCount": f.get("childFolderCount"),
},
)
for f in folders
]
folderId = path.strip("/")
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
pageSize = min(self._PAGE_SIZE, effectiveLimit)
endpoint: Optional[str] = (
f"me/mailFolders/{folderId}/messages"
f"?$top={pageSize}&$orderby=receivedDateTime desc"
)
messages: List[Dict[str, Any]] = []
while endpoint and len(messages) < effectiveLimit:
result = await self._graphGet(endpoint)
if "error" in result:
break
for m in result.get("value", []):
messages.append(m)
if len(messages) >= effectiveLimit:
break
nextLink = result.get("@odata.nextLink")
endpoint = _stripGraphBase(nextLink) if nextLink else None
return [
ExternalEntry(
name=m.get("subject", "(no subject)"),
path=f"{path}/{m.get('id', '')}",
isFolder=False,
metadata={
"id": m.get("id"),
"from": m.get("from", {}).get("emailAddress", {}).get("address"),
"receivedDateTime": m.get("receivedDateTime"),
"hasAttachments": m.get("hasAttachments", False),
},
)
for m in messages
]
async def download(self, path: str) -> DownloadResult:
"""Download a mail message as RFC 822 EML via Graph API $value endpoint."""
import re
messageId = path.strip("/").split("/")[-1]
meta = await self._graphGet(f"me/messages/{messageId}?$select=subject")
subject = meta.get("subject", messageId) if "error" not in meta else messageId
safeName = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", subject)[:80].strip(". ") or "email"
emlBytes = await self._graphDownload(f"me/messages/{messageId}/$value")
if not emlBytes:
return DownloadResult()
return DownloadResult(
data=emlBytes,
fileName=f"{safeName}.eml",
mimeType="message/rfc822",
)
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
"""Not applicable for Outlook in the file sense."""
return {"error": "Upload not supported for Outlook"}
async def search(
self,
query: str,
path: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
safeQuery = query.replace("'", "''")
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
# NOTE: Graph $search does not support $orderby and may return a single
# page (no @odata.nextLink). We still pass $top to lift the implicit 25.
endpoint = f"me/messages?$search=\"{safeQuery}\"&$top={effectiveLimit}"
result = await self._graphGet(endpoint)
if "error" in result:
return []
return [
ExternalEntry(
name=m.get("subject", "(no subject)"),
path=f"/search/{m.get('id', '')}",
isFolder=False,
metadata={
"id": m.get("id"),
"from": m.get("from", {}).get("emailAddress", {}).get("address"),
"receivedDateTime": m.get("receivedDateTime"),
},
)
for m in result.get("value", [])
]
def _buildMessage(
self, to: List[str], subject: str, body: str,
bodyType: str = "Text",
cc: Optional[List[str]] = None,
attachments: Optional[List[Dict]] = None,
) -> Dict[str, Any]:
"""Build a Graph API message object.
attachments: list of {"name": str, "contentBytes": str (base64), "contentType": str}
"""
message: Dict[str, Any] = {
"subject": subject,
"body": {"contentType": bodyType, "content": body},
"toRecipients": [{"emailAddress": {"address": addr}} for addr in to],
}
if cc:
message["ccRecipients"] = [{"emailAddress": {"address": addr}} for addr in cc]
if attachments:
message["attachments"] = [
{
"@odata.type": "#microsoft.graph.fileAttachment",
"name": att["name"],
"contentBytes": att["contentBytes"],
"contentType": att.get("contentType", "application/octet-stream"),
}
for att in attachments
]
return message
async def sendMail(
self, to: List[str], subject: str, body: str,
bodyType: str = "Text",
cc: Optional[List[str]] = None,
attachments: Optional[List[Dict]] = None,
) -> Dict[str, Any]:
"""Send an email via Microsoft Graph. bodyType: 'Text' or 'HTML'."""
import json
message = self._buildMessage(to, subject, body, bodyType, cc, attachments)
payload = json.dumps({"message": message, "saveToSentItems": True}).encode("utf-8")
result = await self._graphPost("me/sendMail", payload)
if "error" in result:
return result
return {"success": True}
async def createDraft(
self, to: List[str], subject: str, body: str,
bodyType: str = "Text",
cc: Optional[List[str]] = None,
attachments: Optional[List[Dict]] = None,
) -> Dict[str, Any]:
"""Create a draft email in the user's Drafts folder via Microsoft Graph."""
import json
message = self._buildMessage(to, subject, body, bodyType, cc, attachments)
payload = json.dumps(message).encode("utf-8")
result = await self._graphPost("me/messages", payload)
if "error" in result:
return result
return {"success": True, "draft": True, "messageId": result.get("id", "")}
# ---------------------------------------------------------------------------
# Teams Adapter (Stub)
# ---------------------------------------------------------------------------
class TeamsAdapter(_GraphApiMixin, ServiceAdapter):
"""ServiceAdapter for Microsoft Teams -- browse joined teams and channels."""
async def browse(
self,
path: str,
filter: Optional[str] = None,
limit: Optional[int] = None,
) -> list:
cleanPath = (path or "").strip("/")
if not cleanPath:
result = await self._graphGet("me/joinedTeams")
if "error" in result:
logger.warning(f"Teams browse failed: {result['error']}")
return []
return [
ExternalEntry(
name=t.get("displayName", ""),
path=f"/{t.get('id', '')}",
isFolder=True,
metadata={"id": t.get("id"), "description": t.get("description", "")},
)
for t in result.get("value", [])
]
parts = cleanPath.split("/", 1)
teamId = parts[0]
if len(parts) == 1:
result = await self._graphGet(f"teams/{teamId}/channels")
if "error" in result:
return []
return [
ExternalEntry(
name=ch.get("displayName", ""),
path=f"/{teamId}/{ch.get('id', '')}",
isFolder=True,
metadata={"id": ch.get("id"), "membershipType": ch.get("membershipType", "")},
)
for ch in result.get("value", [])
]
return []
async def download(self, path: str) -> bytes:
return b""
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
return {"error": "Teams upload not implemented"}
async def search(
self,
query: str,
path: Optional[str] = None,
limit: Optional[int] = None,
) -> list:
return []
# ---------------------------------------------------------------------------
# OneDrive Adapter (Stub -- similar to SharePoint but personal drive)
# ---------------------------------------------------------------------------
class OneDriveAdapter(_GraphApiMixin, ServiceAdapter):
"""ServiceAdapter stub for OneDrive (personal drive)."""
async def browse(
self,
path: str,
filter: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
cleanPath = (path or "").strip("/")
if not cleanPath:
endpoint = "me/drive/root/children"
else:
endpoint = f"me/drive/root:/{cleanPath}:/children"
result = await self._graphGet(endpoint)
if "error" in result:
return []
entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])]
if filter:
entries = [e for e in entries if _matchFilter(e, filter)]
if limit is not None:
entries = entries[: max(1, int(limit))]
return entries
async def download(self, path: str) -> bytes:
cleanPath = (path or "").strip("/")
if not cleanPath:
return b""
data = await self._graphDownload(f"me/drive/root:/{cleanPath}:/content")
return data or b""
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
cleanPath = (path or "").strip("/")
uploadPath = f"{cleanPath}/{fileName}" if cleanPath else fileName
endpoint = f"me/drive/root:/{uploadPath}:/content"
return await self._graphPut(endpoint, data)
async def search(
self,
query: str,
path: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
safeQuery = query.replace("'", "''")
endpoint = f"me/drive/root/search(q='{safeQuery}')"
result = await self._graphGet(endpoint)
if "error" in result:
return []
entries = [_graphItemToExternalEntry(item) for item in result.get("value", [])]
if limit is not None:
entries = entries[: max(1, int(limit))]
return entries
# ---------------------------------------------------------------------------
# MsftConnector (1:n)
# ---------------------------------------------------------------------------
class MsftConnector(ProviderConnector):
"""Microsoft ProviderConnector -- 1 connection → n services."""
_SERVICE_MAP = {
"sharepoint": SharepointAdapter,
"outlook": OutlookAdapter,
"teams": TeamsAdapter,
"onedrive": OneDriveAdapter,
}
def getAvailableServices(self) -> List[str]:
return list(self._SERVICE_MAP.keys())
def getServiceAdapter(self, service: str) -> ServiceAdapter:
adapterClass = self._SERVICE_MAP.get(service)
if not adapterClass:
raise ValueError(f"Unknown MSFT service: {service}. Available: {list(self._SERVICE_MAP.keys())}")
return adapterClass(self.accessToken)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _parseSharepointPath(path: str) -> tuple:
"""Parse a SharePoint path into (siteId, innerPath).
Expected format: /sites/<siteId>/<innerPath>
Also accepts bare siteId if no /sites/ prefix.
"""
if not path:
return ("", "")
clean = path.strip("/")
if clean.startswith("sites/"):
parts = clean.split("/", 2)
siteId = parts[1] if len(parts) > 1 else ""
innerPath = parts[2] if len(parts) > 2 else ""
return (siteId, innerPath)
parts = clean.split("/", 1)
return (parts[0], parts[1] if len(parts) > 1 else "")
def _matchFilter(entry: ExternalEntry, pattern: str) -> bool:
"""Simple glob-like filter (supports * wildcard)."""
import fnmatch
return fnmatch.fnmatch(entry.name.lower(), pattern.lower())