gateway/modules/connectors/providerGoogle/connectorGoogle.py

265 lines
11 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Google ProviderConnector -- Drive and Gmail via Google OAuth."""
import logging
from typing import Any, Dict, List, Optional
import aiohttp
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult
from modules.datamodels.datamodelDataSource import ExternalEntry
logger = logging.getLogger(__name__)
_DRIVE_BASE = "https://www.googleapis.com/drive/v3"
_GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1"
async def _googleGet(token: str, url: str) -> Dict[str, Any]:
headers = {"Authorization": f"Bearer {token}"}
timeout = aiohttp.ClientTimeout(total=20)
try:
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(url, headers=headers) as resp:
if resp.status in (200, 201):
return await resp.json()
errorText = await resp.text()
logger.warning(f"Google API {resp.status}: {errorText[:300]}")
return {"error": f"{resp.status}: {errorText[:200]}"}
except Exception as e:
return {"error": str(e)}
class DriveAdapter(ServiceAdapter):
"""Google Drive ServiceAdapter -- browse files and folders."""
def __init__(self, accessToken: str):
self._token = accessToken
async def browse(self, path: str, filter: Optional[str] = None) -> List[ExternalEntry]:
folderId = (path or "").strip("/") or "root"
query = f"'{folderId}' in parents and trashed=false"
fields = "files(id,name,mimeType,size,modifiedTime,parents)"
url = f"{_DRIVE_BASE}/files?q={query}&fields={fields}&pageSize=100&orderBy=folder,name"
result = await _googleGet(self._token, url)
if "error" in result:
logger.warning(f"Google Drive browse failed: {result['error']}")
return []
entries = []
for f in result.get("files", []):
isFolder = f.get("mimeType") == "application/vnd.google-apps.folder"
entries.append(ExternalEntry(
name=f.get("name", ""),
path=f"/{f.get('id', '')}",
isFolder=isFolder,
size=int(f.get("size", 0)) if f.get("size") else None,
mimeType=f.get("mimeType") if not isFolder else None,
metadata={"id": f.get("id"), "modifiedTime": f.get("modifiedTime")},
))
return entries
_EXPORT_MIME_MAP = {
"application/vnd.google-apps.document": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.google-apps.spreadsheet": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.google-apps.presentation": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/vnd.google-apps.drawing": "application/pdf",
}
async def download(self, path: str) -> bytes:
fileId = (path or "").strip("/")
if not fileId:
return b""
headers = {"Authorization": f"Bearer {self._token}"}
timeout = aiohttp.ClientTimeout(total=60)
try:
async with aiohttp.ClientSession(timeout=timeout) as session:
# Try direct download first
url = f"{_DRIVE_BASE}/files/{fileId}?alt=media"
async with session.get(url, headers=headers) as resp:
if resp.status == 200:
return await resp.read()
logger.debug(f"Google Drive direct download returned {resp.status} for {fileId}")
# If 403/404, check if it's a native Google file that needs export
metaUrl = f"{_DRIVE_BASE}/files/{fileId}?fields=mimeType,name"
async with session.get(metaUrl, headers=headers) as metaResp:
if metaResp.status != 200:
logger.warning(f"Google Drive metadata fetch failed ({metaResp.status}) for {fileId}")
return b""
meta = await metaResp.json()
fileMime = meta.get("mimeType", "")
fileName = meta.get("name", fileId)
exportMime = self._EXPORT_MIME_MAP.get(fileMime)
if not exportMime:
logger.warning(f"Google Drive: unsupported mimeType '{fileMime}' for file '{fileName}' ({fileId})")
return b""
exportUrl = f"{_DRIVE_BASE}/files/{fileId}/export?mimeType={exportMime}"
logger.info(f"Google Drive: exporting '{fileName}' as {exportMime}")
async with session.get(exportUrl, headers=headers) as exportResp:
if exportResp.status == 200:
return await exportResp.read()
logger.warning(f"Google Drive export failed ({exportResp.status}) for '{fileName}'")
except Exception as e:
logger.error(f"Google Drive download failed for {fileId}: {e}")
return b""
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
return {"error": "Google Drive upload not yet implemented"}
async def search(self, query: str, path: Optional[str] = None) -> List[ExternalEntry]:
safeQuery = query.replace("'", "\\'")
folderId = (path or "").strip("/")
qParts = [f"name contains '{safeQuery}'", "trashed=false"]
if folderId:
qParts.append(f"'{folderId}' in parents")
qStr = " and ".join(qParts)
url = f"{_DRIVE_BASE}/files?q={qStr}&fields=files(id,name,mimeType,size)&pageSize=25"
logger.debug(f"Google Drive search: q={qStr}")
result = await _googleGet(self._token, url)
if "error" in result:
return []
return [
ExternalEntry(
name=f.get("name", ""),
path=f"/{f.get('id', '')}",
isFolder=f.get("mimeType") == "application/vnd.google-apps.folder",
size=int(f.get("size", 0)) if f.get("size") else None,
)
for f in result.get("files", [])
]
class GmailAdapter(ServiceAdapter):
"""Gmail ServiceAdapter -- browse labels and messages."""
def __init__(self, accessToken: str):
self._token = accessToken
async def browse(self, path: str, filter: Optional[str] = None) -> list:
cleanPath = (path or "").strip("/")
if not cleanPath:
url = f"{_GMAIL_BASE}/users/me/labels"
result = await _googleGet(self._token, url)
if "error" in result:
logger.warning(f"Gmail labels failed: {result['error']}")
return []
_SYSTEM_LABELS = {"INBOX", "SENT", "DRAFT", "TRASH", "SPAM", "STARRED", "IMPORTANT"}
labels = []
for lbl in result.get("labels", []):
labelId = lbl.get("id", "")
labelName = lbl.get("name", labelId)
if lbl.get("type") == "system" and labelId not in _SYSTEM_LABELS:
continue
labels.append(ExternalEntry(
name=labelName,
path=f"/{labelId}",
isFolder=True,
metadata={"id": labelId, "type": lbl.get("type", "")},
))
labels.sort(key=lambda e: (0 if e.metadata.get("type") == "system" else 1, e.name))
return labels
url = f"{_GMAIL_BASE}/users/me/messages?labelIds={cleanPath}&maxResults=25"
result = await _googleGet(self._token, url)
if "error" in result:
return []
entries = []
for msg in result.get("messages", [])[:25]:
msgId = msg.get("id", "")
detailUrl = f"{_GMAIL_BASE}/users/me/messages/{msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=Date"
detail = await _googleGet(self._token, detailUrl)
if "error" in detail:
entries.append(ExternalEntry(name=f"Message {msgId}", path=f"/{cleanPath}/{msgId}", isFolder=False))
continue
headers = {h.get("name", ""): h.get("value", "") for h in detail.get("payload", {}).get("headers", [])}
entries.append(ExternalEntry(
name=headers.get("Subject", "(no subject)"),
path=f"/{cleanPath}/{msgId}",
isFolder=False,
metadata={
"id": msgId,
"from": headers.get("From", ""),
"date": headers.get("Date", ""),
"snippet": detail.get("snippet", ""),
},
))
return entries
async def download(self, path: str) -> DownloadResult:
"""Download a Gmail message as RFC 822 EML via format=raw."""
import base64
import re
cleanPath = (path or "").strip("/")
msgId = cleanPath.split("/")[-1] if cleanPath else ""
if not msgId:
return DownloadResult()
url = f"{_GMAIL_BASE}/users/me/messages/{msgId}?format=raw"
result = await _googleGet(self._token, url)
if "error" in result:
return DownloadResult()
rawB64 = result.get("raw", "")
if not rawB64:
return DownloadResult()
emlBytes = base64.urlsafe_b64decode(rawB64)
metaUrl = f"{_GMAIL_BASE}/users/me/messages/{msgId}?format=metadata&metadataHeaders=Subject"
meta = await _googleGet(self._token, metaUrl)
subject = msgId
if "error" not in meta:
for h in meta.get("payload", {}).get("headers", []):
if h.get("name", "").lower() == "subject":
subject = h.get("value", msgId)
break
safeName = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", subject)[:80].strip(". ") or "email"
return DownloadResult(
data=emlBytes,
fileName=f"{safeName}.eml",
mimeType="message/rfc822",
)
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
return {"error": "Gmail upload not applicable"}
async def search(self, query: str, path: Optional[str] = None) -> list:
url = f"{_GMAIL_BASE}/users/me/messages?q={query}&maxResults=10"
result = await _googleGet(self._token, url)
if "error" in result:
return []
return [
ExternalEntry(
name=f"Message {m.get('id', '')}",
path=f"/{m.get('id', '')}",
isFolder=False,
metadata={"id": m.get("id")},
)
for m in result.get("messages", [])
]
class GoogleConnector(ProviderConnector):
"""Google ProviderConnector -- 1 connection -> Drive + Gmail."""
_SERVICE_MAP = {
"drive": DriveAdapter,
"gmail": GmailAdapter,
}
def getAvailableServices(self) -> List[str]:
return list(self._SERVICE_MAP.keys())
def getServiceAdapter(self, service: str) -> ServiceAdapter:
adapterClass = self._SERVICE_MAP.get(service)
if not adapterClass:
raise ValueError(f"Unknown Google service: {service}. Available: {list(self._SERVICE_MAP.keys())}")
return adapterClass(self.accessToken)