# Copyright (c) 2025 Patrick Motsch # All rights reserved. """Google ProviderConnector -- Drive and Gmail via Google OAuth.""" import logging from typing import Any, Dict, List, Optional import aiohttp from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult from modules.datamodels.datamodelDataSource import ExternalEntry logger = logging.getLogger(__name__) _DRIVE_BASE = "https://www.googleapis.com/drive/v3" _GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1" async def _googleGet(token: str, url: str) -> Dict[str, Any]: headers = {"Authorization": f"Bearer {token}"} timeout = aiohttp.ClientTimeout(total=20) try: async with aiohttp.ClientSession(timeout=timeout) as session: async with session.get(url, headers=headers) as resp: if resp.status in (200, 201): return await resp.json() errorText = await resp.text() logger.warning(f"Google API {resp.status}: {errorText[:300]}") return {"error": f"{resp.status}: {errorText[:200]}"} except Exception as e: return {"error": str(e)} class DriveAdapter(ServiceAdapter): """Google Drive ServiceAdapter -- browse files and folders.""" def __init__(self, accessToken: str): self._token = accessToken async def browse(self, path: str, filter: Optional[str] = None) -> List[ExternalEntry]: folderId = (path or "").strip("/") or "root" query = f"'{folderId}' in parents and trashed=false" fields = "files(id,name,mimeType,size,modifiedTime,parents)" url = f"{_DRIVE_BASE}/files?q={query}&fields={fields}&pageSize=100&orderBy=folder,name" result = await _googleGet(self._token, url) if "error" in result: logger.warning(f"Google Drive browse failed: {result['error']}") return [] entries = [] for f in result.get("files", []): isFolder = f.get("mimeType") == "application/vnd.google-apps.folder" entries.append(ExternalEntry( name=f.get("name", ""), path=f"/{f.get('id', '')}", isFolder=isFolder, size=int(f.get("size", 0)) if f.get("size") else None, mimeType=f.get("mimeType") if not isFolder else None, metadata={"id": f.get("id"), "modifiedTime": f.get("modifiedTime")}, )) return entries _EXPORT_MIME_MAP = { "application/vnd.google-apps.document": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.google-apps.spreadsheet": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.google-apps.presentation": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/vnd.google-apps.drawing": "application/pdf", } async def download(self, path: str) -> bytes: fileId = (path or "").strip("/") if not fileId: return b"" headers = {"Authorization": f"Bearer {self._token}"} timeout = aiohttp.ClientTimeout(total=60) try: async with aiohttp.ClientSession(timeout=timeout) as session: # Try direct download first url = f"{_DRIVE_BASE}/files/{fileId}?alt=media" async with session.get(url, headers=headers) as resp: if resp.status == 200: return await resp.read() logger.debug(f"Google Drive direct download returned {resp.status} for {fileId}") # If 403/404, check if it's a native Google file that needs export metaUrl = f"{_DRIVE_BASE}/files/{fileId}?fields=mimeType,name" async with session.get(metaUrl, headers=headers) as metaResp: if metaResp.status != 200: logger.warning(f"Google Drive metadata fetch failed ({metaResp.status}) for {fileId}") return b"" meta = await metaResp.json() fileMime = meta.get("mimeType", "") fileName = meta.get("name", fileId) exportMime = self._EXPORT_MIME_MAP.get(fileMime) if not exportMime: logger.warning(f"Google Drive: unsupported mimeType '{fileMime}' for file '{fileName}' ({fileId})") return b"" exportUrl = f"{_DRIVE_BASE}/files/{fileId}/export?mimeType={exportMime}" logger.info(f"Google Drive: exporting '{fileName}' as {exportMime}") async with session.get(exportUrl, headers=headers) as exportResp: if exportResp.status == 200: return await exportResp.read() logger.warning(f"Google Drive export failed ({exportResp.status}) for '{fileName}'") except Exception as e: logger.error(f"Google Drive download failed for {fileId}: {e}") return b"" async def upload(self, path: str, data: bytes, fileName: str) -> dict: return {"error": "Google Drive upload not yet implemented"} async def search(self, query: str, path: Optional[str] = None) -> List[ExternalEntry]: safeQuery = query.replace("'", "\\'") folderId = (path or "").strip("/") qParts = [f"name contains '{safeQuery}'", "trashed=false"] if folderId: qParts.append(f"'{folderId}' in parents") qStr = " and ".join(qParts) url = f"{_DRIVE_BASE}/files?q={qStr}&fields=files(id,name,mimeType,size)&pageSize=25" logger.debug(f"Google Drive search: q={qStr}") result = await _googleGet(self._token, url) if "error" in result: return [] return [ ExternalEntry( name=f.get("name", ""), path=f"/{f.get('id', '')}", isFolder=f.get("mimeType") == "application/vnd.google-apps.folder", size=int(f.get("size", 0)) if f.get("size") else None, ) for f in result.get("files", []) ] class GmailAdapter(ServiceAdapter): """Gmail ServiceAdapter -- browse labels and messages.""" def __init__(self, accessToken: str): self._token = accessToken async def browse(self, path: str, filter: Optional[str] = None) -> list: cleanPath = (path or "").strip("/") if not cleanPath: url = f"{_GMAIL_BASE}/users/me/labels" result = await _googleGet(self._token, url) if "error" in result: logger.warning(f"Gmail labels failed: {result['error']}") return [] _SYSTEM_LABELS = {"INBOX", "SENT", "DRAFT", "TRASH", "SPAM", "STARRED", "IMPORTANT"} labels = [] for lbl in result.get("labels", []): labelId = lbl.get("id", "") labelName = lbl.get("name", labelId) if lbl.get("type") == "system" and labelId not in _SYSTEM_LABELS: continue labels.append(ExternalEntry( name=labelName, path=f"/{labelId}", isFolder=True, metadata={"id": labelId, "type": lbl.get("type", "")}, )) labels.sort(key=lambda e: (0 if e.metadata.get("type") == "system" else 1, e.name)) return labels url = f"{_GMAIL_BASE}/users/me/messages?labelIds={cleanPath}&maxResults=25" result = await _googleGet(self._token, url) if "error" in result: return [] entries = [] for msg in result.get("messages", [])[:25]: msgId = msg.get("id", "") detailUrl = f"{_GMAIL_BASE}/users/me/messages/{msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=Date" detail = await _googleGet(self._token, detailUrl) if "error" in detail: entries.append(ExternalEntry(name=f"Message {msgId}", path=f"/{cleanPath}/{msgId}", isFolder=False)) continue headers = {h.get("name", ""): h.get("value", "") for h in detail.get("payload", {}).get("headers", [])} entries.append(ExternalEntry( name=headers.get("Subject", "(no subject)"), path=f"/{cleanPath}/{msgId}", isFolder=False, metadata={ "id": msgId, "from": headers.get("From", ""), "date": headers.get("Date", ""), "snippet": detail.get("snippet", ""), }, )) return entries async def download(self, path: str) -> DownloadResult: """Download a Gmail message as RFC 822 EML via format=raw.""" import base64 import re cleanPath = (path or "").strip("/") msgId = cleanPath.split("/")[-1] if cleanPath else "" if not msgId: return DownloadResult() url = f"{_GMAIL_BASE}/users/me/messages/{msgId}?format=raw" result = await _googleGet(self._token, url) if "error" in result: return DownloadResult() rawB64 = result.get("raw", "") if not rawB64: return DownloadResult() emlBytes = base64.urlsafe_b64decode(rawB64) metaUrl = f"{_GMAIL_BASE}/users/me/messages/{msgId}?format=metadata&metadataHeaders=Subject" meta = await _googleGet(self._token, metaUrl) subject = msgId if "error" not in meta: for h in meta.get("payload", {}).get("headers", []): if h.get("name", "").lower() == "subject": subject = h.get("value", msgId) break safeName = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", subject)[:80].strip(". ") or "email" return DownloadResult( data=emlBytes, fileName=f"{safeName}.eml", mimeType="message/rfc822", ) async def upload(self, path: str, data: bytes, fileName: str) -> dict: return {"error": "Gmail upload not applicable"} async def search(self, query: str, path: Optional[str] = None) -> list: url = f"{_GMAIL_BASE}/users/me/messages?q={query}&maxResults=10" result = await _googleGet(self._token, url) if "error" in result: return [] return [ ExternalEntry( name=f"Message {m.get('id', '')}", path=f"/{m.get('id', '')}", isFolder=False, metadata={"id": m.get("id")}, ) for m in result.get("messages", []) ] class GoogleConnector(ProviderConnector): """Google ProviderConnector -- 1 connection -> Drive + Gmail.""" _SERVICE_MAP = { "drive": DriveAdapter, "gmail": GmailAdapter, } def getAvailableServices(self) -> List[str]: return list(self._SERVICE_MAP.keys()) def getServiceAdapter(self, service: str) -> ServiceAdapter: adapterClass = self._SERVICE_MAP.get(service) if not adapterClass: raise ValueError(f"Unknown Google service: {service}. Available: {list(self._SERVICE_MAP.keys())}") return adapterClass(self.accessToken)