232 lines
9.9 KiB
Python
232 lines
9.9 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""Google ProviderConnector -- Drive and Gmail via Google OAuth."""
|
|
|
|
import logging
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import aiohttp
|
|
|
|
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter
|
|
from modules.datamodels.datamodelDataSource import ExternalEntry
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_DRIVE_BASE = "https://www.googleapis.com/drive/v3"
|
|
_GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1"
|
|
|
|
|
|
async def _googleGet(token: str, url: str) -> Dict[str, Any]:
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
timeout = aiohttp.ClientTimeout(total=20)
|
|
try:
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
async with session.get(url, headers=headers) as resp:
|
|
if resp.status in (200, 201):
|
|
return await resp.json()
|
|
errorText = await resp.text()
|
|
logger.warning(f"Google API {resp.status}: {errorText[:300]}")
|
|
return {"error": f"{resp.status}: {errorText[:200]}"}
|
|
except Exception as e:
|
|
return {"error": str(e)}
|
|
|
|
|
|
class DriveAdapter(ServiceAdapter):
|
|
"""Google Drive ServiceAdapter -- browse files and folders."""
|
|
|
|
def __init__(self, accessToken: str):
|
|
self._token = accessToken
|
|
|
|
async def browse(self, path: str, filter: Optional[str] = None) -> List[ExternalEntry]:
|
|
folderId = (path or "").strip("/") or "root"
|
|
query = f"'{folderId}' in parents and trashed=false"
|
|
fields = "files(id,name,mimeType,size,modifiedTime,parents)"
|
|
url = f"{_DRIVE_BASE}/files?q={query}&fields={fields}&pageSize=100&orderBy=folder,name"
|
|
|
|
result = await _googleGet(self._token, url)
|
|
if "error" in result:
|
|
logger.warning(f"Google Drive browse failed: {result['error']}")
|
|
return []
|
|
|
|
entries = []
|
|
for f in result.get("files", []):
|
|
isFolder = f.get("mimeType") == "application/vnd.google-apps.folder"
|
|
entries.append(ExternalEntry(
|
|
name=f.get("name", ""),
|
|
path=f"/{f.get('id', '')}",
|
|
isFolder=isFolder,
|
|
size=int(f.get("size", 0)) if f.get("size") else None,
|
|
mimeType=f.get("mimeType") if not isFolder else None,
|
|
metadata={"id": f.get("id"), "modifiedTime": f.get("modifiedTime")},
|
|
))
|
|
return entries
|
|
|
|
_EXPORT_MIME_MAP = {
|
|
"application/vnd.google-apps.document": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
"application/vnd.google-apps.spreadsheet": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
"application/vnd.google-apps.presentation": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
"application/vnd.google-apps.drawing": "application/pdf",
|
|
}
|
|
|
|
async def download(self, path: str) -> bytes:
|
|
fileId = (path or "").strip("/")
|
|
if not fileId:
|
|
return b""
|
|
headers = {"Authorization": f"Bearer {self._token}"}
|
|
timeout = aiohttp.ClientTimeout(total=60)
|
|
try:
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
# Try direct download first
|
|
url = f"{_DRIVE_BASE}/files/{fileId}?alt=media"
|
|
async with session.get(url, headers=headers) as resp:
|
|
if resp.status == 200:
|
|
return await resp.read()
|
|
logger.debug(f"Google Drive direct download returned {resp.status} for {fileId}")
|
|
|
|
# If 403/404, check if it's a native Google file that needs export
|
|
metaUrl = f"{_DRIVE_BASE}/files/{fileId}?fields=mimeType,name"
|
|
async with session.get(metaUrl, headers=headers) as metaResp:
|
|
if metaResp.status != 200:
|
|
logger.warning(f"Google Drive metadata fetch failed ({metaResp.status}) for {fileId}")
|
|
return b""
|
|
meta = await metaResp.json()
|
|
fileMime = meta.get("mimeType", "")
|
|
fileName = meta.get("name", fileId)
|
|
|
|
exportMime = self._EXPORT_MIME_MAP.get(fileMime)
|
|
if not exportMime:
|
|
logger.warning(f"Google Drive: unsupported mimeType '{fileMime}' for file '{fileName}' ({fileId})")
|
|
return b""
|
|
|
|
exportUrl = f"{_DRIVE_BASE}/files/{fileId}/export?mimeType={exportMime}"
|
|
logger.info(f"Google Drive: exporting '{fileName}' as {exportMime}")
|
|
async with session.get(exportUrl, headers=headers) as exportResp:
|
|
if exportResp.status == 200:
|
|
return await exportResp.read()
|
|
logger.warning(f"Google Drive export failed ({exportResp.status}) for '{fileName}'")
|
|
except Exception as e:
|
|
logger.error(f"Google Drive download failed for {fileId}: {e}")
|
|
return b""
|
|
|
|
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
|
return {"error": "Google Drive upload not yet implemented"}
|
|
|
|
async def search(self, query: str, path: Optional[str] = None) -> List[ExternalEntry]:
|
|
safeQuery = query.replace("'", "\\'")
|
|
folderId = (path or "").strip("/")
|
|
qParts = [f"name contains '{safeQuery}'", "trashed=false"]
|
|
if folderId:
|
|
qParts.append(f"'{folderId}' in parents")
|
|
qStr = " and ".join(qParts)
|
|
url = f"{_DRIVE_BASE}/files?q={qStr}&fields=files(id,name,mimeType,size)&pageSize=25"
|
|
logger.debug(f"Google Drive search: q={qStr}")
|
|
result = await _googleGet(self._token, url)
|
|
if "error" in result:
|
|
return []
|
|
return [
|
|
ExternalEntry(
|
|
name=f.get("name", ""),
|
|
path=f"/{f.get('id', '')}",
|
|
isFolder=f.get("mimeType") == "application/vnd.google-apps.folder",
|
|
size=int(f.get("size", 0)) if f.get("size") else None,
|
|
)
|
|
for f in result.get("files", [])
|
|
]
|
|
|
|
|
|
class GmailAdapter(ServiceAdapter):
|
|
"""Gmail ServiceAdapter -- browse labels and messages."""
|
|
|
|
def __init__(self, accessToken: str):
|
|
self._token = accessToken
|
|
|
|
async def browse(self, path: str, filter: Optional[str] = None) -> list:
|
|
cleanPath = (path or "").strip("/")
|
|
|
|
if not cleanPath:
|
|
url = f"{_GMAIL_BASE}/users/me/labels"
|
|
result = await _googleGet(self._token, url)
|
|
if "error" in result:
|
|
logger.warning(f"Gmail labels failed: {result['error']}")
|
|
return []
|
|
_SYSTEM_LABELS = {"INBOX", "SENT", "DRAFT", "TRASH", "SPAM", "STARRED", "IMPORTANT"}
|
|
labels = []
|
|
for lbl in result.get("labels", []):
|
|
labelId = lbl.get("id", "")
|
|
labelName = lbl.get("name", labelId)
|
|
if lbl.get("type") == "system" and labelId not in _SYSTEM_LABELS:
|
|
continue
|
|
labels.append(ExternalEntry(
|
|
name=labelName,
|
|
path=f"/{labelId}",
|
|
isFolder=True,
|
|
metadata={"id": labelId, "type": lbl.get("type", "")},
|
|
))
|
|
labels.sort(key=lambda e: (0 if e.metadata.get("type") == "system" else 1, e.name))
|
|
return labels
|
|
|
|
url = f"{_GMAIL_BASE}/users/me/messages?labelIds={cleanPath}&maxResults=25"
|
|
result = await _googleGet(self._token, url)
|
|
if "error" in result:
|
|
return []
|
|
|
|
entries = []
|
|
for msg in result.get("messages", [])[:25]:
|
|
msgId = msg.get("id", "")
|
|
detailUrl = f"{_GMAIL_BASE}/users/me/messages/{msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=Date"
|
|
detail = await _googleGet(self._token, detailUrl)
|
|
if "error" in detail:
|
|
entries.append(ExternalEntry(name=f"Message {msgId}", path=f"/{cleanPath}/{msgId}", isFolder=False))
|
|
continue
|
|
headers = {h.get("name", ""): h.get("value", "") for h in detail.get("payload", {}).get("headers", [])}
|
|
entries.append(ExternalEntry(
|
|
name=headers.get("Subject", "(no subject)"),
|
|
path=f"/{cleanPath}/{msgId}",
|
|
isFolder=False,
|
|
metadata={
|
|
"id": msgId,
|
|
"from": headers.get("From", ""),
|
|
"date": headers.get("Date", ""),
|
|
"snippet": detail.get("snippet", ""),
|
|
},
|
|
))
|
|
return entries
|
|
|
|
async def download(self, path: str) -> bytes:
|
|
return b""
|
|
|
|
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
|
return {"error": "Gmail upload not applicable"}
|
|
|
|
async def search(self, query: str, path: Optional[str] = None) -> list:
|
|
url = f"{_GMAIL_BASE}/users/me/messages?q={query}&maxResults=10"
|
|
result = await _googleGet(self._token, url)
|
|
if "error" in result:
|
|
return []
|
|
return [
|
|
ExternalEntry(
|
|
name=f"Message {m.get('id', '')}",
|
|
path=f"/{m.get('id', '')}",
|
|
isFolder=False,
|
|
metadata={"id": m.get("id")},
|
|
)
|
|
for m in result.get("messages", [])
|
|
]
|
|
|
|
|
|
class GoogleConnector(ProviderConnector):
|
|
"""Google ProviderConnector -- 1 connection -> Drive + Gmail."""
|
|
|
|
_SERVICE_MAP = {
|
|
"drive": DriveAdapter,
|
|
"gmail": GmailAdapter,
|
|
}
|
|
|
|
def getAvailableServices(self) -> List[str]:
|
|
return list(self._SERVICE_MAP.keys())
|
|
|
|
def getServiceAdapter(self, service: str) -> ServiceAdapter:
|
|
adapterClass = self._SERVICE_MAP.get(service)
|
|
if not adapterClass:
|
|
raise ValueError(f"Unknown Google service: {service}. Available: {list(self._SERVICE_MAP.keys())}")
|
|
return adapterClass(self.accessToken)
|