241 lines
10 KiB
Python
241 lines
10 KiB
Python
# Copyright (c) 2026 PowerOn AG
|
|
# All rights reserved.
|
|
"""Shared HTTP resilience helpers for provider connectors.
|
|
|
|
Provides a reusable session pool with concurrency limiter and retry-with-backoff
|
|
so that Google, MSFT and Infomaniak connectors do not each re-implement
|
|
per-request sessions, unbounded parallelism, and missing retry logic.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
from typing import Any, Dict, Optional, Union
|
|
|
|
import aiohttp
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_DEFAULT_MAX_CONCURRENT = 8
|
|
_DEFAULT_MAX_RETRIES = 3
|
|
_DEFAULT_TIMEOUT_S = 30
|
|
_RETRYABLE_STATUS = {429, 502, 503, 504}
|
|
|
|
|
|
_instances: list["ResilientHttp"] = []
|
|
|
|
|
|
class ResilientHttp:
|
|
"""Managed aiohttp.ClientSession with semaphore + retry.
|
|
|
|
Typical usage inside a connector module-level function::
|
|
|
|
_http = ResilientHttp("Google", maxConcurrent=8)
|
|
|
|
async def _googleGet(token, url):
|
|
return await _http.getJson(url, headers={"Authorization": f"Bearer {token}"})
|
|
|
|
The session is created lazily on first call, reused across requests,
|
|
and closed via ``closeAllResilientHttp()`` at app shutdown.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
providerLabel: str = "HTTP",
|
|
maxConcurrent: int = _DEFAULT_MAX_CONCURRENT,
|
|
maxRetries: int = _DEFAULT_MAX_RETRIES,
|
|
defaultTimeoutS: float = _DEFAULT_TIMEOUT_S,
|
|
):
|
|
self._label = providerLabel
|
|
self._maxConcurrent = maxConcurrent
|
|
self._maxRetries = maxRetries
|
|
self._defaultTimeout = aiohttp.ClientTimeout(total=defaultTimeoutS)
|
|
self._semaphore: Optional[asyncio.Semaphore] = None
|
|
self._session: Optional[aiohttp.ClientSession] = None
|
|
_instances.append(self)
|
|
|
|
def _ensureReady(self) -> aiohttp.ClientSession:
|
|
if self._semaphore is None:
|
|
self._semaphore = asyncio.Semaphore(self._maxConcurrent)
|
|
if self._session is None or self._session.closed:
|
|
self._session = aiohttp.ClientSession(timeout=self._defaultTimeout)
|
|
return self._session
|
|
|
|
async def close(self):
|
|
if self._session and not self._session.closed:
|
|
await self._session.close()
|
|
await asyncio.sleep(0.25)
|
|
self._session = None
|
|
|
|
async def getJson(
|
|
self,
|
|
url: str,
|
|
headers: Dict[str, str],
|
|
timeout: Optional[aiohttp.ClientTimeout] = None,
|
|
allowRedirects: bool = True,
|
|
) -> Dict[str, Any]:
|
|
"""GET request returning parsed JSON with retry + throttle."""
|
|
session = self._ensureReady()
|
|
assert self._semaphore is not None
|
|
lastError: Optional[str] = None
|
|
for attempt in range(1, self._maxRetries + 1):
|
|
async with self._semaphore:
|
|
try:
|
|
async with session.get(
|
|
url,
|
|
headers=headers,
|
|
timeout=timeout or self._defaultTimeout,
|
|
allow_redirects=allowRedirects,
|
|
) as resp:
|
|
if resp.status in (200, 201):
|
|
return await resp.json()
|
|
if resp.status in _RETRYABLE_STATUS:
|
|
retryAfter = _parseRetryAfter(resp.headers.get("Retry-After"))
|
|
waitS = retryAfter if retryAfter > 0 else min(2 ** attempt, 30)
|
|
logger.warning(
|
|
f"{self._label} GET {resp.status} (attempt {attempt}/{self._maxRetries}), "
|
|
f"retry in {waitS:.1f}s: {url[:120]}"
|
|
)
|
|
await asyncio.sleep(waitS)
|
|
continue
|
|
errorText = await resp.text()
|
|
lastError = f"{resp.status}: {errorText[:200]}"
|
|
logger.warning(f"{self._label} GET {url[:120]} -> {lastError[:300]}")
|
|
return {"error": lastError}
|
|
except asyncio.TimeoutError:
|
|
lastError = f"timeout after {self._defaultTimeout.total}s"
|
|
if attempt < self._maxRetries:
|
|
logger.warning(f"{self._label} GET timeout (attempt {attempt}): {url[:120]}")
|
|
await asyncio.sleep(min(2 ** attempt, 10))
|
|
continue
|
|
except aiohttp.ClientError as e:
|
|
lastError = str(e)
|
|
if attempt < self._maxRetries:
|
|
logger.warning(f"{self._label} GET client error (attempt {attempt}): {e}")
|
|
await asyncio.sleep(min(2 ** attempt, 10))
|
|
continue
|
|
return {"error": lastError or "unknown error"}
|
|
|
|
async def getBytes(
|
|
self,
|
|
url: str,
|
|
headers: Dict[str, str],
|
|
timeout: Optional[aiohttp.ClientTimeout] = None,
|
|
allowRedirects: bool = True,
|
|
) -> Optional[bytes]:
|
|
"""GET request returning raw bytes (for file downloads)."""
|
|
session = self._ensureReady()
|
|
assert self._semaphore is not None
|
|
for attempt in range(1, self._maxRetries + 1):
|
|
async with self._semaphore:
|
|
try:
|
|
async with session.get(
|
|
url,
|
|
headers=headers,
|
|
timeout=timeout or self._defaultTimeout,
|
|
allow_redirects=allowRedirects,
|
|
) as resp:
|
|
if resp.status == 200:
|
|
return await resp.read()
|
|
if resp.status in _RETRYABLE_STATUS:
|
|
retryAfter = _parseRetryAfter(resp.headers.get("Retry-After"))
|
|
waitS = retryAfter if retryAfter > 0 else min(2 ** attempt, 30)
|
|
logger.warning(
|
|
f"{self._label} download {resp.status} (attempt {attempt}), "
|
|
f"retry in {waitS:.1f}s: {url[:120]}"
|
|
)
|
|
await asyncio.sleep(waitS)
|
|
continue
|
|
errorText = await resp.text()
|
|
logger.warning(f"{self._label} download {url[:120]} -> {resp.status}: {errorText[:200]}")
|
|
return None
|
|
except asyncio.TimeoutError:
|
|
if attempt < self._maxRetries:
|
|
logger.warning(f"{self._label} download timeout (attempt {attempt}): {url[:120]}")
|
|
await asyncio.sleep(min(2 ** attempt, 10))
|
|
continue
|
|
except aiohttp.ClientError as e:
|
|
if attempt < self._maxRetries:
|
|
logger.warning(f"{self._label} download client error (attempt {attempt}): {e}")
|
|
await asyncio.sleep(min(2 ** attempt, 10))
|
|
continue
|
|
return None
|
|
|
|
async def request(
|
|
self,
|
|
method: str,
|
|
url: str,
|
|
headers: Dict[str, str],
|
|
data: Any = None,
|
|
timeout: Optional[aiohttp.ClientTimeout] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Generic HTTP request with retry for retryable status codes."""
|
|
session = self._ensureReady()
|
|
assert self._semaphore is not None
|
|
lastError: Optional[str] = None
|
|
for attempt in range(1, self._maxRetries + 1):
|
|
async with self._semaphore:
|
|
try:
|
|
kwargs: Dict[str, Any] = {"headers": headers}
|
|
if data is not None:
|
|
kwargs["data"] = data
|
|
async with session.request(
|
|
method, url,
|
|
timeout=timeout or self._defaultTimeout,
|
|
**kwargs,
|
|
) as resp:
|
|
if resp.status in (200, 201, 202, 204):
|
|
if resp.status == 204:
|
|
return {}
|
|
return await resp.json()
|
|
if resp.status in _RETRYABLE_STATUS:
|
|
retryAfter = _parseRetryAfter(resp.headers.get("Retry-After"))
|
|
waitS = retryAfter if retryAfter > 0 else min(2 ** attempt, 30)
|
|
logger.warning(
|
|
f"{self._label} {method} {resp.status} (attempt {attempt}), "
|
|
f"retry in {waitS:.1f}s: {url[:120]}"
|
|
)
|
|
await asyncio.sleep(waitS)
|
|
continue
|
|
errorText = await resp.text()
|
|
lastError = f"{resp.status}: {errorText[:200]}"
|
|
logger.warning(f"{self._label} {method} {url[:120]} -> {lastError[:300]}")
|
|
return {"error": lastError}
|
|
except asyncio.TimeoutError:
|
|
lastError = f"timeout after {(timeout or self._defaultTimeout).total}s"
|
|
if attempt < self._maxRetries:
|
|
await asyncio.sleep(min(2 ** attempt, 10))
|
|
continue
|
|
except aiohttp.ClientError as e:
|
|
lastError = str(e)
|
|
if attempt < self._maxRetries:
|
|
await asyncio.sleep(min(2 ** attempt, 10))
|
|
continue
|
|
return {"error": lastError or "unknown error"}
|
|
|
|
|
|
async def closeAllResilientHttp() -> None:
|
|
"""Close all ResilientHttp sessions. Call at app shutdown."""
|
|
for inst in _instances:
|
|
try:
|
|
await inst.close()
|
|
except Exception as e:
|
|
logger.debug(f"Error closing {inst._label} session: {e}")
|
|
logger.info(f"Closed {len(_instances)} ResilientHttp session(s)")
|
|
|
|
|
|
def _parseRetryAfter(value: Optional[str]) -> float:
|
|
"""Parse Retry-After header (seconds or HTTP-date). Returns 0 if absent/unparseable."""
|
|
if not value:
|
|
return 0.0
|
|
try:
|
|
return float(value)
|
|
except ValueError:
|
|
pass
|
|
try:
|
|
from email.utils import parsedate_to_datetime
|
|
dt = parsedate_to_datetime(value)
|
|
delta = (dt.timestamp() - time.time())
|
|
return max(delta, 0.5)
|
|
except Exception:
|
|
return 0.0
|