gateway/modules/routes/routeDataConnections.py
Ida 6a5ff1ff7c feat(rag): P1 user-connection hooks + retrieval threshold fix
- connection.established/revoked callbacks from OAuth routes and
  connection management endpoints
- KnowledgeIngestionConsumer dispatches bootstrap job (established)
  and synchronous purge (revoked)
- FileContentIndex: add connectionId + sourceKind columns
- SharePoint bootstrap with @odata.nextLink pagination and eTag-based
  idempotency
- Outlook bootstrap treats messages as virtual documents with
  cleanEmailBody for HTML/quote/signature stripping
- fix(rag): lower buildAgentContext minScore thresholds from
  0.55/0.65/0.70 to 0.35 — previous values blocked all real matches
  from text-embedding-3-small
- 24 new unit tests covering purge, consumer dispatch, email cleaning
  and both bootstrap paths
2026-04-29 14:39:40 +02:00

685 lines
No EOL
28 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Connection routes for the backend API.
Implements the endpoints for connection management.
SECURITY NOTE:
- Regular connections endpoint (/api/connections/) only returns connections for the current user
- Admin endpoint (/api/connections/admin/all) provides access to all connections for management purposes
- This prevents security vulnerabilities where admin users could see other users' connections
"""
from fastapi import APIRouter, HTTPException, Depends, Body, Path, Request, Response, Query
from typing import List, Dict, Any, Optional
from fastapi import status
import logging
import json
import math
from urllib.parse import quote
from modules.datamodels.datamodelUam import User, UserConnection, AuthAuthority, ConnectionStatus
from modules.datamodels.datamodelSecurity import Token
from modules.auth import getCurrentUser, limiter
from modules.auth.tokenRefreshService import token_refresh_service
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata, normalize_pagination_dict
from modules.interfaces.interfaceDbApp import getInterface
from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp
from modules.interfaces.interfaceDbManagement import ComponentObjects
from modules.shared.i18nRegistry import apiRouteContext
routeApiMsg = apiRouteContext("routeDataConnections")
# Configure logger
logger = logging.getLogger(__name__)
def getTokenStatusForConnection(interface, connectionId: str) -> tuple[str, Optional[float]]:
"""
Get token status and expiration for a connection.
Args:
interface: The database interface
connectionId: The connection ID to check
Returns:
tuple: (tokenStatus, tokenExpiresAt)
- tokenStatus: 'active', 'expired', or 'none'
- tokenExpiresAt: UTC timestamp or None
"""
try:
# Query tokens table for the latest token for this connection using interface method
latestToken = interface.getConnectionToken(connectionId)
if not latestToken:
return "none", None
# Check if token is expired
expiresAt = parseTimestamp(latestToken.expiresAt)
if not expiresAt:
return "none", None
currentTime = getUtcTimestamp()
# Add 5 minute buffer for proactive refresh
bufferTime = 5 * 60 # 5 minutes in seconds
if expiresAt <= currentTime:
return "expired", expiresAt
elif expiresAt <= (currentTime + bufferTime):
# Token expires soon - mark as active but log for proactive refresh
logger.debug(f"Token for connection {connectionId} expires soon (in {expiresAt - currentTime} seconds)")
return "active", expiresAt
else:
return "active", expiresAt
except Exception as e:
logger.error(f"Error getting token status for connection {connectionId}: {str(e)}")
return "none", None
router = APIRouter(
prefix="/api/connections",
tags=["Manage Connections"],
responses={404: {"description": "Not found"}}
)
# ============================================================================
# OPTIONS ENDPOINTS (for dropdowns)
# ============================================================================
@router.get("/statuses/options", response_model=List[Dict[str, Any]])
@limiter.limit("60/minute")
def get_connection_status_options(
request: Request,
currentUser: User = Depends(getCurrentUser)
) -> List[Dict[str, Any]]:
"""
Get connection status options for select dropdowns.
Returns standardized format: [{ value, label }]
"""
return [
{"value": status.value, "label": status.value.capitalize()}
for status in ConnectionStatus
]
@router.get("/authorities/options", response_model=List[Dict[str, Any]])
@limiter.limit("60/minute")
def get_auth_authority_options(
request: Request,
currentUser: User = Depends(getCurrentUser)
) -> List[Dict[str, Any]]:
"""
Get authentication authority options for select dropdowns.
Returns standardized format: [{ value, label }]
"""
authorityLabels = {
"local": "Local",
"google": "Google",
"msft": "Microsoft",
"clickup": "ClickUp",
"infomaniak": "Infomaniak",
}
return [
{"value": auth.value, "label": authorityLabels.get(auth.value, auth.value)}
for auth in AuthAuthority
]
# ============================================================================
# CRUD ENDPOINTS
# ============================================================================
@router.get("/")
@limiter.limit("30/minute")
async def get_connections(
request: Request,
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
currentUser: User = Depends(getCurrentUser)
):
"""Get connections for the current user with optional pagination, sorting, and filtering.
SECURITY: This endpoint is secure - users can only see their own connections.
Automatically refreshes expired OAuth tokens in the background.
Query Parameters:
- pagination: JSON-encoded PaginationParams object, or None for no pagination
Examples:
- GET /api/connections/ (no pagination - returns all items)
- GET /api/connections/?pagination={"page":1,"pageSize":10,"sort":[]}
- GET /api/connections/?pagination={"page":1,"pageSize":10,"filters":{"status":"active"}}
- GET /api/connections/?mode=filterValues&column=status
- GET /api/connections/?mode=ids
"""
from modules.routes.routeHelpers import handleFilterValuesInMemory, handleIdsInMemory, enrichRowsWithFkLabels
def _buildEnhancedItems():
interface = getInterface(currentUser)
connections = interface.getUserConnections(currentUser.id)
items = []
for connection in connections:
tokenStatus, tokenExpiresAt = getTokenStatusForConnection(interface, connection.id)
items.append({
"id": connection.id,
"userId": connection.userId,
"authority": connection.authority.value if hasattr(connection.authority, 'value') else str(connection.authority),
"externalId": connection.externalId,
"externalUsername": connection.externalUsername or "",
"externalEmail": connection.externalEmail,
"status": connection.status.value if hasattr(connection.status, 'value') else str(connection.status),
"connectedAt": connection.connectedAt,
"lastChecked": connection.lastChecked,
"expiresAt": connection.expiresAt,
"tokenStatus": tokenStatus,
"tokenExpiresAt": tokenExpiresAt
})
return items
if mode == "filterValues":
if not column:
raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
try:
items = _buildEnhancedItems()
enrichRowsWithFkLabels(items, UserConnection)
return handleFilterValuesInMemory(items, column, pagination)
except Exception as e:
logger.error(f"Error getting filter values for connections: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
if mode == "ids":
try:
return handleIdsInMemory(_buildEnhancedItems(), pagination)
except Exception as e:
logger.error(f"Error getting IDs for connections: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
try:
interface = getInterface(currentUser)
# NOTE: Cannot use db.getRecordsetPaginated() here because each connection
# is enriched with computed tokenStatus/tokenExpiresAt (requires per-row DB lookup).
# Token refresh also may trigger re-fetch. Connections per user are typically < 10,
# so in-memory pagination is acceptable.
# Parse pagination parameter
paginationParams = None
if pagination:
try:
paginationDict = json.loads(pagination)
if paginationDict:
# Normalize pagination dict (handles top-level "search" field)
paginationDict = normalize_pagination_dict(paginationDict)
paginationParams = PaginationParams(**paginationDict)
except (json.JSONDecodeError, ValueError) as e:
raise HTTPException(
status_code=400,
detail=f"Invalid pagination parameter: {str(e)}"
)
# SECURITY FIX: All users (including admins) can only see their own connections
# This prevents admin from seeing other users' connections and causing confusion
connections = interface.getUserConnections(currentUser.id)
# Perform silent token refresh for expired OAuth connections
try:
refresh_result = await token_refresh_service.refresh_expired_tokens(currentUser.id)
if refresh_result.get("refreshed", 0) > 0:
logger.info(f"Silently refreshed {refresh_result['refreshed']} tokens for user {currentUser.id}")
# Re-fetch connections to get updated token status
connections = interface.getUserConnections(currentUser.id)
except Exception as e:
logger.warning(f"Silent token refresh failed for user {currentUser.id}: {str(e)}")
# Continue with original connections even if refresh fails
# Enhance each connection with token status information and convert to dict
enhanced_connections_dict = []
for connection in connections:
# Get token status for this connection
tokenStatus, tokenExpiresAt = getTokenStatusForConnection(interface, connection.id)
# Convert to dict for filtering/sorting
connection_dict = {
"id": connection.id,
"userId": connection.userId,
"authority": connection.authority.value if hasattr(connection.authority, 'value') else str(connection.authority),
"externalId": connection.externalId,
"externalUsername": connection.externalUsername or "",
"externalEmail": connection.externalEmail, # Keep None instead of converting to empty string
"status": connection.status.value if hasattr(connection.status, 'value') else str(connection.status),
"connectedAt": connection.connectedAt,
"lastChecked": connection.lastChecked,
"expiresAt": connection.expiresAt,
"tokenStatus": tokenStatus,
"tokenExpiresAt": tokenExpiresAt
}
enhanced_connections_dict.append(connection_dict)
enrichRowsWithFkLabels(enhanced_connections_dict, UserConnection)
if paginationParams is None:
return {
"items": enhanced_connections_dict,
"pagination": None,
}
# Apply filtering if provided
if paginationParams.filters:
component_interface = ComponentObjects()
component_interface.setUserContext(currentUser)
enhanced_connections_dict = component_interface._applyFilters(
enhanced_connections_dict,
paginationParams.filters
)
# Apply sorting if provided
if paginationParams.sort:
component_interface = ComponentObjects()
component_interface.setUserContext(currentUser)
enhanced_connections_dict = component_interface._applySorting(
enhanced_connections_dict,
paginationParams.sort
)
totalItems = len(enhanced_connections_dict)
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
startIdx = (paginationParams.page - 1) * paginationParams.pageSize
endIdx = startIdx + paginationParams.pageSize
paged_connections = enhanced_connections_dict[startIdx:endIdx]
return {
"items": paged_connections,
"pagination": PaginationMetadata(
currentPage=paginationParams.page,
pageSize=paginationParams.pageSize,
totalItems=totalItems,
totalPages=totalPages,
sort=paginationParams.sort,
filters=paginationParams.filters
).model_dump(),
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting connections: {str(e)}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get connections: {str(e)}"
)
@router.post("/", response_model=UserConnection)
@limiter.limit("10/minute")
def create_connection(
request: Request,
connection_data: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser)
) -> UserConnection:
"""Create a new connection for the current user
SECURITY: This endpoint is secure - it always creates connections for the current user
and cannot be used to create connections for other users.
"""
try:
interface = getInterface(currentUser)
# Map type to authority
authority_map = {
'msft': AuthAuthority.MSFT,
'google': AuthAuthority.GOOGLE,
'clickup': AuthAuthority.CLICKUP,
'infomaniak': AuthAuthority.INFOMANIAK,
}
authority = authority_map.get(connection_data.get('type'))
if not authority:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Unsupported connection type: {connection_data.get('type')}"
)
# Note: currentUser is already authenticated via JWT - no need to re-verify from database
# The getCurrentUser dependency already validated the user exists
# Always create a new connection with PENDING status
connection = interface.addUserConnection(
userId=currentUser.id,
authority=authority,
externalId="", # Will be set after OAuth
externalUsername="", # Will be set after OAuth
status=ConnectionStatus.PENDING # Start with PENDING status
)
# Save connection record - models now handle timestamp serialization automatically
interface.db.recordModify(UserConnection, connection.id, connection.model_dump())
return connection
except HTTPException:
raise
except Exception as e:
logger.error(f"Error creating connection: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to create connection: {str(e)}"
)
@router.put("/{connectionId}", response_model=UserConnection)
@limiter.limit("10/minute")
def update_connection(
request: Request,
connectionId: str = Path(..., description="The ID of the connection to update"),
connection_data: Dict[str, Any] = Body(...),
currentUser: User = Depends(getCurrentUser)
) -> UserConnection:
"""Update an existing connection for the current user
SECURITY: This endpoint is secure - users can only update their own connections.
"""
try:
interface = getInterface(currentUser)
# Find the connection
connection = None
# SECURITY FIX: All users (including admins) can only update their own connections
# This prevents admin from updating other users' connections and causing confusion
connections = interface.getUserConnections(currentUser.id)
for conn in connections:
if conn.id == connectionId:
connection = conn
break
if not connection:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=routeApiMsg("Connection not found")
)
# Merge incoming changes into a dict and re-validate via pydantic.
# Direct setattr() bypasses type coercion (PowerOnModel doesn't enable
# validate_assignment), which leaves enum fields as raw strings and
# later breaks .value access. Also filters out computed / unknown keys.
writableFields = set(UserConnection.model_fields.keys())
previous = connection.model_dump()
merged = dict(previous)
for field, value in connection_data.items():
if field in writableFields:
merged[field] = value
merged["lastChecked"] = getUtcTimestamp()
connection = UserConnection.model_validate(merged)
# If this is a remote (non-local) connection and any identity-bearing
# field changed, the stored OAuth tokens no longer match the account.
# Force the user to reconnect: mark PENDING and revoke existing tokens.
identityFields = ("externalUsername", "externalEmail", "externalId", "authority")
authorityValue = (
connection.authority.value
if hasattr(connection.authority, "value")
else str(connection.authority)
)
isRemote = authorityValue != AuthAuthority.LOCAL.value
identityChanged = any(
previous.get(field) != merged.get(field) for field in identityFields
)
if isRemote and identityChanged:
connection.status = ConnectionStatus.PENDING
connection.expiresAt = None
try:
existingTokens = interface.db.getRecordset(
Token, recordFilter={"connectionId": connectionId}
)
for token in existingTokens:
interface.revokeTokenById(
token["id"],
revokedBy=currentUser.id,
reason="connection identity changed",
)
logger.info(
f"Revoked {len(existingTokens)} token(s) for connection "
f"{connectionId} after identity change; reconnect required."
)
except Exception as e:
logger.warning(
f"Failed to revoke tokens for connection {connectionId}: {str(e)}"
)
# Update connection - models now handle timestamp serialization automatically
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
# Get token status for the updated connection
tokenStatus, tokenExpiresAt = getTokenStatusForConnection(interface, connectionId)
# Create enhanced connection with token status
enhanced_connection = UserConnection(
id=connection.id,
userId=connection.userId,
authority=connection.authority,
externalId=connection.externalId,
externalUsername=connection.externalUsername,
externalEmail=connection.externalEmail,
status=connection.status,
connectedAt=connection.connectedAt,
lastChecked=connection.lastChecked,
expiresAt=connection.expiresAt,
tokenStatus=tokenStatus,
tokenExpiresAt=tokenExpiresAt
)
return enhanced_connection
except HTTPException:
raise
except Exception as e:
logger.error(f"Error updating connection: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to update connection: {str(e)}"
)
@router.post("/{connectionId}/connect")
@limiter.limit("10/minute")
def connect_service(
request: Request,
connectionId: str = Path(..., description="The ID of the connection to connect"),
body: Optional[Dict[str, Any]] = Body(default=None),
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
"""Connect a service for the current user.
Optional body: ``{"reauth": true}`` -- forces the OAuth provider to re-show
the consent screen, which is required when new scopes have been added (e.g.
Calendar + Contacts after the connection was first created). Without this
flag the provider silently re-uses the previous consent and never grants
the new scopes, leaving the connection in a degraded state.
SECURITY: This endpoint is secure - users can only connect their own connections.
"""
try:
interface = getInterface(currentUser)
# Find the connection
connection = None
# SECURITY FIX: All users (including admins) can only connect their own connections
# This prevents admin from connecting other users' connections and causing confusion
connections = interface.getUserConnections(currentUser.id)
for conn in connections:
if conn.id == connectionId:
connection = conn
break
if not connection:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=routeApiMsg("Connection not found")
)
reauth = bool((body or {}).get("reauth")) if isinstance(body, dict) else False
reauthSuffix = "&reauth=1" if reauth else ""
# Data-app OAuth (JWT state issued server-side in /auth/connect)
auth_url = None
if connection.authority == AuthAuthority.MSFT:
auth_url = f"/api/msft/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
elif connection.authority == AuthAuthority.GOOGLE:
auth_url = f"/api/google/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
elif connection.authority == AuthAuthority.CLICKUP:
auth_url = f"/api/clickup/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
elif connection.authority == AuthAuthority.INFOMANIAK:
# Infomaniak does not use OAuth for data access; the frontend posts a
# Personal Access Token directly to /api/infomaniak/connections/{id}/token.
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=routeApiMsg(
"Infomaniak uses a Personal Access Token instead of OAuth. "
"Submit the token via POST /api/infomaniak/connections/{connectionId}/token."
),
)
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Unsupported authority: {connection.authority}"
)
return {"authUrl": auth_url}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error connecting service: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to connect service: {str(e)}"
)
@router.post("/{connectionId}/disconnect")
@limiter.limit("10/minute")
def disconnect_service(
request: Request,
connectionId: str = Path(..., description="The ID of the connection to disconnect"),
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
"""Disconnect a service for the current user
SECURITY: This endpoint is secure - users can only disconnect their own connections.
"""
try:
interface = getInterface(currentUser)
# Find the connection
connection = None
# SECURITY FIX: All users (including admins) can only disconnect their own connections
# This prevents admin from disconnecting other users' connections and causing confusion
connections = interface.getUserConnections(currentUser.id)
for conn in connections:
if conn.id == connectionId:
connection = conn
break
if not connection:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=routeApiMsg("Connection not found")
)
# Fire revoked event BEFORE DB status change so knowledge purge and
# status mutation form one logical step; subscribers see the
# connection as it was. INACTIVE does not exist on the enum — REVOKED
# is the correct terminal-but-retained state (deleted rows are
# handled in DELETE /{id}).
try:
from modules.shared.callbackRegistry import callbackRegistry
callbackRegistry.trigger(
"connection.revoked",
connectionId=connectionId,
authority=str(getattr(connection.authority, "value", connection.authority) or ""),
userId=str(currentUser.id),
reason="disconnected",
)
except Exception as _cbErr:
logger.warning("connection.revoked callback failed for %s: %s", connectionId, _cbErr)
connection.status = ConnectionStatus.REVOKED
connection.lastChecked = getUtcTimestamp()
# Update connection record - models now handle timestamp serialization automatically
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
return {"message": "Service disconnected successfully"}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error disconnecting service: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to disconnect service: {str(e)}"
)
@router.delete("/{connectionId}")
@limiter.limit("10/minute")
def delete_connection(
request: Request,
connectionId: str = Path(..., description="The ID of the connection to delete"),
currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]:
"""Delete a connection for the current user
SECURITY: This endpoint is secure - users can only delete their own connections.
"""
try:
interface = getInterface(currentUser)
# Find the connection
connection = None
# SECURITY FIX: All users (including admins) can only delete their own connections
# This prevents admin from deleting other users' connections and causing confusion
connections = interface.getUserConnections(currentUser.id)
for conn in connections:
if conn.id == connectionId:
connection = conn
break
if not connection:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=routeApiMsg("Connection not found")
)
# Fire revoked event BEFORE the row disappears so consumers still
# have authority/connection context for observability; purge itself
# targets FileContentIndex rows by connectionId which are unaffected
# by the UserConnection delete.
try:
from modules.shared.callbackRegistry import callbackRegistry
callbackRegistry.trigger(
"connection.revoked",
connectionId=connectionId,
authority=str(getattr(connection.authority, "value", connection.authority) or ""),
userId=str(currentUser.id),
reason="deleted",
)
except Exception as _cbErr:
logger.warning("connection.revoked callback failed for %s: %s", connectionId, _cbErr)
# Remove the connection - only need connectionId since permissions are verified
interface.removeUserConnection(connectionId)
return {"message": "Connection deleted successfully"}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error deleting connection: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to delete connection: {str(e)}"
)