gateway/scripts/migrate_async_to_sync.py

374 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Migration Script: Convert async def → def for route handlers that don't need async.
This fixes the event-loop blocking issue where synchronous psycopg2 DB operations
inside async def routes block the entire uvicorn event loop, preventing concurrent
request handling.
FastAPI behavior:
- `async def` routes → run directly on the event loop (blocks if sync code inside)
- `def` routes → run in a thread pool automatically (non-blocking)
Usage:
python scripts/migrate_async_to_sync.py --dry-run # Preview changes
python scripts/migrate_async_to_sync.py # Apply changes
Author: Auto-generated migration script
"""
import os
import re
import sys
import argparse
from pathlib import Path
from typing import Dict, List, Set, Tuple
# Base directory
GATEWAY_DIR = Path(__file__).parent.parent
ROUTES_DIR = GATEWAY_DIR / "modules" / "routes"
FEATURES_DIR = GATEWAY_DIR / "modules" / "features"
AUTH_DIR = GATEWAY_DIR / "modules" / "auth"
# =============================================================================
# Configuration: Functions that MUST stay async
# =============================================================================
# Key: relative file path from gateway dir
# Value: set of function names that must remain async def
_MUST_STAY_ASYNC: Dict[str, Set[str]] = {
# --- routes/ ---
"modules/routes/routeAdminAutomationEvents.py": {
"sync_all_automation_events", # await syncAutomationEvents(...)
},
"modules/routes/routeAdminRbacExport.py": {
"import_global_rbac", # await file.read()
"import_mandate_rbac", # await file.read()
},
"modules/routes/routeDataConnections.py": {
"get_connections", # await token_refresh_service.refresh_expired_tokens(...)
},
"modules/routes/routeDataFiles.py": {
"upload_file", # await file.read()
},
# These files have many genuinely async routes (httpx, external APIs) -- keep ALL async:
"modules/routes/routeRealEstate.py": "__ALL__",
"modules/routes/routeSharepoint.py": "__ALL__",
"modules/routes/routeVoiceGoogle.py": "__ALL__",
# Partial keeps in security routes (httpx.AsyncClient, request.json()):
"modules/routes/routeSecurityGoogle.py": {
"verify_google_token", # await client.get(...)
"auth_callback", # await verify_google_token(...), await client.get(...)
"verify_token", # await verify_google_token(...)
"refresh_token", # await request.json()
},
"modules/routes/routeSecurityMsft.py": {
"auth_callback", # await client.get(...)
"refresh_token", # await request.json()
},
# --- features/ ---
"modules/features/automation/routeFeatureAutomation.py": {
"execute_automation_route", # await executeAutomation(...)
},
"modules/features/chatbot/routeFeatureChatbot.py": {
"stream_chatbot_start", # await chatProcess(...), contains async event_stream generator
"event_stream", # await request.is_disconnected(), await asyncio.wait_for(...)
"stop_chatbot", # await event_manager.emit_event(...)
},
"modules/features/chatplayground/routeFeatureChatplayground.py": {
"start_workflow", # await chatStart(...)
"stop_workflow", # await chatStop(...)
},
"modules/features/neutralization/routeFeatureNeutralizer.py": {
"process_sharepoint_files", # await service.processSharepointFiles(...)
},
"modules/features/realestate/routeFeatureRealEstate.py": {
"process_command", # await processNaturalLanguageCommand(...)
"create_table_record", # await create_project_with_parcel_data(...)
"search_parcel", # await connector.search_parcel(...), connector._query_building_layer(...)
"add_parcel_to_project", # await connector.search_parcel(...)
},
"modules/features/trustee/routeFeatureTrustee.py": {
"create_document", # await request.json()
"upload_document", # await file.read()
},
}
# Files to skip entirely (all routes must stay async)
_SKIP_FILES: Set[str] = {
"modules/routes/routeRealEstate.py",
"modules/routes/routeSharepoint.py",
"modules/routes/routeVoiceGoogle.py",
}
# Helper functions that are fake-async (async def but no await inside)
# These will be converted from async def -> def
_FAKE_ASYNC_HELPERS: Dict[str, Set[str]] = {
"modules/features/chatplayground/routeFeatureChatplayground.py": {"_validateInstanceAccess"},
"modules/features/trustee/routeFeatureTrustee.py": {"_validateInstanceAccess", "_validateInstanceAdmin"},
"modules/features/realestate/routeFeatureRealEstate.py": {"_validateInstanceAccess"},
"modules/features/chatbot/routeFeatureChatbot.py": {"_validateInstanceAccess"},
"modules/routes/routeNotifications.py": {"_handleInvitationAction"},
}
# Calls to these functions should have 'await' removed after they become sync
_REMOVE_AWAIT_CALLS: Set[str] = {
"_validateInstanceAccess",
"_validateInstanceAdmin",
"_handleInvitationAction",
}
# =============================================================================
# Migration Logic
# =============================================================================
def _getRelativePath(filePath: Path) -> str:
"""Get path relative to gateway dir."""
try:
return str(filePath.relative_to(GATEWAY_DIR)).replace("\\", "/")
except ValueError:
return str(filePath)
def _shouldSkipFile(relPath: str) -> bool:
"""Check if file should be skipped entirely."""
return relPath in _SKIP_FILES or _MUST_STAY_ASYNC.get(relPath) == "__ALL__"
def _mustStayAsync(relPath: str, funcName: str) -> bool:
"""Check if a specific function must stay async."""
keepSet = _MUST_STAY_ASYNC.get(relPath, set())
if keepSet == "__ALL__":
return True
return funcName in keepSet
def _isFakeAsyncHelper(relPath: str, funcName: str) -> bool:
"""Check if a function is a fake-async helper that should be converted."""
helpers = _FAKE_ASYNC_HELPERS.get(relPath, set())
return funcName in helpers
def _processFile(filePath: Path, dryRun: bool = True) -> Dict[str, any]:
"""Process a single file and convert async def → def where appropriate."""
relPath = _getRelativePath(filePath)
if _shouldSkipFile(relPath):
return {"file": relPath, "skipped": True, "reason": "all routes must stay async"}
with open(filePath, "r", encoding="utf-8") as f:
originalContent = f.read()
content = originalContent
changes = []
# Step 1: Find all async def functions and convert eligible ones
# Pattern matches: async def function_name(
asyncDefPattern = re.compile(r'^(\s*)async def (\w+)\s*\(', re.MULTILINE)
convertedFunctions = set()
for match in asyncDefPattern.finditer(originalContent):
indent = match.group(1)
funcName = match.group(2)
# Check if this function must stay async
if _mustStayAsync(relPath, funcName):
changes.append(f" KEEP async: {funcName} (must stay async)")
continue
# Convert async def → def
convertedFunctions.add(funcName)
changes.append(f" CONVERT: async def {funcName} -> def {funcName}")
# Apply conversions
for funcName in convertedFunctions:
# Replace "async def funcName(" with "def funcName("
# Be careful to match the exact function definition
pattern = re.compile(
r'^(\s*)async def ' + re.escape(funcName) + r'\s*\(',
re.MULTILINE
)
content = pattern.sub(
lambda m: f'{m.group(1)}def {funcName}(',
content
)
# Step 2: Remove 'await' from calls to converted functions
# This handles: await _validateInstanceAccess(...) → _validateInstanceAccess(...)
# And also: result = await someConvertedFunc(...) → result = someConvertedFunc(...)
for funcName in _REMOVE_AWAIT_CALLS:
if funcName in convertedFunctions or _isFakeAsyncHelper(relPath, funcName):
awaitPattern = re.compile(
r'(\s*)(.*)await\s+' + re.escape(funcName) + r'\s*\(',
re.MULTILINE
)
newContent = awaitPattern.sub(
lambda m: f'{m.group(1)}{m.group(2)}{funcName}(',
content
)
if newContent != content:
changes.append(f" REMOVE await: await {funcName}(...) -> {funcName}(...)")
content = newContent
# Step 3: Check for any remaining 'await' in converted functions
# This catches cases where a converted function still has await calls
remainingAwaits = []
lines = content.split('\n')
currentFunc = None
funcIndent = 0
for i, line in enumerate(lines):
# Track current function
defMatch = re.match(r'^(\s*)def (\w+)\s*\(', line)
asyncDefMatch = re.match(r'^(\s*)async def (\w+)\s*\(', line)
if defMatch and defMatch.group(2) in convertedFunctions:
currentFunc = defMatch.group(2)
funcIndent = len(defMatch.group(1))
elif defMatch or asyncDefMatch:
currentFunc = None
elif currentFunc and line.strip() and not line[0].isspace():
currentFunc = None
# Check for remaining awaits in converted functions
if currentFunc and 'await ' in line:
remainingAwaits.append(f" WARNING: Remaining 'await' in {currentFunc} at line {i+1}: {line.strip()}")
# Build result
result = {
"file": relPath,
"skipped": False,
"convertedCount": len(convertedFunctions),
"convertedFunctions": sorted(convertedFunctions),
"changes": changes,
"warnings": remainingAwaits,
"modified": content != originalContent,
}
# Write file if not dry run and content changed
if not dryRun and content != originalContent:
with open(filePath, "w", encoding="utf-8") as f:
f.write(content)
result["written"] = True
else:
result["written"] = False
return result
def _discoverRouteFiles() -> List[Path]:
"""Discover all route files to process."""
files = []
# Standard routes
if ROUTES_DIR.exists():
for f in sorted(ROUTES_DIR.glob("route*.py")):
files.append(f)
# Feature routes
if FEATURES_DIR.exists():
for f in sorted(FEATURES_DIR.glob("*/routeFeature*.py")):
files.append(f)
return files
def _main():
parser = argparse.ArgumentParser(
description="Migrate async def → def for FastAPI routes with sync DB operations"
)
parser.add_argument(
"--dry-run",
action="store_true",
default=False,
help="Preview changes without writing files (default: apply changes)"
)
parser.add_argument(
"--file",
type=str,
default=None,
help="Process only a specific file (relative to gateway dir)"
)
args = parser.parse_args()
dryRun = args.dry_run
print("=" * 70)
print(f" FastAPI Route Migration: async def -> def")
print(f" Mode: {'DRY RUN (preview only)' if dryRun else 'APPLY CHANGES'}")
print("=" * 70)
print()
# Discover files
if args.file:
targetFile = GATEWAY_DIR / args.file.replace("/", os.sep)
if not targetFile.exists():
print(f"ERROR: File not found: {targetFile}")
sys.exit(1)
files = [targetFile]
else:
files = _discoverRouteFiles()
print(f"Found {len(files)} route files to analyze\n")
totalConverted = 0
totalWarnings = 0
totalModified = 0
allResults = []
for filePath in files:
result = _processFile(filePath, dryRun=dryRun)
allResults.append(result)
if result.get("skipped"):
print(f"[SKIP] {result['file']} - SKIPPED ({result.get('reason', '')})")
continue
converted = result.get("convertedCount", 0)
warnings = result.get("warnings", [])
modified = result.get("modified", False)
if converted == 0 and not warnings:
continue
totalConverted += converted
totalWarnings += len(warnings)
if modified:
totalModified += 1
status = "[DONE] WRITTEN" if result.get("written") else ("[PLAN] WOULD WRITE" if modified else "---")
print(f"{status} {result['file']} ({converted} functions)")
for change in result.get("changes", []):
print(f" {change}")
for warning in warnings:
print(f" [WARN] {warning}")
print()
# Summary
print("=" * 70)
print(f" SUMMARY")
print(f" Files analyzed: {len(files)}")
print(f" Files modified: {totalModified}")
print(f" Functions converted: {totalConverted}")
print(f" Warnings: {totalWarnings}")
if dryRun:
print(f"\n This was a DRY RUN. Run without --dry-run to apply changes.")
else:
print(f"\n Changes applied. Restart the server to take effect.")
print("=" * 70)
# Return exit code based on warnings
if totalWarnings > 0:
print(f"\n[WARN] There are {totalWarnings} warnings - review before deploying!")
return 1
return 0
if __name__ == "__main__":
sys.exit(_main())