gateway/modules/connectors/connectorPreprocessor.py
2026-01-05 06:40:09 +01:00

169 lines
5.7 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Preprocessor connector for executing SQL queries via HTTP API.
Connects to remote preprocessing service that hosts the SQLite database.
"""
import logging
import httpx
from typing import Optional
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class PreprocessorConnector:
"""
Connector for executing SQL queries via preprocessing API.
Makes HTTP POST requests to remote preprocessing service.
"""
def __init__(self):
"""Initialize the preprocessor connector."""
self.api_key = APP_CONFIG.get("PP_QUERY_API_KEY")
self.base_url = APP_CONFIG.get("PP_QUERY_BASE_URL")
if not self.api_key:
logger.warning("PP_QUERY_API_KEY not found in configuration")
if not self.base_url:
logger.warning("PP_QUERY_BASE_URL not found in configuration")
# HTTP client with timeout
self.http_client = httpx.AsyncClient(
timeout=30.0,
headers={
"Content-Type": "application/json"
}
)
logger.info("PreprocessorConnector initialized")
async def executeQuery(self, sql_query: str) -> str:
"""
Execute a SQL query via the preprocessing API.
Args:
sql_query: SQL SELECT query to execute
Returns:
Formatted result string with query results
Raises:
ValueError: If query is invalid or contains forbidden keywords
Exception: If API request fails
"""
try:
# Validate query
validation_error = self._validateQuery(sql_query)
if validation_error:
return validation_error
# Check configuration
if not self.api_key:
error_msg = "Error: PP_QUERY_API_KEY not configured"
logger.error(error_msg)
return error_msg
if not self.base_url:
error_msg = "Error: PP_QUERY_BASE_URL not configured"
logger.error(error_msg)
return error_msg
# Make HTTP POST request to preprocessing API
logger.info(f"Executing SQL query via preprocessing API: {self.base_url} (query: {sql_query[:100]}...)")
response = await self.http_client.post(
self.base_url,
json={"query": sql_query},
headers={
"X-DB-API-Key": self.api_key
}
)
response.raise_for_status()
result = response.json()
# Parse response
if not result.get("success"):
error_message = result.get("message", "Unknown error")
return f"Query failed: {error_message}"
# Format results
data = result.get("data", [])
row_count = result.get("row_count", 0)
# Limit to 50 rows for display
display_data = data[:50]
# Format results as string
if not display_data:
return f"Query executed successfully. Returned {row_count} rows (no data)."
# Format each row
results = []
for row in display_data:
results.append(str(row))
result_text = (
f"Query executed successfully. Returned {row_count} rows "
f"(showing first {min(row_count, 50)}):\n"
+ "\n".join(results)
)
return result_text
except httpx.HTTPStatusError as e:
error_msg = f"API error: HTTP {e.response.status_code}"
try:
error_text = e.response.text
error_msg += f" - {error_text}"
except:
pass
logger.error(f"Preprocessing API HTTP error: {error_msg}")
return error_msg
except httpx.RequestError as e:
error_msg = f"Network error: {str(e)}"
logger.error(f"Preprocessing API network error: {error_msg}")
return error_msg
except Exception as e:
error_msg = f"Error executing query: {str(e)}"
logger.error(f"Preprocessing API error: {error_msg}")
return error_msg
def _validateQuery(self, sql_query: str) -> Optional[str]:
"""
Validate SQL query to ensure only SELECT queries are allowed.
Args:
sql_query: SQL query to validate
Returns:
Error message if validation fails, None if valid
"""
if not sql_query or not isinstance(sql_query, str):
return "Error: SQL query must be a non-empty string"
query_upper = sql_query.strip().upper()
# Check if query starts with SELECT
if not query_upper.startswith("SELECT"):
return "Error: Only SELECT queries are allowed. Query must start with SELECT."
# Check for forbidden keywords
forbidden_keywords = [
"DROP", "CREATE", "ALTER", "INSERT", "UPDATE",
"DELETE", "PRAGMA", "ATTACH", "DETACH", "TRUNCATE"
]
for keyword in forbidden_keywords:
if keyword in query_upper:
return f"Error: Query contains forbidden keyword '{keyword}'. Only SELECT queries are allowed."
return None
async def close(self):
"""Close the HTTP client."""
await self.http_client.aclose()