185 lines
6.6 KiB
Python
185 lines
6.6 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Preprocessor connector for executing SQL queries via HTTP API.
|
|
Connects to remote preprocessing service that hosts the SQLite database.
|
|
"""
|
|
|
|
import logging
|
|
import httpx
|
|
from typing import Optional
|
|
from modules.shared.configuration import APP_CONFIG
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PreprocessorConnector:
|
|
"""
|
|
Connector for executing SQL queries via preprocessing API.
|
|
Makes HTTP POST requests to remote preprocessing service.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the preprocessor connector."""
|
|
self.api_key = APP_CONFIG.get("PP_QUERY_API_KEY")
|
|
self.base_url = APP_CONFIG.get("PP_QUERY_BASE_URL")
|
|
|
|
if not self.api_key:
|
|
logger.warning("PP_QUERY_API_KEY not found in configuration")
|
|
if not self.base_url:
|
|
logger.warning("PP_QUERY_BASE_URL not found in configuration")
|
|
|
|
# HTTP client with timeout
|
|
self.http_client = httpx.AsyncClient(
|
|
timeout=30.0,
|
|
headers={
|
|
"Content-Type": "application/json"
|
|
}
|
|
)
|
|
|
|
logger.info("PreprocessorConnector initialized")
|
|
|
|
async def executeQuery(self, sql_query: str, return_json: bool = False):
|
|
"""
|
|
Execute a SQL query via the preprocessing API.
|
|
|
|
Args:
|
|
sql_query: SQL SELECT query to execute
|
|
return_json: If True, returns dict with 'text' and 'data' keys. If False, returns formatted string.
|
|
|
|
Returns:
|
|
If return_json=False: Formatted result string with query results
|
|
If return_json=True: Dict with 'text' (formatted string) and 'data' (raw JSON data list)
|
|
|
|
Raises:
|
|
ValueError: If query is invalid or contains forbidden keywords
|
|
Exception: If API request fails
|
|
"""
|
|
try:
|
|
# Validate query
|
|
validation_error = self._validateQuery(sql_query)
|
|
if validation_error:
|
|
if return_json:
|
|
return {"text": validation_error, "data": []}
|
|
return validation_error
|
|
|
|
# Check configuration
|
|
if not self.api_key:
|
|
error_msg = "Error: PP_QUERY_API_KEY not configured"
|
|
logger.error(error_msg)
|
|
if return_json:
|
|
return {"text": error_msg, "data": []}
|
|
return error_msg
|
|
if not self.base_url:
|
|
error_msg = "Error: PP_QUERY_BASE_URL not configured"
|
|
logger.error(error_msg)
|
|
if return_json:
|
|
return {"text": error_msg, "data": []}
|
|
return error_msg
|
|
|
|
# Make HTTP POST request to preprocessing API
|
|
logger.info(f"Executing SQL query via preprocessing API: {self.base_url} (query: {sql_query[:100]}...)")
|
|
|
|
response = await self.http_client.post(
|
|
self.base_url,
|
|
json={"query": sql_query},
|
|
headers={
|
|
"X-DB-API-Key": self.api_key
|
|
}
|
|
)
|
|
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
|
|
# Parse response
|
|
if not result.get("success"):
|
|
error_message = result.get("message", "Unknown error")
|
|
error_text = f"Query failed: {error_message}"
|
|
if return_json:
|
|
return {"text": error_text, "data": []}
|
|
return error_text
|
|
|
|
# Format results
|
|
data = result.get("data", [])
|
|
row_count = result.get("row_count", 0)
|
|
|
|
# Limit to 50 rows for display
|
|
display_data = data[:50]
|
|
|
|
# Format results as string
|
|
if not display_data:
|
|
result_text = f"Query executed successfully. Returned {row_count} rows (no data)."
|
|
if return_json:
|
|
return {"text": result_text, "data": data}
|
|
return result_text
|
|
|
|
# Format each row
|
|
results = []
|
|
for row in display_data:
|
|
results.append(str(row))
|
|
|
|
result_text = (
|
|
f"Query executed successfully. Returned {row_count} rows "
|
|
f"(showing first {min(row_count, 50)}):\n"
|
|
+ "\n".join(results)
|
|
)
|
|
|
|
if return_json:
|
|
return {"text": result_text, "data": data}
|
|
return result_text
|
|
|
|
except httpx.HTTPStatusError as e:
|
|
error_msg = f"API error: HTTP {e.response.status_code}"
|
|
try:
|
|
error_text = e.response.text
|
|
error_msg += f" - {error_text}"
|
|
except:
|
|
pass
|
|
logger.error(f"Preprocessing API HTTP error: {error_msg}")
|
|
return error_msg
|
|
|
|
except httpx.RequestError as e:
|
|
error_msg = f"Network error: {str(e)}"
|
|
logger.error(f"Preprocessing API network error: {error_msg}")
|
|
return error_msg
|
|
|
|
except Exception as e:
|
|
error_msg = f"Error executing query: {str(e)}"
|
|
logger.error(f"Preprocessing API error: {error_msg}")
|
|
return error_msg
|
|
|
|
def _validateQuery(self, sql_query: str) -> Optional[str]:
|
|
"""
|
|
Validate SQL query to ensure only SELECT queries are allowed.
|
|
|
|
Args:
|
|
sql_query: SQL query to validate
|
|
|
|
Returns:
|
|
Error message if validation fails, None if valid
|
|
"""
|
|
if not sql_query or not isinstance(sql_query, str):
|
|
return "Error: SQL query must be a non-empty string"
|
|
|
|
query_upper = sql_query.strip().upper()
|
|
|
|
# Check if query starts with SELECT
|
|
if not query_upper.startswith("SELECT"):
|
|
return "Error: Only SELECT queries are allowed. Query must start with SELECT."
|
|
|
|
# Check for forbidden keywords
|
|
forbidden_keywords = [
|
|
"DROP", "CREATE", "ALTER", "INSERT", "UPDATE",
|
|
"DELETE", "PRAGMA", "ATTACH", "DETACH", "TRUNCATE"
|
|
]
|
|
|
|
for keyword in forbidden_keywords:
|
|
if keyword in query_upper:
|
|
return f"Error: Query contains forbidden keyword '{keyword}'. Only SELECT queries are allowed."
|
|
|
|
return None
|
|
|
|
async def close(self):
|
|
"""Close the HTTP client."""
|
|
await self.http_client.aclose()
|
|
|