# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Preprocessor connector for executing SQL queries via HTTP API. Connects to remote preprocessing service that hosts the SQLite database. """ import logging import httpx from typing import Optional from modules.shared.configuration import APP_CONFIG logger = logging.getLogger(__name__) class PreprocessorConnector: """ Connector for executing SQL queries via preprocessing API. Makes HTTP POST requests to remote preprocessing service. """ def __init__(self): """Initialize the preprocessor connector.""" self.api_key = APP_CONFIG.get("PP_QUERY_API_KEY") self.base_url = APP_CONFIG.get("PP_QUERY_BASE_URL") if not self.api_key: logger.warning("PP_QUERY_API_KEY not found in configuration") if not self.base_url: logger.warning("PP_QUERY_BASE_URL not found in configuration") # HTTP client with timeout self.http_client = httpx.AsyncClient( timeout=30.0, headers={ "Content-Type": "application/json" } ) logger.info("PreprocessorConnector initialized") async def executeQuery(self, sql_query: str) -> str: """ Execute a SQL query via the preprocessing API. Args: sql_query: SQL SELECT query to execute Returns: Formatted result string with query results Raises: ValueError: If query is invalid or contains forbidden keywords Exception: If API request fails """ try: # Validate query validation_error = self._validateQuery(sql_query) if validation_error: return validation_error # Check configuration if not self.api_key: error_msg = "Error: PP_QUERY_API_KEY not configured" logger.error(error_msg) return error_msg if not self.base_url: error_msg = "Error: PP_QUERY_BASE_URL not configured" logger.error(error_msg) return error_msg # Make HTTP POST request to preprocessing API logger.info(f"Executing SQL query via preprocessing API: {self.base_url} (query: {sql_query[:100]}...)") response = await self.http_client.post( self.base_url, json={"query": sql_query}, headers={ "X-DB-API-Key": self.api_key } ) response.raise_for_status() result = response.json() # Parse response if not result.get("success"): error_message = result.get("message", "Unknown error") return f"Query failed: {error_message}" # Format results data = result.get("data", []) row_count = result.get("row_count", 0) # Limit to 50 rows for display display_data = data[:50] # Format results as string if not display_data: return f"Query executed successfully. Returned {row_count} rows (no data)." # Format each row results = [] for row in display_data: results.append(str(row)) result_text = ( f"Query executed successfully. Returned {row_count} rows " f"(showing first {min(row_count, 50)}):\n" + "\n".join(results) ) return result_text except httpx.HTTPStatusError as e: error_msg = f"API error: HTTP {e.response.status_code}" try: error_text = e.response.text error_msg += f" - {error_text}" except: pass logger.error(f"Preprocessing API HTTP error: {error_msg}") return error_msg except httpx.RequestError as e: error_msg = f"Network error: {str(e)}" logger.error(f"Preprocessing API network error: {error_msg}") return error_msg except Exception as e: error_msg = f"Error executing query: {str(e)}" logger.error(f"Preprocessing API error: {error_msg}") return error_msg def _validateQuery(self, sql_query: str) -> Optional[str]: """ Validate SQL query to ensure only SELECT queries are allowed. Args: sql_query: SQL query to validate Returns: Error message if validation fails, None if valid """ if not sql_query or not isinstance(sql_query, str): return "Error: SQL query must be a non-empty string" query_upper = sql_query.strip().upper() # Check if query starts with SELECT if not query_upper.startswith("SELECT"): return "Error: Only SELECT queries are allowed. Query must start with SELECT." # Check for forbidden keywords forbidden_keywords = [ "DROP", "CREATE", "ALTER", "INSERT", "UPDATE", "DELETE", "PRAGMA", "ATTACH", "DETACH", "TRUNCATE" ] for keyword in forbidden_keywords: if keyword in query_upper: return f"Error: Query contains forbidden keyword '{keyword}'. Only SELECT queries are allowed." return None async def close(self): """Close the HTTP client.""" await self.http_client.aclose()