""" Security utilities for AI prompt construction. Provides secure content escaping to prevent prompt injection attacks. """ import re import json import logging from typing import Any, Union, List, Dict logger = logging.getLogger(__name__) def _escapeForAiPrompt(content: str) -> str: """ Securely escape content for AI prompts to prevent injection attacks. This function: 1. Escapes all special characters that could break prompt structure 2. Wraps content in secure delimiters 3. Handles multi-line content safely 4. Prevents quote injection and context breaking Args: content: The content to escape Returns: Safely escaped content wrapped in secure delimiters """ if not content: return "" # Convert to string if not already content_str = str(content) # Remove or escape dangerous characters that could break prompt structure # This includes quotes, backslashes, and other special characters escaped = content_str # Escape backslashes first (order matters) escaped = escaped.replace('\\', '\\\\') # Escape quotes and other special characters escaped = escaped.replace('"', '\\"') escaped = escaped.replace("'", "\\'") escaped = escaped.replace('\n', '\\n') escaped = escaped.replace('\r', '\\r') escaped = escaped.replace('\t', '\\t') # Remove or escape other potentially dangerous characters # Remove control characters except newlines (already handled above) escaped = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', escaped) # Wrap in secure delimiters with clear boundaries # Using a unique delimiter pattern that's unlikely to appear in user content secure_delimiter_start = "===USER_CONTENT_START===" secure_delimiter_end = "===USER_CONTENT_END===" return f"{secure_delimiter_start}\n{escaped}\n{secure_delimiter_end}" def _escapeForJsonPrompt(content: Any) -> str: """ Securely escape content for JSON-based AI prompts. Args: content: The content to escape (can be any type) Returns: Safely escaped JSON string """ try: # Convert to JSON string with proper escaping json_str = json.dumps(content, ensure_ascii=False, separators=(',', ':')) return json_str except Exception as e: logger.warning(f"Failed to escape content as JSON: {str(e)}") # Fallback to string escaping return _escapeForAiPrompt(str(content)) def _escapeForListPrompt(items: List[Any]) -> str: """ Securely escape a list of items for AI prompts. Args: items: List of items to escape Returns: Safely escaped list representation """ if not items: return "[]" try: escaped_items = [] for item in items: if isinstance(item, (dict, list)): escaped_items.append(_escapeForJsonPrompt(item)) else: escaped_items.append(_escapeForAiPrompt(str(item))) return f"[{', '.join(escaped_items)}]" except Exception as e: logger.warning(f"Failed to escape list content: {str(e)}") return "[]" def securePromptContent(content: Any, content_type: str = "text") -> str: """ Main function to securely escape content for AI prompts. Args: content: The content to escape content_type: Type of content ("text", "json", "list", "user_prompt", "document_content") Returns: Safely escaped content ready for AI prompt insertion """ if content is None: return "" try: if content_type == "json": return _escapeForJsonPrompt(content) elif content_type == "list": if isinstance(content, list): return _escapeForListPrompt(content) else: return _escapeForAiPrompt(str(content)) elif content_type in ["user_prompt", "document_content"]: # Extra security for user-controlled content escaped = _escapeForAiPrompt(str(content)) # Add additional warning for AI return f"⚠️ USER_CONTROLLED_CONTENT: {escaped}" else: # content_type == "text" or default return _escapeForAiPrompt(str(content)) except Exception as e: logger.error(f"Error escaping content for AI prompt: {str(e)}") # Return a safe fallback return "[ERROR: Content could not be safely escaped]" def buildSecurePrompt(template: str, **kwargs) -> str: """ Build a secure AI prompt by safely inserting content into a template. Args: template: The prompt template with {key} placeholders **kwargs: Key-value pairs for template substitution Returns: Securely constructed prompt """ try: # Escape all values before substitution escaped_kwargs = {} for key, value in kwargs.items(): if key.endswith('_json'): escaped_kwargs[key] = securePromptContent(value, "json") elif key.endswith('_list'): escaped_kwargs[key] = securePromptContent(value, "list") elif key in ['user_prompt', 'context', 'document_content', 'user_input']: escaped_kwargs[key] = securePromptContent(value, "user_prompt") else: escaped_kwargs[key] = securePromptContent(value, "text") # Use safe string formatting return template.format(**escaped_kwargs) except Exception as e: logger.error(f"Error building secure prompt: {str(e)}") return template # Return original template if escaping fails def validatePromptSecurity(prompt: str) -> Dict[str, Any]: """ Validate that a prompt is secure and doesn't contain injection patterns. Args: prompt: The prompt to validate Returns: Dictionary with validation results """ issues = [] # Check for unescaped quotes that could break JSON if '"' in prompt and '\\"' not in prompt: # Check if quotes are properly escaped unescaped_quotes = re.findall(r'(?', # Special tokens ] for pattern in injection_patterns: if re.search(pattern, prompt, re.IGNORECASE): issues.append(f"Potential injection pattern detected: {pattern}") # Check for proper content delimiters if "===USER_CONTENT_START===" not in prompt and "===USER_CONTENT_END===" not in prompt: # This might be okay for some prompts, but flag for review if any(keyword in prompt.lower() for keyword in ['context', 'user', 'input', 'prompt']): issues.append("User content may not be properly delimited") return { "is_secure": len(issues) == 0, "issues": issues, "prompt_length": len(prompt), "has_user_content_delimiters": "===USER_CONTENT_START===" in prompt }