216 lines
7.3 KiB
Python
216 lines
7.3 KiB
Python
"""
|
|
Security utilities for AI prompt construction.
|
|
Provides secure content escaping to prevent prompt injection attacks.
|
|
"""
|
|
|
|
import re
|
|
import json
|
|
import logging
|
|
from typing import Any, Union, List, Dict
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def _escapeForAiPrompt(content: str) -> str:
|
|
"""
|
|
Securely escape content for AI prompts to prevent injection attacks.
|
|
|
|
This function:
|
|
1. Escapes all special characters that could break prompt structure
|
|
2. Wraps content in secure delimiters
|
|
3. Handles multi-line content safely
|
|
4. Prevents quote injection and context breaking
|
|
|
|
Args:
|
|
content: The content to escape
|
|
|
|
Returns:
|
|
Safely escaped content wrapped in secure delimiters
|
|
"""
|
|
if not content:
|
|
return ""
|
|
|
|
# Convert to string if not already
|
|
content_str = str(content)
|
|
|
|
# Remove or escape dangerous characters that could break prompt structure
|
|
# This includes quotes, backslashes, and other special characters
|
|
escaped = content_str
|
|
|
|
# Escape backslashes first (order matters)
|
|
escaped = escaped.replace('\\', '\\\\')
|
|
|
|
# Escape quotes and other special characters
|
|
escaped = escaped.replace('"', '\\"')
|
|
escaped = escaped.replace("'", "\\'")
|
|
escaped = escaped.replace('\n', '\\n')
|
|
escaped = escaped.replace('\r', '\\r')
|
|
escaped = escaped.replace('\t', '\\t')
|
|
|
|
# Remove or escape other potentially dangerous characters
|
|
# Remove control characters except newlines (already handled above)
|
|
escaped = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', escaped)
|
|
|
|
# Wrap in secure delimiters with clear boundaries
|
|
# Using a unique delimiter pattern that's unlikely to appear in user content
|
|
secure_delimiter_start = "===USER_CONTENT_START==="
|
|
secure_delimiter_end = "===USER_CONTENT_END==="
|
|
|
|
return f"{secure_delimiter_start}\n{escaped}\n{secure_delimiter_end}"
|
|
|
|
def _escapeForJsonPrompt(content: Any) -> str:
|
|
"""
|
|
Securely escape content for JSON-based AI prompts.
|
|
|
|
Args:
|
|
content: The content to escape (can be any type)
|
|
|
|
Returns:
|
|
Safely escaped JSON string
|
|
"""
|
|
try:
|
|
# Convert to JSON string with proper escaping
|
|
json_str = json.dumps(content, ensure_ascii=False, separators=(',', ':'))
|
|
return json_str
|
|
except Exception as e:
|
|
logger.warning(f"Failed to escape content as JSON: {str(e)}")
|
|
# Fallback to string escaping
|
|
return _escapeForAiPrompt(str(content))
|
|
|
|
def _escapeForListPrompt(items: List[Any]) -> str:
|
|
"""
|
|
Securely escape a list of items for AI prompts.
|
|
|
|
Args:
|
|
items: List of items to escape
|
|
|
|
Returns:
|
|
Safely escaped list representation
|
|
"""
|
|
if not items:
|
|
return "[]"
|
|
|
|
try:
|
|
escaped_items = []
|
|
for item in items:
|
|
if isinstance(item, (dict, list)):
|
|
escaped_items.append(_escapeForJsonPrompt(item))
|
|
else:
|
|
escaped_items.append(_escapeForAiPrompt(str(item)))
|
|
|
|
return f"[{', '.join(escaped_items)}]"
|
|
except Exception as e:
|
|
logger.warning(f"Failed to escape list content: {str(e)}")
|
|
return "[]"
|
|
|
|
def securePromptContent(content: Any, content_type: str = "text") -> str:
|
|
"""
|
|
Main function to securely escape content for AI prompts.
|
|
|
|
Args:
|
|
content: The content to escape
|
|
content_type: Type of content ("text", "json", "list", "user_prompt", "document_content")
|
|
|
|
Returns:
|
|
Safely escaped content ready for AI prompt insertion
|
|
"""
|
|
if content is None:
|
|
return ""
|
|
|
|
try:
|
|
if content_type == "json":
|
|
return _escapeForJsonPrompt(content)
|
|
elif content_type == "list":
|
|
if isinstance(content, list):
|
|
return _escapeForListPrompt(content)
|
|
else:
|
|
return _escapeForAiPrompt(str(content))
|
|
elif content_type in ["user_prompt", "document_content"]:
|
|
# Extra security for user-controlled content
|
|
escaped = _escapeForAiPrompt(str(content))
|
|
# Add additional warning for AI
|
|
return f"⚠️ USER_CONTROLLED_CONTENT: {escaped}"
|
|
else: # content_type == "text" or default
|
|
return _escapeForAiPrompt(str(content))
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error escaping content for AI prompt: {str(e)}")
|
|
# Return a safe fallback
|
|
return "[ERROR: Content could not be safely escaped]"
|
|
|
|
def buildSecurePrompt(template: str, **kwargs) -> str:
|
|
"""
|
|
Build a secure AI prompt by safely inserting content into a template.
|
|
|
|
Args:
|
|
template: The prompt template with {key} placeholders
|
|
**kwargs: Key-value pairs for template substitution
|
|
|
|
Returns:
|
|
Securely constructed prompt
|
|
"""
|
|
try:
|
|
# Escape all values before substitution
|
|
escaped_kwargs = {}
|
|
for key, value in kwargs.items():
|
|
if key.endswith('_json'):
|
|
escaped_kwargs[key] = securePromptContent(value, "json")
|
|
elif key.endswith('_list'):
|
|
escaped_kwargs[key] = securePromptContent(value, "list")
|
|
elif key in ['user_prompt', 'context', 'document_content', 'user_input']:
|
|
escaped_kwargs[key] = securePromptContent(value, "user_prompt")
|
|
else:
|
|
escaped_kwargs[key] = securePromptContent(value, "text")
|
|
|
|
# Use safe string formatting
|
|
return template.format(**escaped_kwargs)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error building secure prompt: {str(e)}")
|
|
return template # Return original template if escaping fails
|
|
|
|
def validatePromptSecurity(prompt: str) -> Dict[str, Any]:
|
|
"""
|
|
Validate that a prompt is secure and doesn't contain injection patterns.
|
|
|
|
Args:
|
|
prompt: The prompt to validate
|
|
|
|
Returns:
|
|
Dictionary with validation results
|
|
"""
|
|
issues = []
|
|
|
|
# Check for unescaped quotes that could break JSON
|
|
if '"' in prompt and '\\"' not in prompt:
|
|
# Check if quotes are properly escaped
|
|
unescaped_quotes = re.findall(r'(?<!\\)"', prompt)
|
|
if unescaped_quotes:
|
|
issues.append("Unescaped quotes detected")
|
|
|
|
# Check for potential injection patterns
|
|
injection_patterns = [
|
|
r'ignore\s+previous\s+instructions',
|
|
r'forget\s+everything',
|
|
r'you\s+are\s+now',
|
|
r'system\s*:',
|
|
r'assistant\s*:',
|
|
r'user\s*:',
|
|
r'<\|.*\|>', # Special tokens
|
|
]
|
|
|
|
for pattern in injection_patterns:
|
|
if re.search(pattern, prompt, re.IGNORECASE):
|
|
issues.append(f"Potential injection pattern detected: {pattern}")
|
|
|
|
# Check for proper content delimiters
|
|
if "===USER_CONTENT_START===" not in prompt and "===USER_CONTENT_END===" not in prompt:
|
|
# This might be okay for some prompts, but flag for review
|
|
if any(keyword in prompt.lower() for keyword in ['context', 'user', 'input', 'prompt']):
|
|
issues.append("User content may not be properly delimited")
|
|
|
|
return {
|
|
"is_secure": len(issues) == 0,
|
|
"issues": issues,
|
|
"prompt_length": len(prompt),
|
|
"has_user_content_delimiters": "===USER_CONTENT_START===" in prompt
|
|
}
|