From 227d7b94017ccd47a2df46a8a71cb8372556f490 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 7 Oct 2025 01:15:07 +0200 Subject: [PATCH] secured user inputs in prompts --- modules/workflows/methods/methodOutlook.py | 7 +- .../processing/shared/placeholderFactory.py | 2 +- .../shared/promptGenerationActionsReact.py | 38 +-- .../processing/shared/securityUtils.py | 216 ++++++++++++++++++ 4 files changed, 246 insertions(+), 17 deletions(-) create mode 100644 modules/workflows/processing/shared/securityUtils.py diff --git a/modules/workflows/methods/methodOutlook.py b/modules/workflows/methods/methodOutlook.py index b6411503..55d0c372 100644 --- a/modules/workflows/methods/methodOutlook.py +++ b/modules/workflows/methods/methodOutlook.py @@ -1108,11 +1108,16 @@ class MethodOutlook(MethodBase): else: doc_list_text = "Available_Document_References: (No documents available for attachment)" + # Escape only the user-controlled context to prevent prompt injection + escaped_context = context.replace('"', '\\"').replace('\n', '\\n').replace('\r', '\\r') + ai_prompt = f""" Compose a professional email based on the following context and requirements: CONTEXT: -{context} +---------------- +{escaped_context} +---------------- RECIPIENT: {to} EMAIL STYLE: {emailStyle} diff --git a/modules/workflows/processing/shared/placeholderFactory.py b/modules/workflows/processing/shared/placeholderFactory.py index 5c27ec30..90aa8ba8 100644 --- a/modules/workflows/processing/shared/placeholderFactory.py +++ b/modules/workflows/processing/shared/placeholderFactory.py @@ -206,7 +206,7 @@ def getPreviousRoundContext(services, workflow: Any) -> str: if hasattr(services, 'workflow'): docs_index = services.workflow.getAvailableDocuments(workflow) if docs_index and docs_index != "No documents available": - doc_count = docs_index.count("docList:") + docs_index.count("docItem:") + doc_count = docs_index.count("docItem:") # Only count actual documents, not document list labels lines.append(f"Available documents: {doc_count}") except Exception: pass diff --git a/modules/workflows/processing/shared/promptGenerationActionsReact.py b/modules/workflows/processing/shared/promptGenerationActionsReact.py index b0c876f7..7aa5ce7f 100644 --- a/modules/workflows/processing/shared/promptGenerationActionsReact.py +++ b/modules/workflows/processing/shared/promptGenerationActionsReact.py @@ -162,25 +162,13 @@ Excludes documents/connections/history entirely. template = """You are a parameter generator. Set the parameters for this specific action. CONTEXT AND OBJECTIVE: +----------------- {{KEY:ACTION_OBJECTIVE}} +----------------- SELECTED_ACTION: {{KEY:SELECTED_ACTION}} -CONTEXT FOR PARAMETER VALUES: -{{KEY:PARAMETERS_CONTEXT}} - -LEARNINGS (from prior attempts, if any): -{{KEY:LEARNINGS}} - -REQUIRED PARAMETERS FOR THIS ACTION (use these exact parameter names): -{{KEY:ACTION_PARAMETERS}} - -INSTRUCTIONS: -- Use ONLY the parameter names listed above -- Fill in appropriate values based on the context and objective -- Do NOT invent new parameters -- Do NOT include: documentList, connectionReference, history, documents, connections REPLY (ONLY JSON): {{ @@ -190,9 +178,29 @@ REPLY (ONLY JSON): }} }} + +CONTEXT FOR PARAMETER VALUES: +----------------- +{{KEY:PARAMETERS_CONTEXT}} +----------------- + +LEARNINGS (from prior attempts, if any): +{{KEY:LEARNINGS}} + +REQUIRED PARAMETERS FOR THIS ACTION (use these exact parameter names): +{{KEY:ACTION_PARAMETERS}} + +INSTRUCTIONS: +- Use ONLY the parameter names listed in section REQUIRED PARAMETERS FOR THIS ACTION +- Fill in appropriate values based on the context and objective +- Do NOT invent new parameters +- Do NOT include: documentList, connectionReference, history, documents, connections + RULES: - Return ONLY JSON (no markdown, no prose) -- Use only the parameters listed in REQUIRED PARAMETERS FOR THIS ACTION +- Use ONLY the exact parameter names listed in REQUIRED PARAMETERS FOR THIS ACTION +- Do NOT add any parameters not listed above +- Do NOT add nested objects or custom fields """ return PromptBundle(prompt=template, placeholders=placeholders) diff --git a/modules/workflows/processing/shared/securityUtils.py b/modules/workflows/processing/shared/securityUtils.py new file mode 100644 index 00000000..8e632709 --- /dev/null +++ b/modules/workflows/processing/shared/securityUtils.py @@ -0,0 +1,216 @@ +""" +Security utilities for AI prompt construction. +Provides secure content escaping to prevent prompt injection attacks. +""" + +import re +import json +import logging +from typing import Any, Union, List, Dict + +logger = logging.getLogger(__name__) + +def _escapeForAiPrompt(content: str) -> str: + """ + Securely escape content for AI prompts to prevent injection attacks. + + This function: + 1. Escapes all special characters that could break prompt structure + 2. Wraps content in secure delimiters + 3. Handles multi-line content safely + 4. Prevents quote injection and context breaking + + Args: + content: The content to escape + + Returns: + Safely escaped content wrapped in secure delimiters + """ + if not content: + return "" + + # Convert to string if not already + content_str = str(content) + + # Remove or escape dangerous characters that could break prompt structure + # This includes quotes, backslashes, and other special characters + escaped = content_str + + # Escape backslashes first (order matters) + escaped = escaped.replace('\\', '\\\\') + + # Escape quotes and other special characters + escaped = escaped.replace('"', '\\"') + escaped = escaped.replace("'", "\\'") + escaped = escaped.replace('\n', '\\n') + escaped = escaped.replace('\r', '\\r') + escaped = escaped.replace('\t', '\\t') + + # Remove or escape other potentially dangerous characters + # Remove control characters except newlines (already handled above) + escaped = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', escaped) + + # Wrap in secure delimiters with clear boundaries + # Using a unique delimiter pattern that's unlikely to appear in user content + secure_delimiter_start = "===USER_CONTENT_START===" + secure_delimiter_end = "===USER_CONTENT_END===" + + return f"{secure_delimiter_start}\n{escaped}\n{secure_delimiter_end}" + +def _escapeForJsonPrompt(content: Any) -> str: + """ + Securely escape content for JSON-based AI prompts. + + Args: + content: The content to escape (can be any type) + + Returns: + Safely escaped JSON string + """ + try: + # Convert to JSON string with proper escaping + json_str = json.dumps(content, ensure_ascii=False, separators=(',', ':')) + return json_str + except Exception as e: + logger.warning(f"Failed to escape content as JSON: {str(e)}") + # Fallback to string escaping + return _escapeForAiPrompt(str(content)) + +def _escapeForListPrompt(items: List[Any]) -> str: + """ + Securely escape a list of items for AI prompts. + + Args: + items: List of items to escape + + Returns: + Safely escaped list representation + """ + if not items: + return "[]" + + try: + escaped_items = [] + for item in items: + if isinstance(item, (dict, list)): + escaped_items.append(_escapeForJsonPrompt(item)) + else: + escaped_items.append(_escapeForAiPrompt(str(item))) + + return f"[{', '.join(escaped_items)}]" + except Exception as e: + logger.warning(f"Failed to escape list content: {str(e)}") + return "[]" + +def securePromptContent(content: Any, content_type: str = "text") -> str: + """ + Main function to securely escape content for AI prompts. + + Args: + content: The content to escape + content_type: Type of content ("text", "json", "list", "user_prompt", "document_content") + + Returns: + Safely escaped content ready for AI prompt insertion + """ + if content is None: + return "" + + try: + if content_type == "json": + return _escapeForJsonPrompt(content) + elif content_type == "list": + if isinstance(content, list): + return _escapeForListPrompt(content) + else: + return _escapeForAiPrompt(str(content)) + elif content_type in ["user_prompt", "document_content"]: + # Extra security for user-controlled content + escaped = _escapeForAiPrompt(str(content)) + # Add additional warning for AI + return f"⚠️ USER_CONTROLLED_CONTENT: {escaped}" + else: # content_type == "text" or default + return _escapeForAiPrompt(str(content)) + + except Exception as e: + logger.error(f"Error escaping content for AI prompt: {str(e)}") + # Return a safe fallback + return "[ERROR: Content could not be safely escaped]" + +def buildSecurePrompt(template: str, **kwargs) -> str: + """ + Build a secure AI prompt by safely inserting content into a template. + + Args: + template: The prompt template with {key} placeholders + **kwargs: Key-value pairs for template substitution + + Returns: + Securely constructed prompt + """ + try: + # Escape all values before substitution + escaped_kwargs = {} + for key, value in kwargs.items(): + if key.endswith('_json'): + escaped_kwargs[key] = securePromptContent(value, "json") + elif key.endswith('_list'): + escaped_kwargs[key] = securePromptContent(value, "list") + elif key in ['user_prompt', 'context', 'document_content', 'user_input']: + escaped_kwargs[key] = securePromptContent(value, "user_prompt") + else: + escaped_kwargs[key] = securePromptContent(value, "text") + + # Use safe string formatting + return template.format(**escaped_kwargs) + + except Exception as e: + logger.error(f"Error building secure prompt: {str(e)}") + return template # Return original template if escaping fails + +def validatePromptSecurity(prompt: str) -> Dict[str, Any]: + """ + Validate that a prompt is secure and doesn't contain injection patterns. + + Args: + prompt: The prompt to validate + + Returns: + Dictionary with validation results + """ + issues = [] + + # Check for unescaped quotes that could break JSON + if '"' in prompt and '\\"' not in prompt: + # Check if quotes are properly escaped + unescaped_quotes = re.findall(r'(?', # Special tokens + ] + + for pattern in injection_patterns: + if re.search(pattern, prompt, re.IGNORECASE): + issues.append(f"Potential injection pattern detected: {pattern}") + + # Check for proper content delimiters + if "===USER_CONTENT_START===" not in prompt and "===USER_CONTENT_END===" not in prompt: + # This might be okay for some prompts, but flag for review + if any(keyword in prompt.lower() for keyword in ['context', 'user', 'input', 'prompt']): + issues.append("User content may not be properly delimited") + + return { + "is_secure": len(issues) == 0, + "issues": issues, + "prompt_length": len(prompt), + "has_user_content_delimiters": "===USER_CONTENT_START===" in prompt + }