From 227d7b94017ccd47a2df46a8a71cb8372556f490 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 7 Oct 2025 01:15:07 +0200
Subject: [PATCH] secured user inputs in prompts

---
 modules/workflows/methods/methodOutlook.py    |   7 +-
 .../processing/shared/placeholderFactory.py   |   2 +-
 .../shared/promptGenerationActionsReact.py    |  38 +--
 .../processing/shared/securityUtils.py        | 216 ++++++++++++++++++
 4 files changed, 246 insertions(+), 17 deletions(-)
 create mode 100644 modules/workflows/processing/shared/securityUtils.py

diff --git a/modules/workflows/methods/methodOutlook.py b/modules/workflows/methods/methodOutlook.py
index b6411503..55d0c372 100644
--- a/modules/workflows/methods/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook.py
@@ -1108,11 +1108,16 @@ class MethodOutlook(MethodBase):
             else:
                 doc_list_text = "Available_Document_References: (No documents available for attachment)"
             
+            # Escape only the user-controlled context to prevent prompt injection
+            escaped_context = context.replace('"', '\\"').replace('\n', '\\n').replace('\r', '\\r')
+            
             ai_prompt = f"""
 Compose a professional email based on the following context and requirements:
 
 CONTEXT:
-{context}
+----------------
+{escaped_context}
+----------------
 
 RECIPIENT: {to}
 EMAIL STYLE: {emailStyle}
diff --git a/modules/workflows/processing/shared/placeholderFactory.py b/modules/workflows/processing/shared/placeholderFactory.py
index 5c27ec30..90aa8ba8 100644
--- a/modules/workflows/processing/shared/placeholderFactory.py
+++ b/modules/workflows/processing/shared/placeholderFactory.py
@@ -206,7 +206,7 @@ def getPreviousRoundContext(services, workflow: Any) -> str:
             if hasattr(services, 'workflow'):
                 docs_index = services.workflow.getAvailableDocuments(workflow)
                 if docs_index and docs_index != "No documents available":
-                    doc_count = docs_index.count("docList:") + docs_index.count("docItem:")
+                    doc_count = docs_index.count("docItem:")  # Only count actual documents, not document list labels
                     lines.append(f"Available documents: {doc_count}")
         except Exception:
             pass
diff --git a/modules/workflows/processing/shared/promptGenerationActionsReact.py b/modules/workflows/processing/shared/promptGenerationActionsReact.py
index b0c876f7..7aa5ce7f 100644
--- a/modules/workflows/processing/shared/promptGenerationActionsReact.py
+++ b/modules/workflows/processing/shared/promptGenerationActionsReact.py
@@ -162,25 +162,13 @@ Excludes documents/connections/history entirely.
     template = """You are a parameter generator. Set the parameters for this specific action.
 
 CONTEXT AND OBJECTIVE:
+-----------------
 {{KEY:ACTION_OBJECTIVE}}
+-----------------
 
 SELECTED_ACTION:
 {{KEY:SELECTED_ACTION}}
 
-CONTEXT FOR PARAMETER VALUES:
-{{KEY:PARAMETERS_CONTEXT}}
-
-LEARNINGS (from prior attempts, if any):
-{{KEY:LEARNINGS}}
-
-REQUIRED PARAMETERS FOR THIS ACTION (use these exact parameter names):
-{{KEY:ACTION_PARAMETERS}}
-
-INSTRUCTIONS:
-- Use ONLY the parameter names listed above
-- Fill in appropriate values based on the context and objective
-- Do NOT invent new parameters
-- Do NOT include: documentList, connectionReference, history, documents, connections
 
 REPLY (ONLY JSON):
 {{
@@ -190,9 +178,29 @@ REPLY (ONLY JSON):
     }}
 }}
 
+
+CONTEXT FOR PARAMETER VALUES:
+-----------------
+{{KEY:PARAMETERS_CONTEXT}}
+-----------------
+
+LEARNINGS (from prior attempts, if any):
+{{KEY:LEARNINGS}}
+
+REQUIRED PARAMETERS FOR THIS ACTION (use these exact parameter names):
+{{KEY:ACTION_PARAMETERS}}
+
+INSTRUCTIONS:
+- Use ONLY the parameter names listed in section REQUIRED PARAMETERS FOR THIS ACTION
+- Fill in appropriate values based on the context and objective
+- Do NOT invent new parameters
+- Do NOT include: documentList, connectionReference, history, documents, connections
+
 RULES:
 - Return ONLY JSON (no markdown, no prose)
-- Use only the parameters listed in REQUIRED PARAMETERS FOR THIS ACTION
+- Use ONLY the exact parameter names listed in REQUIRED PARAMETERS FOR THIS ACTION
+- Do NOT add any parameters not listed above
+- Do NOT add nested objects or custom fields
 """
 
     return PromptBundle(prompt=template, placeholders=placeholders)
diff --git a/modules/workflows/processing/shared/securityUtils.py b/modules/workflows/processing/shared/securityUtils.py
new file mode 100644
index 00000000..8e632709
--- /dev/null
+++ b/modules/workflows/processing/shared/securityUtils.py
@@ -0,0 +1,216 @@
+"""
+Security utilities for AI prompt construction.
+Provides secure content escaping to prevent prompt injection attacks.
+"""
+
+import re
+import json
+import logging
+from typing import Any, Union, List, Dict
+
+logger = logging.getLogger(__name__)
+
+def _escapeForAiPrompt(content: str) -> str:
+    """
+    Securely escape content for AI prompts to prevent injection attacks.
+    
+    This function:
+    1. Escapes all special characters that could break prompt structure
+    2. Wraps content in secure delimiters
+    3. Handles multi-line content safely
+    4. Prevents quote injection and context breaking
+    
+    Args:
+        content: The content to escape
+        
+    Returns:
+        Safely escaped content wrapped in secure delimiters
+    """
+    if not content:
+        return ""
+    
+    # Convert to string if not already
+    content_str = str(content)
+    
+    # Remove or escape dangerous characters that could break prompt structure
+    # This includes quotes, backslashes, and other special characters
+    escaped = content_str
+    
+    # Escape backslashes first (order matters)
+    escaped = escaped.replace('\\', '\\\\')
+    
+    # Escape quotes and other special characters
+    escaped = escaped.replace('"', '\\"')
+    escaped = escaped.replace("'", "\\'")
+    escaped = escaped.replace('\n', '\\n')
+    escaped = escaped.replace('\r', '\\r')
+    escaped = escaped.replace('\t', '\\t')
+    
+    # Remove or escape other potentially dangerous characters
+    # Remove control characters except newlines (already handled above)
+    escaped = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', escaped)
+    
+    # Wrap in secure delimiters with clear boundaries
+    # Using a unique delimiter pattern that's unlikely to appear in user content
+    secure_delimiter_start = "===USER_CONTENT_START==="
+    secure_delimiter_end = "===USER_CONTENT_END==="
+    
+    return f"{secure_delimiter_start}\n{escaped}\n{secure_delimiter_end}"
+
+def _escapeForJsonPrompt(content: Any) -> str:
+    """
+    Securely escape content for JSON-based AI prompts.
+    
+    Args:
+        content: The content to escape (can be any type)
+        
+    Returns:
+        Safely escaped JSON string
+    """
+    try:
+        # Convert to JSON string with proper escaping
+        json_str = json.dumps(content, ensure_ascii=False, separators=(',', ':'))
+        return json_str
+    except Exception as e:
+        logger.warning(f"Failed to escape content as JSON: {str(e)}")
+        # Fallback to string escaping
+        return _escapeForAiPrompt(str(content))
+
+def _escapeForListPrompt(items: List[Any]) -> str:
+    """
+    Securely escape a list of items for AI prompts.
+    
+    Args:
+        items: List of items to escape
+        
+    Returns:
+        Safely escaped list representation
+    """
+    if not items:
+        return "[]"
+    
+    try:
+        escaped_items = []
+        for item in items:
+            if isinstance(item, (dict, list)):
+                escaped_items.append(_escapeForJsonPrompt(item))
+            else:
+                escaped_items.append(_escapeForAiPrompt(str(item)))
+        
+        return f"[{', '.join(escaped_items)}]"
+    except Exception as e:
+        logger.warning(f"Failed to escape list content: {str(e)}")
+        return "[]"
+
+def securePromptContent(content: Any, content_type: str = "text") -> str:
+    """
+    Main function to securely escape content for AI prompts.
+    
+    Args:
+        content: The content to escape
+        content_type: Type of content ("text", "json", "list", "user_prompt", "document_content")
+        
+    Returns:
+        Safely escaped content ready for AI prompt insertion
+    """
+    if content is None:
+        return ""
+    
+    try:
+        if content_type == "json":
+            return _escapeForJsonPrompt(content)
+        elif content_type == "list":
+            if isinstance(content, list):
+                return _escapeForListPrompt(content)
+            else:
+                return _escapeForAiPrompt(str(content))
+        elif content_type in ["user_prompt", "document_content"]:
+            # Extra security for user-controlled content
+            escaped = _escapeForAiPrompt(str(content))
+            # Add additional warning for AI
+            return f"⚠️ USER_CONTROLLED_CONTENT: {escaped}"
+        else:  # content_type == "text" or default
+            return _escapeForAiPrompt(str(content))
+            
+    except Exception as e:
+        logger.error(f"Error escaping content for AI prompt: {str(e)}")
+        # Return a safe fallback
+        return "[ERROR: Content could not be safely escaped]"
+
+def buildSecurePrompt(template: str, **kwargs) -> str:
+    """
+    Build a secure AI prompt by safely inserting content into a template.
+    
+    Args:
+        template: The prompt template with {key} placeholders
+        **kwargs: Key-value pairs for template substitution
+        
+    Returns:
+        Securely constructed prompt
+    """
+    try:
+        # Escape all values before substitution
+        escaped_kwargs = {}
+        for key, value in kwargs.items():
+            if key.endswith('_json'):
+                escaped_kwargs[key] = securePromptContent(value, "json")
+            elif key.endswith('_list'):
+                escaped_kwargs[key] = securePromptContent(value, "list")
+            elif key in ['user_prompt', 'context', 'document_content', 'user_input']:
+                escaped_kwargs[key] = securePromptContent(value, "user_prompt")
+            else:
+                escaped_kwargs[key] = securePromptContent(value, "text")
+        
+        # Use safe string formatting
+        return template.format(**escaped_kwargs)
+        
+    except Exception as e:
+        logger.error(f"Error building secure prompt: {str(e)}")
+        return template  # Return original template if escaping fails
+
+def validatePromptSecurity(prompt: str) -> Dict[str, Any]:
+    """
+    Validate that a prompt is secure and doesn't contain injection patterns.
+    
+    Args:
+        prompt: The prompt to validate
+        
+    Returns:
+        Dictionary with validation results
+    """
+    issues = []
+    
+    # Check for unescaped quotes that could break JSON
+    if '"' in prompt and '\\"' not in prompt:
+        # Check if quotes are properly escaped
+        unescaped_quotes = re.findall(r'(?<!\\)"', prompt)
+        if unescaped_quotes:
+            issues.append("Unescaped quotes detected")
+    
+    # Check for potential injection patterns
+    injection_patterns = [
+        r'ignore\s+previous\s+instructions',
+        r'forget\s+everything',
+        r'you\s+are\s+now',
+        r'system\s*:',
+        r'assistant\s*:',
+        r'user\s*:',
+        r'<\|.*\|>',  # Special tokens
+    ]
+    
+    for pattern in injection_patterns:
+        if re.search(pattern, prompt, re.IGNORECASE):
+            issues.append(f"Potential injection pattern detected: {pattern}")
+    
+    # Check for proper content delimiters
+    if "===USER_CONTENT_START===" not in prompt and "===USER_CONTENT_END===" not in prompt:
+        # This might be okay for some prompts, but flag for review
+        if any(keyword in prompt.lower() for keyword in ['context', 'user', 'input', 'prompt']):
+            issues.append("User content may not be properly delimited")
+    
+    return {
+        "is_secure": len(issues) == 0,
+        "issues": issues,
+        "prompt_length": len(prompt),
+        "has_user_content_delimiters": "===USER_CONTENT_START===" in prompt
+    }