gateway/modules/workflows/processing/shared/promptFactoryPlaceholders.py
2025-10-04 13:31:21 +02:00

420 lines
16 KiB
Python

"""
Placeholder-based prompt factory for dynamic AI calls.
This module provides prompt templates with placeholders that can be filled dynamically.
"""
import json
import logging
from typing import Dict, Any
logger = logging.getLogger(__name__)
from modules.workflows.processing.shared.promptFactory import (
getAvailableDocuments,
getPreviousRoundContext,
getMethodsList,
getEnhancedDocumentContext,
getConnectionReferenceList,
methods,
discoverMethods
)
def createTaskPlanningPromptTemplate() -> str:
"""Create task planning prompt template with placeholders."""
return """Break down user requests into logical, executable task steps.
USER REQUEST:
{{KEY:USER_PROMPT}}
AVAILABLE DOCUMENTS:
{{KEY:AVAILABLE_DOCUMENTS}}
PREVIOUS WORKFLOW ROUNDS:
{{KEY:WORKFLOW_HISTORY}}
TASK PLANNING RULES:
- Create HIGH-LEVEL tasks - one topic per task, not detailed implementation steps
- Focus on DELIVERING what the user asked for, not how to do it
- For DATA requests (numbers, lists, calculations): Plan to deliver the actual data
- For DOCUMENT requests (Word, PDF, Excel): Plan to create the formatted document
- For ANALYSIS requests: Plan to analyze and deliver insights
- Keep tasks simple and focused on outcomes, not implementation details
- Each task should produce usable results for subsequent tasks
- If retry request, analyze previous rounds to understand what failed
REQUIRED JSON STRUCTURE:
{{
"overview": "Brief description of the overall plan",
"languageUserDetected": "en",
"userMessage": "User-friendly message explaining the task plan",
"tasks": [
{{
"id": "task_1",
"objective": "Clear business objective focusing on what to deliver",
"dependencies": ["task_0"],
"success_criteria": ["measurable criteria 1", "measurable criteria 2"],
"estimated_complexity": "low|medium|high",
"userMessage": "What this task will accomplish"
}}
]
}}
RESPONSE: Return ONLY the JSON object."""
def createActionDefinitionPromptTemplate() -> str:
"""Create action definition prompt template with placeholders."""
return """Generate the next action to advance toward completing the task objective.
TASK OBJECTIVE: {{KEY:USER_PROMPT}}
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
WORKFLOW HISTORY: {{KEY:WORKFLOW_HISTORY}}
AVAILABLE METHODS: {{KEY:AVAILABLE_METHODS}}
AVAILABLE CONNECTIONS: {{KEY:AVAILABLE_CONNECTIONS}}
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
REQUIRED JSON STRUCTURE FOR YOUR RESPONSE:
{{
"actions": [
{{
"method": "method_name",
"action": "action_name",
"parameters": {},
"resultLabel": "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}",
"description": "What this action accomplishes",
"userMessage": "User-friendly message in {{KEY:USER_LANGUAGE}}"
}}
]
}}
CRITICAL:
- Use EXACT method names from AVAILABLE_METHODS (e.g., "ai", "document", "web")
- Use EXACT action names from AVAILABLE_METHODS (e.g., "process", "extract", "search")
- DO NOT combine method and action names (e.g., "document.extract" is WRONG)
- DO NOT create new method or action names
CORRECT EXAMPLE:
{{
"actions": [
{{
"method": "document",
"action": "extract",
"parameters": {{"documentList": ["docList:msg_123:results"], "prompt": "Extract data"}},
"resultLabel": "round1_task1_action1_extract_results",
"description": "Extract data from documents",
"userMessage": "Extracting data from documents"
}}
]
}}
WRONG EXAMPLE (DO NOT USE):
{{
"actions": [
{{
"method": "document.extract",
"action": "extract_data",
...
}}
]
}}
RESPONSE: Return ONLY the JSON object."""
def createActionSelectionPromptTemplate() -> str:
"""Create action selection prompt template with placeholders."""
return """Select exactly one action to advance the task.
OBJECTIVE: {{KEY:USER_PROMPT}}
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
AVAILABLE METHODS:
{{KEY:AVAILABLE_METHODS}}
CRITICAL: Return ONLY the method and action name. Do NOT include parameters or prompts.
CRITICAL: Use EXACT method names from AVAILABLE_METHODS above - do NOT combine method and action names!
REQUIRED JSON FORMAT:
{"action":{"method":"method_name","name":"action_name"}}
EXAMPLES:
{"action":{"method":"ai","name":"process"}}
{"action":{"method":"document","name":"extract"}}
{"action":{"method":"document","name":"generate"}}
{"action":{"method":"web","name":"search"}}
WRONG FORMAT (DO NOT USE):
{"action":{"method":"document.extract","name":"some_action"}}
{"action":{"method":"ai.process","name":"some_action"}}"""
def createActionParameterPromptTemplate() -> str:
"""Create action parameter prompt template with placeholders."""
return """CRITICAL: You MUST wrap all parameters in a "parameters" object!
MANDATORY RESPONSE FORMAT:
{"parameters":{"parameterName": "parameterValue"}}
EXAMPLES:
For aiPrompt parameter: {"parameters":{"aiPrompt": "Your prompt here"}}
For multiple parameters: {"parameters":{"aiPrompt": "Your prompt here", "language": "en"}}
WRONG FORMAT (DO NOT USE):
{"aiPrompt": "Your prompt here"}
```json
{"aiPrompt": "Your prompt here"}
```
CORRECT FORMAT (MUST USE):
{"parameters":{"aiPrompt": "Your prompt here"}}
DO NOT use code blocks or markdown. Return ONLY the JSON object with parameters wrapped in "parameters".
Provide only the required parameters for this action.
SELECTED ACTION: {{KEY:SELECTED_ACTION}}
ACTION SIGNATURE: {{KEY:ACTION_SIGNATURE}}
OBJECTIVE: {{KEY:USER_PROMPT}}
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
AVAILABLE CONNECTIONS: {{KEY:AVAILABLE_CONNECTIONS}}
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
DOCUMENT REFERENCE TYPES:
- docItem: Reference to a single document (e.g., "docItem:uuid:filename.pdf")
- docList: Reference to a group of documents (e.g., "docList:msg_123:AnalysisResults")
- Use the EXACT reference strings shown in AVAILABLE_DOCUMENTS (e.g., "docList:msg_123:round1_task1_action1_results")
CONNECTION REFERENCE TYPES:
- Use exact connection references from AVAILABLE CONNECTIONS (e.g., "conn_microsoft_123", "conn_sharepoint_456")
CRITICAL RULES:
- ONLY use exact document reference strings from AVAILABLE_DOCUMENTS (e.g., "docList:msg_123:round1_task1_action1_results")
- DO NOT add file paths or individual filenames to document references
- ONLY use exact connection references from AVAILABLE CONNECTIONS
- For documentList parameters: Use the EXACT reference strings shown in AVAILABLE_DOCUMENTS
- For connectionReference parameters: Use the exact connection reference from AVAILABLE CONNECTIONS
- Include user language if relevant
- Avoid unnecessary fields; host applies defaults
- Use the ACTION SIGNATURE above to understand what parameters are required
- Convert the objective into appropriate parameter values as needed
CRITICAL: You MUST wrap all parameters in a "parameters" object!
MANDATORY RESPONSE FORMAT:
{"parameters":{"parameterName": "parameterValue"}}
EXAMPLES:
For aiPrompt parameter:
{"parameters":{"aiPrompt": "Your prompt here"}}
For multiple parameters:
{"parameters":{"aiPrompt": "Your prompt here", "language": "en"}}
WRONG FORMAT (DO NOT USE):
{"aiPrompt": "Your prompt here"}
```json
{"aiPrompt": "Your prompt here"}
```
CORRECT FORMAT (MUST USE):
{"parameters":{"aiPrompt": "Your prompt here"}}
DO NOT use code blocks or markdown. Return ONLY the JSON object with parameters wrapped in "parameters"."""
def createRefinementPromptTemplate() -> str:
"""Create refinement prompt template with placeholders."""
return """Decide next step based on observation.
OBJECTIVE: {{KEY:USER_PROMPT}}
OBSERVATION:
{{KEY:REVIEW_CONTENT}}
CRITICAL RULES:
- If user wants DATA (numbers, lists, calculations): Ensure AI delivers the actual data, not code
- If user wants DOCUMENTS (Word, PDF, Excel): Ensure appropriate method is used to create the document
- If user wants ANALYSIS: Ensure AI analyzes and delivers insights
- NEVER accept code when user wants data - demand the actual data
- NEVER accept algorithms when user wants results - demand the actual results
DECISION RULES:
- If the objective is fulfilled (user got what they asked for), decide stop
- If the objective is not fulfilled (user didn't get what they asked for), decide continue
- Focus on what the user actually wants, not what was delivered
RESPONSE FORMAT (JSON only):
{"decision":"continue","reason":"Need more data"}"""
def createResultReviewPromptTemplate() -> str:
"""Create result review prompt template with placeholders."""
return """Review task execution outcomes and determine success, retry needs, or failure.
TASK OBJECTIVE: {{KEY:USER_PROMPT}}
EXECUTION RESULTS:
{{KEY:REVIEW_CONTENT}}
VALIDATION CRITERIA:
- Review each action's success/failure status
- Check if required documents were produced
- Validate document quality and completeness
- Assess if success criteria were met
- Identify any missing or incomplete outputs
- Determine if retry would help or if task should be marked as failed
REQUIRED JSON STRUCTURE:
{{
"status": "success|retry|failed",
"reason": "Detailed explanation of the validation decision",
"improvements": ["specific improvement 1", "specific improvement 2"],
"quality_score": 8, // 1-10 scale
"met_criteria": ["criteria1", "criteria2"],
"unmet_criteria": ["criteria3", "criteria4"],
"confidence": 0.85, // 0.0-1.0 scale
"userMessage": "User-friendly message explaining the validation result"
}}
VALIDATION PRINCIPLES:
- Be thorough but fair in assessment
- Focus on business value and outcomes
- Consider both technical execution and business results
- Provide specific, actionable improvement suggestions
- Use quality scores to track progress across retries
- Clearly identify which success criteria were met vs. unmet
- Set appropriate confidence levels based on evidence quality
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
# Helper functions to extract content for placeholders
def extractUserPrompt(context) -> str:
"""Extract user prompt from context."""
if hasattr(context, 'task_step') and context.task_step:
return context.task_step.objective or 'No request specified'
return 'No request specified'
def extractAvailableDocuments(context) -> str:
"""Extract available documents from context."""
if hasattr(context, 'available_documents') and context.available_documents:
return context.available_documents
return "No documents available"
def extractWorkflowHistory(service, context) -> str:
"""Extract workflow history from context."""
if hasattr(context, 'workflow') and context.workflow:
return getPreviousRoundContext(service, context.workflow) or "No previous workflow rounds - this is the first round."
return "No previous workflow rounds - this is the first round."
def extractAvailableMethods(service) -> str:
"""Extract available methods for action planning."""
try:
# Get the methods dictionary directly from the global methods variable
if not methods:
discoverMethods(service)
# Create a structured JSON format for better AI parsing
available_methods_json = {}
for methodName, methodInfo in methods.items():
# Convert MethodAi -> ai, MethodDocument -> document, etc.
shortName = methodName.replace('Method', '').lower()
available_methods_json[shortName] = {}
for actionName, actionInfo in methodInfo['actions'].items():
# Get the action description
action_description = actionInfo.get('description', f"Execute {actionName} action")
available_methods_json[shortName][actionName] = action_description
return json.dumps(available_methods_json, indent=2, ensure_ascii=False)
except Exception as e:
logger.error(f"Error extracting available methods: {str(e)}")
return json.dumps({}, indent=2, ensure_ascii=False)
def extractUserLanguage(service) -> str:
"""Extract user language from service."""
return service.user.language if service and service.user else 'en'
def extractReviewContent(context) -> str:
"""Extract review content from context with full document metadata."""
if hasattr(context, 'action_results') and context.action_results:
# Build result summary
result_summary = ""
for i, result in enumerate(context.action_results):
result_summary += f"\nRESULT {i+1}:\n"
result_summary += f" Success: {result.success}\n"
if result.error:
result_summary += f" Error: {result.error}\n"
if result.documents:
result_summary += f" Documents: {len(result.documents)} document(s)\n"
for doc in result.documents:
# Extract all available metadata without content
doc_metadata = {
"name": getattr(doc, 'documentName', 'Unknown'),
"mimeType": getattr(doc, 'mimeType', 'Unknown'),
"size": getattr(doc, 'size', 'Unknown'),
"created": getattr(doc, 'created', 'Unknown'),
"modified": getattr(doc, 'modified', 'Unknown'),
"typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
"documentId": getattr(doc, 'documentId', 'Unknown'),
"reference": getattr(doc, 'reference', 'Unknown')
}
# Remove 'Unknown' values to keep it clean
doc_metadata = {k: v for k, v in doc_metadata.items() if v != 'Unknown'}
result_summary += f" - {json.dumps(doc_metadata, indent=6, ensure_ascii=False)}\n"
else:
result_summary += f" Documents: None\n"
return result_summary
elif hasattr(context, 'observation') and context.observation:
# For observation data, show full content but handle documents specially
if isinstance(context.observation, dict):
# Create a copy to modify
obs_copy = context.observation.copy()
# If there are previews with documents, show only metadata
if 'previews' in obs_copy and isinstance(obs_copy['previews'], list):
for preview in obs_copy['previews']:
if isinstance(preview, dict) and 'snippet' in preview:
# Replace snippet with metadata indicator
preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"
return json.dumps(obs_copy, indent=2, ensure_ascii=False)
else:
return json.dumps(context.observation, ensure_ascii=False)
elif hasattr(context, 'step_result') and context.step_result and 'observation' in context.step_result:
# For observation data in step_result, show full content but handle documents specially
observation = context.step_result['observation']
if isinstance(observation, dict):
# Create a copy to modify
obs_copy = observation.copy()
# If there are previews with documents, show only metadata
if 'previews' in obs_copy and isinstance(obs_copy['previews'], list):
for preview in obs_copy['previews']:
if isinstance(preview, dict) and 'snippet' in preview:
# Replace snippet with metadata indicator
preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"
return json.dumps(obs_copy, indent=2, ensure_ascii=False)
else:
return json.dumps(observation, ensure_ascii=False)
else:
return "No review content available"