gateway/modules/chat/handling/promptFactory.py

# promptFactory.py
# Contains all prompt creation functions extracted from managerChat.py

import json
import logging
from typing import Any, Dict

# Prompt creation helpers extracted from managerChat.py

def createTaskPlanningPrompt(context: Dict[str, Any]) -> str:
    """Create prompt for task planning"""
    return f"""You are a task planning AI that analyzes user requests and creates structured task plans.

USER REQUEST: {context['user_request']}

AVAILABLE DOCUMENTS: {', '.join(context['available_documents'])}

INSTRUCTIONS:
1. Analyze the user request and available documents
2. Break down the request into 2-4 meaningful high-level task steps
3. Focus on business outcomes, not technical operations
4. Each task should produce meaningful, usable outputs
5. Ensure proper handover between tasks using result labels
6. Return a JSON object with the exact structure shown below

TASK PLANNING PRINCIPLES:
- Break down complex requests into logical, sequential steps
- Focus on business value and outcomes
- Keep tasks at a meaningful level of abstraction
- Each task should produce results that can be used by subsequent tasks
- Ensure clear dependencies and handovers between tasks

REQUIRED JSON STRUCTURE:
{{
    \"overview\": \"Brief description of the overall plan\",
    \"tasks\": [
        {{
            \"id\": \"task_1\",
            \"objective\": \"Clear business objective this task accomplishes\",
            \"dependencies\": [\"task_0\"],  // IDs of tasks that must complete first
            \"success_criteria\": [\"criteria1\", \"criteria2\"],
            \"estimated_complexity\": \"low|medium|high\"
        }}
    ]
}}

EXAMPLES OF GOOD TASK OBJECTIVES:
- \"Extract key information from documents for email preparation\"
- \"Draft professional email incorporating analyzed information\"
- \"Send email using specified email account\"
- \"Store email draft and confirmation in system\"

EXAMPLES OF GOOD SUCCESS CRITERIA:
- \"Document analysis completed with key points identified\"
- \"Email draft created with professional tone and clear structure\"
- \"Email successfully sent with delivery confirmation\"
- \"All outputs properly stored and accessible for future use\"

EXAMPLES OF BAD TASK OBJECTIVES:
- \"Open and read the PDF file\" (too granular)
- \"Identify table structure\" (technical detail)
- \"Convert data to CSV format\" (implementation detail)

NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""

async def createActionDefinitionPrompt(context, service) -> str:
    """Create prompt for action generation with enhanced document extraction guidance and retry context"""
    task_step = context.task_step
    workflow = context.workflow
    available_docs = context.available_documents or []
    previous_results = context.previous_results or []
    improvements = context.improvements or []
    retry_count = context.retry_count or 0
    previous_action_results = context.previous_action_results or []
    previous_review_result = context.previous_review_result
    methodList = service.getMethodsList()
    method_actions = {}
    for sig in methodList:
        if '.' in sig:
            method, rest = sig.split('.', 1)
            action = rest.split('(')[0]
            method_actions.setdefault(method, []).append((action, sig))
    messageSummary = await service.summarizeChat(workflow.messages)
    # Get ALL documents from the entire workflow, not just current round
    docRefs = service.getDocumentReferenceList()
    connRefs = service.getConnectionReferenceList()

    # Get documents from current round (chat) and entire workflow history
    current_round_docs = docRefs.get('chat', [])
    workflow_history_docs = docRefs.get('history', [])

    # Combine all documents, prioritizing current round first, then workflow history
    all_doc_refs = current_round_docs + workflow_history_docs

    # Log document availability for debugging
    logging.debug(f"Document references - Current round: {len(current_round_docs)}, Workflow history: {len(workflow_history_docs)}, Total: {len(all_doc_refs)}")
    available_methods_str = ''
    for method, actions in method_actions.items():
        available_methods_str += f"- {method}:\n"
        for action, sig in actions:
            available_methods_str += f"    - {action}: {sig}\n"
    retry_context = ""
    if retry_count > 0:
        retry_context = f"""
RETRY CONTEXT (Attempt {retry_count}):
Previous action results that failed or were incomplete:
"""
        for i, result in enumerate(previous_action_results):
            retry_context += f"- Action {i+1}: {result.actionMethod or 'unknown'}.{result.actionName or 'unknown'}\n"
            retry_context += f"  Status: {result.success and 'success' or 'failed'}\n"
            retry_context += f"  Error: {result.error or 'None'}\n"
            retry_context += f"  Result: {(result.data.get('result', '') if result.data else '')[:100]}...\n"
        if previous_review_result:
            retry_context += f"""
Previous review feedback:
- Status: {previous_review_result.status or 'unknown'}
- Reason: {previous_review_result.reason or 'No reason provided'}
- Quality Score: {previous_review_result.quality_score or 0}/10
- Unmet Criteria: {', '.join(previous_review_result.unmet_criteria or [])}
"""
    success_criteria_str = ', '.join(task_step.success_criteria or [])
    previous_results_str = ', '.join(previous_results) if previous_results else 'None'
    improvements_str = str(improvements) if improvements else 'None'
    available_connections_str = '\n'.join(f"- {conn}" for conn in connRefs)
    # Build comprehensive document list showing both current round and workflow history
    if all_doc_refs:
        available_documents_str = "CURRENT ROUND DOCUMENTS:\n"
        if current_round_docs:
            for doc in current_round_docs:
                available_documents_str += f"- {doc.documentsLabel} contains {', '.join(doc.documents)}\n"
        else:
            available_documents_str += "- No documents in current round\n"

        available_documents_str += "\nWORKFLOW HISTORY DOCUMENTS:\n"
        if workflow_history_docs:
            for doc in workflow_history_docs:
                available_documents_str += f"- {doc.documentsLabel} contains {', '.join(doc.documents)}\n"
        else:
            available_documents_str += "- No documents in workflow history\n"
    else:
        available_documents_str = "NO DOCUMENTS AVAILABLE - This workflow has no documents to process."

    # Debug logging for document availability
    logging.debug(f"Available documents string length: {len(available_documents_str)}")
    logging.debug(f"Current round docs count: {len(current_round_docs)}")
    logging.debug(f"Workflow history docs count: {len(workflow_history_docs)}")
    logging.debug(f"Total doc refs: {len(all_doc_refs)}")

    prompt = f"""
You are an action generation AI that creates specific actions to accomplish a task step.

DOCUMENT REFERENCE TYPES:
- docItem: Reference to a single document. Format: "docItem:<id>:<filename>"
- docList: Reference to a group of documents under a label. Format: <label> (e.g., "task1_action2_results").
  - Each docList label maps to a list of docItem references (see AVAILABLE DOCUMENTS).
  - A label like "task1_action2_results" refers to the output of action 2 in task 1.

CRITICAL DOCUMENT REFERENCE RULES:
- ONLY use the exact labels listed in AVAILABLE DOCUMENTS below
- NEVER invent new labels or use message IDs
- NEVER use formats like "msg_xxx:documents" or "task_X_results" (these will fail)
- ONLY use the exact labels shown in AVAILABLE DOCUMENTS
- When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you CANNOT create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.

TASK STEP: {task_step.objective} (ID: {task_step.id})
SUCCESS CRITERIA: {success_criteria_str}

CONTEXT - Chat History:
{messageSummary}

AVAILABLE METHODS AND ACTIONS (with signatures):
{available_methods_str}

AVAILABLE CONNECTIONS:
{available_connections_str}

AVAILABLE DOCUMENTS:
{available_documents_str}

DOCUMENT REFERENCE EXAMPLES:
✅ CORRECT: Use exact labels from AVAILABLE DOCUMENTS above
- "task2_action1_personnel_search"
- "task2_action3_personnel_analysis"
- "docItem:doc_abc:file1.txt"
- "docList:msg123:user_uploads" (supported format, but use actual labels instead)

❌ INCORRECT: These will cause errors
- "msg_xxx:documents" (invalid format - missing docList/docItem prefix)
- "task_2_results" (not a valid label - use exact labels from AVAILABLE DOCUMENTS)
- Inventing message IDs instead of using actual document labels

PREVIOUS RESULTS: {previous_results_str}
IMPROVEMENTS NEEDED: {improvements_str}{retry_context}

ACTION GENERATION PRINCIPLES:
- Create meaningful actions per task step
- Use comprehensive AI prompts for document processing
- Focus on business outcomes, not technical operations
- Combine related operations into single actions when possible
- Use the task's AI prompt if provided, or create a comprehensive one
- Each action should produce meaningful, usable outputs
- For document extraction, ensure prompts are specific and detailed
- Include validation steps in extraction prompts
- If this is a retry, learn from previous failures and improve the approach
- Address specific issues mentioned in previous review feedback
- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting

INSTRUCTIONS:
- Generate actions to accomplish this task step using available documents, connections, and previous results
- Use docItem for single documents and docList labels for groups of documents as shown in AVAILABLE DOCUMENTS
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you cannot create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
- For resultLabel, use the format: "task{{task_id}}_action{{action_number}}_{{short_label}}" where:
    - {{task_id}} = the current task's id (e.g., 1)
    - {{action_number}} = the sequence number of the action within the task (e.g., 2)
    - {{short_label}} = a short, descriptive label for the output (e.g., "analysis_results")
  Example: "task1_action2_analysis_results"
- If this is a retry, ensure the new actions address the specific issues from previous attempts
- Follow the JSON structure below. All fields are required.

REQUIRED JSON STRUCTURE:
{{
    "actions": [
        {{
            "method": "method_name",  // Use only the method name (e.g., "document")
            "action": "action_name",  // Use only the action name (e.g., "extract")
            "parameters": {{
                "documentList": ["docItem:doc_abc:file1.txt", "task1_action2_results"],
                "aiPrompt": "Comprehensive AI prompt describing what to accomplish"
            }},
            "resultLabel": "task1_action3_analysis_results",
            "expectedDocumentFormats": [  // OPTIONAL: Specify expected document formats when needed
                {{
                    "extension": ".txt",
                    "mimeType": "text/plain",
                    "description": "Structured data output"
                }}
            ],
            "description": "What this action accomplishes (business outcome)"
        }}
    ]
}}

FIELD REQUIREMENTS:
- "method": Must be from AVAILABLE METHODS
- "action": Must be valid for the method
- "parameters": Method-specific, must include documentList as a list if required by the signature
- "resultLabel": Must follow the format above (e.g., "task1_action3_analysis_results")
- "expectedDocumentFormats": OPTIONAL - Only specify when you need to control output format
  - Use when you need specific file types (e.g., CSV for data, JSON for structured output)
  - Omit when format is flexible (e.g., folder queries with mixed file types)
  - Each format should specify: extension, mimeType, description
  - When using expectedDocumentFormats, ensure the aiPrompt explicitly requests pure data without markdown formatting
- "description": Clear summary of the business outcome

EXAMPLES OF GOOD ACTIONS:

1. Document analysis with specific output format (use expectedDocumentFormats):
{{
    "method": "document",
    "action": "extract",
    "parameters": {{
        "documentList": ["docItem:doc_57520394-6b6d-41c2-b641-bab3fc6d7f4b:candidate_1_profile.txt"],
        "aiPrompt": "Extract and analyze the candidate's qualifications, experience, skills, and suitability for the product designer position. Identify key strengths, relevant experience, technical skills, and any areas of concern. Provide a comprehensive assessment that can be used for evaluation."
    }},
    "resultLabel": "task1_action1_candidate_analysis",
    "expectedDocumentFormats": [
        {{
            "extension": ".json",
            "mimeType": "application/json",
            "description": "Structured candidate analysis data"
        }}
    ],
    "description": "Comprehensive analysis of candidate profile for evaluation"
}}

2. Multi-document processing with flexible output (omit expectedDocumentFormats):
{{
    "method": "document",
    "action": "extract",
    "parameters": {{
        "documentList": ["task1_action1_candidate_analysis", "task1_action2_candidate_analysis", "task1_action3_candidate_analysis"],
        "aiPrompt": "Compare all three candidate profiles and create an evaluation matrix. Rate each candidate on technical skills, experience level, cultural fit, portfolio quality, and communication skills. Provide clear rankings and recommendations for the product designer position."
    }},
    "resultLabel": "task1_action4_evaluation_matrix",
    "description": "Create comprehensive evaluation matrix comparing all candidates"
}}

3. Data extraction with specific CSV format:
{{
    "method": "document",
    "action": "extract",
    "parameters": {{
        "documentList": ["docItem:doc_abc:table_data.pdf"],
        "aiPrompt": "Extract all table data and convert to structured CSV format with proper headers and data types. IMPORTANT: Deliver pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows."
    }},
    "resultLabel": "task1_action2_structured_data",
    "expectedDocumentFormats": [
        {{
            "extension": ".csv",
            "mimeType": "text/csv",
            "description": "Structured table data in CSV format"
        }}
    ],
    "description": "Extract and structure table data for analysis"
}}

4. Comprehensive summary report from multiple documents (use generateReport):
{{
    "method": "document",
    "action": "generateReport",
    "parameters": {{
        "documentList": ["task1_action1_candidate_analysis", "task1_action2_candidate_analysis", "task1_action3_candidate_analysis"],
        "title": "Comprehensive Candidate Evaluation Report"
    }},
    "resultLabel": "task1_action5_summary_report",
    "description": "Generate a comprehensive, professional HTML report consolidating all candidate analyses and findings"
}}

5. Correct chaining of actions within a task:
{{
    "actions": [
        {{
            "method": "document",
            "action": "extract",
            "parameters": {{
                "documentList": ["docItem:doc_abc:file1.txt"],
                "aiPrompt": "Extract data from file1."
            }},
            "resultLabel": "task1_action1_extracted_data",
            "description": "Extract data from file1."
        }},
        {{
            "method": "document",
            "action": "generateReport",
            "parameters": {{
                "documentList": ["task1_action1_extracted_data"],
                "title": "Report"
            }},
            "resultLabel": "task1_action2_report",
            "description": "Generate report from extracted data."
        }}
    ]
}}

6. When no documents are available (NO DOCUMENTS AVAILABLE scenario):
{{
    "method": "document",
    "action": "generateReport",
    "parameters": {{
        "documentList": [],
        "title": "Workflow Status Report"
    }},
    "resultLabel": "task1_action1_status_report",
    "description": "Generate a status report informing the user that no documents are available for processing and requesting document upload or alternative input."
}}

IMPORTANT NOTES:
- Respond with ONLY the JSON object. Do not include any explanatory text.
- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", use example 6 above to create a status report action instead of document extraction."""
    logging.debug(f"[ACTION PLAN PROMPT] Available Documents Section:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (detailed):\n{available_methods_str}")
    return prompt

async def createResultReviewPrompt(review_context) -> str:
    """Create prompt for result review"""
    task_step = review_context.task_step
    step_result = review_context.step_result or {}
    step_result_serializable = {
        'task_step': {
            'id': task_step.id,
            'objective': task_step.objective,
            'success_criteria': task_step.success_criteria or []
        },
        'action_results': [],
        'successful_actions': step_result.get('successful_actions', 0),
        'total_actions': step_result.get('total_actions', 0),
        'results_count': len(step_result.get('results', [])),
        'errors_count': len(step_result.get('errors', []))
    }
    for action_result in (review_context.action_results or []):
        documents_metadata = []

        # FIX: Look for documents in the correct place - action_result.data.documents contains actual document objects
        # action_result.documents only contains document references (strings)
        documents_to_check = action_result.data.get("documents", [])

        for doc in documents_to_check:
            if hasattr(doc, 'filename'):
                documents_metadata.append({
                    'filename': doc.filename,
                    'fileSize': getattr(doc, 'fileSize', 0),
                    'mimeType': getattr(doc, 'mimeType', 'unknown')
                })
            elif isinstance(doc, dict):
                documents_metadata.append({
                    'filename': doc.get('filename', 'unknown'),
                    'fileSize': doc.get('fileSize', 0),
                    'mimeType': doc.get('mimeType', 'unknown')
                })
            elif isinstance(doc, str):
                # Handle case where documents are just filenames
                documents_metadata.append({
                    'filename': doc,
                    'fileSize': 0,
                    'mimeType': 'unknown'
                })

        serializable_action_result = {
            'status': 'completed' if action_result.success else 'failed',
            'result_summary': action_result.data.get('result', '')[:200] + '...' if len(action_result.data.get('result', '')) > 200 else action_result.data.get('result', ''),
            'error': action_result.error,
            'resultLabel': action_result.data.get('resultLabel', ''),
            'documents_count': len(documents_metadata),
            'documents_metadata': documents_metadata,
            'actionId': action_result.actionId,
            'actionMethod': action_result.actionMethod,
            'actionName': action_result.actionName,
            'success_indicator': (
                'documents' if len(documents_metadata) > 0 else
                'text_result' if action_result.data.get('result', '').strip() else 'none'
            )
        }
        step_result_serializable['action_results'].append(serializable_action_result)
    step_result_json = json.dumps(step_result_serializable, indent=2, ensure_ascii=False)
    success_criteria_str = ', '.join(task_step.success_criteria or [])
    return f"""You are a result review AI that evaluates task step completion with BASIC validation.

TASK STEP: {task_step.objective}
SUCCESS CRITERIA: {success_criteria_str}

STEP RESULT: {step_result_json}

BASIC VALIDATION RULES:
1. SUCCESS if: Action completed AND (documents were produced OR meaningful text output exists)
2. RETRY if: Action failed due to technical issues that can be fixed
3. FAILED if: Action completely failed with no recoverable output

VALIDATION PRINCIPLES:
- Be GENEROUS with success - if the action achieved its basic purpose, mark as success
- Focus on FUNCTIONALITY, not perfection
- Document outputs are PRIMARY indicators of success
- Text outputs are SECONDARY indicators
- Only retry for CLEAR technical issues, not minor imperfections
- Don't be picky about formatting or minor details
- Check if ANY documents were produced (documents_count > 0)
- If documents were produced, consider it a SUCCESS

EXAMPLES OF SUCCESS:
- Document extraction produced a file (even if imperfect)
- Text analysis provided meaningful insights
- Data processing completed with results
- Any action that produced documents (documents_count > 0)

EXAMPLES OF RETRY:
- Technical errors (API failures, timeouts)
- Missing required inputs
- Clear implementation bugs

EXAMPLES OF FAILED:
- Complete system failures
- No output whatsoever
- Unrecoverable errors
- Actions with documents_count = 0 AND no meaningful text output

REQUIRED JSON STRUCTURE:
{{
    "status": "success|retry|failed",
    "reason": "Brief explanation",
    "improvements": ["specific technical fixes only"],
    "quality_score": 1-10,
    "met_criteria": ["basic functionality achieved"],
    "unmet_criteria": []
}}

VALIDATION LOGIC:
- If ANY action has documents_count > 0, mark as SUCCESS
- If ALL actions have documents_count = 0 AND no meaningful text output, mark as FAILED
- Only mark as RETRY for clear technical issues that can be fixed
- Focus on actual document production and functionality, not specific output names

NOTE: Respond with ONLY the JSON object. Be GENEROUS with success ratings."""