gateway/modules/workflows/processing/shared/placeholderFactory.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Placeholder Factory
Centralized placeholder extraction functions for all workflow modes.
Each function corresponds to a {{KEY:PLACEHOLDER_NAME}} in prompt templates.

NAMING CONVENTION:
- All functions follow pattern: extract{PlaceholderName}()
- Placeholder names are in UPPER_CASE with underscores
- Function names are in camelCase

MAPPING TABLE (keys → function) with usage [taskplan | dynamic]:
{{KEY:USER_PROMPT}}                    -> extractUserPrompt()                     [taskplan, dynamic]
{{KEY:OVERALL_TASK_CONTEXT}}           -> services.currentUserPromptNormalized (always set in WorkflowManager._sendFirstMessage)  [direct]
{{KEY:TASK_OBJECTIVE}}                 -> context.taskStep.objective (always set in TaskPlanner.generateTaskPlan)  [direct]
{{KEY:USER_LANGUAGE}}                  -> extractUserLanguage()                   [dynamic]
{{KEY:LANGUAGE_USER_DETECTED}}         -> extractLanguageUserDetected()           [taskplan]
{{KEY:WORKFLOW_HISTORY}}               -> extractWorkflowHistory()                [taskplan, dynamic]
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}    -> extractAvailableConnectionsIndex()      [dynamic]
{{KEY:AVAILABLE_CONNECTIONS_SUMMARY}}  -> extractAvailableConnectionsSummary()    []
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}    -> extractAvailableDocumentsSummary()      [taskplan, dynamic]
{{KEY:AVAILABLE_DOCUMENTS_INDEX}}      -> extractAvailableDocumentsIndex()        [dynamic]
{{KEY:AVAILABLE_METHODS}}              -> extractAvailableMethods()               [dynamic]
{{KEY:REVIEW_CONTENT}}                 -> extractReviewContent()                  [dynamic]
{{KEY:PREVIOUS_ACTION_RESULTS}}        -> extractPreviousActionResults()          [dynamic]
{{KEY:LEARNINGS_AND_IMPROVEMENTS}}     -> extractLearningsAndImprovements()       [dynamic]
{{KEY:LATEST_REFINEMENT_FEEDBACK}}     -> extractLatestRefinementFeedback()       [dynamic]

Following placeholders are populated directly by prompt builders with according context in promptGenerationActionsDynamic module:
- ACTION_OBJECTIVE,
- SELECTED_ACTION,
- ACTION_SIGNATURE
"""

import json
import logging
from typing import Dict, Any, List

logger = logging.getLogger(__name__)
from modules.workflows.processing.shared.methodDiscovery import (methods, discoverMethods)

def extractUserPrompt(context: Any) -> str:
    """Extract user prompt from context. Maps to {{KEY:USER_PROMPT}}.
    Prefer the cleaned intent stored on the services object if available via context.
    Fallback to the task_step objective.
    """
    try:
        services = getattr(context, 'services', None)

        # Determine raw user prompt from services or taskStep
        rawPrompt = None
        if services and getattr(services, 'currentUserPrompt', None):
            rawPrompt = services.currentUserPrompt
        elif hasattr(context, 'taskStep') and context.taskStep:
            rawPrompt = context.taskStep.objective
        else:
            rawPrompt = 'No request specified'

        # Prefer values computed at workflow start by WorkflowManager analyzer
        normalized = getattr(services, 'currentUserPromptNormalized', None) if services else None
        if normalized:
            return normalized
        return rawPrompt
    except Exception:
        # Robust fallback behavior
        if hasattr(context, 'taskStep') and context.taskStep:
            return context.taskStep.objective
        return 'No request specified'

def extractNormalizedRequest(services: Any) -> str:
    """Extract normalized user request from services. Maps to {{KEY:NORMALIZED_REQUEST}}.
    Returns the full normalized request from user input analysis (preserves all constraints and details).
    CRITICAL: Must return the actual normalizedRequest from analysis, NOT intent.
    """
    try:
        # Get normalized request from currentUserPromptNormalized (stores the normalizedRequest from analysis)
        if services and getattr(services, 'currentUserPromptNormalized', None):
            normalized = services.currentUserPromptNormalized
            # Validate that it's not the intent (which is shorter and less detailed)
            # Intent is typically a concise objective, normalized request should be longer and more detailed
            workflowIntent = getattr(services.workflow, '_workflowIntent', {}) if hasattr(services, 'workflow') and services.workflow else {}
            intent = workflowIntent.get('intent', '')

            # If normalized matches intent exactly, it's wrong - log warning
            if intent and normalized == intent:
                logger.warning(f"extractNormalizedRequest: normalized request matches intent - this is incorrect! normalized={normalized[:100]}...")
                # Try to get from workflow intent or return error message
                return f"ERROR: Normalized request not properly stored. Expected detailed request, got intent: {intent}"

            return normalized

        return 'No normalized request specified'
    except Exception as e:
        logger.error(f"Error extracting normalized request: {str(e)}")
        return 'No normalized request specified'

def extractUserIntent(services: Any) -> str:
    """Extract user intent from services. Maps to {{KEY:USER_INTENT}}.
    Returns the concise intent from user input analysis, or falls back to normalized request.
    """
    try:
        # Get intent from currentUserPrompt (stores the intent from analysis)
        if services and getattr(services, 'currentUserPrompt', None):
            intent = services.currentUserPrompt
            # If intent is same as normalized, it's fine - use it
            return intent

        # Fallback to normalized request if intent not available
        if services and getattr(services, 'currentUserPromptNormalized', None):
            return services.currentUserPromptNormalized

        return 'No intent specified'
    except Exception:
        return 'No intent specified'

def extractWorkflowHistory(service: Any) -> str:
    """Extract workflow history. Maps to {{KEY:WORKFLOW_HISTORY}}
    Reverse-chronological, enriched with message summaries and document labels.
    """
    try:
        history = getPreviousRoundContext(service)
        return history or "No previous workflow rounds available"
    except Exception as e:
        logger.error(f"Error getting workflow history: {str(e)}")
        return "No previous workflow rounds available"

def extractAvailableMethods(service: Any, filterDynamicMode: bool = True) -> str:
    """Extract available methods for action planning. Maps to {{KEY:AVAILABLE_METHODS}}

    Args:
        service: Service object
        filterDynamicMode: If True, only include actions with dynamicMode=True flag (default: True for dynamic workflow prompts)
    """
    try:
        # Get the methods dictionary directly from the global methods variable
        if not methods:
            discoverMethods(service)

        # Create a flat JSON format with compound action names for better AI parsing
        available_actions_json = {}
        processed_methods = set()  # Track processed methods to avoid duplicates

        for methodName, methodInfo in methods.items():
            # Skip short name aliases - only process full class names (MethodXxx)
            # Short names are stored as aliases but we want to avoid processing them twice
            if not methodName.startswith('Method'):
                continue

            # Convert MethodAi -> ai, MethodDocument -> document, etc.
            shortName = methodName.replace('Method', '').lower()

            # Skip if we've already processed this method (via its short name alias)
            if shortName in processed_methods:
                continue

            processed_methods.add(shortName)

            # Get method instance to access _actions dictionary with WorkflowActionDefinition objects
            methodInstance = methodInfo.get('instance')
            if not methodInstance:
                continue

            for actionName, actionInfo in methodInfo['actions'].items():
                # Check dynamicMode flag if filtering is enabled
                if filterDynamicMode:
                    # Access original WorkflowActionDefinition from _actions dictionary
                    if hasattr(methodInstance, '_actions') and actionName in methodInstance._actions:
                        actionDef = methodInstance._actions[actionName]
                        # Only include actions with dynamicMode=True
                        if not getattr(actionDef, 'dynamicMode', False):
                            continue

                # Create compound action name: method.action
                compoundActionName = f"{shortName}.{actionName}"
                # Get the action description
                action_description = actionInfo.get('description', f"Execute {actionName} action")
                available_actions_json[compoundActionName] = action_description

        return json.dumps(available_actions_json, indent=2, ensure_ascii=False)
    except Exception as e:
        logger.error(f"Error extracting available methods: {str(e)}")
        return json.dumps({}, indent=2, ensure_ascii=False)

def extractUserLanguage(service: Any) -> str:
    """Extract user language from service. Maps to {{KEY:USER_LANGUAGE}}"""
    try:
        # Prefer detected language if available
        if service and getattr(service, 'currentUserLanguage', None):
            return service.currentUserLanguage
        return service.user.language if service and service.user else 'en'
    except Exception:
        return 'en'

# Normalization now happens centrally in WorkflowManager._sendFirstMessage; no AI call here.


def _computeMessageSummary(msg) -> str:
    """Create a concise summary for a ChatMessage with documents only.
    Fields: documentCount, roundNumber, documentsLabel, document names, message (full), success flag.
    """
    try:
        docs = getattr(msg, 'documents', []) or []
        if not docs:
            return ""  # Only summarize messages that contain documents
        document_count = len(docs)
        round_number = getattr(msg, 'roundNumber', None) or 0
        label = getattr(msg, 'documentsLabel', None) or ""
        # Collect ALL document names (supports ChatDocument objects and dicts)
        doc_names = []
        for d in docs:
            name = None
            try:
                if isinstance(d, dict):
                    # For dict objects, try multiple possible field names
                    name = d.get('fileName') or d.get('documentName') or d.get('name') or d.get('filename')
                else:
                    # For ChatDocument objects, use fileName field
                    name = getattr(d, 'fileName', None) or getattr(d, 'documentName', None) or getattr(d, 'name', None) or getattr(d, 'filename', None)
            except Exception:
                name = None
            doc_names.append(name or "(unnamed)")
        # Format document names in brackets
        if doc_names:
            names_part = f"({', '.join(doc_names)})"
        else:
            names_part = "(no documents)"

        # Don't truncate the message - show full content
        user_message = (getattr(msg, 'message', '') or '').strip().replace("\n", " ")
        # Read success from ChatMessage.success field
        success_flag = getattr(msg, 'success', None)
        success_text = "success=True" if success_flag is True else ("success=False" if success_flag is False else "success=Unknown")
        label_part = f" label='{label}'" if label else ""

        # Add learning/feedback if available
        learning_part = ""
        if hasattr(msg, 'summary') and msg.summary and 'learnings' in msg.summary.lower():
            learning_part = " | learnings available"

        return f"Round {round_number}: {document_count} docs {names_part}{label_part} | {success_text}{learning_part} | msg='{user_message}'"
    except Exception:
        return ""

def getMessageSummary(msg) -> str:
    """Return existing ChatMessage.summary or compute, set, and return it (documents only)."""
    try:
        if getattr(msg, 'summary', None):
            return msg.summary
        summary = _computeMessageSummary(msg)
        # Persist in-memory only; caller can store if desired
        if summary:
            try:
                setattr(msg, 'summary', summary)
            except Exception:
                pass
        return summary
    except Exception:
        return ""

def getPreviousRoundContext(services) -> str:
    """Get enriched context:
    - Reverse-chronological ordering
    - Current round first (newest → oldest), then older rounds
    - Only messages with documents summarized
    - Include available documents snapshot at end
    """
    workflow = services.workflow
    try:
        if not workflow:
            return "No previous round context available"

        lines: List[str] = []

        # Reverse-chronological, current round first
        try:
            msgs = getattr(workflow, 'messages', []) or []
            current_round = getattr(workflow, 'currentRound', None)
            current_round_msgs: List[Any] = []
            previous_round_msgs: List[Any] = []
            for m in msgs:
                if current_round is not None and getattr(m, 'roundNumber', None) == current_round:
                    current_round_msgs.append(m)
                else:
                    previous_round_msgs.append(m)

            for m in reversed(current_round_msgs):
                s = getMessageSummary(m)
                if s:
                    lines.append(f"- {s}")
            for m in reversed(previous_round_msgs):
                s = getMessageSummary(m)
                if s:
                    lines.append(f"- {s}")
        except Exception:
            pass

        # Include available documents snapshot at end
        try:
            if hasattr(services, 'workflow'):
                docs_index = services.chat.getAvailableDocuments(workflow)
                if docs_index and docs_index != "No documents available":
                    doc_count = docs_index.count("docItem:")  # Only count actual documents, not document list labels
                    lines.append(f"Available documents: {doc_count}")
        except Exception:
            pass

        if not lines:
            return "No previous round context available"
        return "\n".join(lines)
    except Exception as e:
        logger.error(f"Error getting previous round context: {str(e)}")
        return "Error retrieving previous round context"

def extractReviewContent(context: Any) -> str:
    """Extract review content for result validation. Maps to {{KEY:REVIEW_CONTENT}}"""
    try:
        if hasattr(context, 'actionResults') and context.actionResults:
            # Build result summary
            result_summary = ""
            for i, result in enumerate(context.actionResults):
                result_summary += f"\nRESULT {i+1}:\n"
                result_summary += f"  Success: {result.success}\n"
                if result.error:
                    result_summary += f"  Error: {result.error}\n"

                if result.documents:
                    result_summary += f"  Documents: {len(result.documents)} document(s)\n"
                    for doc in result.documents:
                        # Extract all available metadata without content
                        doc_metadata = {
                            "name": getattr(doc, 'fileName', None) or getattr(doc, 'documentName', 'Unknown'),
                            "mimeType": getattr(doc, 'mimeType', 'Unknown'),
                            "size": getattr(doc, 'size', 'Unknown'),
                            "created": getattr(doc, 'created', 'Unknown'),
                            "modified": getattr(doc, 'modified', 'Unknown'),
                            "typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
                            "documentId": getattr(doc, 'documentId', 'Unknown'),
                            "reference": getattr(doc, 'reference', 'Unknown')
                        }
                        # Remove 'Unknown' values to keep it clean
                        doc_metadata = {k: v for k, v in doc_metadata.items() if v != 'Unknown'}
                        result_summary += f"    - {json.dumps(doc_metadata, indent=6, ensure_ascii=False)}\n"
                else:
                    result_summary += f"  Documents: None\n"

            return result_summary
        elif hasattr(context, 'observation') and context.observation:
            # For observation data, show full content but handle documents specially
            # Handle both Pydantic Observation model and dict format
            from modules.datamodels.datamodelChat import Observation

            if isinstance(context.observation, Observation):
                # Convert Pydantic model to dict
                obs_dict = context.observation.model_dump(exclude_none=True) if hasattr(context.observation, 'model_dump') else context.observation.dict()
            elif isinstance(context.observation, dict):
                obs_dict = context.observation.copy()
            else:
                # Fallback: try to serialize as-is
                obs_dict = context.observation.model_dump(exclude_none=True) if hasattr(context.observation, 'model_dump') else context.observation.dict()

            # If there are previews with documents, show only metadata
            if 'previews' in obs_dict and isinstance(obs_dict['previews'], list):
                for preview in obs_dict['previews']:
                    if isinstance(preview, dict) and 'snippet' in preview:
                        # Replace snippet with metadata indicator
                        preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"

            return json.dumps(obs_dict, indent=2, ensure_ascii=False)
        elif hasattr(context, 'stepResult') and context.stepResult and 'observation' in context.stepResult:
            # For observation data in stepResult, show full content but handle documents specially
            observation = context.stepResult['observation']
            # Handle both Pydantic Observation model and dict format
            from modules.datamodels.datamodelChat import Observation

            if isinstance(observation, Observation):
                # Convert Pydantic model to dict
                obs_dict = observation.model_dump(exclude_none=True) if hasattr(observation, 'model_dump') else observation.dict()
            elif isinstance(observation, dict):
                obs_dict = observation.copy()
            else:
                # Fallback: try to serialize
                obs_dict = observation.model_dump(exclude_none=True) if hasattr(observation, 'model_dump') else observation.dict()

            # If there are previews with documents, show only metadata
            if 'previews' in obs_dict and isinstance(obs_dict['previews'], list):
                for preview in obs_dict['previews']:
                    if isinstance(preview, dict) and 'snippet' in preview:
                        # Replace snippet with metadata indicator
                        preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"

            return json.dumps(obs_dict, indent=2, ensure_ascii=False)
        else:
            return "No review content available"
    except Exception as e:
        logger.error(f"Error extracting review content: {str(e)}")
        return "No review content available"

def extractPreviousActionResults(context: Any) -> str:
    """Extract previous action results for learning context. Maps to {{KEY:PREVIOUS_ACTION_RESULTS}}"""
    try:
        if not hasattr(context, 'previousActionResults') or not context.previousActionResults:
            return "No previous actions executed yet"

        results = []
        for i, result in enumerate(context.previousActionResults[-5:], 1):  # Last 5 results
            if hasattr(result, 'resultLabel') and hasattr(result, 'status'):
                status = "SUCCESS" if result.status == "completed" else "FAILED"
                results.append(f"Action {i}: {result.resultLabel} - {status}")
                if hasattr(result, 'error') and result.error:
                    results.append(f"  Error: {result.error}")

        return "\n".join(results) if results else "No previous actions executed yet"
    except Exception as e:
        logger.error(f"Error extracting previous action results: {str(e)}")
        return "No previous actions executed yet"

def extractLearningsAndImprovements(context: Any) -> str:
    """Extract learnings and improvements from previous actions. Maps to {{KEY:LEARNINGS_AND_IMPROVEMENTS}}"""
    try:
        learnings = []

        # Get improvements from context
        if hasattr(context, 'improvements') and context.improvements and isinstance(context.improvements, list):
            learnings.append("IMPROVEMENTS:")
            for improvement in context.improvements[-3:]:  # Last 3 improvements
                learnings.append(f"- {improvement}")

        # Get failure patterns
        if hasattr(context, 'failurePatterns') and context.failurePatterns and isinstance(context.failurePatterns, list):
            learnings.append("FAILURE PATTERNS TO AVOID:")
            for pattern in context.failurePatterns[-3:]:  # Last 3 patterns
                learnings.append(f"- {pattern}")

        # Get successful actions
        if hasattr(context, 'successfulActions') and context.successfulActions and isinstance(context.successfulActions, list):
            learnings.append("SUCCESSFUL APPROACHES:")
            for action in context.successfulActions[-3:]:  # Last 3 successful
                learnings.append(f"- {action}")

        return "\n".join(learnings) if learnings else "No learnings available yet"
    except Exception as e:
        logger.error(f"Error extracting learnings and improvements: {str(e)}")
        return "No learnings available yet"

def extractLatestRefinementFeedback(context: Any) -> str:
    """Extract the latest refinement feedback. Maps to {{KEY:LATEST_REFINEMENT_FEEDBACK}}

    CRITICAL: If ERROR level logs are found, refinement should stop processing.
    """
    try:
        # First check for ERROR level logs in workflow
        if hasattr(context, 'workflow') and context.workflow:
            try:
                import modules.interfaces.interfaceDbChat as interfaceDbChat
                from modules.interfaces.interfaceDbApp import getRootInterface
                rootInterface = getRootInterface()
                interfaceDbChat = interfaceDbChat.getInterface(rootInterface.currentUser)

                # Get workflow logs
                chatData = interfaceDbChat.getUnifiedChatData(context.workflow.id, None)
                logs = chatData.get("logs", [])

                # Check for ERROR level logs
                for log in logs:
                    if isinstance(log, dict):
                        log_level = log.get("level", "").upper()
                        log_message = str(log.get("message", ""))
                        if log_level == "ERROR" or "ERROR" in log_message.upper():
                            return f"CRITICAL: Processing stopped due to ERROR in logs: {log_message[:200]}"
            except Exception as log_check_error:
                # If we can't check logs, continue with normal feedback extraction
                logger.warning(f"Could not check for ERROR logs: {str(log_check_error)}")

        if not hasattr(context, 'previousReviewResult') or not context.previousReviewResult or not isinstance(context.previousReviewResult, list):
            return "No previous refinement feedback available"

        # Get the most recent refinement decision
        latest_decision = context.previousReviewResult[-1]
        if not isinstance(latest_decision, dict):
            return "No previous refinement feedback available"

        feedback_parts = []

        # Add decision and reason
        decision = latest_decision.get('decision', 'unknown')
        reason = latest_decision.get('reason', 'No reason provided')
        feedback_parts.append(f"Latest Decision: {decision}")
        feedback_parts.append(f"Reason: {reason}")

        # Add any specific feedback or suggestions
        if 'feedback' in latest_decision:
            feedback_parts.append(f"Feedback: {latest_decision['feedback']}")

        if 'suggestions' in latest_decision:
            feedback_parts.append(f"Suggestions: {latest_decision['suggestions']}")

        return "\n".join(feedback_parts)
    except Exception as e:
        logger.error(f"Error extracting latest refinement feedback: {str(e)}")
        return "No previous refinement feedback available"

def extractAvailableDocumentsSummary(service: Any, context: Any) -> str:
    """Summary of available documents (count only)."""
    try:
        documents = service.chat.getAvailableDocuments(service.workflow)
        if documents and documents != "No documents available":
            # Count only actual documents, not list labels
            doc_count = documents.count("docItem:")
            return f"{doc_count} documents available from previous tasks"
        return "No documents available"
    except Exception as e:
        logger.error(f"Error getting document summary: {str(e)}")
        return "No documents available"

def extractAvailableDocumentsIndex(service: Any, context: Any) -> str:
    """Index of available documents with detailed references for parameter generation."""
    try:
        return service.chat.getAvailableDocuments(service.workflow)
    except Exception as e:
        logger.error(f"Error getting document index: {str(e)}")
        return "No documents available"

def extractAvailableConnectionsSummary(service: Any) -> str:
    """Summary of available connections (count only)."""
    try:
        connections = service.chat.getConnectionReferenceList()
        if connections:
            return f"{len(connections)} connections available"
        return "No connections available"
    except Exception as e:
        logger.error(f"Error getting connection summary: {str(e)}")
        return "No connections available"

def extractAvailableConnectionsIndex(service: Any) -> str:
    """Index of available connections with detailed references for parameter generation."""
    try:
        connections = service.chat.getConnectionReferenceList()
        if connections:
            return '\n'.join(f"- {conn}" for conn in connections)
        return "No connections available"
    except Exception as e:
        logger.error(f"Error getting connection index: {str(e)}")
        return "No connections available"

def extractLanguageUserDetected(context: Any) -> str:
    """Extract the detected user language from intent analysis or context."""
    try:
        # Try to get language from intent analysis if available
        if hasattr(context, 'intent_analysis') and context.intent_analysis:
            language = context.intent_analysis.get('languageUserDetected', '')
            if language and language != 'unknown':
                return language

        # Try to get language from workflow context
        if hasattr(context, 'workflow') and hasattr(context.workflow, 'intent_analysis'):
            language = context.workflow.intent_analysis.get('languageUserDetected', '') if context.workflow.intent_analysis else ''
            if language and language != 'unknown':
                return language

        # Try to get language from user service
        if hasattr(context, 'services') and hasattr(context.services, 'user') and hasattr(context.services.user, 'language'):
            return context.services.user.language

        # Default fallback
        return 'en'
    except Exception as e:
        logger.error(f"Error extracting user language: {str(e)}")
        return 'en'