gateway/modules/workflows/processing/shared/placeholderFactory.py

"""
Placeholder Factory
Centralized placeholder extraction functions for all workflow modes.
Each function corresponds to a {{KEY:PLACEHOLDER_NAME}} in prompt templates.

NAMING CONVENTION:
- All functions follow pattern: extract{PlaceholderName}()
- Placeholder names are in UPPER_CASE with underscores
- Function names are in camelCase

MAPPING TABLE:
{{KEY:USER_PROMPT}}                    -> extractUserPrompt()
{{KEY:AVAILABLE_DOCUMENTS}}            -> extractAvailableDocuments()
{{KEY:WORKFLOW_HISTORY}}               -> extractWorkflowHistory()
{{KEY:AVAILABLE_METHODS}}              -> extractAvailableMethods()
{{KEY:AVAILABLE_CONNECTIONS}}          -> extractAvailableConnections()
{{KEY:USER_LANGUAGE}}                  -> extractUserLanguage()
{{KEY:REVIEW_CONTENT}}                 -> extractReviewContent()
{{KEY:ACTION_OBJECTIVE}}               -> extractActionObjective()
{{KEY:PREVIOUS_ACTION_RESULTS}}        -> extractPreviousActionResults()
{{KEY:LEARNINGS_AND_IMPROVEMENTS}}     -> extractLearningsAndImprovements()
{{KEY:LATEST_REFINEMENT_FEEDBACK}}     -> extractLatestRefinementFeedback()
{{KEY:SELECTED_ACTION}}                -> extractSelectedAction()
{{KEY:ACTION_SIGNATURE}}               -> extractActionSignature()
{{KEY:ENHANCED_DOCUMENTS}}             -> extractEnhancedDocumentContext()
"""

import json
import logging
from typing import Dict, Any, List
from modules.datamodels.datamodelChat import ChatDocument

logger = logging.getLogger(__name__)
from modules.workflows.processing.shared.methodDiscovery import (
    getAvailableDocuments,
    getMethodsList,
    methods,
    discoverMethods
)


# ============================================================================
# CORE PLACEHOLDER EXTRACTION FUNCTIONS
# ============================================================================

def extractUserPrompt(context: Any) -> str:
    """Extract user prompt from context. Maps to {{KEY:USER_PROMPT}}"""
    if hasattr(context, 'task_step') and context.task_step:
        return context.task_step.objective or 'No request specified'
    return 'No request specified'


def extractAvailableDocuments(context: Any) -> str:
    """Extract available documents from context. Maps to {{KEY:AVAILABLE_DOCUMENTS}}"""
    if hasattr(context, 'available_documents') and context.available_documents:
        return context.available_documents
    return "No documents available"


def extractWorkflowHistory(service: Any, context: Any) -> str:
    """Extract workflow history from context. Maps to {{KEY:WORKFLOW_HISTORY}}"""
    if hasattr(context, 'workflow') and context.workflow:
        return getPreviousRoundContext(service, context.workflow) or "No previous workflow rounds - this is the first round."
    return "No previous workflow rounds - this is the first round."


def extractAvailableMethods(service: Any) -> str:
    """Extract available methods for action planning. Maps to {{KEY:AVAILABLE_METHODS}}"""
    try:
        # Get the methods dictionary directly from the global methods variable
        if not methods:
            discoverMethods(service)

        # Create a flat JSON format with compound action names for better AI parsing
        available_actions_json = {}
        for methodName, methodInfo in methods.items():
            # Convert MethodAi -> ai, MethodDocument -> document, etc.
            shortName = methodName.replace('Method', '').lower()

            for actionName, actionInfo in methodInfo['actions'].items():
                # Create compound action name: method.action
                compoundActionName = f"{shortName}.{actionName}"
                # Get the action description
                action_description = actionInfo.get('description', f"Execute {actionName} action")
                available_actions_json[compoundActionName] = action_description

        return json.dumps(available_actions_json, indent=2, ensure_ascii=False)
    except Exception as e:
        logger.error(f"Error extracting available methods: {str(e)}")
        return json.dumps({}, indent=2, ensure_ascii=False)


def extractUserLanguage(service: Any) -> str:
    """Extract user language from service. Maps to {{KEY:USER_LANGUAGE}}"""
    return service.user.language if service and service.user else 'en'


def extractAvailableConnections(service: Any) -> str:
    """Extract available connections. Maps to {{KEY:AVAILABLE_CONNECTIONS}}"""
    try:
        connections = getConnectionReferenceList(service)
        if connections:
            return '\n'.join(f"- {conn}" for conn in connections)
        return "No connections available"
    except Exception as e:
        logger.error(f"Error extracting available connections: {str(e)}")
        return "No connections available"


def getConnectionReferenceList(services) -> List[str]:
    """Get list of available connections"""
    try:
        # Get connections from the database
        if hasattr(services, 'interfaceDbApp') and hasattr(services, 'user'):
            userId = services.user.id
            connections = services.interfaceDbApp.getUserConnections(userId)
            if connections:
                # Format connections as reference strings
                connectionRefs = []
                for conn in connections:
                    # Create reference string in format: conn_{authority}_{id}
                    ref = f"conn_{conn.authority.value}_{conn.id}"
                    connectionRefs.append(ref)
                return connectionRefs

        return []
    except Exception as e:
        logger.error(f"Error getting connection reference list: {str(e)}")
        return []


def getPreviousRoundContext(services, context: Any) -> str:
    """Get previous round context for prompt"""
    try:
        if not context or not hasattr(context, 'workflow_id'):
            return "No previous round context available"

        workflowId = context.workflow_id
        if not workflowId:
            return "No previous round context available"

        # Get previous round results
        previousResults = getattr(context, 'previous_results', [])
        if not previousResults:
            return "No previous round context available"

        contextList = []
        for i, result in enumerate(previousResults, 1):
            if hasattr(result, 'success') and hasattr(result, 'resultLabel'):
                status = "Success" if result.success else "Failed"
                contextList.append(f"{i}. {result.resultLabel} - {status}")
            elif isinstance(result, dict):
                status = "Success" if result.get('success', False) else "Failed"
                label = result.get('resultLabel', 'Unknown')
                contextList.append(f"{i}. {label} - {status}")
            else:
                contextList.append(f"{i}. {str(result)}")

        return "\n".join(contextList) if contextList else "No previous round context available"
    except Exception as e:
        logger.error(f"Error getting previous round context: {str(e)}")
        return "Error retrieving previous round context"


def extractReviewContent(context: Any) -> str:
    """Extract review content for result validation. Maps to {{KEY:REVIEW_CONTENT}}"""
    try:
        if hasattr(context, 'action_results') and context.action_results:
            # Build result summary
            result_summary = ""
            for i, result in enumerate(context.action_results):
                result_summary += f"\nRESULT {i+1}:\n"
                result_summary += f"  Success: {result.success}\n"
                if result.error:
                    result_summary += f"  Error: {result.error}\n"

                if result.documents:
                    result_summary += f"  Documents: {len(result.documents)} document(s)\n"
                    for doc in result.documents:
                        # Extract all available metadata without content
                        doc_metadata = {
                            "name": getattr(doc, 'documentName', 'Unknown'),
                            "mimeType": getattr(doc, 'mimeType', 'Unknown'),
                            "size": getattr(doc, 'size', 'Unknown'),
                            "created": getattr(doc, 'created', 'Unknown'),
                            "modified": getattr(doc, 'modified', 'Unknown'),
                            "typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
                            "documentId": getattr(doc, 'documentId', 'Unknown'),
                            "reference": getattr(doc, 'reference', 'Unknown')
                        }
                        # Remove 'Unknown' values to keep it clean
                        doc_metadata = {k: v for k, v in doc_metadata.items() if v != 'Unknown'}
                        result_summary += f"    - {json.dumps(doc_metadata, indent=6, ensure_ascii=False)}\n"
                else:
                    result_summary += f"  Documents: None\n"

            return result_summary
        elif hasattr(context, 'observation') and context.observation:
            # For observation data, show full content but handle documents specially
            if isinstance(context.observation, dict):
                # Create a copy to modify
                obs_copy = context.observation.copy()

                # If there are previews with documents, show only metadata
                if 'previews' in obs_copy and isinstance(obs_copy['previews'], list):
                    for preview in obs_copy['previews']:
                        if isinstance(preview, dict) and 'snippet' in preview:
                            # Replace snippet with metadata indicator
                            preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"

                return json.dumps(obs_copy, indent=2, ensure_ascii=False)
            else:
                return json.dumps(context.observation, ensure_ascii=False)
        elif hasattr(context, 'step_result') and context.step_result and 'observation' in context.step_result:
            # For observation data in step_result, show full content but handle documents specially
            observation = context.step_result['observation']
            if isinstance(observation, dict):
                # Create a copy to modify
                obs_copy = observation.copy()

                # If there are previews with documents, show only metadata
                if 'previews' in obs_copy and isinstance(obs_copy['previews'], list):
                    for preview in obs_copy['previews']:
                        if isinstance(preview, dict) and 'snippet' in preview:
                            # Replace snippet with metadata indicator
                            preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"

                return json.dumps(obs_copy, indent=2, ensure_ascii=False)
            else:
                return json.dumps(observation, ensure_ascii=False)
        else:
            return "No review content available"
    except Exception as e:
        logger.error(f"Error extracting review content: {str(e)}")
        return "No review content available"


# ============================================================================
# REACT MODE SPECIFIC PLACEHOLDERS
# ============================================================================

def extractActionObjective(context: Any, current_task: str, original_prompt: str, additional_data: Dict[str, Any] = None) -> str:
    """Extract action objective for React mode. Maps to {{KEY:ACTION_OBJECTIVE}}"""
    # This is a placeholder - the actual implementation will be in placeholderFactoryReactOnly
    # since it requires AI generation
    return current_task or original_prompt


def extractPreviousActionResults(context: Any) -> str:
    """Extract previous action results for learning context. Maps to {{KEY:PREVIOUS_ACTION_RESULTS}}"""
    try:
        if not hasattr(context, 'previous_action_results') or not context.previous_action_results:
            return "No previous actions executed yet"

        results = []
        for i, result in enumerate(context.previous_action_results[-5:], 1):  # Last 5 results
            if hasattr(result, 'resultLabel') and hasattr(result, 'status'):
                status = "SUCCESS" if result.status == "completed" else "FAILED"
                results.append(f"Action {i}: {result.resultLabel} - {status}")
                if hasattr(result, 'error') and result.error:
                    results.append(f"  Error: {result.error}")

        return "\n".join(results) if results else "No previous actions executed yet"
    except Exception as e:
        logger.error(f"Error extracting previous action results: {str(e)}")
        return "No previous actions executed yet"


def extractLearningsAndImprovements(context: Any) -> str:
    """Extract learnings and improvements from previous actions. Maps to {{KEY:LEARNINGS_AND_IMPROVEMENTS}}"""
    try:
        learnings = []

        # Get improvements from context
        if hasattr(context, 'improvements') and context.improvements and isinstance(context.improvements, list):
            learnings.append("IMPROVEMENTS:")
            for improvement in context.improvements[-3:]:  # Last 3 improvements
                learnings.append(f"- {improvement}")

        # Get failure patterns
        if hasattr(context, 'failure_patterns') and context.failure_patterns and isinstance(context.failure_patterns, list):
            learnings.append("FAILURE PATTERNS TO AVOID:")
            for pattern in context.failure_patterns[-3:]:  # Last 3 patterns
                learnings.append(f"- {pattern}")

        # Get successful actions
        if hasattr(context, 'successful_actions') and context.successful_actions and isinstance(context.successful_actions, list):
            learnings.append("SUCCESSFUL APPROACHES:")
            for action in context.successful_actions[-3:]:  # Last 3 successful
                learnings.append(f"- {action}")

        return "\n".join(learnings) if learnings else "No learnings available yet"
    except Exception as e:
        logger.error(f"Error extracting learnings and improvements: {str(e)}")
        return "No learnings available yet"


def extractLatestRefinementFeedback(context: Any) -> str:
    """Extract the latest refinement feedback. Maps to {{KEY:LATEST_REFINEMENT_FEEDBACK}}"""
    try:
        if not hasattr(context, 'previous_review_result') or not context.previous_review_result or not isinstance(context.previous_review_result, list):
            return "No previous refinement feedback available"

        # Get the most recent refinement decision
        latest_decision = context.previous_review_result[-1]
        if not isinstance(latest_decision, dict):
            return "No previous refinement feedback available"

        feedback_parts = []

        # Add decision and reason
        decision = latest_decision.get('decision', 'unknown')
        reason = latest_decision.get('reason', 'No reason provided')
        feedback_parts.append(f"Latest Decision: {decision}")
        feedback_parts.append(f"Reason: {reason}")

        # Add any specific feedback or suggestions
        if 'feedback' in latest_decision:
            feedback_parts.append(f"Feedback: {latest_decision['feedback']}")

        if 'suggestions' in latest_decision:
            feedback_parts.append(f"Suggestions: {latest_decision['suggestions']}")

        return "\n".join(feedback_parts)
    except Exception as e:
        logger.error(f"Error extracting latest refinement feedback: {str(e)}")
        return "No previous refinement feedback available"


def extractSelectedAction(additional_data: Dict[str, Any]) -> str:
    """Extract selected action from additional data. Maps to {{KEY:SELECTED_ACTION}}"""
    return additional_data.get('SELECTED_ACTION', '') if additional_data else ''


def extractActionSignature(additional_data: Dict[str, Any]) -> str:
    """Extract action signature from additional data. Maps to {{KEY:ACTION_SIGNATURE}}"""
    return additional_data.get('ACTION_SIGNATURE', '') if additional_data else ''


# ============================================================================
# CONTEXT-AWARE PLACEHOLDER FUNCTIONS (for React mode)
# ============================================================================

def extractMinimalDocumentContext(service: Any, context: Any) -> str:
    """Extract minimal document context (counts only) for React plan selection."""
    try:
        if hasattr(context, 'workflow') and context.workflow:
            # Get document count from workflow
            documents = service.workflow.getAvailableDocuments(context.workflow)
            if documents and documents != "No documents available":
                # Count documents by counting docList and docItem references
                doc_count = documents.count("docList:") + documents.count("docItem:")
                return f"{doc_count} documents available from previous tasks"
            else:
                return "No documents available"
        return "No documents available"
    except Exception as e:
        logger.error(f"Error getting minimal document context: {str(e)}")
        return "No documents available"


def extractFullDocumentContext(service: Any, context: Any) -> str:
    """Extract full document context with detailed references for parameter generation."""
    try:
        if hasattr(context, 'workflow') and context.workflow:
            return service.workflow.getAvailableDocuments(context.workflow)
        return "No documents available"
    except Exception as e:
        logger.error(f"Error getting full document context: {str(e)}")
        return "No documents available"


def extractMinimalConnectionContext(service: Any) -> str:
    """Extract minimal connection context (count only) for React plan selection."""
    try:
        connections = getConnectionReferenceList(service)
        if connections:
            return f"{len(connections)} connections available"
        return "No connections available"
    except Exception as e:
        logger.error(f"Error getting minimal connection context: {str(e)}")
        return "No connections available"


def extractFullConnectionContext(service: Any) -> str:
    """Extract full connection context with detailed references for parameter generation."""
    try:
        connections = getConnectionReferenceList(service)
        if connections:
            return '\n'.join(f"- {conn}" for conn in connections)
        return "No connections available"
    except Exception as e:
        logger.error(f"Error getting full connection context: {str(e)}")
        return "No connections available"


def extractUserPromptFromService(service: Any) -> str:
    """Extract user prompt from service (clean and reliable)."""
    # Get the current user prompt from services (clean and reliable)
    if service and hasattr(service, 'currentUserPrompt') and service.currentUserPrompt:
        return service.currentUserPrompt

    # Fallback to task step objective if no current prompt found
    return 'No request specified'


def extractUserLanguageFromService(service: Any) -> str:
    """Extract user language from service."""
    return service.user.language if service and service.user else 'en'


# ============================================================================
# ADDITIONAL PLACEHOLDER EXTRACTION FUNCTIONS (moved from methodDiscovery.py)
# ============================================================================

def extractAvailableDocumentsFromList(context: Any) -> str:
    """Extract available documents from context list. Maps to {{KEY:AVAILABLE_DOCUMENTS}} (alternative implementation)"""
    try:
        if not context or not hasattr(context, 'available_documents') or not context.available_documents:
            return "No documents available"

        documents = context.available_documents
        if not isinstance(documents, list):
            return "No documents available"

        docList = []
        for i, doc in enumerate(documents, 1):
            if isinstance(doc, ChatDocument):
                docInfo = f"{i}. **{doc.fileName}**"
                if hasattr(doc, 'mimeType') and doc.mimeType:
                    docInfo += f" ({doc.mimeType})"
                if hasattr(doc, 'size') and doc.size:
                    docInfo += f" - {doc.size} bytes"
                docList.append(docInfo)
            elif isinstance(doc, dict):
                docInfo = f"{i}. **{doc.get('fileName', 'Unknown')}**"
                if doc.get('mimeType'):
                    docInfo += f" ({doc['mimeType']})"
                if doc.get('size'):
                    docInfo += f" - {doc['size']} bytes"
                docList.append(docInfo)
            else:
                docList.append(f"{i}. {str(doc)}")

        return "\n".join(docList) if docList else "No documents available"
    except Exception as e:
        logger.error(f"Error getting available documents: {str(e)}")
        return "Error retrieving documents"


def extractWorkflowHistoryFromMessages(services: Any, context: Any) -> str:
    """Extract workflow history from messages. Maps to {{KEY:WORKFLOW_HISTORY}} (alternative implementation)"""
    try:
        if not context or not hasattr(context, 'workflow_id'):
            return "No workflow history available"

        workflowId = context.workflow_id
        if not workflowId:
            return "No workflow history available"

        # Get workflow messages
        messages = services.interfaceDbChat.getWorkflowMessages(workflowId)
        if not messages:
            return "No workflow history available"

        # Filter for relevant messages (last 10)
        recentMessages = messages[-10:] if len(messages) > 10 else messages

        historyList = []
        for msg in recentMessages:
            if hasattr(msg, 'role') and hasattr(msg, 'message'):
                role = "User" if msg.role == "user" else "Assistant"
                message = msg.message[:200] + "..." if len(msg.message) > 200 else msg.message
                historyList.append(f"**{role}**: {message}")

        return "\n".join(historyList) if historyList else "No workflow history available"
    except Exception as e:
        logger.error(f"Error getting workflow history: {str(e)}")
        return "Error retrieving workflow history"


def extractAvailableMethodsFromList(services: Any) -> str:
    """Extract available methods as formatted list. Maps to {{KEY:AVAILABLE_METHODS}} (alternative implementation)"""
    try:
        if not methods:
            discoverMethods(services)

        return getMethodsList(services)
    except Exception as e:
        logger.error(f"Error getting available methods: {str(e)}")
        return "Error retrieving available methods"


def extractUserLanguageFromServices(services: Any) -> str:
    """Extract user language from services. Maps to {{KEY:USER_LANGUAGE}} (alternative implementation)"""
    try:
        if hasattr(services, 'user') and hasattr(services.user, 'language'):
            return services.user.language or 'en'
        return 'en'
    except Exception as e:
        logger.error(f"Error getting user language: {str(e)}")
        return 'en'


def extractReviewContentFromObservation(context: Any) -> str:
    """Extract review content from observation. Maps to {{KEY:REVIEW_CONTENT}} (alternative implementation)"""
    try:
        if not context or not hasattr(context, 'observation'):
            return "No review content available"

        observation = context.observation
        if not isinstance(observation, dict):
            return "No review content available"

        reviewParts = []

        # Add success status
        if 'success' in observation:
            reviewParts.append(f"Success: {observation['success']}")

        # Add documents count
        if 'documentsCount' in observation:
            reviewParts.append(f"Documents generated: {observation['documentsCount']}")

        # Add previews
        if 'previews' in observation and observation['previews']:
            reviewParts.append("Document previews:")
            for preview in observation['previews']:
                if isinstance(preview, dict):
                    name = preview.get('name', 'Unknown')
                    mimeType = preview.get('mimeType', 'Unknown')
                    size = preview.get('contentSize', 'Unknown size')
                    reviewParts.append(f"  - {name} ({mimeType}) - {size}")

        # Add notes
        if 'notes' in observation and observation['notes']:
            reviewParts.append("Notes:")
            for note in observation['notes']:
                reviewParts.append(f"  - {note}")

        return "\n".join(reviewParts) if reviewParts else "No review content available"
    except Exception as e:
        logger.error(f"Error getting review content: {str(e)}")
        return "Error retrieving review content"


def extractEnhancedDocumentContext(services: Any) -> str:
    """Extract enhanced document context with full metadata. Maps to {{KEY:ENHANCED_DOCUMENTS}}"""
    try:
        # Get all documents from the current workflow
        workflow = getattr(services, 'currentWorkflow', None)
        if not workflow or not hasattr(workflow, 'id'):
            return "No workflow context available"

        # Get workflow documents from messages
        if not hasattr(workflow, 'messages') or not workflow.messages:
            return "No documents available"

        # Collect all documents from all messages
        all_documents = []
        for message in workflow.messages:
            if hasattr(message, 'documents') and message.documents:
                all_documents.extend(message.documents)

        if not all_documents:
            return "No documents available"

        # Group documents by round/task/action for better organization
        docGroups = {}
        for message in workflow.messages:
            if hasattr(message, 'documents') and message.documents:
                round_num = getattr(message, 'roundNumber', 0)
                task_num = getattr(message, 'taskNumber', 0)
                action_num = getattr(message, 'actionNumber', 0)
                label = getattr(message, 'documentsLabel', 'results')

                group_key = f"round{round_num}_task{task_num}_action{action_num}_{label}"
                if group_key not in docGroups:
                    docGroups[group_key] = []
                docGroups[group_key].extend(message.documents)

        # Format documents by groups with proper docList references
        docList = []
        for group_key, group_docs in docGroups.items():
            # Find the message that contains these documents to get the message ID
            message_id = None
            for message in workflow.messages:
                if hasattr(message, 'documents') and message.documents:
                    round_num = getattr(message, 'roundNumber', 0)
                    task_num = getattr(message, 'taskNumber', 0)
                    action_num = getattr(message, 'actionNumber', 0)
                    label = getattr(message, 'documentsLabel', 'results')
                    msg_group_key = f"round{round_num}_task{task_num}_action{action_num}_{label}"

                    if msg_group_key == group_key:
                        message_id = str(message.id)
                        break

            # Generate proper docList reference
            if message_id:
                docListRef = f"docList:{message_id}:{group_key}"
            else:
                # Fallback to direct label reference
                docListRef = group_key

            docList.append(f"\n**{group_key}:**")
            docList.append(f"Reference: {docListRef}")
            for i, doc in enumerate(group_docs, 1):
                if isinstance(doc, ChatDocument):
                    docInfo = f"  {i}. **{doc.fileName}**"
                    if hasattr(doc, 'mimeType') and doc.mimeType:
                        docInfo += f" ({doc.mimeType})"
                    if hasattr(doc, 'size') and doc.size:
                        docInfo += f" - {doc.size} bytes"
                    if hasattr(doc, 'created') and doc.created:
                        docInfo += f" - Created: {doc.created}"
                    docList.append(docInfo)
                elif isinstance(doc, dict):
                    docInfo = f"  {i}. **{doc.get('fileName', 'Unknown')}**"
                    if doc.get('mimeType'):
                        docInfo += f" ({doc['mimeType']})"
                    if doc.get('size'):
                        docInfo += f" - {doc['size']} bytes"
                    if doc.get('created'):
                        docInfo += f" - Created: {doc['created']}"
                    docList.append(docInfo)
                else:
                    docList.append(f"  {i}. {str(doc)}")

        return "\n".join(docList) if docList else "No documents available"
    except Exception as e:
        logger.error(f"Error getting enhanced document context: {str(e)}")
        return "Error retrieving document context"