gateway/gwserver/modules/agentservice_workflow_execution.py

"""
Refactored workflow execution for the Agentservice.
Implements a structured workflow with clear separation of planning and execution phases.
"""

import os
import logging
import asyncio
import uuid
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple, Union

# Import utility module (will be created)
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils

# Import for data extraction
from modules.agentservice_dataextraction import data_extraction

logger = logging.getLogger(__name__)

class WorkflowExecution:
    """
    Handles the execution of workflows in a structured, multi-phase approach.
    Separates planning from execution and provides better logging.
    """

    def __init__(self, workflow_manager, workflow_id: str, mandate_id: int, user_id: int, ai_service, lucydom_interface):
        """Initialize the workflow execution"""
        self.workflow_manager = workflow_manager
        self.workflow_id = workflow_id
        self.mandate_id = mandate_id
        self.user_id = user_id
        self.ai_service = ai_service
        self.lucydom_interface = lucydom_interface

        # Initialize utilities
        self.workflow_utils = WorkflowUtils(workflow_id)
        self.message_utils = MessageUtils()
        self.logging_utils = LoggingUtils(workflow_id, self._add_log)

    async def execute(self, message: Dict[str, Any], workflow: Dict[str, Any], files: List[Dict[str, Any]] = None, is_user_input: bool = False):
        """
        Execute the workflow following the new structured approach.

        Args:
            message: The initiating message (prompt or user input)
            workflow: The workflow object
            files: Optional list of file metadata
            is_user_input: Flag indicating if this is user input

        Returns:
            Dict with workflow status and result
        """
        try:
            # 1. Initialize the workflow (already done by the caller)
            self.logging_utils.info("Starting workflow execution", "workflow", "Workflow initialized")

            # 2. Create a message with user input
            user_message = self._create_message(workflow, message.get("role", "user"))
            user_message["content"] = message.get("content", "")

            # Process files if provided
            if files and len(files) > 0:
                self.logging_utils.info(f"Processing {len(files)} files", "files", f"Processing files: {[f.get('name', 'unknown') for f in files]}")
                await self._process_files(workflow, user_message, files)

            # Add the message to the workflow
            if "messages" not in workflow:
                workflow["messages"] = []
            workflow["messages"].append(user_message)

            # Save workflow state
            self.workflow_manager._save_workflow(workflow)
            self.logging_utils.info("User message processed", "workflow", "User input added to workflow")

            # 3. Create work plan using AI
            work_plan = await self._create_work_plan(workflow, user_message)
            self.logging_utils.info(f"Created work plan with {len(work_plan)} activities", "planning", "Work plan created")

            # 4. Execute each activity in the work plan
            results = []
            for i, activity in enumerate(work_plan, 1):
                self.logging_utils.info(f"Starting activity {i}/{len(work_plan)}: {activity.get('title', 'Unnamed')}",
                                        "execution", f"Activity: {activity.get('title', 'Unnamed')}")

                # Execute the activity
                activity_result = await self._execute_activity(workflow, activity)
                results.append(activity_result)

                # Save intermediate state
                self.workflow_manager._save_workflow(workflow)

            # 5. Create summary for the user
            summary = await self._create_summary(workflow, results)
            self.logging_utils.info("Created workflow summary", "summary", "Workflow summary created")

            # Set workflow status to completed
            workflow["status"] = "completed"
            workflow["last_activity"] = datetime.now().isoformat()

            # Final save
            self.workflow_manager._save_workflow(workflow)

            return {
                "workflow_id": self.workflow_id,
                "status": "completed",
                "messages": workflow.get("messages", [])
            }

        except Exception as e:
            self.logging_utils.error(f"Workflow execution failed: {str(e)}", "error", f"Error: {str(e)}")
            workflow["status"] = "failed"
            self.workflow_manager._save_workflow(workflow)

            return {
                "workflow_id": self.workflow_id,
                "status": "failed",
                "error": str(e)
            }

    async def _process_files(self, workflow: Dict[str, Any], message: Dict[str, Any], files: List[Dict[str, Any]]):
        """
        Process files and add them to the message.
        Extracts text content where possible.

        Args:
            workflow: The workflow object
            message: The message to add files to
            files: List of file metadata
        """
        # Import necessary modules
        from modules.agentservice_filemanager import get_file_manager
        # Get the file manager instance
        file_manager = get_file_manager()

        # Prepare file contexts
        file_contexts = file_manager.prepare_file_contexts(files)
        self.logging_utils.info(f"Prepared contexts for {len(file_contexts)} files", "files", "File contexts prepared")

        # Read file contents
        file_contents = await file_manager.read_file_contents(
            file_contexts,
            self.lucydom_interface,
            self.workflow_id,
            self._add_log,
            self.ai_service
        )

        # Add files to message
        for file_id, content in file_contents.items():
            file_metadata = next((f for f in files if f.get('id') == file_id), {})

            file_data = {
                "id": file_id,
                "name": file_metadata.get('name', 'unnamed_file'),
                "content_type": file_metadata.get('content_type'),
                "type": file_metadata.get('type', "unknown"),
                "content": content.get("content", "") if isinstance(content, dict) else content,
                "size": file_metadata.get('size'),
                "is_extracted": content.get("is_extracted", False) if isinstance(content, dict) else False
            }

            self.logging_utils.info(f"Adding file {file_data['name']} to message", "files", f"Adding file: {file_data['name']}")
            file_manager.add_file_to_message(message, file_data)

    async def _create_work_plan(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Create a structured work plan based on the user's request.

        Args:
            workflow: The workflow object
            message: The initiating message

        Returns:
            List of structured activities to execute
        """
        # Extract context information
        task = message.get("content", "")
        documents = message.get("documents", [])

        # Create the planning prompt
        plan_prompt = f"""
        As an AI workflow manager, create a detailed work plan for the following task:

        TASK: {task}

        The work plan should include a structured list of activities. Each activity should have:
        1. title - A short descriptive title for the activity
        2. description - What needs to be done in this activity
        3. agent_prompt - A complete prompt to give to the AI agent(s) for this activity
        4. data_prompt - A prompt describing what data will be needed for this activity
        5. expected_format - The expected output format (e.g., "Text", "JSON", "Table", "FileList")
        6. dependencies - List of previous activities this depends on (by index)

        Return the work plan as a JSON array of activity objects, each with the above properties.
        The work plan should be logical, efficient, and comprehensively address the task.
        """

        # Add information about available documents if present
        if documents:
            doc_info = []
            for doc in documents:
                source = doc.get("source", {})
                doc_info.append({
                    "name": source.get("name", "unnamed"),
                    "type": source.get("type", "unknown"),
                    "content_type": source.get("content_type", "unknown")
                })

            plan_prompt += f"\n\nAvailable documents: {doc_info}"

        self.logging_utils.info("Requesting AI work plan", "planning", "Generating work plan")

        # Call AI to generate work plan
        try:
            plan_response = await self.ai_service.call_api([{"role": "user", "content": plan_prompt}])

            print("DEBUG prompt=",plan_prompt," Response=",plan_response)

            # Extract JSON plan (using a helper utility)
            import json
            import re

            # Look for JSON array in the response
            json_pattern = r'\[\s*\{.*\}\s*\]'
            json_match = re.search(json_pattern, plan_response, re.DOTALL)

            if json_match:
                json_str = json_match.group(0)
                work_plan = json.loads(json_str)
                self.logging_utils.info(f"Work plan created with {len(work_plan)} activities", "planning",
                                       f"Work plan activities: {[activity.get('title', 'Unnamed') for activity in work_plan]}")
                return work_plan
            else:
                self.logging_utils.warning("Could not extract JSON from AI response", "planning",
                                          "Fallback to default work plan")

                # Fallback: Create a simple default work plan
                return [{
                    "title": "Process Task",
                    "description": "Process the user's request directly",
                    "agent_prompt": task,
                    "data_prompt": "All available data is needed for this task",
                    "expected_format": "Text",
                    "dependencies": []
                }]

        except Exception as e:
            self.logging_utils.error(f"Error creating work plan: {str(e)}", "planning", f"Work plan error: {str(e)}")
            # Return a minimal fallback plan
            return [{
                "title": "Process Task (Error Recovery)",
                "description": "Process the user's request after planning error",
                "agent_prompt": task,
                "data_prompt": "All available data is needed for this task",
                "expected_format": "Text",
                "dependencies": []
            }]

    async def _execute_activity(self, workflow: Dict[str, Any], activity: Dict[str, Any]) -> Dict[str, Any]:
        """
        Execute a single activity from the work plan.

        Args:
            workflow: The workflow object
            activity: The activity definition from the work plan

        Returns:
            Result of the activity execution
        """
        # Extract activity information
        title = activity.get("title", "Unnamed Activity")
        agent_prompt = activity.get("agent_prompt", "")
        data_prompt = activity.get("data_prompt", "")
        expected_format = activity.get("expected_format", "Text")

        self.logging_utils.info(f"Executing activity: {title}", "execution", f"Activity: {title}, Format: {expected_format}")

        # 1. Determine which agents to use
        agents_config = await self._select_agents(workflow, agent_prompt, expected_format)
        self.logging_utils.info(f"Selected {len(agents_config)} agents for execution", "agents",
                               f"Agents: {[agent.get('agent_id', 'unknown') for agent in agents_config]}")

        # 2. Extract the necessary data
        from modules.agentservice_registry import AgentRegistry
        registry = AgentRegistry.get_instance()

        # If no agents were selected, use the moderator directly
        if not agents_config:
            self.logging_utils.info("No specific agents selected, using moderator", "agents", "Using moderator")
            # Create a message with the moderator's response
            moderator_message = self._create_message(workflow, "assistant")
            moderator_message["content"] = f"No specialized agents needed for this task. Processing directly: {agent_prompt}"
            moderator_message["agent_type"] = "moderator"
            moderator_message["agent_id"] = "moderator"
            moderator_message["agent_name"] = "Moderator"

            # Add message to workflow
            workflow["messages"].append(moderator_message)

            # Direct AI call for simple result
            result_content = await self.ai_service.call_api([
                {"role": "system", "content": "You are a helpful assistant processing the user's request."},
                {"role": "user", "content": agent_prompt}
            ])

            # Create result message
            result_message = self._create_message(workflow, "assistant")
            result_message["content"] = result_content
            result_message["agent_type"] = "assistant"
            result_message["agent_id"] = "assistant"
            result_message["agent_name"] = "AI Assistant"
            result_message["result_format"] = "Text"

            # Add message to workflow
            workflow["messages"].append(result_message)

            return {
                "title": title,
                "content": result_content,
                "agent": "assistant",
                "format": "Text"
            }

        # 3. Execute the agents in sequence
        last_result = None
        for agent_config in agents_config:
            agent_id = agent_config.get("agent_id")
            agent_prompt = agent_config.get("prompt")
            expected_format = agent_config.get("expected_format", "Text")

            # Get the agent from registry
            agent = registry.get_agent(agent_id)
            if not agent:
                self.logging_utils.warning(f"Agent '{agent_id}' not found, skipping", "agents", f"Agent not found: {agent_id}")
                continue

            # Incorporate previous result if available
            if last_result:
                agent_prompt = f"{agent_prompt}\n\nPrevious result: {last_result}"

            self.logging_utils.info(f"Executing agent: {agent_id}", "agents", f"Agent: {agent_id}, Format: {expected_format}")

            # Extract any needed data
            if data_prompt:
                # Get all messages from the workflow
                workflow_messages = workflow.get("messages", [])

                # Extract data using the dataextraction module
                extracted_data = await data_extraction(
                    prompt=data_prompt,
                    files=self._extract_files_from_workflow(workflow),
                    messages=workflow_messages,
                    ai_service=self.ai_service,
                    lucydom_interface=self.lucydom_interface,
                    workflow_id=self.workflow_id,
                    add_log_func=self._add_log
                )

                # Add the data context to the prompt
                if extracted_data and "extracted_content" in extracted_data:
                    data_summary = "\n\nExtracted data summary:\n"
                    for item in extracted_data.get("extracted_content", []):
                        data_summary += f"- {item.get('name', 'unnamed')}: {item.get('content', '')[:100]}...\n"

                    agent_prompt += data_summary

            # Create the agent message
            agent_message = self._create_message(workflow, "user")
            agent_message["content"] = agent_prompt
            agent_message["workflow_id"] = self.workflow_id

            # Execute the agent
            agent_response = await agent.process_message(agent_message, {"expected_format": expected_format})

            # Process agent response
            if agent_response:
                # Create response message
                response_message = self._create_message(workflow, "assistant")
                response_message["content"] = agent_response.get("content", "")
                response_message["agent_type"] = agent_id
                response_message["agent_id"] = agent_id
                response_message["agent_name"] = agent.name
                response_message["result_format"] = agent_response.get("result_format", expected_format)

                # Add to workflow
                workflow["messages"].append(response_message)

                # Update last result
                last_result = agent_response.get("content", "")

        # Return the final result
        return {
            "title": title,
            "content": last_result or "",
            "agent": agent_config.get("agent_id", "unknown") if agents_config else "none",
            "format": expected_format
        }

    async def _select_agents(self, workflow: Dict[str, Any], prompt: str, expected_format: str) -> List[Dict[str, Any]]:
        """
        Select appropriate agents for a given prompt and expected format.

        Args:
            workflow: The workflow object
            prompt: The prompt to process
            expected_format: The expected output format

        Returns:
            List of agent configurations (agent_id, prompt, expected_format)
        """
        # Get available agents
        from modules.agentservice_registry import AgentRegistry
        registry = AgentRegistry.get_instance()

        # Get all agents except user_agent
        system_agents = {}
        for agent_id, agent in registry.get_all_agents().items():
            if agent.type != "user" and agent_id not in system_agents:
                system_agents[agent_id] = agent.get_agent_info()

        # Create agent selection prompt
        selection_prompt = f"""
        You are a workflow coordinator responsible for selecting appropriate agents for a task.

        TASK PROMPT: {prompt}

        EXPECTED FORMAT: {expected_format}

        AVAILABLE AGENTS:
        """

        # Add agent descriptions
        for agent_id, agent_info in system_agents.items():
            selection_prompt += f"""
            - ID: {agent_id}
              Name: {agent_info.get('name', '')}
              Type: {agent_info.get('type', '')}
              Description: {agent_info.get('description', '')}
              Capabilities: {agent_info.get('capabilities', '')}
              Result Format: {agent_info.get('result_format', 'Text')}
            """

        selection_prompt += """
        Based on the task and expected format, select the appropriate agent(s) to use.

        Return your selection as a JSON array with objects containing:
        1. agent_id: The ID of the selected agent
        2. prompt: A specific prompt tailored for this agent
        3. expected_format: The expected output format

        You can select multiple agents if needed, in which case they will be executed in sequence.
        If no specialized agent is needed, return an empty array.
        """

        # Call AI to select agents
        try:
            selection_response = await self.ai_service.call_api([{"role": "user", "content": selection_prompt}])

            # Extract JSON from response
            import json
            import re

            # Look for JSON array
            json_pattern = r'\[\s*\{.*\}\s*\]'
            json_match = re.search(json_pattern, selection_response, re.DOTALL)

            if json_match:
                json_str = json_match.group(0)
                selected_agents = json.loads(json_str)

                # Validate selections
                valid_agents = []
                for agent_config in selected_agents:
                    if "agent_id" in agent_config and agent_config["agent_id"] in system_agents:
                        valid_agents.append(agent_config)

                return valid_agents
            elif "[]" in selection_response:
                # Empty array - no agents needed
                return []
            else:
                # Could not parse response, use default strategy
                self.logging_utils.warning("Could not parse agent selection response", "agents",
                                          "Falling back to default agent selection")

                # Simple heuristic for default agent selection based on expected format
                if expected_format.lower() in ["file", "filelist", "document"]:
                    return [{
                        "agent_id": "filecreator_agent",
                        "prompt": prompt,
                        "expected_format": expected_format
                    }]
                elif expected_format.lower() in ["report", "analysis", "document"]:
                    return [{
                        "agent_id": "documentation_agent",
                        "prompt": prompt,
                        "expected_format": expected_format
                    }]
                elif "web" in prompt.lower() or "search" in prompt.lower():
                    return [{
                        "agent_id": "webcrawler_agent",
                        "prompt": prompt,
                        "expected_format": expected_format
                    }]
                elif "analyze" in prompt.lower() or "data" in prompt.lower():
                    return [{
                        "agent_id": "analyst_agent",
                        "prompt": prompt,
                        "expected_format": expected_format
                    }]
                else:
                    # No specific agent needed
                    return []

        except Exception as e:
            self.logging_utils.error(f"Error selecting agents: {str(e)}", "agents", f"Agent selection error: {str(e)}")
            return []  # Empty array - use default processing

    async def _create_summary(self, workflow: Dict[str, Any], results: List[Dict[str, Any]]) -> Dict[str, Any]:
        """
        Create a summary of the workflow results for the user.

        Args:
            workflow: The workflow object
            results: Results from activity executions

        Returns:
            Summary message
        """
        # Create a summary prompt
        summary_prompt = "Create a clear, concise summary of the following workflow results:\n\n"

        for i, result in enumerate(results, 1):
            title = result.get("title", f"Activity {i}")
            content = result.get("content", "")
            agent = result.get("agent", "unknown")

            # Limit content length for the summary prompt
            content_preview = content[:500] + "..." if len(content) > 500 else content

            summary_prompt += f"""
            ACTIVITY {i}: {title}
            Executed by: {agent}

            {content_preview}

            ---
            """

        summary_prompt += """
        Provide a well-structured summary that:
        1. Highlights the key findings and results
        2. Connects the results to the original task
        3. Presents any conclusions or recommendations

        Make sure the summary is clear, concise, and useful to the user.
        """

        # Call AI to generate summary
        summary_content = await self.ai_service.call_api([{"role": "user", "content": summary_prompt}])

        # Create summary message
        summary_message = self._create_message(workflow, "assistant")
        summary_message["content"] = summary_content
        summary_message["agent_type"] = "summary"
        summary_message["agent_id"] = "workflow_summary"
        summary_message["agent_name"] = "Workflow Summary"
        summary_message["result_format"] = "Text"
        summary_message["workflow_complete"] = True

        # Add to workflow
        workflow["messages"].append(summary_message)

        return summary_message

    def _create_message(self, workflow: Dict[str, Any], role: str) -> Dict[str, Any]:
        """Create a new message object for the workflow"""
        # This is a utility function that should be moved to the utility module
        message_id = f"msg_{uuid.uuid4()}"
        current_time = datetime.now().isoformat()

        # Determine sequence number
        sequence_no = 1
        if "messages" in workflow and workflow["messages"]:
            sequence_no = len(workflow["messages"]) + 1

        # Create message object
        message = {
            "id": message_id,
            "workflow_id": self.workflow_id,
            "started_at": current_time,
            "finished_at": None,
            "sequence_no": sequence_no,

            "status": "pending",
            "role": role,

            "data_stats": {
                "processing_time": 0.0,
                "token_count": 0,
                "bytes_sent": 0,
                "bytes_received": 0
            },

            "documents": [],
            "content": None,
            "agent_type": None
        }

        return message

    def _extract_files_from_workflow(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Extract file information from all messages in the workflow"""
        files = []

        # Process all messages
        for message in workflow.get("messages", []):
            # Extract documents from the message
            for doc in message.get("documents", []):
                source = doc.get("source", {})

                # Only include file documents
                if source.get("type") == "file":
                    file_info = {
                        "id": source.get("id", ""),
                        "name": source.get("name", ""),
                        "type": source.get("content_type", ""),
                        "content_type": source.get("content_type", "")
                    }

                    # Check if file is already in the list (avoid duplicates)
                    if not any(f.get("id") == file_info["id"] for f in files):
                        files.append(file_info)

        return files

    def _add_log(self, workflow_id: str, message: str, log_type: str, agent_id: str = None, agent_name: str = None):
        """Add a log entry to the workflow"""
        # This actually calls back to the workflow manager's log function
        self.workflow_manager._add_log(workflow_id, message, log_type, agent_id, agent_name)