gateway/gwserver/modules/agentservice_workflow_execution.py

"""
Refactored architecture for the Agentservice multi-agent system.
This module defines the revised workflow execution with improved agent handovers.
"""

import os
import logging
import asyncio
import uuid
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple, Union

logger = logging.getLogger(__name__)
logging.getLogger('matplotlib.font_manager').setLevel(logging.INFO)

class WorkflowExecution:
    """
    Handles the execution of workflows with improved agent collaboration.
    Integrates planning and execution phases for better context awareness.
    """

    def __init__(self, workflow_manager, workflow_id: str, mandate_id: int, user_id: int, ai_service, lucydom_interface):
        """Initialize the workflow execution"""
        self.workflow_manager = workflow_manager
        self.workflow_id = workflow_id
        self.mandate_id = mandate_id
        self.user_id = user_id
        self.ai_service = ai_service
        self.lucydom_interface = lucydom_interface

        # Import necessary modules
        from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
        from modules.agentservice_registry import AgentRegistry
        from modules.agentservice_filemanager import get_workflow_file_manager

        # Initialize utilities
        self.workflow_utils = WorkflowUtils(workflow_id)
        self.message_utils = MessageUtils()
        self.logging_utils = LoggingUtils(workflow_id, self._add_log)

        # Initialize agent registry
        self.agent_registry = AgentRegistry.get_instance()
        # Set dependencies for agents

        # Initialize file manager
        self.file_manager = get_workflow_file_manager(workflow_id, lucydom_interface)

        # Import and initialize document handler
        from modules.agentservice_document_handler import get_document_handler
        self.document_handler = get_document_handler(workflow_id, lucydom_interface, ai_service)

        self.agent_registry.set_dependencies(
            ai_service=ai_service,
            document_handler=self.document_handler,
            lucydom_interface=lucydom_interface
        )

    async def execute(self, message: Dict[str, Any], workflow: Dict[str, Any], files: List[Dict[str, Any]] = None, is_user_input: bool = False):
        """
        Execute the workflow with integrated planning and agent selection.

        Args:
            message: The initiating message (prompt or user input)
            workflow: The workflow object
            files: Optional list of file metadata
            is_user_input: Flag indicating if this is user input

        Returns:
            Dict with workflow status and result
        """
        try:
            # 1. Initialize workflow logging
            self.logging_utils.info("Starting workflow execution", "workflow", "Workflow initialized")

            # 2. Process user message and files
            user_message = await self._process_user_message(workflow, message, files)
            self.logging_utils.info("User message processed", "workflow", "User input added to workflow")

            # 3. Create agent-aware work plan
            work_plan = await self._create_agent_aware_work_plan(workflow, user_message)
            self.logging_utils.info(f"Created agent-aware work plan with {len(work_plan)} activities", "planning")
            self.logging_utils.debug(f"{work_plan}.", "planning")

            # 4. Execute the activities in the work plan
            results = await self._execute_work_plan(workflow, work_plan)

            # 5. Create summary
            summary = await self._create_summary(workflow, results)
            self.logging_utils.info("Created workflow summary", "summary")

            # Set workflow status to completed
            workflow["status"] = "completed"
            workflow["last_activity"] = datetime.now().isoformat()

            # Final save
            self.workflow_manager._save_workflow(workflow)

            return {
                "workflow_id": self.workflow_id,
                "status": "completed",
                "messages": workflow.get("messages", [])
            }

        except Exception as e:
            self.logging_utils.error(f"Workflow execution failed: {str(e)}", "error")
            workflow["status"] = "failed"
            self.workflow_manager._save_workflow(workflow)

            return {
                "workflow_id": self.workflow_id,
                "status": "failed",
                "error": str(e)
            }

    async def _process_user_message(self, workflow: Dict[str, Any], message: Dict[str, Any], files: List[Dict[str, Any]] = None) -> Dict[str, Any]:
        """
        Process the user message and add it to the workflow.

        Args:
            workflow: The workflow object
            message: The user message
            files: Optional list of file metadata

        Returns:
            The processed user message
        """
        # Create a message with user input
        user_message = self._create_message(workflow, message.get("role", "user"))
        user_message["content"] = message.get("content", "")

        # Process files if provided
        if files and len(files) > 0:
            self.logging_utils.info(f"Processing {len(files)} files", "files")

            # Add files to message via file manager instead of _process_files
            user_message = await self.file_manager.add_files_to_message(
                user_message,
                [f.get('id') for f in files],
                self._add_log
            )

        # Add the message to the workflow
        if "messages" not in workflow:
            workflow["messages"] = []
        workflow["messages"].append(user_message)

        # Save workflow state
        self.workflow_manager._save_workflow(workflow)

        return user_message

    async def _create_agent_aware_work_plan(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Create an agent-aware work plan that integrates agent selection during planning.

        Args:
            workflow: The workflow object
            message: The initiating message

        Returns:
            List of structured activities with agent assignments
        """
        # Extract context information
        task = message.get("content", "")

        # Get all available agents and their capabilities
        agent_infos = self.agent_registry.get_agent_infos()

        # Extract documents
        documents = message.get("documents", [])
        document_info = []
        for doc in documents:
            source = doc.get("source", {})
            document_info.append({
                "id": doc.get("id"),
                "name": source.get("name", "unnamed"),
                "type": source.get("type", "unknown"),
                "content_type": source.get("content_type", "unknown")
            })

        # Create the planning prompt with agent awareness and document handling information
        plan_prompt = f"""
As an AI workflow manager, create a detailed agent-aware work plan for the following task:

TASK: {task}

AVAILABLE AGENTS:
{self._format_agent_info(agent_infos)}

AVAILABLE DOCUMENTS:
{document_info if document_info else "No documents provided"}

IMPORTANT: Document extraction happens automatically in the workflow. Documents in the message are already available to all agents. DO NOT assign agent_coder or any other agent specifically for just reading or extracting document content. Only assign agents for tasks that require specific processing beyond what the document handler already provides.

The work plan should include a structured list of activities. Each activity should have:
1. title - A short descriptive title for the activity
2. description - What needs to be done in this activity
3. assigned_agents - List of agent IDs that should handle this activity (can be multiple in sequence)
4. agent_prompts - Specific instructions for each agent (matched by index to assigned_agents)
5. document_requirements - Description of which documents are needed for this activity (these will be automatically extracted)
6. expected_output - The expected output format and content
7. dependencies - List of previous activities this depends on (by index)

IMPORTANT GUIDELINES:
- Each activity should have clear objectives and be assigned to the most appropriate agent(s)
- When multiple agents are assigned to an activity, specify the sequence and how outputs should flow between them
- Documents are processed on-demand by the system's document handler, so only specify which documents are needed, not how to extract them
- DO NOT create activities that only read or extract document content - this happens automatically
- Create a logical sequence where later activities can use outputs from earlier ones
- If no specialized agent is needed for a task, use the default "assistant" agent
- Only use the agent_coder for tasks that require actual coding or complex data analysis, not for simply reading documents

Return the work plan as a JSON array of activity objects, each with the above properties.
"""

        self.logging_utils.info("Creating agent-aware work plan", "planning")

        # Call AI to generate work plan
        try:
            plan_response = await self.ai_service.call_api([{"role": "user", "content": plan_prompt}])

            # Extract JSON plan
            import json
            import re

            # Look for JSON array in the response
            json_pattern = r'\[\s*\{.*\}\s*\]'
            json_match = re.search(json_pattern, plan_response, re.DOTALL)

            if json_match:
                json_str = json_match.group(0)
                work_plan = json.loads(json_str)
                self.logging_utils.info(f"Work plan created with {len(work_plan)} activities", "planning")
                return work_plan
            else:
                self.logging_utils.warning("Could not extract JSON from AI response", "planning")

                # Fallback: Create a simple default work plan
                return [{
                    "title": "Process Task",
                    "description": "Process the user's request directly",
                    "assigned_agents": ["assistant"],
                    "agent_prompts": [task],
                    "document_requirements": "All available documents may be needed",
                    "expected_output": "Text",
                    "dependencies": []
                }]

        except Exception as e:
            self.logging_utils.error(f"Error creating work plan: {str(e)}", "planning")
            # Return a minimal fallback plan
            return [{
                "title": "Process Task (Error Recovery)",
                "description": "Process the user's request after planning error",
                "assigned_agents": ["assistant"],
                "agent_prompts": [task],
                "document_requirements": "All available documents may be needed",
                "expected_output": "Text",
                "dependencies": []
            }]

    async def _execute_work_plan(self, workflow: Dict[str, Any], work_plan: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """
        Execute all activities in the work plan with proper agent handovers.

        Args:
            workflow: The workflow object
            work_plan: The work plan with activities

        Returns:
            Results from all activities
        """
        results = []
        activity_outputs = {}  # Store outputs for dependency resolution

        for activity_index, activity in enumerate(work_plan):
            # Extract activity info
            title = activity.get("title", f"Activity {activity_index+1}")
            description = activity.get("description", "")
            assigned_agents = activity.get("assigned_agents", ["assistant"])
            agent_prompts = activity.get("agent_prompts", [description])
            doc_requirements = activity.get("document_requirements", "")
            expected_output = activity.get("expected_output", "Text")
            dependencies = activity.get("dependencies", [])

            self.logging_utils.info(f"Starting activity: {title}", "execution")

            # Validate assigned_agents and agent_prompts
            if len(assigned_agents) > len(agent_prompts):
                # Duplicate the last prompt for additional agents
                agent_prompts.extend([agent_prompts[-1]] * (len(assigned_agents) - len(agent_prompts)))
            elif len(agent_prompts) > len(assigned_agents):
                # Truncate excess prompts
                agent_prompts = agent_prompts[:len(assigned_agents)]

            # Process dependencies first
            dependency_context = {}
            for dep_index in dependencies:
                if dep_index < activity_index and dep_index in activity_outputs:
                    dep_output = activity_outputs[dep_index]
                    dependency_context[f"activity_{dep_index+1}"] = dep_output

            # Extract required documents if needed
            document_content = ""
            if doc_requirements:
                extracted_data = await self._extract_required_documents(workflow, doc_requirements)
                if extracted_data and "extracted_content" in extracted_data:
                    # Format document content for the prompt
                    document_content = "\n\n=== EXTRACTED DOCUMENT CONTENT ===\n\n"
                    for item in extracted_data.get("extracted_content", []):
                        doc_name = item.get("name", "Unnamed document")
                        doc_content = item.get("content", "No content available")
                        document_content += f"--- {doc_name} ---\n{doc_content}\n\n"

            # Execute the activity with the assigned agents
            activity_result = await self._execute_agent_sequence(
                workflow,
                assigned_agents,
                agent_prompts,
                document_content,
                dependency_context,
                expected_output
            )

            # Store the result
            activity_outputs[activity_index] = activity_result
            results.append({
                "title": title,
                "description": description,
                "agents": assigned_agents,
                "result": activity_result.get("content", ""),
                "output_format": activity_result.get("format", "Text")
            })

            self.logging_utils.info(f"Completed activity: {title}", "execution")

            # Save intermediate state
            self.workflow_manager._save_workflow(workflow)

        return results

    async def _execute_agent_sequence(
        self,
        workflow: Dict[str, Any],
        agent_ids: List[str],
        prompts: List[str],
        document_content: str,
        dependency_context: Dict[str, Any],
        expected_output: str
    ) -> Dict[str, Any]:
        """
        Execute a sequence of agents with proper handovers.

        Args:
            workflow: The workflow object
            agent_ids: List of agent IDs to execute in sequence
            prompts: List of prompts for each agent
            document_content: Extracted document content
            dependency_context: Context from dependent activities
            expected_output: Expected output format

        Returns:
            Result of the agent sequence execution
        """
        context = {
            "workflow_id": self.workflow_id,
            "expected_format": expected_output,
            "dependency_outputs": dependency_context
        }

        last_result = None
        last_documents = []

        for i, agent_id in enumerate(agent_ids):
            # Get the agent
            agent = self.agent_registry.get_agent(agent_id)
            if agent:
                # Ensure dependencies are set
                if hasattr(agent, 'set_dependencies'):
                    agent.set_dependencies(
                        ai_service=self.ai_service,
                        document_handler=self.document_handler,
                        lucydom_interface=self.lucydom_interface
                    )

                # Set document handler if agent supports it
                if hasattr(agent, 'set_document_handler') and hasattr(self, 'document_handler'):
                    agent.set_document_handler(self.document_handler)


            if not agent:
                self.logging_utils.warning(f"Agent '{agent_id}' not found, using assistant instead", "agents")
                agent = self.agent_registry.get_agent("assistant")
                if not agent:
                    # If assistant not found, create a minimal agent response
                    continue

            # Get the agent prompt
            base_prompt = prompts[i] if i < len(prompts) else prompts[-1]

            # Enhance the prompt with context
            enhanced_prompt = self._enhance_prompt(
                base_prompt,
                document_content,
                dependency_context,
                last_result.get("content", "") if last_result else "",
                i > 0  # is_continuation flag
            )

            # Create the message for this agent
            agent_message = self._create_message(workflow, "user")
            agent_message["content"] = enhanced_prompt

            # IMPORTANT FIX: Document handling logic
            # First, check if we have documents from previous agent if this is a continuation
            if last_documents and i > 0:
                agent_message["documents"] = last_documents
            # For the first agent, make sure we pass any documents from the most recent user message
            elif i == 0:
                # Find the most recent user message with documents
                for msg in reversed(workflow.get("messages", [])):
                    if msg.get("role") == "user" and msg.get("documents"):
                        agent_message["documents"] = msg.get("documents", [])
                        self.logging_utils.info(f"Passing {len(agent_message['documents'])} documents from user message to {agent_id}", "agents")
                        break

            # Log agent execution
            self.logging_utils.info(f"Executing agent: {agent_id}", "agents")

            # Execute the agent
            agent_response = await agent.process_message(agent_message, context)

            # Create response message
            response_message = self._create_message(workflow, "assistant")
            response_message["content"] = agent_response.get("content", "")
            response_message["agent_type"] = agent_id
            response_message["agent_id"] = agent_id
            response_message["agent_name"] = agent.name
            response_message["result_format"] = agent_response.get("result_format", expected_output)

            # Capture documents from response
            if "documents" in agent_response:
                response_message["documents"] = agent_response["documents"]
                last_documents = agent_response["documents"]
                self.logging_utils.info(f"Agent {agent_id} produced {len(last_documents)} documents", "agents")

            # Add to workflow
            workflow["messages"].append(response_message)

            # Update last result
            last_result = {
                "content": agent_response.get("content", ""),
                "format": agent_response.get("result_format", expected_output),
                "agent_id": agent_id,
                "documents": agent_response.get("documents", [])
            }

        return last_result or {
            "content": "No agent response was generated.",
            "format": "Text"
        }


    async def _extract_required_documents(self, workflow: Dict[str, Any], doc_requirements: str) -> Dict[str, Any]:
        """
        Extract required documents based on requirements description.

        Args:
            workflow: The workflow object
            doc_requirements: Description of document requirements

        Returns:
            Extracted document data
        """
        # Import for data extraction
        from modules.agentservice_dataextraction import data_extraction

        # Get all files from the workflow
        files = self.workflow_utils.get_files(workflow)

        # Get all messages from the workflow
        workflow_messages = workflow.get("messages", [])

        # Extract data using the dataextraction module
        extracted_data = await data_extraction(
            prompt=doc_requirements,
            files=files,
            messages=workflow_messages,
            ai_service=self.ai_service,
            lucydom_interface=self.lucydom_interface,
            workflow_id=self.workflow_id,
            add_log_func=self._add_log
        )

        return extracted_data

    async def _create_summary(self, workflow: Dict[str, Any], results: List[Dict[str, Any]]) -> Dict[str, Any]:
        """
        Create a summary of the workflow results for the user.

        Args:
            workflow: The workflow object
            results: Results from activity executions

        Returns:
            Summary message
        """
        # Create a summary prompt
        summary_prompt = "Create a clear, concise summary of the following workflow results:\n\n"

        for i, result in enumerate(results, 1):
            title = result.get("title", f"Activity {i}")
            description = result.get("description", "")
            content = result.get("result", "")
            agents = ", ".join(result.get("agents", ["unknown"]))

            # Limit content length for the summary prompt
            content_preview = content[:500] + "..." if len(content) > 500 else content

            summary_prompt += f"""
            ACTIVITY {i}: {title}
            Description: {description}
            Executed by: {agents}

            {content_preview}

            ---
            """

        summary_prompt += """
        Provide a well-structured summary that:
        1. Highlights the key findings and results
        2. Connects the results to the original task
        3. Presents any conclusions or recommendations

        Make sure the summary is clear, concise, and useful to the user.
        """

        # Call AI to generate summary
        summary_content = await self.ai_service.call_api([{"role": "user", "content": summary_prompt}])

        # Create summary message
        summary_message = self._create_message(workflow, "assistant")
        summary_message["content"] = summary_content
        summary_message["agent_type"] = "summary"
        summary_message["agent_id"] = "workflow_summary"
        summary_message["agent_name"] = "Workflow Summary"
        summary_message["result_format"] = "Text"
        summary_message["workflow_complete"] = True

        # Add to workflow
        workflow["messages"].append(summary_message)

        return summary_message

    def _create_message(self, workflow: Dict[str, Any], role: str) -> Dict[str, Any]:
        """Create a new message object for the workflow"""
        message_id = f"msg_{uuid.uuid4()}"
        current_time = datetime.now().isoformat()

        # Determine sequence number
        sequence_no = 1
        if "messages" in workflow and workflow["messages"]:
            sequence_no = len(workflow["messages"]) + 1

        # Create message object
        message = {
            "id": message_id,
            "workflow_id": self.workflow_id,
            "parent_message_id": None,
            "started_at": current_time,
            "finished_at": None,
            "sequence_no": sequence_no,

            "status": "pending",
            "role": role,

            "data_stats": {
                "processing_time": 0.0,
                "token_count": 0,
                "bytes_sent": 0,
                "bytes_received": 0
            },

            "documents": [],
            "content": None,
            "agent_type": None
        }

        return message

    def _add_log(self, workflow_id: str, message: str, log_type: str, agent_id: str = None, agent_name: str = None):
        """Add a log entry to the workflow"""
        # This calls back to the workflow manager's log function
        self.workflow_manager._add_log(workflow_id, message, log_type, agent_id, agent_name)

    def _format_agent_info(self, agent_infos: List[Dict[str, Any]]) -> str:
        """Format agent information for the planning prompt"""
        formatted_info = ""
        for agent in agent_infos:
            formatted_info += f"""
            - ID: {agent.get('id', 'unknown')}
              Name: {agent.get('name', '')}
              Type: {agent.get('type', '')}
              Description: {agent.get('description', '')}
              Capabilities: {agent.get('capabilities', '')}
              Result Format: {agent.get('result_format', 'Text')}
            """
        return formatted_info

    def _enhance_prompt(
        self,
        base_prompt: str,
        document_content: str,
        dependency_context: Dict[str, Any],
        previous_result: str,
        is_continuation: bool
    ) -> str:
        """
        Enhance a prompt with context information.

        Args:
            base_prompt: The original prompt
            document_content: Extracted document content
            dependency_context: Context from dependent activities
            previous_result: Result from previous agent in sequence
            is_continuation: Flag indicating if this is a continuation

        Returns:
            Enhanced prompt
        """
        enhanced_prompt = base_prompt

        # Add continuation context if this is a continuation
        if is_continuation and previous_result:
            enhanced_prompt = f"""
{enhanced_prompt}

=== PREVIOUS AGENT OUTPUT ===
{previous_result}
"""

        # Add document content if available
        if document_content:
            enhanced_prompt += f"\n\n{document_content}"

        # Add dependency context if available
        if dependency_context:
            dependency_section = "\n\n=== OUTPUTS FROM PREVIOUS ACTIVITIES ===\n\n"
            for name, value in dependency_context.items():
                if isinstance(value, dict) and "content" in value:
                    # Extract content if it's in the standard format
                    dependency_section += f"--- {name} ---\n{value['content']}\n\n"
                else:
                    # Use the value directly
                    dependency_section += f"--- {name} ---\n{str(value)}\n\n"

            enhanced_prompt += dependency_section

        return enhanced_prompt