gateway/gwserver/old_modules_copy/agentservice_agent_coder.py

"""
Simplified Coder Agent for developing and executing Python code.
This agent uses the CodeExecutor from the helper module to execute code.
"""

import logging
import json
import re
import uuid
import traceback
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple

from modules.agentservice_base import BaseAgent
from modules.agentservice_utils import FileUtils, WorkflowUtils, MessageUtils, LoggingUtils
from connectors.connector_aichat_openai import ChatService

logger = logging.getLogger(__name__)

class CoderAgent(BaseAgent):
    """Agent for developing and executing Python code"""

    def __init__(self):
        """Initialize the coder agent with proper type and capabilities"""
        super().__init__()

        # Agent metadata
        self.id = "coder"
        self.type = "coder"
        self.name = "Python Code Agent"
        self.description = "Develops and executes Python code"
        self.capabilities = "code_development,data_processing,file_processing,automation"
        self.result_format = "python_code"

        # Init utilities
        self.file_utils = FileUtils()
        self.message_utils = MessageUtils()

        # Executor settings
        self.executor_timeout = 60  # seconds
        self.executor_memory_limit = 512  # MB

        # AI service settings
        self.ai_temperature = 0.2  # Lower temperature for more deterministic code generation
        self.ai_max_tokens = 2000  # Enough tokens for complex code

    def get_agent_info(self) -> Dict[str, Any]:
        """Get agent information for agent registry"""
        return {
            "id": self.id,
            "type": self.type,
            "name": self.name,
            "description": self.description,
            "capabilities": self.capabilities,
            "result_format": self.result_format,
            "metadata": {
                "timeout": self.executor_timeout,
                "memory_limit": self.executor_memory_limit
            }
        }

    async def process_message(self, message: Dict[str, Any],
                         workflow: Dict[str, Any],
                         context: Dict[str, Any] = None,
                         log_func=None) -> Dict[str, Any]:
        """
        Processes a message to develop and execute Python code.

        Args:
            message: The message to process
            workflow: The current workflow
            context: Additional context information
            log_func: Function for workflow logging

        Returns:
            Response message
        """
        # Initialize logging
        workflow_id = workflow.get("id")
        logging_utils = LoggingUtils(workflow_id, log_func)
        logging_utils.info(f"CoderAgent starting processing", "agents")

        # Create response message
        response = self.message_utils.create_message(workflow_id, role="assistant")
        response["agent_type"] = self.type
        response["agent_name"] = self.name
        response["parent_message_id"] = message.get("id")
        response["documents"] = []

        try:
            # Check if user directly provided code
            content = message.get("content", "")
            documents = message.get("documents", [])

            # Extract code from message content
            code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', content)
            code_to_execute = None

            if code_blocks:
                # Use the first code block found
                code_to_execute = code_blocks[0]
                # Clean the code to remove any markdown formatting
                code_to_execute = self._clean_code(code_to_execute)
                logging_utils.info(f"Code extracted from message ({len(code_to_execute)} characters)", "agents")
            else:
                # Generate code based on the message content using AI
                logging_utils.info("No code found in message, generating new code with AI", "agents")

                # Generate code using AI
                code_to_execute, requirements = await self._generate_code_from_prompt(content, documents)
                if not code_to_execute:
                    logging_utils.warning("AI could not generate code", "agents")
                    response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions."
                    self.message_utils.finalize_message(response)
                    return response
                logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents")

            # Execute the code
            if code_to_execute:
                logging_utils.info("Executing code", "execution")

                # Prepare execution context
                execution_context = {
                    "workflow_id": workflow_id,
                    "documents": documents,
                    "message": message,
                    "log_func": log_func
                }

                # Add log_func to execution context
                execution_context["log_func"] = log_func

                # Execute code
                result = await self._execute_code(code_to_execute, requirements, execution_context)

                # Prepare response
                if result.get("success", False):
                    # Code execution successful
                    output = result.get("output", "")
                    execution_result = result.get("result")
                    logging_utils.info("Code executed successfully", "execution")

                    # Format response content
                    response_content = f"## Code executed successfully\n\n"

                    # Include the executed code
                    response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"

                    # Include the output if available
                    if output:
                        response_content += f"### Output\n\n```\n{output}\n```\n\n"

                    # Include the execution result if available
                    if execution_result:
                        result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
                        response_content += f"### Result\n\n```\n{result_str}\n```\n\n"

                    response["content"] = response_content

                    # Process any files created by the code
                    if isinstance(execution_result, dict) and "created_files" in execution_result:
                        created_files = execution_result.get("created_files", [])
                        for file_info in created_files:
                            file_id = file_info.get("id")
                            if file_id:
                                logging_utils.info(f"Adding created file {file_info.get('name', file_id)} to documents", "files")
                                # Add file document to the response
                                doc = {
                                    "id": f"doc_{uuid.uuid4()}",
                                    "source": file_info,
                                    "type": "file"
                                }
                                response["documents"].append(doc)
                else:
                    # Code execution failed
                    error = result.get("error", "Unknown error")
                    logging_utils.error(f"Error during code execution: {error}", "execution")

                    # Format error response
                    response_content = f"## Error during code execution\n\n"
                    response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
                    response_content += f"### Error\n\n```\n{error}\n```\n\n"

                    # Add recommendation based on error
                    response_content += get_error_recommendation(error)

                    response["content"] = response_content
            else:
                # No code to execute
                response["content"] = "I couldn't find or generate executable code. Please provide Python code or explain your requirements more clearly."

            # Finalize response
            self.message_utils.finalize_message(response)

            # Log success
            logging_utils.info("CoderAgent has successfully processed the request", "agents")

            return response

        except Exception as e:
            error_msg = f"Error during processing by the CoderAgent: {str(e)}"
            logging_utils.error(error_msg, "error")

            # Create error response
            response["content"] = f"## Processing Error\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```"
            self.message_utils.finalize_message(response)

            return response

    def _clean_code(self, code: str) -> str:
        """
        Clean up code by removing markdown code block markers and handling other formatting issues.

        Args:
            code: The code string to clean

        Returns:
            Cleaned code string
        """
        import re

        # Remove code block markers if present
        code = re.sub(r'^```(?:python)?\s*', '', code)
        code = re.sub(r'```\s*$', '', code)

        # Fix potential string literal issues
        lines = code.split('\n')
        fixed_lines = []
        in_string = False
        string_delimiter = None

        for line in lines:
            # Very basic string literal parsing - not perfect but helps with common cases
            if in_string:
                # We're in a multi-line string, check if it ends
                if string_delimiter in line and not line.endswith('\\'):
                    in_string = False
            else:
                # Check for unclosed string literals
                for delimiter in ['"', "'"]:
                    count = line.count(delimiter)
                    # If odd number of delimiters and not escaped
                    if count % 2 == 1 and not line.endswith('\\'):
                        in_string = True
                        string_delimiter = delimiter
                        break

            fixed_lines.append(line)

        # If we ended with an unclosed string, add a closing delimiter
        if in_string:
            fixed_lines[-1] += string_delimiter
            logger.warning(f"Fixed unclosed string literal in code")

        return '\n'.join(fixed_lines)

    async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]]) -> Tuple[str, List[str]]:
        """
        Generate Python code from a prompt using AI service.

        Args:
            prompt: The prompt to generate code from
            documents: Documents associated with the prompt

        Returns:
            Tuple of (generated Python code, required packages)
        """
        try:
            # Initialize AI service
            chat_service = ChatService()

            # Prepare a prompt for code generation
            ai_prompt = f"""Generate Python code to solve the following task:
    {prompt}

    Available documents:
    """
            # Add information about available documents
            if documents:
                for i, doc in enumerate(documents):
                    source = doc.get("source", {})
                    doc_name = source.get("name", f"Document {i+1}")
                    doc_type = source.get("content_type", "unknown")
                    doc_id = source.get("id", "")

                    ai_prompt += f"- {doc_name} (type: {doc_type}, id: {doc_id})\n"

            ai_prompt += """
IMPORTANT REQUIREMENTS:
1. Your code MUST define a 'result' variable that captures the output of your code.
The execution framework specifically looks for this variable.
2. Write only executable Python code in the Python section.
3. Do not include any text explanations or markdown outside of code comments (#).
4. All explanations should be within Python comments only.
5. Make your code complete and self-contained.
6. For CSV processing, include proper error handling.

Return your response in the following format:

## requirements.txt
# Each required package on its own line
pandas
numpy
matplotlib

## python
import pandas as pd
import numpy as np

# Load and process data
def process_data(file_path):
    try:
        # Read the CSV file
        df = pd.read_csv(file_path)
        return df
    except Exception as e:
        print(f"Error: {e}")
        return None

# Main processing logic
data = process_data('data.csv')

# Analyze data
if data is not None:
    summary = data.describe()
    print("Data summary:")
    print(summary)

    # IMPORTANT: Define result variable to return data
    result = {
        "summary": summary.to_dict(),
        "columns": list(data.columns),
        "row_count": len(data)
    }
else:
    # Always define a result, even in error cases
    result = {"error": "Failed to process data"}
    """

            # Create messages for the API
            messages = [
                {"role": "system", "content": "You are a Python code generator. Generate executable Python code following the specified format with requirements.txt and code sections. The code must be well-commented, include error handling, and define a 'result' variable to capture output."},
                {"role": "user", "content": ai_prompt}
            ]

            # Call the API
            logging.info(f"Calling AI API to generate code")
            generated_content = await chat_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens)

            # Extract requirements.txt content
            requirements_match = re.search(r'## requirements.txt\s*([\s\S]*?)(?=##|\Z)', generated_content)
            requirements = []
            if requirements_match:
                requirements_text = requirements_match.group(1).strip()
                # Filter out markdown formatting and invalid characters
                for line in requirements_text.split('\n'):
                    line = line.strip()
                    # Skip empty lines, comments, and markdown formatting
                    if not line or line.startswith('#') or line.startswith('`') or line.endswith('`') or '```' in line:
                        continue
                    requirements.append(line)

            # Extract Python code
            code_match = re.search(r'## python\s*([\s\S]*?)(?=##|\Z)', generated_content)
            if code_match:
                code = code_match.group(1).strip()
            else:
                # Fallback to legacy code block extraction
                code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', generated_content)
                code = code_blocks[0].strip() if code_blocks else generated_content.strip()

            # Clean the code to remove any markdown formatting
            code = self._clean_code(code)

            return code, requirements

        except Exception as e:
            logging.error(f"Error generating code with AI: {str(e)}", exc_info=True)
            # Return basic error handling code and no requirements
            error_str = str(e).replace('"', '\\"')
            return f"""
# Error during code generation
print(f"An error occurred during code generation: {error_str}")
# Return an error result
result = {{"error": "Code generation failed", "message": "{error_str}"}}
""", []


    async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Execute Python code using the CodeExecutor.

        Args:
            code: The Python code to execute
            requirements: List of required packages
            context: Additional context for execution

        Returns:
            Result of code execution
        """
        # Get workflow ID and set up logging
        workflow_id = context.get("workflow_id", "") if context else ""
        logging_utils = None
        if "log_func" in context and workflow_id:
            logging_utils = LoggingUtils(workflow_id, context.get("log_func"))

        if logging_utils:
            logging_utils.info("Executing Python code", "execution")
            if requirements:
                logging_utils.info(f"Required packages: {', '.join(requirements)}", "execution")

        try:
            # List of blocked packages for security
            blocked_packages = [
                "cryptography", "flask", "django", "tornado",  # Security risks
                "tensorflow", "pytorch", "scikit-learn"  # Resource intensive
            ]

            # Initialize CodeExecutor with requirements and workflow_id for persistence
            executor = CodeExecutor(
                workflow_id=workflow_id,
                timeout=self.executor_timeout,
                max_memory_mb=self.executor_memory_limit,
                requirements=requirements,
                blocked_packages=blocked_packages
            )

            # Prepare input data for the code
            input_data = {"context": context, "workflow_id": workflow_id}

            # Add file references if available
            if context and "documents" in context:
                input_data["files"] = [
                    {
                        "id": doc.get("source", {}).get("id", ""),
                        "name": doc.get("source", {}).get("name", ""),
                        "type": doc.get("source", {}).get("content_type", "")
                    }
                    for doc in context.get("documents", [])
                    if doc.get("source", {}).get("type") == "file"
                ]

            # Execute the code
            result = executor.execute_code(code, input_data)

            # Log the execution results
            if logging_utils:
                if result.get("success", False):
                    logging_utils.info("Code executed successfully", "execution")

                    # Log a preview of the output
                    output = result.get("output", "")
                    if output:
                        preview = output[:1000] + "..." if len(output) > 1000 else output
                        logging_utils.info(f"Output preview: {preview}", "execution")

                    # Log a preview of the result
                    execution_result = result.get("result")
                    if execution_result:
                        if isinstance(execution_result, (dict, list)):
                            result_str = json.dumps(execution_result, indent=2)
                            preview = result_str[:1000] + "..." if len(result_str) > 1000 else result_str
                        else:
                            str_result = str(execution_result)
                            preview = str_result[:1000] + "..." if len(str_result) > 1000 else str_result

                        logging_utils.info(f"Result preview: {preview}", "execution")
                else:
                    # Log error information
                    error = result.get("error", "Unknown error")
                    logging_utils.error(f"Error during code execution: {error}", "execution")

            # Only clean up non-persistent environments
            if not executor.is_persistent:
                executor.cleanup()

            return result

        except Exception as e:
            error_message = f"Error during code execution: {str(e)}\n{traceback.format_exc()}"
            if logging_utils:
                logging_utils.error(error_message, "error")

            return {
                "success": False,
                "output": "",
                "error": error_message,
                "result": None
            }


# Singleton instance
_coder_agent = None

def get_coder_agent():
    """Returns a singleton instance of the Coder Agent"""
    global _coder_agent
    if _coder_agent is None:
        _coder_agent = CoderAgent()
    return _coder_agent