gateway/modules/chat_agent_coder.py

"""
Simple Coder Agent for execution of Python code.
"""

import logging
import json
import os
import subprocess
import tempfile
import shutil
import sys
from typing import Dict, Any, List, Tuple

from modules.chat_registry import AgentBase
from modules.configuration import APP_CONFIG

logger = logging.getLogger(__name__)

class AgentCoder(AgentBase):
    """Simplified Agent for developing and executing Python code with integrated executor"""

    def __init__(self):
        """Initialize the coder agent"""
        super().__init__()
        self.name = "coder"
        self.description = "Develops and executes Python code for data processing and automation"
        self.capabilities = [
            "code_development",
            "data_processing",
            "file_processing",
            "automation",
            "code_execution"
        ]

        # Executor settings
        self.executor_timeout = int(APP_CONFIG.get("Agent_Coder_EXECUTION_TIMEOUT"))  # seconds
        self.execution_retry_limit = int(APP_CONFIG.get("Agent_Coder_EXECUTION_RETRY"))  # max retries
        self.temp_dir = None

    def set_dependencies(self, mydom=None):
        """Set external dependencies for the agent."""
        self.mydom = mydom

    async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
        """
        Process a task and perform code development/execution.
        First checks if the task can be completed without code execution,
        then falls back to code generation if needed.

        Args:
            task: Task dictionary with prompt, input_documents, output_specifications

        Returns:
            Dictionary with feedback and documents
        """
        # 1. Extract task information
        prompt = task.get("prompt", "")
        input_documents = task.get("input_documents", [])
        output_specs = task.get("output_specifications", [])

        # Check if AI service is available
        if not self.mydom:
            logger.error("No AI service configured for the Coder agent")
            return {
                "feedback": "The Coder agent is not properly configured.",
                "documents": []
            }

        # 2. Extract data from documents in separate categories
        document_data = []          # For raw file data (for code execution)
        content_data = []           # For content data (later use)
        content_extraction = []     # For AI-extracted data (for quick completion)

        for doc in input_documents:
            # Create proper filename from name and ext
            filename = f"{doc.get('name')}.{doc.get('ext')}" if doc.get('ext') else doc.get('name')

            # Add main document data to document_data if it exists
            doc_data = doc.get('data', '')
            if doc_data:
                is_base64 = True  # Assume base64 encoded for document data
                document_data.append([filename, doc_data, is_base64])

            # Process contents for different uses
            if doc.get('contents'):
                for content in doc.get('contents', []):
                    content_name = content.get('name', 'unnamed')

                    # For AI-extracted data (quick completion)
                    if content.get('data_extracted'):
                        content_extraction.append({
                            "filename": filename,
                            "content_name": content_name,
                            "content_data": content.get('data_extracted', ''),
                            "content_type": content.get('content_type', ''),
                            "summary": content.get('summary', '')
                        })

                    # For raw content data
                    if content.get('data'):
                        raw_data = content.get('data', '')
                        is_base64 = content.get('metadata', {}).get('base64_encoded', False)
                        content_data.append({
                            "filename": filename,
                            "content_name": content_name,
                            "data": raw_data,
                            "is_base64": is_base64,
                            "content_type": content.get('content_type', '')
                        })

                        # Also add to document_data for code execution if not already added
                        if not doc_data or doc_data != raw_data:
                            document_data.append([filename, raw_data, is_base64])

        # 3. Check if task can be completed without code execution
        quick_completion = await self._check_quick_completion(prompt, content_extraction, output_specs)

        if quick_completion and quick_completion.get("complete") == 1:
            logger.info("Task completed without code execution")
            return {
                "feedback": quick_completion.get("prompt", "Task completed successfully."),
                "documents": quick_completion.get("documents", [])
            }
        else:
            logger.debug(f"Code to generate, quick check responded: {quick_completion.get('prompt', '(no answer)')}")

        # If quick completion not possible, continue with code generation and execution
        logger.info("Generating code to solve the task")

        # 4. Generate code using AI
        code, requirements = await self._generate_code(prompt)

        if not code:
            return {
                "feedback": "Failed to generate code for the task.",
                "documents": []
            }

        # 5. Replace the placeholder with actual input_files data
        document_data_json = repr(document_data)
        code_with_data = code.replace("input_files = \"=== JSONLOAD ===\"", f"input_files = {document_data_json}")

        # 6. Execute code with retry logic
        retry_count = 0
        max_retries = self.execution_retry_limit
        execution_history = []

        while retry_count <= max_retries:
            execution_result = self._execute_code(code_with_data, requirements)
            execution_history.append({
                "attempt": retry_count + 1,
                "code": code_with_data,
                "result": execution_result
            })

            # Check if execution was successful
            if execution_result.get("success", False):
                logger.info(f"Code execution succeeded on attempt {retry_count + 1}")
                break

            # If we've reached max retries, exit the loop
            if retry_count >= max_retries:
                logger.info(f"Reached maximum retry limit ({max_retries}). Giving up.")
                break

            # Log the error and attempt to improve the code
            error = execution_result.get("error", "Unknown error")
            logger.info(f"Execution attempt {retry_count + 1} failed: {error}. Attempting to improve code.")

            # Generate improved code based on error
            improved_code, improved_requirements = await self._improve_code(
                original_code=code_with_data,
                error=error,
                execution_result=execution_result,
                attempt=retry_count + 1
            )

            if improved_code:
                code_with_data = improved_code
                requirements = improved_requirements
                logger.info(f"Code improved for retry {retry_count + 2}")
            else:
                logger.warning("Failed to improve code, using original code for retry")

            retry_count += 1

        # 7. Process results and create output documents
        documents = []

        # Always add the final code document
        documents.append({
            "label": "generated_code.py",
            "content": code_with_data
        })

        # Add execution history document
        execution_history_str = json.dumps(execution_history, indent=2)
        documents.append({
            "label": "execution_history.json",
            "content": execution_history_str
        })

        # Create documents based on execution results
        if execution_result.get("success", False):
            result_data = execution_result.get("result")

            # Create documents based on output specifications
            if output_specs:
                for spec in output_specs:
                    label = spec.get("label", "output.txt")

                    # Extract content from result if available
                    content = ""
                    if isinstance(result_data, dict) and label in result_data:
                        content = result_data[label]
                    else:
                        # Default to execution output
                        content = execution_result.get("output", "")

                    documents.append({
                        "label": label,
                        "content": content
                    })
            else:
                # No output specs, create default output document
                documents.append({
                    "label": "execution_output.txt",
                    "content": execution_result.get("output", "")
                })

            if retry_count > 0:
                feedback = f"Code executed successfully after {retry_count + 1} attempts. Generated output files based on specifications."
            else:
                feedback = "Code executed successfully. Generated output files based on specifications."
        else:
            # Execution failed
            error = execution_result.get("error", "Unknown error")
            documents.append({
                "label": "execution_error.txt",
                "content": f"Error executing code:\n\n{error}"
            })

            if retry_count > 0:
                feedback = f"Error during code execution after {retry_count + 1} attempts: {error}"
            else:
                feedback = f"Error during code execution: {error}"

        return {
            "feedback": feedback,
            "documents": documents
        }

    async def _improve_code(self, original_code: str, error: str, execution_result: Dict[str, Any], attempt: int) -> Tuple[str, List[str]]:
        """
        Improve code based on execution error.

        Args:
            original_code: The code that failed to execute
            error: The error message
            execution_result: Complete execution result dictionary
            attempt: Current attempt number

        Returns:
            Tuple of (improved_code, requirements)
        """
        # Create prompt for code improvement
        improvement_prompt = f"""
Fix the following Python code that failed during execution. This is attempt {attempt} to fix the code.

ORIGINAL CODE:
{original_code}

ERROR MESSAGE:
{error}

STDOUT:
{execution_result.get('output', '')}

INSTRUCTIONS:
1. Fix all errors identified in the error message
2. Diagnose and fix any logical issues
3. Pay special attention to:
   - Type conversions and data handling
   - Error handling and edge cases
   - Resource management (file handles, etc.)
   - Syntax errors and typos
4. Keep the input_files handling logic intact
5. Maintain the same overall structure and purpose

OUTPUT:
- Your improved code MUST still define a 'result' variable as a dictionary
- Each output file should be a key in the result dictionary
- DO NOT remove the input_files assignment line structure

REQUIREMENTS:
Required packages should be specified as:
# REQUIREMENTS: library==version,library2>=version
- You may add/remove requirements as needed to fix the code

Return ONLY Python code without explanations or markdown.
"""

        # Call AI service
        messages = [
            {"role": "system", "content": "You are an expert Python code debugger. Provide only fixed Python code without explanations or formatting."},
            {"role": "user", "content": improvement_prompt}
        ]

        try:
            improved_content = await self.mydom.call_ai(messages, temperature=0.2)

            # Extract code and requirements
            improved_code = self._clean_code(improved_content)

            # Extract requirements
            requirements = []
            for line in improved_code.split('\n'):
                if line.strip().startswith("# REQUIREMENTS:"):
                    req_str = line.replace("# REQUIREMENTS:", "").strip()
                    requirements = [r.strip() for r in req_str.split(',') if r.strip()]
                    break

            return improved_code, requirements
        except Exception as e:
            logger.error(f"Error improving code: {str(e)}")
            return None, []

    async def _check_quick_completion(self, prompt: str, content_extraction: List[Dict], output_specs: List[Dict]) -> Dict:
        """
        Check if the task can be completed without writing and executing code.

        Args:
            prompt: The task prompt
            content_extraction: List of extracted content data with content_name and data_extracted
            output_specs: List of output specifications

        Returns:
            Dictionary with completion status and results, or None if no quick completion
        """
        # If no data or no output specs, can't do a quick completion
        if not content_extraction or not output_specs:
            return None

        # Create a prompt for the AI to check if this can be completed directly
        specs_json = json.dumps(output_specs)
        data_json = json.dumps(content_extraction)

        check_prompt = f"""
Analyze this task and determine if it can be completed directly without writing code.

TASK:
{prompt}

EXTRACTED DATA AVAILABLE:
{data_json}

Each entry in the extracted data contains:
- filename: The source file name
- content_name: The specific content section name
- content_data: The AI-extracted text from the content
- content_type: The type of content (text, csv, etc.)
- summary: A brief summary of the content

REQUIRED OUTPUT:
{specs_json}

If the task can be completed directly with the available extracted data, respond with:
{{"complete": 1, "prompt": "Brief explanation of the solution", "documents": [
    {{"label": "filename.ext", "content": "content here"}}
]}}

If code would be needed to properly complete this task, respond with:
{{"complete": 0, "prompt": "Explanation why code is needed"}}

Only return valid JSON. Your entire response must be parseable as JSON.
"""

        # Call AI service
        logger.debug(f"Checking if task can be completed without code execution: {check_prompt}")
        messages = [
            {"role": "system", "content": "You are an AI assistant that determines if tasks require code execution. Reply with JSON only."},
            {"role": "user", "content": check_prompt}
        ]

        try:
            # Use a lower temperature for more deterministic response
            response = await self.mydom.call_ai(messages, produce_user_answer = True, temperature=0.1)

            # Parse response as JSON
            if response:
                try:
                    # Find JSON in response if there's any text around it
                    json_start = response.find('{')
                    json_end = response.rfind('}') + 1

                    if json_start >= 0 and json_end > json_start:
                        json_str = response[json_start:json_end]
                        result = json.loads(json_str)

                        # Check if this is a proper response
                        if "complete" in result:
                            return result

                except json.JSONDecodeError:
                    logger.debug("Failed to parse quick completion response as JSON")
                    pass
        except Exception as e:
            logger.debug(f"Error during quick completion check: {str(e)}")

        # Default to requiring code execution
        return None

    async def _generate_code(self, prompt: str) -> Tuple[str, List[str]]:
        """
        Generate Python code from a prompt with the input_files placeholder.

        Args:
            prompt: The task prompt
            input_files: List of [filename, data, is_base64] items

        Returns:
            Tuple of (code, requirements)
        """
        # Create prompt for code generation
        ai_prompt = f"""
Generate Python code to solve the following task:

TASK:
{prompt}

INPUT FILES:
- 'input_files' variable is provided as [[filename, data, is_base64], ...]
- For text files (is_base64=False): use data directly as string
- For binary files (is_base64=True): use base64.b64decode(data)

CODE QUALITY:
- Use explicit type conversions where needed (int/float/str)
- Implement feature detection, not version checks
- Handle errors gracefully with appropriate fallbacks
- Follow latest API conventions for libraries
- Validate inputs before processing

OUTPUT:
- Your code MUST define a 'result' variable as a dictionary to store outputs.
- Each output file should be a key in the result dictionary.
- For example: result = {{"output.txt": "output text", "results.json": json_string}}

Your code must start with:
input_files = "=== JSONLOAD ==="  # DO NOT CHANGE THIS LINE

REQUIREMENTS:
Required packages should be specified as:
# REQUIREMENTS: library==version,library2>=version
- Specify exact versions for critical libraries
- Use constraint operators (==,>=,<=) as needed

Return ONLY Python code without explanations or markdown.
"""

        # Call AI service
        messages = [
            {"role": "system", "content": "You are a Python code generator. Provide only valid Python code without explanations or formatting."},
            {"role": "user", "content": ai_prompt}
        ]

        generated_content = await self.mydom.call_ai(messages, temperature=0.1)

        # Extract code and requirements
        code = self._clean_code(generated_content)

        # Extract requirements
        requirements = []
        for line in code.split('\n'):
            if line.strip().startswith("# REQUIREMENTS:"):
                req_str = line.replace("# REQUIREMENTS:", "").strip()
                requirements = [r.strip() for r in req_str.split(',') if r.strip()]
                break

        return code, requirements

    def _execute_code(self, code: str, requirements: List[str] = None) -> Dict[str, Any]:
        """
        Execute Python code in a virtual environment.
        Integrated executor functionality.

        Args:
            code: Python code to execute
            requirements: List of required packages

        Returns:
            Execution result dictionary
        """
        try:
            # 1. Create temp directory and virtual environment
            self.temp_dir = tempfile.mkdtemp(prefix="code_exec_")
            venv_path = os.path.join(self.temp_dir, "venv")

            # Create venv
            logger.debug(f"Creating virtual environment at {venv_path}")
            subprocess.run([sys.executable, "-m", "venv", venv_path],
                        check=True, capture_output=True)

            # Get Python executable path
            python_exe = os.path.join(venv_path, "Scripts", "python.exe") if os.name == 'nt' else os.path.join(venv_path, "bin", "python")

            # 2. Install requirements if provided
            if requirements:
                logger.info(f"Installing requirements: {requirements}")

                # Create requirements.txt
                req_file = os.path.join(self.temp_dir, "requirements.txt")
                with open(req_file, "w") as f:
                    f.write("\n".join(requirements))

                x="\n".join(requirements)
                logger.info(f"Requirements file: {x}.")

                # Install requirements
                try:
                    pip_result = subprocess.run(
                        [python_exe, "-m", "pip", "install", "-r", req_file],
                        capture_output=True,
                        text=True,
                        timeout=int(APP_CONFIG.get("Agent_Coder_INSTALL_TIMEOUT"))
                    )
                    if pip_result.returncode != 0:
                        logger.debug(f"Error installing requirements: {pip_result.stderr}")
                    else:
                        logger.debug(f"Requirements installed successfully")
                        # Log installed packages if in debug mode
                        if logger.isEnabledFor(logging.DEBUG):
                            pip_list = subprocess.run(
                                [python_exe, "-m", "pip", "list"],
                                capture_output=True,
                                text=True
                            )
                            logger.debug(f"Installed packages:\n{pip_list.stdout}")

                except Exception as e:
                    logger.debug(f"Exception during requirements installation: {str(e)}")

            # 3. Write code to file
            code_file = os.path.join(self.temp_dir, "code.py")
            with open(code_file, "w", encoding="utf-8") as f:
                f.write(code)

            # 4. Execute code
            logger.debug(f"Executing code with timeout of {self.executor_timeout} seconds. Code: {code}")
            process = subprocess.run(
                [python_exe, code_file],
                timeout=self.executor_timeout,
                capture_output=True,
                text=True
            )

            # 5. Process results
            stdout = process.stdout
            stderr = process.stderr

            # Try to extract result from stdout
            result_data = None
            if process.returncode == 0:
                try:
                    # Find the last line that might be JSON
                    for line in reversed(stdout.strip().split('\n')):
                        line = line.strip()
                        if line and line[0] in '{[' and line[-1] in '}]':
                            try:
                                result_data = json.loads(line)
                                logger.debug(f"Extracted result data from stdout: {type(result_data)}")
                                break
                            except json.JSONDecodeError:
                                continue
                except Exception as e:
                    logger.debug(f"Error extracting result from stdout: {str(e)}")

            # Create result dictionary
            return {
                "success": process.returncode == 0,
                "output": stdout,
                "error": stderr if process.returncode != 0 else "",
                "result": result_data,
                "exit_code": process.returncode
            }

        except subprocess.TimeoutExpired:
            logger.error(f"Execution timed out after {self.executor_timeout} seconds")
            return {
                "success": False,
                "output": "",
                "error": f"Execution timed out after {self.executor_timeout} seconds",
                "result": None,
                "exit_code": -1
            }
        except Exception as e:
            logger.error(f"Execution error: {str(e)}")
            return {
                "success": False,
                "output": "",
                "error": f"Execution error: {str(e)}",
                "result": None,
                "exit_code": -1
            }
        finally:
            # Clean up resources
            self._cleanup_execution()

    def _cleanup_execution(self):
        """Clean up temporary resources from code execution."""
        if self.temp_dir and os.path.exists(self.temp_dir):
            try:
                logger.debug(f"Cleaning up temporary directory: {self.temp_dir}")
                shutil.rmtree(self.temp_dir)
                self.temp_dir = None
            except Exception as e:
                logger.warning(f"Error cleaning up temp directory: {str(e)}")

    def _clean_code(self, code: str) -> str:
        """Remove any markdown formatting or explanations."""
        # Remove code block markers
        code = code.replace("```python", "").replace("```", "")

        # Remove explanations before or after code
        lines = code.strip().split('\n')
        start_index = 0
        end_index = len(lines)

        # Find start of actual code
        for i, line in enumerate(lines):
            if line.strip().startswith("input_files =") or line.strip().startswith("# REQUIREMENTS:"):
                start_index = i
                break

        # Clean code
        cleaned_code = '\n'.join(lines[start_index:end_index])
        return cleaned_code.strip()


# Factory function for the Coder agent
def get_coder_agent():
    """Returns an instance of the Coder agent."""
    return AgentCoder()