""" CoderAgent - A unified agent for developing and executing Python code. Includes code execution capabilities previously in separate modules. Enhanced with auto-correction loop for handling execution errors. """ import logging import json import re import uuid import traceback import os import subprocess import tempfile import shutil import sys import pandas as pd from datetime import datetime from typing import List, Dict, Any, Optional, Tuple from modules.agentservice_base import BaseAgent from modules.agentservice_utils import FileUtils, WorkflowUtils, MessageUtils, LoggingUtils from connectors.connector_aichat_openai import ChatService from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol logger = logging.getLogger(__name__) # Existing SimpleCodeExecutor class remains unchanged class SimpleCodeExecutor: # ... existing code ... """ A simplified executor that runs Python code in isolated virtual environments. """ # Class variable to store workflow environments for persistence _workflow_environments = {} def __init__(self, workflow_id: str = None, timeout: int = 30, max_memory_mb: int = 512, requirements: List[str] = None, blocked_packages: List[str] = None, ai_service = None): """ Initialize the SimpleCodeExecutor. Args: workflow_id: Optional workflow ID for persistent environments timeout: Maximum execution time in seconds max_memory_mb: Maximum memory in MB requirements: List of packages to install blocked_packages: List of blocked packages """ self.workflow_id = workflow_id self.timeout = timeout self.max_memory_mb = max_memory_mb self.temp_dir = None self.requirements = requirements or [] self.blocked_packages = blocked_packages or [ "cryptography", "flask", "django", "tornado", # Security risks "tensorflow", "pytorch", "scikit-learn" # Resource intensive ] self.is_persistent = workflow_id is not None self.ai_service = ai_service @classmethod def get_workflow_environment(cls, workflow_id: str) -> Optional[str]: """Get an existing workflow environment path if it exists.""" return cls._workflow_environments.get(workflow_id) @classmethod def set_workflow_environment(cls, workflow_id: str, env_path: str) -> None: """Store a workflow environment path.""" cls._workflow_environments[workflow_id] = env_path def _create_venv(self) -> str: """Creates a virtual environment and returns the path.""" # Check for existing environment if using workflow_id if self.workflow_id: self.is_persistent = True existing_env = self.get_workflow_environment(self.workflow_id) if existing_env and os.path.exists(existing_env): logger.info(f"Reusing existing virtual environment: {existing_env}") self.temp_dir = os.path.dirname(existing_env) return existing_env else: logger.info(f"Creating new environment for workflow {self.workflow_id}") # Create a new environment venv_parent_dir = tempfile.mkdtemp(prefix="simple_exec_") self.temp_dir = venv_parent_dir venv_path = os.path.join(venv_parent_dir, "venv") try: # Create virtual environment logger.info(f"Creating new virtual environment in {venv_path}") subprocess.run([sys.executable, "-m", "venv", venv_path], check=True, capture_output=True) # Store the environment path if this is for a specific workflow if self.workflow_id: logger.info(f"Registering new persistent environment for workflow {self.workflow_id}") self.set_workflow_environment(self.workflow_id, venv_path) return venv_path except subprocess.CalledProcessError as e: logger.error(f"Error creating virtual environment: {e}") raise RuntimeError(f"Could not create venv: {e}") def _get_pip_executable(self, venv_path: str) -> str: """Gets the path to the pip executable in the virtual environment.""" if os.name == 'nt': # Windows return os.path.join(venv_path, "Scripts", "pip.exe") else: # Unix/Linux return os.path.join(venv_path, "bin", "pip") def _get_python_executable(self, venv_path: str) -> str: """Gets the path to the Python executable in the virtual environment.""" if os.name == 'nt': # Windows return os.path.join(venv_path, "Scripts", "python.exe") else: # Unix/Linux return os.path.join(venv_path, "bin", "python") def _filter_requirements(self, requirements: List[str]) -> List[str]: """Filter out blocked packages and invalid requirements.""" if not requirements: return [] filtered_requirements = [] for req in requirements: # Skip empty, comment lines, or invalid requirements req = req.strip() if not req or req.startswith('#') or '```' in req or req in ['`', '``', '```']: logging.warning(f"Skipping comment or invalid requirement: {req}") continue # Extract package name from requirement spec import re package_name = re.split(r'[=<>]', req)[0].strip().lower() if package_name in self.blocked_packages: logging.warning(f"Blocked package detected: {package_name}") continue filtered_requirements.append(req) return filtered_requirements def _install_packages(self, venv_path: str, requirements: List[str]) -> bool: """Install packages in the virtual environment.""" if not requirements: return True # Filter requirements filtered_requirements = self._filter_requirements(requirements) if not filtered_requirements: logger.info("No allowed packages to install") return True # Get pip executable pip_executable = self._get_pip_executable(venv_path) # Install packages try: logger.info(f"Installing packages: {', '.join(filtered_requirements)}") result = subprocess.run( [pip_executable, "install"] + filtered_requirements, check=True, capture_output=True, text=True, timeout=300 ) logger.info("Package installation successful") return True except subprocess.CalledProcessError as e: logger.error(f"Error during package installation: {e.stderr}") return False except Exception as e: logger.error(f"Error during package installation: {str(e)}") return False def _extract_required_packages(self, code: str) -> List[str]: # Extract required packages from requirements comments in the 1st code line packages = set() # Check for special REQUIREMENTS comment - specific format we're looking for first_lines = code.split('\n')[:5] # Only check first few lines for line in first_lines: if line.strip().startswith("# REQUIREMENTS:"): req_str = line.replace("# REQUIREMENTS:", "").strip() for pkg in req_str.split(','): if pkg.strip(): packages.add(pkg.strip()) return list(packages) def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]: """ Execute Python code in an isolated environment using a simple approach. Args: code: Python code to execute input_data: Optional input data for the code Returns: Dictionary with execution results """ logger.info(f"Executing code with workflow_id: {self.workflow_id}") # Create or reuse virtual environment venv_path = self._create_venv() #creating self.temp_dir! # Create input_data directory for file handling input_data_dir = os.path.join(self.temp_dir, "input_data") # Temp dir is at root os.makedirs(input_data_dir, exist_ok=True) # Extract and install required packages all_requirements = [] # Add explicitly provided requirements # if self.requirements: # all_requirements.extend(self.requirements) # Extract requirements from code extracted_requirements = self._extract_required_packages(code) if extracted_requirements: all_requirements.extend(extracted_requirements) logger.info(f"Extracted required packages from code: {', '.join(extracted_requirements)}") # Install packages if needed if all_requirements: logger.info(f"Installing {len(all_requirements)} packages") install_success = self._install_packages(venv_path, all_requirements) if not install_success: # Return error if package installation failed return { "success": False, "output": "", "error": f"Failed to install required packages: {', '.join(all_requirements)}", "result": None, "exit_code": -1 } # Process extracted document content if available if input_data and "extracted_documents" in input_data: for doc in input_data["extracted_documents"]: doc_name = doc["name"] doc_content = doc["content"] doc_type = doc["type"] # Create file path file_path = os.path.join(input_data_dir, doc_name) try: # Write content to file with open(file_path, 'w', encoding='utf-8') as f: f.write(doc_content) # Add to files list if not already there if "files" not in input_data: input_data["files"] = [] input_data["files"].append({ "id": f"extracted_{doc_name}", "name": doc_name, "type": doc_type, "path": file_path }) logger.info(f"Created file from extracted content: {doc_name}") except Exception as e: logger.error(f"Error creating file from extracted content: {str(e)}") # Copy input files to input_data directory if provided if input_data and "files" in input_data: for file_info in input_data.get("files", []): # Skip files we just created from extracted content if file_info.get("id", "").startswith("extracted_"): continue source_path = file_info.get("path", "") logger.info(f"Attempting to copy file from: {source_path}") logger.info(f"File exists: {os.path.exists(source_path)}") if source_path and os.path.exists(source_path): # Get just the filename file_name = os.path.basename(source_path) # Create destination path in input_data directory dest_path = os.path.join(input_data_dir, file_name) try: # Copy the file shutil.copy2(source_path, dest_path) logger.info(f"Copied file to input_data directory: {dest_path}") except Exception as e: logger.error(f"Error copying file {source_path}: {str(e)}") # Create a file for the code code_id = uuid.uuid4().hex[:8] code_file = os.path.join(self.temp_dir, f"code_{code_id}.py") # Write the code as-is without injecting additional loader code with open(code_file, "w", encoding="utf-8") as f: f.write(code) # Get Python executable python_executable = self._get_python_executable(venv_path) logger.info(f"Using Python executable: {python_executable}") # Execute code try: # Run the code from root dir working_dir = os.path.dirname(code_file) # This should be the project root logger.info(f"DEBUG PATH Root: {os.getcwd()} Code: {code_file} Working Dir: {working_dir}") logger.debug(f"|{code}|") process = subprocess.run( [python_executable, code_file], timeout=self.timeout, capture_output=True, text=True, cwd=working_dir ) # Process the output stdout = process.stdout stderr = process.stderr # Get result from stdout if available result_data = None if process.returncode == 0 and stdout: try: # Look for the last line that could be JSON for line in reversed(stdout.strip().split('\n')): line = line.strip() if line and line[0] in '{[' and line[-1] in '}]': try: result_data = json.loads(line) # Successfully parsed JSON result, use it break except json.JSONDecodeError: # Not valid JSON, continue to next line continue except Exception as e: logger.warning(f"Failed to parse result from stdout: {str(e)}") # Create result dictionary execution_result = { "success": process.returncode == 0, "output": stdout, "error": stderr if process.returncode != 0 else "", "result": result_data, "exit_code": process.returncode } except subprocess.TimeoutExpired: logger.error(f"Execution timed out after {self.timeout} seconds") execution_result = { "success": False, "output": "", "error": f"Execution timed out (timeout after {self.timeout} seconds)", "result": None, "exit_code": -1 } except Exception as e: logger.error(f"Execution error: {str(e)}") execution_result = { "success": False, "output": "", "error": f"Execution error: {str(e)} for code {code}", "result": None, "exit_code": -1 } # Clean up temporary code file try: if os.path.exists(code_file): os.remove(code_file) except Exception as e: logger.warning(f"Error cleaning up temporary code file: {e}") return execution_result def cleanup(self): """Clean up temporary resources.""" # Skip cleanup for persistent environments if self.is_persistent and self.workflow_id: logger.info(f"Skipping cleanup for persistent environment of workflow {self.workflow_id}") return # Clean up temporary directory if self.temp_dir and os.path.exists(self.temp_dir): try: shutil.rmtree(self.temp_dir) logger.info(f"Deleted temporary directory: {self.temp_dir}") except Exception as e: logger.warning(f"Could not delete temporary directory {self.temp_dir}: {e}") def __del__(self): """Clean up during garbage collection.""" self.cleanup() # Unchanged error recommendation function def get_error_recommendation(error_message: str) -> str: """Generate recommendations based on error message.""" if "ImportError" in error_message or "ModuleNotFoundError" in error_message: return """ ### Recommendation The error indicates a missing Python module. Try using standard libraries or common data analysis modules. """ elif "PermissionError" in error_message: return """ ### Recommendation The code doesn't have the necessary permissions to access files or directories. """ elif "SyntaxError" in error_message: return """ ### Recommendation There's a syntax error in the code. Check for missing parentheses, quotes, colons, or indentation errors. """ elif "FileNotFoundError" in error_message: return """ ### Recommendation A file could not be found. Check the file path and make sure the file exists. """ else: return """ ### Recommendation To fix the error: 1. Check the exact error message 2. Simplify the code and test step by step 3. Use try/except blocks for error-prone operations """ class CoderAgent(BaseAgent): """Agent for developing and executing Python code with auto-correction capabilities""" def __init__(self): """Initialize the coder agent with proper type and capabilities""" super().__init__() # Agent metadata self.id = "coder" self.type = "coder" self.name = "Python Code Agent" self.description = "Develops and executes Python code" self.capabilities = "code_development,data_processing,file_processing,automation" self.result_format = "python_code" # Initialize AI service self.ai_service = None # Add document capabilities self.supports_documents = True self.document_capabilities = ["read", "reference", "create"] self.required_context = ["workflow_id"] self.document_handler = None # Initialize protocol self.protocol = AgentCommunicationProtocol() # Init utilities self.file_utils = FileUtils() self.message_utils = MessageUtils() # Executor settings self.executor_timeout = 60 # seconds self.executor_memory_limit = 512 # MB # AI service settings self.ai_temperature = 0.1 # Lower temperature for more deterministic code generation self.ai_max_tokens = 2000 # Enough tokens for complex code # Auto-correction settings (new) self.max_correction_attempts = 3 # Maximum number of correction attempts self.correction_temperature = 0.1 # Even lower temperature for corrections def get_agent_info(self) -> Dict[str, Any]: """Get agent information for agent registry""" info = super().get_agent_info() info.update({ "metadata": { "timeout": self.executor_timeout, "memory_limit": self.executor_memory_limit, "max_correction_attempts": self.max_correction_attempts } }) return info def set_document_handler(self, document_handler): """Set the document handler for file operations""" self.document_handler = document_handler async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]: """ Process a message to develop and execute Python code with auto-correction. Args: message: The message to process context: Additional context information Returns: Response message """ # Extract workflow_id from context or message workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown") # Get or create logging_utils log_func = context.get("log_func") if context else None logging_utils = LoggingUtils(workflow_id, log_func) # Create response message response = { "role": "assistant", "content": "", "agent_id": self.id, "agent_type": self.type, "agent_name": self.name, "workflow_id": workflow_id, "documents": [] } # Send status update using protocol if log_func: status_message = self.protocol.create_status_update_message( status_description="Starting code generation and execution", sender_id=self.id, status="in_progress", progress=0.0, context_id=workflow_id ) log_func(workflow_id, status_message.content, "info", self.id, self.name) try: # Extract content and documents content = message.get("content", "") documents = message.get("documents", []) code_to_execute = None requirements = [] # Generate code based on the message content using AI logging_utils.info("Generating new code with AI", "agents") # Log status update - 10% progress if log_func: status_message = self.protocol.create_status_update_message( status_description="Analyzing requirements and generating code", sender_id=self.id, status="in_progress", progress=0.1, context_id=workflow_id ) log_func(workflow_id, status_message.content, "info", self.id, self.name) # Generate code using AI code_to_execute, requirements = await self._generate_code_from_prompt(content, documents) if not code_to_execute: logging_utils.warning("AI could not generate code", "agents") response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions." self.message_utils.finalize_message(response) return response logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents") # Log status update - 30% progress if log_func: status_message = self.protocol.create_status_update_message( status_description="Code generated, preparing for execution", sender_id=self.id, status="in_progress", progress=0.3, context_id=workflow_id ) log_func(workflow_id, status_message.content, "info", self.id, self.name) # Create code file document code_doc_id = f"code_{uuid.uuid4()}" code_filename = "generated_code.py" code_document = { "id": code_doc_id, "source": { "type": "generated", "id": code_doc_id, "name": code_filename, "content_type": "text/x-python", "size": len(code_to_execute) }, "contents": [{ "type": "text", "text": code_to_execute, "is_extracted": True }] } # Add code document to response response["documents"].append(code_document) logging_utils.info(f"Added code file '{code_filename}' to response", "agents") # Execute the code with auto-correction loop if code_to_execute: # Log status update - 40% progress if log_func: status_message = self.protocol.create_status_update_message( status_description="Setting up execution environment", sender_id=self.id, status="in_progress", progress=0.4, context_id=workflow_id ) log_func(workflow_id, status_message.content, "info", self.id, self.name) # Prepare execution context execution_context = { "workflow_id": workflow_id, "documents": documents, "message": message, "log_func": log_func } # Log status update - 50% progress if log_func: status_message = self.protocol.create_status_update_message( status_description="Executing code", sender_id=self.id, status="in_progress", progress=0.5, context_id=workflow_id ) log_func(workflow_id, status_message.content, "info", self.id, self.name) # Enhanced execution with auto-correction result, attempts_info = await self._execute_with_auto_correction( code_to_execute, requirements, execution_context, content, # Original prompt/message logging_utils ) # Prepare response based on the final result (success or failure) if result.get("success", False): # Log status update - 80% progress if log_func: status_message = self.protocol.create_status_update_message( status_description="Code executed successfully, preparing results", sender_id=self.id, status="in_progress", progress=0.8, context_id=workflow_id ) log_func(workflow_id, status_message.content, "info", self.id, self.name) # Code execution successful output = result.get("output", "") execution_result = result.get("result") logging_utils.info("Code executed successfully", "execution") # Format response content response_content = f"## Code executed successfully" # Add correction attempts info if any corrections were made if attempts_info and len(attempts_info) > 1: response_content += f" (after {len(attempts_info)-1} correction attempts)" response_content += "\n\n" # Include the executed code response_content += f"### Final Executed Code\n\n```python\n{attempts_info[-1]['code']}\n```\n\n" # Include the output if available if output: response_content += f"### Output\n\n```\n{output}\n```\n\n" # Create document with results data_document = self._create_document_from_result(execution_result) if data_document: response["documents"].append(data_document) # Include the execution result if available if execution_result: result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result) response_content += f"### Result\n\n```\n{result_str}\n```\n\n" # Include correction history if any corrections were made if attempts_info and len(attempts_info) > 1: response_content += f"### Code Correction History\n\n" for i, attempt in enumerate(attempts_info[:-1], 1): response_content += f"**Attempt {i}:**\n\n" response_content += f"```python\n{attempt['code']}\n```\n\n" response_content += f"**Error:**\n\n```\n{attempt['error']}\n```\n\n" # Create a correction attempt document for each attempt attempt_doc_id = f"correction_{uuid.uuid4()}" attempt_filename = f"correction_attempt_{i}.py" attempt_document = { "id": attempt_doc_id, "source": { "type": "generated", "id": attempt_doc_id, "name": attempt_filename, "content_type": "text/x-python", "size": len(attempt['code']) }, "contents": [{ "type": "text", "text": attempt['code'], "is_extracted": True }] } # Add correction document to response response["documents"].append(attempt_document) logging_utils.info(f"Added correction attempt file '{attempt_filename}' to response", "agents") response["content"] = response_content # Process any files created by the code if isinstance(execution_result, dict) and "created_files" in execution_result: created_files = execution_result.get("created_files", []) for file_info in created_files: file_id = file_info.get("id") if file_id: logging_utils.info(f"Adding created file {file_info.get('name', file_id)} to documents", "files") # Add file document to the response doc = { "id": f"doc_{uuid.uuid4()}", "source": file_info, "type": "file" } response["documents"].append(doc) else: # Code execution failed after all attempts error = result.get("error", "Unknown error") logging_utils.error(f"Error during code execution after all correction attempts: {error}", "execution") # Format error response response_content = f"## Error during code execution\n\n" # Include correction attempts information if attempts_info: response_content += f"I made {len(attempts_info)} attempts to correct the code, but couldn't resolve all issues.\n\n" # Add the final attempt response_content += f"### Final Code Attempt\n\n```python\n{attempts_info[-1]['code']}\n```\n\n" response_content += f"### Final Error\n\n```\n{attempts_info[-1]['error']}\n```\n\n" # Add recommendation based on error response_content += get_error_recommendation(error) # Add correction history if len(attempts_info) > 1: response_content += f"\n### Code Correction History\n\n" for i, attempt in enumerate(attempts_info[:-1], 1): response_content += f"**Attempt {i}:**\n\n" response_content += f"```python\n{attempt['code']}\n```\n\n" response_content += f"**Error:**\n\n```\n{attempt['error']}\n```\n\n" # Create a correction attempt document for each attempt attempt_doc_id = f"correction_{uuid.uuid4()}" attempt_filename = f"correction_attempt_{i}.py" attempt_document = { "id": attempt_doc_id, "source": { "type": "generated", "id": attempt_doc_id, "name": attempt_filename, "content_type": "text/x-python", "size": len(attempt['code']) }, "contents": [{ "type": "text", "text": attempt['code'], "is_extracted": True }] } # Add correction document to response response["documents"].append(attempt_document) logging_utils.info(f"Added correction attempt file '{attempt_filename}' to response", "agents") else: # Just show the code and error response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n" response_content += f"### Error\n\n```\n{error}\n```\n\n" # Add recommendation based on error response_content += get_error_recommendation(error) response["content"] = response_content else: # No code to execute response["content"] = "I couldn't find or generate executable code. Please provide Python code or explain your requirements more clearly." # Finalize response self.message_utils.finalize_message(response) # Log completion - 100% progress if log_func: status_message = self.protocol.create_status_update_message( status_description="Code execution complete", sender_id=self.id, status="completed", progress=1.0, context_id=workflow_id ) log_func(workflow_id, status_message.content, "info", self.id, self.name) # Log success logging_utils.info("CoderAgent has successfully processed the request", "agents") return response except Exception as e: error_msg = f"Error during processing by the CoderAgent: {str(e)}" logging_utils.error(error_msg, "error") # Create error response response["content"] = f"## Processing Error\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```" self.message_utils.finalize_message(response) # Log error status if log_func: status_message = self.protocol.create_status_update_message( status_description=f"Error during code execution: {str(e)}", sender_id=self.id, status="error", progress=1.0, context_id=workflow_id ) log_func(workflow_id, status_message.content, "error", self.id, self.name) return response def _create_document_from_result(self, execution_result, output_format="json"): """ Create a document object from execution results Args: execution_result: The data returned from code execution output_format: Desired format (json, csv, etc.) Returns: Document object for passing to other agents """ if not execution_result: return None doc_id = f"data_{uuid.uuid4()}" # Determine filename and content type based on the data if isinstance(execution_result, pd.DataFrame): # Handle DataFrame result filename = "processed_data.csv" content_type = "text/csv" content = execution_result.to_csv(index=False) elif isinstance(execution_result, dict) or isinstance(execution_result, list): # Handle dictionary or list result filename = "processed_data.json" content_type = "application/json" content = json.dumps(execution_result) elif isinstance(execution_result, str): # Try to determine if string is JSON, CSV, or plain text if execution_result.strip().startswith('{') or execution_result.strip().startswith('['): filename = "processed_data.json" content_type = "application/json" elif ',' in execution_result and '\n' in execution_result: filename = "processed_data.csv" content_type = "text/csv" else: filename = "processed_data.txt" content_type = "text/plain" content = str(execution_result) else: # Default case for other types filename = "processed_data.txt" content_type = "text/plain" content = str(execution_result) # Create document object document = { "id": doc_id, "source": { "type": "generated", "id": doc_id, "name": filename, "content_type": content_type, }, "contents": [{ "type": "text", "text": content, "is_extracted": True }] } return document async def _execute_with_auto_correction( self, initial_code: str, requirements: List[str], context: Dict[str, Any], original_prompt: str, logging_utils: LoggingUtils = None ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]: """ Execute code with automatic error correction and retries. Args: initial_code: The initial Python code to execute requirements: List of required packages context: Additional context for execution original_prompt: The original user request/prompt logging_utils: Optional logging utility Returns: Tuple of (final execution result, list of attempt info dictionaries) """ # Initialize tracking data current_code = initial_code current_requirements = requirements.copy() if requirements else [] attempts_info = [] # Execute with correction loop for attempt in range(1, self.max_correction_attempts + 1): if logging_utils: if attempt == 1: logging_utils.info(f"Executing code (attempt {attempt}/{self.max_correction_attempts})", "execution") else: logging_utils.info(f"Executing corrected code (attempt {attempt}/{self.max_correction_attempts})", "execution") # Execute the current code version result = await self._execute_code(current_code, current_requirements, context) # Record attempt information attempts_info.append({ "attempt": attempt, "code": current_code, "error": result.get("error", ""), "success": result.get("success", False) }) # Check if execution was successful if result.get("success", False): # Success! Return the result and attempt info return result, attempts_info # Failed execution - check if we've reached the maximum attempt limit if attempt >= self.max_correction_attempts: if logging_utils: logging_utils.warning(f"Maximum correction attempts ({self.max_correction_attempts}) reached, giving up", "execution") break # Need to correct the code - generate a fix based on the error error_message = result.get("error", "Unknown error") if logging_utils: logging_utils.info(f"Attempting to fix code error: {error_message[:200]}...", "execution") # Generate corrected code corrected_code, new_requirements = await self._generate_code_correction( current_code, error_message, original_prompt, current_requirements ) # Update for next attempt if corrected_code: current_code = corrected_code # Add any new requirements if new_requirements: for req in new_requirements: if req not in current_requirements: current_requirements.append(req) if logging_utils: logging_utils.info(f"Added new requirement: {req}", "execution") else: # Could not generate correction, break out of the loop if logging_utils: logging_utils.warning("Could not generate code correction, giving up", "execution") break # If we get here, all attempts failed - return the last result and attempt info return result, attempts_info async def _generate_code_correction( self, code: str, error_message: str, original_prompt: str, current_requirements: List[str] = None ) -> Tuple[str, List[str]]: """ Generate a corrected version of code based on error messages. Args: code: The code that produced errors error_message: The error message to fix original_prompt: The original task/requirements current_requirements: List of currently required packages Returns: Tuple of (corrected code, new requirements list) """ try: # Create a detailed prompt for code correction correction_prompt = f"""You need to fix an error in Python code. The code was written for this task: ORIGINAL TASK: {original_prompt} CURRENT CODE: ```python {code} ``` ERROR MESSAGE: ``` {error_message} ``` CURRENT REQUIREMENTS: {', '.join(current_requirements) if current_requirements else "None"} Your task is to analyze the error and provide a corrected version of the code. Focus specifically on fixing the error while preserving the original functionality. Common fixes might include: - Fixing syntax errors (missing parentheses, indentation, etc.) - Resolving import errors by adding appropriate requirements - Correcting file paths or handling file not found errors - Adding error handling for specific edge cases - Fixing logical errors in the code FORMAT INSTRUCTIONS: 1. Provide ONLY the complete fixed Python code without ANY explanation 2. DO NOT include code block markers like ```python or ``` 3. DO NOT explain what the code does before or after it 4. DO NOT include any text that is not valid Python code 5. Start your response directly with the valid Python code 6. End your response with valid Python code If you need to add new required packages, place them in a specially formatted comment at the top of your code like this: # REQUIREMENTS: package1,package2,package3 Your entire response must be valid Python that can be executed without modification. """ # Create messages for the API messages = [ {"role": "system", "content": "You are a Python debugging expert. You provide ONLY clean, fixed Python code without any explanations, markdown formatting, or non-code text. Your response should be nothing but valid, fixed Python code that can be executed directly."}, {"role": "user", "content": correction_prompt} ] # Call the API with very low temperature for deterministic fixes generated_content = await self.ai_service.call_api( messages, temperature=self.correction_temperature, max_tokens=self.ai_max_tokens ) # Clean the generated content to ensure it's only valid Python code fixed_code = self._clean_code(generated_content) # Extract requirements from special comment at the top of the code new_requirements = [] for line in fixed_code.split('\n'): if line.strip().startswith("# REQUIREMENTS:"): req_str = line.replace("# REQUIREMENTS:", "").strip() new_requirements = [r.strip() for r in req_str.split(',') if r.strip()] break return fixed_code, new_requirements except Exception as e: logging.error(f"Error generating code correction: {str(e)}", exc_info=True) # Return None to indicate failure return None, [] def _clean_code(self, code: str) -> str: """ Clean up code by removing markdown code block markers and other formatting artifacts. Args: code: The code string to clean Returns: Cleaned code string """ import re # Remove code block markers at beginning/end code = re.sub(r'^```(?:python)?\s*', '', code) code = re.sub(r'```\s*$', '', code) # Remove any trailing markdown code blocks that might have been added by the AI lines = code.split('\n') clean_lines = [] # Flag to track if we're in a trailing markdown section in_trailing_markdown = False for line in reversed(lines): stripped = line.strip() # Check if this line contains only backticks (``` or ` or ``) if re.match(r'^`{1,3}$', stripped): in_trailing_markdown = True continue # Check if this is a markdown comment or note if in_trailing_markdown and (stripped.startswith('#') or stripped.lower().startswith('note:') or stripped.lower().startswith('example:')): continue # If we've reached actual code, stop considering trailing markdown if stripped and not in_trailing_markdown: in_trailing_markdown = False # Add this line if it's not part of trailing markdown if not in_trailing_markdown: clean_lines.insert(0, line) # Join the lines back together clean_code = '\n'.join(clean_lines) # Final cleanup for any stray backticks clean_code = re.sub(r'`{1,3}\s*$', '', clean_code) return clean_code.strip() async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]]) -> Tuple[str, List[str]]: """ Generate Python code from a prompt using AI service. Args: prompt: The prompt to generate code from documents: Documents associated with the prompt Returns: Tuple of (generated Python code, required packages) """ try: # Prepare a prompt for code generation ai_prompt = f"""Generate Python code to solve the following task: {prompt} Available input files: """ # Add information about available documents if documents: for i, doc in enumerate(documents): source = doc.get("source", {}) doc_name = source.get("name", f"Document {i+1}") doc_type = source.get("content_type", "unknown") doc_id = source.get("id", "") ai_prompt += f"- {doc_name} (type: {doc_type}, id: {doc_id}, path: './input_data/{doc_name}')\n" else: ai_prompt += "No input files available.\n" ai_prompt += """ IMPORTANT REQUIREMENTS: 1. Your code MUST define a 'result' variable to store the final output of your code. 2. At the end of your script, it should print or output the result variable. 3. Make your 'result' variable a dictionary or another JSON-serializable data structure that contains all relevant output. 4. Input files are accessible in the './input_data/' directory. 5. Keep code well-documented with comments explaining key operations. 6. Make your code complete and self-contained. 7. Include proper error handling. FORMAT INSTRUCTIONS: - Provide ONLY the Python code without ANY introduction, explanation, or conclusion text - DO NOT include code block markers like ```python or ``` - DO NOT explain what the code does before or after it - DO NOT include any text that is not valid Python code - Start your response directly with valid Python code - End your response with valid Python code For required packages, place them in a specially formatted comment at the top of your code one one line like this: # REQUIREMENTS: pandas,numpy,matplotlib,requests Your entire response must be valid Python that can be executed without modification. """ # Create messages for the API messages = [ {"role": "system", "content": "You are a Python code generator that provides ONLY clean, executable Python code without any explanations, markdown formatting, or non-code text. Your response should be nothing but valid Python code that can be executed directly."}, {"role": "user", "content": ai_prompt} ] # Call the API logging.info(f"Calling AI API to generate code") generated_content = await self.ai_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens) # Clean the generated content to ensure it's only valid Python code code = self._clean_code(generated_content) # Extract requirements from special comment at the top of the code requirements = [] for line in code.split('\n'): if line.strip().startswith("# REQUIREMENTS:"): req_str = line.replace("# REQUIREMENTS:", "").strip() requirements = [r.strip() for r in req_str.split(',') if r.strip()] break return code, requirements except Exception as e: logging.error(f"Error generating code with AI: {str(e)}", exc_info=True) # Return basic error handling code and no requirements error_str = str(e).replace('"', '\\"') return f""" # Error during code generation print(f"An error occurred during code generation: {error_str}") # Return an error result result = {{"error": "Code generation failed", "message": "{error_str}"}} """, [] async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]: """ Execute Python code using the SimpleCodeExecutor. Args: code: The Python code to execute requirements: List of required packages context: Additional context for execution Returns: Result of code execution """ # Get workflow ID and set up logging workflow_id = context.get("workflow_id", "") if context else "" logging_utils = None if "log_func" in context and workflow_id: logging_utils = LoggingUtils(workflow_id, context.get("log_func")) if logging_utils: logging_utils.info("Executing Python code", "execution") if requirements: logging_utils.info(f"Required packages: {', '.join(requirements)}", "execution") try: # List of blocked packages for security blocked_packages = [ "cryptography", "flask", "django", "tornado", # Security risks "tensorflow", "pytorch", "scikit-learn" # Resource intensive ] # Initialize SimpleCodeExecutor with requirements and workflow_id for persistence executor = SimpleCodeExecutor( workflow_id=workflow_id, timeout=self.executor_timeout, max_memory_mb=self.executor_memory_limit, requirements=requirements, blocked_packages=blocked_packages, ai_service = self.ai_service ) # Prepare input data for the code input_data = {"context": context, "workflow_id": workflow_id} # Add file references if available if context and "documents" in context: input_data["files"] = [ { "id": doc.get("source", {}).get("id", ""), "name": doc.get("source", {}).get("name", ""), "type": doc.get("source", {}).get("content_type", ""), "path": doc.get("source", {}).get("path", "") # Full file path } for doc in context.get("documents", []) if doc.get("source", {}).get("type") == "file" ] # Extract document content from message but don't create files yet if context and "message" in context and "content" in context["message"]: message_content = context["message"]["content"] # Check if there's extracted document content if "=== EXTRACTED DOCUMENT CONTENT ===" in message_content: # Add a special field to input_data for extracted content input_data["extracted_documents"] = [] # Split by the document marker pattern pattern = r"--- (.*?) ---\s*" import re doc_sections = re.split(pattern, message_content) # Skip the first section (before any "--- doc ---" marker) for i in range(1, len(doc_sections), 2): if i+1 < len(doc_sections): doc_name = doc_sections[i].strip() doc_content = doc_sections[i+1].strip() # Store the extracted content to be processed by the executor input_data["extracted_documents"].append({ "name": doc_name, "content": doc_content, "type": "text/csv" if doc_name.endswith(".csv") else "text/plain" }) if logging_utils: logging_utils.info(f"Extracted document content: {doc_name}", "execution") # Execute the code if logging_utils: logging_utils.info(f"Executing code with input data containing {len(input_data.get('files', []))} files", "execution") result = executor.execute_code(code, input_data) # Log the execution results if logging_utils: if result.get("success", False): logging_utils.info("Code executed successfully", "execution") # Log a preview of the output output = result.get("output", "") if output: preview = output[:1000] + "..." if len(output) > 1000 else output logging_utils.info(f"Output preview: {preview}", "execution") # Log a preview of the result execution_result = result.get("result") if execution_result: if isinstance(execution_result, (dict, list)): result_str = json.dumps(execution_result, indent=2) preview = result_str[:1000] + "..." if len(result_str) > 1000 else result_str else: str_result = str(execution_result) preview = str_result[:1000] + "..." if len(str_result) > 1000 else str_result logging_utils.info(f"Result preview: {preview}", "execution") else: # Log error information error = result.get("error", "Unknown error") logging_utils.error(f"Error during code execution: {error}", "execution") # Clean up non-persistent environments if not executor.is_persistent: executor.cleanup() return result except Exception as e: error_message = f"Error during code execution: {str(e)}\n{traceback.format_exc()}" if logging_utils: logging_utils.error(error_message, "error") return { "success": False, "output": "", "error": error_message, "result": None } def send_error_message(self, error_description: str, sender_id: str, receiver_id: str = None, context_id: str = None) -> AgentMessage: """Send an error message using the protocol""" return self.protocol.create_error_message( error_description=error_description, sender_id=sender_id, receiver_id=receiver_id, error_type="code_execution", context_id=context_id ) def send_result_message(self, result_content: str, sender_id: str, receiver_id: str, task_id: str, output_data: Dict[str, Any] = None, context_id: str = None) -> AgentMessage: """Send a result message using the protocol""" return self.protocol.create_result_message( result_content=result_content, sender_id=sender_id, receiver_id=receiver_id, task_id=task_id, output_data=output_data, result_format="python_code", context_id=context_id ) # Singleton instance _coder_agent = None def get_coder_agent(): """Returns a singleton instance of the Coder Agent""" global _coder_agent if _coder_agent is None: _coder_agent = CoderAgent() return _coder_agent