gateway/modules/agentservice_agent_coder.py
2025-04-16 21:42:26 +02:00

1398 lines
No EOL
60 KiB
Python

"""
CoderAgent - A unified agent for developing and executing Python code.
Includes code execution capabilities previously in separate modules.
Enhanced with auto-correction loop for handling execution errors.
"""
import logging
import json
import re
import uuid
import traceback
import os
import subprocess
import tempfile
import shutil
import sys
import pandas as pd
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple
from modules.agentservice_base import BaseAgent
from modules.agentservice_utils import FileUtils, WorkflowUtils, MessageUtils, LoggingUtils
from connectors.connector_aichat_openai import ChatService
from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol
logger = logging.getLogger(__name__)
# Existing SimpleCodeExecutor class remains unchanged
class SimpleCodeExecutor:
# ... existing code ...
"""
A simplified executor that runs Python code in isolated virtual environments.
"""
# Class variable to store workflow environments for persistence
_workflow_environments = {}
def __init__(self,
workflow_id: str = None,
timeout: int = 30,
max_memory_mb: int = 512,
requirements: List[str] = None,
blocked_packages: List[str] = None,
ai_service = None):
"""
Initialize the SimpleCodeExecutor.
Args:
workflow_id: Optional workflow ID for persistent environments
timeout: Maximum execution time in seconds
max_memory_mb: Maximum memory in MB
requirements: List of packages to install
blocked_packages: List of blocked packages
"""
self.workflow_id = workflow_id
self.timeout = timeout
self.max_memory_mb = max_memory_mb
self.temp_dir = None
self.requirements = requirements or []
self.blocked_packages = blocked_packages or [
"cryptography", "flask", "django", "tornado", # Security risks
"tensorflow", "pytorch", "scikit-learn" # Resource intensive
]
self.is_persistent = workflow_id is not None
self.ai_service = ai_service
@classmethod
def get_workflow_environment(cls, workflow_id: str) -> Optional[str]:
"""Get an existing workflow environment path if it exists."""
return cls._workflow_environments.get(workflow_id)
@classmethod
def set_workflow_environment(cls, workflow_id: str, env_path: str) -> None:
"""Store a workflow environment path."""
cls._workflow_environments[workflow_id] = env_path
def _create_venv(self) -> str:
"""Creates a virtual environment and returns the path."""
# Check for existing environment if using workflow_id
if self.workflow_id:
self.is_persistent = True
existing_env = self.get_workflow_environment(self.workflow_id)
if existing_env and os.path.exists(existing_env):
logger.info(f"Reusing existing virtual environment: {existing_env}")
self.temp_dir = os.path.dirname(existing_env)
return existing_env
else:
logger.info(f"Creating new environment for workflow {self.workflow_id}")
# Create a new environment
venv_parent_dir = tempfile.mkdtemp(prefix="simple_exec_")
self.temp_dir = venv_parent_dir
venv_path = os.path.join(venv_parent_dir, "venv")
try:
# Create virtual environment
logger.info(f"Creating new virtual environment in {venv_path}")
subprocess.run([sys.executable, "-m", "venv", venv_path],
check=True,
capture_output=True)
# Store the environment path if this is for a specific workflow
if self.workflow_id:
logger.info(f"Registering new persistent environment for workflow {self.workflow_id}")
self.set_workflow_environment(self.workflow_id, venv_path)
return venv_path
except subprocess.CalledProcessError as e:
logger.error(f"Error creating virtual environment: {e}")
raise RuntimeError(f"Could not create venv: {e}")
def _get_pip_executable(self, venv_path: str) -> str:
"""Gets the path to the pip executable in the virtual environment."""
if os.name == 'nt': # Windows
return os.path.join(venv_path, "Scripts", "pip.exe")
else: # Unix/Linux
return os.path.join(venv_path, "bin", "pip")
def _get_python_executable(self, venv_path: str) -> str:
"""Gets the path to the Python executable in the virtual environment."""
if os.name == 'nt': # Windows
return os.path.join(venv_path, "Scripts", "python.exe")
else: # Unix/Linux
return os.path.join(venv_path, "bin", "python")
def _filter_requirements(self, requirements: List[str]) -> List[str]:
"""Filter out blocked packages and invalid requirements."""
if not requirements:
return []
filtered_requirements = []
for req in requirements:
# Skip empty, comment lines, or invalid requirements
req = req.strip()
if not req or req.startswith('#') or '```' in req or req in ['`', '``', '```']:
logging.warning(f"Skipping comment or invalid requirement: {req}")
continue
# Extract package name from requirement spec
import re
package_name = re.split(r'[=<>]', req)[0].strip().lower()
if package_name in self.blocked_packages:
logging.warning(f"Blocked package detected: {package_name}")
continue
filtered_requirements.append(req)
return filtered_requirements
def _install_packages(self, venv_path: str, requirements: List[str]) -> bool:
"""Install packages in the virtual environment."""
if not requirements:
return True
# Filter requirements
filtered_requirements = self._filter_requirements(requirements)
if not filtered_requirements:
logger.info("No allowed packages to install")
return True
# Get pip executable
pip_executable = self._get_pip_executable(venv_path)
# Install packages
try:
logger.info(f"Installing packages: {', '.join(filtered_requirements)}")
result = subprocess.run(
[pip_executable, "install"] + filtered_requirements,
check=True,
capture_output=True,
text=True,
timeout=300
)
logger.info("Package installation successful")
return True
except subprocess.CalledProcessError as e:
logger.error(f"Error during package installation: {e.stderr}")
return False
except Exception as e:
logger.error(f"Error during package installation: {str(e)}")
return False
def _extract_required_packages(self, code: str) -> List[str]:
# Extract required packages from requirements comments in the 1st code line
packages = set()
# Check for special REQUIREMENTS comment - specific format we're looking for
first_lines = code.split('\n')[:5] # Only check first few lines
for line in first_lines:
if line.strip().startswith("# REQUIREMENTS:"):
req_str = line.replace("# REQUIREMENTS:", "").strip()
for pkg in req_str.split(','):
if pkg.strip():
packages.add(pkg.strip())
return list(packages)
def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Execute Python code in an isolated environment using a simple approach.
Args:
code: Python code to execute
input_data: Optional input data for the code
Returns:
Dictionary with execution results
"""
logger.info(f"Executing code with workflow_id: {self.workflow_id}")
# Create or reuse virtual environment
venv_path = self._create_venv() #creating self.temp_dir!
# Create input_data directory for file handling
input_data_dir = os.path.join(self.temp_dir, "input_data") # Temp dir is at root
os.makedirs(input_data_dir, exist_ok=True)
# Extract and install required packages
all_requirements = []
# Add explicitly provided requirements
# if self.requirements:
# all_requirements.extend(self.requirements)
# Extract requirements from code
extracted_requirements = self._extract_required_packages(code)
if extracted_requirements:
all_requirements.extend(extracted_requirements)
logger.info(f"Extracted required packages from code: {', '.join(extracted_requirements)}")
# Install packages if needed
if all_requirements:
logger.info(f"Installing {len(all_requirements)} packages")
install_success = self._install_packages(venv_path, all_requirements)
if not install_success:
# Return error if package installation failed
return {
"success": False,
"output": "",
"error": f"Failed to install required packages: {', '.join(all_requirements)}",
"result": None,
"exit_code": -1
}
# Process extracted document content if available
if input_data and "extracted_documents" in input_data:
for doc in input_data["extracted_documents"]:
doc_name = doc["name"]
doc_content = doc["content"]
doc_type = doc["type"]
# Create file path
file_path = os.path.join(input_data_dir, doc_name)
try:
# Write content to file
with open(file_path, 'w', encoding='utf-8') as f:
f.write(doc_content)
# Add to files list if not already there
if "files" not in input_data:
input_data["files"] = []
input_data["files"].append({
"id": f"extracted_{doc_name}",
"name": doc_name,
"type": doc_type,
"path": file_path
})
logger.info(f"Created file from extracted content: {doc_name}")
except Exception as e:
logger.error(f"Error creating file from extracted content: {str(e)}")
# Copy input files to input_data directory if provided
if input_data and "files" in input_data:
for file_info in input_data.get("files", []):
# Skip files we just created from extracted content
if file_info.get("id", "").startswith("extracted_"):
continue
source_path = file_info.get("path", "")
logger.info(f"Attempting to copy file from: {source_path}")
logger.info(f"File exists: {os.path.exists(source_path)}")
if source_path and os.path.exists(source_path):
# Get just the filename
file_name = os.path.basename(source_path)
# Create destination path in input_data directory
dest_path = os.path.join(input_data_dir, file_name)
try:
# Copy the file
shutil.copy2(source_path, dest_path)
logger.info(f"Copied file to input_data directory: {dest_path}")
except Exception as e:
logger.error(f"Error copying file {source_path}: {str(e)}")
# Create a file for the code
code_id = uuid.uuid4().hex[:8]
code_file = os.path.join(self.temp_dir, f"code_{code_id}.py")
# Write the code as-is without injecting additional loader code
with open(code_file, "w", encoding="utf-8") as f:
f.write(code)
# Get Python executable
python_executable = self._get_python_executable(venv_path)
logger.info(f"Using Python executable: {python_executable}")
# Execute code
try:
# Run the code from root dir
working_dir = os.path.dirname(code_file) # This should be the project root
logger.info(f"DEBUG PATH Root: {os.getcwd()} Code: {code_file} Working Dir: {working_dir}")
logger.debug(f"|{code}|")
process = subprocess.run(
[python_executable, code_file],
timeout=self.timeout,
capture_output=True,
text=True,
cwd=working_dir
)
# Process the output
stdout = process.stdout
stderr = process.stderr
# Get result from stdout if available
result_data = None
if process.returncode == 0 and stdout:
try:
# Look for the last line that could be JSON
for line in reversed(stdout.strip().split('\n')):
line = line.strip()
if line and line[0] in '{[' and line[-1] in '}]':
try:
result_data = json.loads(line)
# Successfully parsed JSON result, use it
break
except json.JSONDecodeError:
# Not valid JSON, continue to next line
continue
except Exception as e:
logger.warning(f"Failed to parse result from stdout: {str(e)}")
# Create result dictionary
execution_result = {
"success": process.returncode == 0,
"output": stdout,
"error": stderr if process.returncode != 0 else "",
"result": result_data,
"exit_code": process.returncode
}
except subprocess.TimeoutExpired:
logger.error(f"Execution timed out after {self.timeout} seconds")
execution_result = {
"success": False,
"output": "",
"error": f"Execution timed out (timeout after {self.timeout} seconds)",
"result": None,
"exit_code": -1
}
except Exception as e:
logger.error(f"Execution error: {str(e)}")
execution_result = {
"success": False,
"output": "",
"error": f"Execution error: {str(e)} for code {code}",
"result": None,
"exit_code": -1
}
# Clean up temporary code file
try:
if os.path.exists(code_file):
os.remove(code_file)
except Exception as e:
logger.warning(f"Error cleaning up temporary code file: {e}")
return execution_result
def cleanup(self):
"""Clean up temporary resources."""
# Skip cleanup for persistent environments
if self.is_persistent and self.workflow_id:
logger.info(f"Skipping cleanup for persistent environment of workflow {self.workflow_id}")
return
# Clean up temporary directory
if self.temp_dir and os.path.exists(self.temp_dir):
try:
shutil.rmtree(self.temp_dir)
logger.info(f"Deleted temporary directory: {self.temp_dir}")
except Exception as e:
logger.warning(f"Could not delete temporary directory {self.temp_dir}: {e}")
def __del__(self):
"""Clean up during garbage collection."""
self.cleanup()
# Unchanged error recommendation function
def get_error_recommendation(error_message: str) -> str:
"""Generate recommendations based on error message."""
if "ImportError" in error_message or "ModuleNotFoundError" in error_message:
return """
### Recommendation
The error indicates a missing Python module. Try using standard libraries or common data analysis modules.
"""
elif "PermissionError" in error_message:
return """
### Recommendation
The code doesn't have the necessary permissions to access files or directories.
"""
elif "SyntaxError" in error_message:
return """
### Recommendation
There's a syntax error in the code. Check for missing parentheses, quotes, colons, or indentation errors.
"""
elif "FileNotFoundError" in error_message:
return """
### Recommendation
A file could not be found. Check the file path and make sure the file exists.
"""
else:
return """
### Recommendation
To fix the error:
1. Check the exact error message
2. Simplify the code and test step by step
3. Use try/except blocks for error-prone operations
"""
class CoderAgent(BaseAgent):
"""Agent for developing and executing Python code with auto-correction capabilities"""
def __init__(self):
"""Initialize the coder agent with proper type and capabilities"""
super().__init__()
# Agent metadata
self.id = "coder"
self.type = "coder"
self.name = "Python Code Agent"
self.description = "Develops and executes Python code"
self.capabilities = "code_development,data_processing,file_processing,automation"
self.result_format = "python_code"
# Initialize AI service
self.ai_service = None
# Add document capabilities
self.supports_documents = True
self.document_capabilities = ["read", "reference", "create"]
self.required_context = ["workflow_id"]
self.document_handler = None
# Initialize protocol
self.protocol = AgentCommunicationProtocol()
# Init utilities
self.file_utils = FileUtils()
self.message_utils = MessageUtils()
# Executor settings
self.executor_timeout = 60 # seconds
self.executor_memory_limit = 512 # MB
# AI service settings
self.ai_temperature = 0.1 # Lower temperature for more deterministic code generation
self.ai_max_tokens = 2000 # Enough tokens for complex code
# Auto-correction settings (new)
self.max_correction_attempts = 3 # Maximum number of correction attempts
self.correction_temperature = 0.1 # Even lower temperature for corrections
def get_agent_info(self) -> Dict[str, Any]:
"""Get agent information for agent registry"""
info = super().get_agent_info()
info.update({
"metadata": {
"timeout": self.executor_timeout,
"memory_limit": self.executor_memory_limit,
"max_correction_attempts": self.max_correction_attempts
}
})
return info
def set_document_handler(self, document_handler):
"""Set the document handler for file operations"""
self.document_handler = document_handler
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Process a message to develop and execute Python code with auto-correction.
Args:
message: The message to process
context: Additional context information
Returns:
Response message
"""
# Extract workflow_id from context or message
workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
# Get or create logging_utils
log_func = context.get("log_func") if context else None
logging_utils = LoggingUtils(workflow_id, log_func)
# Create response message
response = {
"role": "assistant",
"content": "",
"agent_id": self.id,
"agent_type": self.type,
"agent_name": self.name,
"workflow_id": workflow_id,
"documents": []
}
# Send status update using protocol
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Starting code generation and execution",
sender_id=self.id,
status="in_progress",
progress=0.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
try:
# Extract content and documents
content = message.get("content", "")
documents = message.get("documents", [])
code_to_execute = None
requirements = []
# Generate code based on the message content using AI
logging_utils.info("Generating new code with AI", "agents")
# Log status update - 10% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Analyzing requirements and generating code",
sender_id=self.id,
status="in_progress",
progress=0.1,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Generate code using AI
code_to_execute, requirements = await self._generate_code_from_prompt(content, documents)
if not code_to_execute:
logging_utils.warning("AI could not generate code", "agents")
response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions."
self.message_utils.finalize_message(response)
return response
logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents")
# Log status update - 30% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Code generated, preparing for execution",
sender_id=self.id,
status="in_progress",
progress=0.3,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Create code file document
code_doc_id = f"code_{uuid.uuid4()}"
code_filename = "generated_code.py"
code_document = {
"id": code_doc_id,
"source": {
"type": "generated",
"id": code_doc_id,
"name": code_filename,
"content_type": "text/x-python",
"size": len(code_to_execute)
},
"contents": [{
"type": "text",
"text": code_to_execute,
"is_extracted": True
}]
}
# Add code document to response
response["documents"].append(code_document)
logging_utils.info(f"Added code file '{code_filename}' to response", "agents")
# Execute the code with auto-correction loop
if code_to_execute:
# Log status update - 40% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Setting up execution environment",
sender_id=self.id,
status="in_progress",
progress=0.4,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Prepare execution context
execution_context = {
"workflow_id": workflow_id,
"documents": documents,
"message": message,
"log_func": log_func
}
# Log status update - 50% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Executing code",
sender_id=self.id,
status="in_progress",
progress=0.5,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Enhanced execution with auto-correction
result, attempts_info = await self._execute_with_auto_correction(
code_to_execute,
requirements,
execution_context,
content, # Original prompt/message
logging_utils
)
# Prepare response based on the final result (success or failure)
if result.get("success", False):
# Log status update - 80% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Code executed successfully, preparing results",
sender_id=self.id,
status="in_progress",
progress=0.8,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Code execution successful
output = result.get("output", "")
execution_result = result.get("result")
logging_utils.info("Code executed successfully", "execution")
# Format response content
response_content = f"## Code executed successfully"
# Add correction attempts info if any corrections were made
if attempts_info and len(attempts_info) > 1:
response_content += f" (after {len(attempts_info)-1} correction attempts)"
response_content += "\n\n"
# Include the executed code
response_content += f"### Final Executed Code\n\n```python\n{attempts_info[-1]['code']}\n```\n\n"
# Include the output if available
if output:
response_content += f"### Output\n\n```\n{output}\n```\n\n"
# Create document with results
data_document = self._create_document_from_result(execution_result)
if data_document:
response["documents"].append(data_document)
# Include the execution result if available
if execution_result:
result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
response_content += f"### Result\n\n```\n{result_str}\n```\n\n"
# Include correction history if any corrections were made
if attempts_info and len(attempts_info) > 1:
response_content += f"### Code Correction History\n\n"
for i, attempt in enumerate(attempts_info[:-1], 1):
response_content += f"**Attempt {i}:**\n\n"
response_content += f"```python\n{attempt['code']}\n```\n\n"
response_content += f"**Error:**\n\n```\n{attempt['error']}\n```\n\n"
# Create a correction attempt document for each attempt
attempt_doc_id = f"correction_{uuid.uuid4()}"
attempt_filename = f"correction_attempt_{i}.py"
attempt_document = {
"id": attempt_doc_id,
"source": {
"type": "generated",
"id": attempt_doc_id,
"name": attempt_filename,
"content_type": "text/x-python",
"size": len(attempt['code'])
},
"contents": [{
"type": "text",
"text": attempt['code'],
"is_extracted": True
}]
}
# Add correction document to response
response["documents"].append(attempt_document)
logging_utils.info(f"Added correction attempt file '{attempt_filename}' to response", "agents")
response["content"] = response_content
# Process any files created by the code
if isinstance(execution_result, dict) and "created_files" in execution_result:
created_files = execution_result.get("created_files", [])
for file_info in created_files:
file_id = file_info.get("id")
if file_id:
logging_utils.info(f"Adding created file {file_info.get('name', file_id)} to documents", "files")
# Add file document to the response
doc = {
"id": f"doc_{uuid.uuid4()}",
"source": file_info,
"type": "file"
}
response["documents"].append(doc)
else:
# Code execution failed after all attempts
error = result.get("error", "Unknown error")
logging_utils.error(f"Error during code execution after all correction attempts: {error}", "execution")
# Format error response
response_content = f"## Error during code execution\n\n"
# Include correction attempts information
if attempts_info:
response_content += f"I made {len(attempts_info)} attempts to correct the code, but couldn't resolve all issues.\n\n"
# Add the final attempt
response_content += f"### Final Code Attempt\n\n```python\n{attempts_info[-1]['code']}\n```\n\n"
response_content += f"### Final Error\n\n```\n{attempts_info[-1]['error']}\n```\n\n"
# Add recommendation based on error
response_content += get_error_recommendation(error)
# Add correction history
if len(attempts_info) > 1:
response_content += f"\n### Code Correction History\n\n"
for i, attempt in enumerate(attempts_info[:-1], 1):
response_content += f"**Attempt {i}:**\n\n"
response_content += f"```python\n{attempt['code']}\n```\n\n"
response_content += f"**Error:**\n\n```\n{attempt['error']}\n```\n\n"
# Create a correction attempt document for each attempt
attempt_doc_id = f"correction_{uuid.uuid4()}"
attempt_filename = f"correction_attempt_{i}.py"
attempt_document = {
"id": attempt_doc_id,
"source": {
"type": "generated",
"id": attempt_doc_id,
"name": attempt_filename,
"content_type": "text/x-python",
"size": len(attempt['code'])
},
"contents": [{
"type": "text",
"text": attempt['code'],
"is_extracted": True
}]
}
# Add correction document to response
response["documents"].append(attempt_document)
logging_utils.info(f"Added correction attempt file '{attempt_filename}' to response", "agents")
else:
# Just show the code and error
response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
response_content += f"### Error\n\n```\n{error}\n```\n\n"
# Add recommendation based on error
response_content += get_error_recommendation(error)
response["content"] = response_content
else:
# No code to execute
response["content"] = "I couldn't find or generate executable code. Please provide Python code or explain your requirements more clearly."
# Finalize response
self.message_utils.finalize_message(response)
# Log completion - 100% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Code execution complete",
sender_id=self.id,
status="completed",
progress=1.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Log success
logging_utils.info("CoderAgent has successfully processed the request", "agents")
return response
except Exception as e:
error_msg = f"Error during processing by the CoderAgent: {str(e)}"
logging_utils.error(error_msg, "error")
# Create error response
response["content"] = f"## Processing Error\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```"
self.message_utils.finalize_message(response)
# Log error status
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Error during code execution: {str(e)}",
sender_id=self.id,
status="error",
progress=1.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "error", self.id, self.name)
return response
def _create_document_from_result(self, execution_result, output_format="json"):
"""
Create a document object from execution results
Args:
execution_result: The data returned from code execution
output_format: Desired format (json, csv, etc.)
Returns:
Document object for passing to other agents
"""
if not execution_result:
return None
doc_id = f"data_{uuid.uuid4()}"
# Determine filename and content type based on the data
if isinstance(execution_result, pd.DataFrame):
# Handle DataFrame result
filename = "processed_data.csv"
content_type = "text/csv"
content = execution_result.to_csv(index=False)
elif isinstance(execution_result, dict) or isinstance(execution_result, list):
# Handle dictionary or list result
filename = "processed_data.json"
content_type = "application/json"
content = json.dumps(execution_result)
elif isinstance(execution_result, str):
# Try to determine if string is JSON, CSV, or plain text
if execution_result.strip().startswith('{') or execution_result.strip().startswith('['):
filename = "processed_data.json"
content_type = "application/json"
elif ',' in execution_result and '\n' in execution_result:
filename = "processed_data.csv"
content_type = "text/csv"
else:
filename = "processed_data.txt"
content_type = "text/plain"
content = str(execution_result)
else:
# Default case for other types
filename = "processed_data.txt"
content_type = "text/plain"
content = str(execution_result)
# Create document object
document = {
"id": doc_id,
"source": {
"type": "generated",
"id": doc_id,
"name": filename,
"content_type": content_type,
},
"contents": [{
"type": "text",
"text": content,
"is_extracted": True
}]
}
return document
async def _execute_with_auto_correction(
self,
initial_code: str,
requirements: List[str],
context: Dict[str, Any],
original_prompt: str,
logging_utils: LoggingUtils = None
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
"""
Execute code with automatic error correction and retries.
Args:
initial_code: The initial Python code to execute
requirements: List of required packages
context: Additional context for execution
original_prompt: The original user request/prompt
logging_utils: Optional logging utility
Returns:
Tuple of (final execution result, list of attempt info dictionaries)
"""
# Initialize tracking data
current_code = initial_code
current_requirements = requirements.copy() if requirements else []
attempts_info = []
# Execute with correction loop
for attempt in range(1, self.max_correction_attempts + 1):
if logging_utils:
if attempt == 1:
logging_utils.info(f"Executing code (attempt {attempt}/{self.max_correction_attempts})", "execution")
else:
logging_utils.info(f"Executing corrected code (attempt {attempt}/{self.max_correction_attempts})", "execution")
# Execute the current code version
result = await self._execute_code(current_code, current_requirements, context)
# Record attempt information
attempts_info.append({
"attempt": attempt,
"code": current_code,
"error": result.get("error", ""),
"success": result.get("success", False)
})
# Check if execution was successful
if result.get("success", False):
# Success! Return the result and attempt info
return result, attempts_info
# Failed execution - check if we've reached the maximum attempt limit
if attempt >= self.max_correction_attempts:
if logging_utils:
logging_utils.warning(f"Maximum correction attempts ({self.max_correction_attempts}) reached, giving up", "execution")
break
# Need to correct the code - generate a fix based on the error
error_message = result.get("error", "Unknown error")
if logging_utils:
logging_utils.info(f"Attempting to fix code error: {error_message[:200]}...", "execution")
# Generate corrected code
corrected_code, new_requirements = await self._generate_code_correction(
current_code,
error_message,
original_prompt,
current_requirements
)
# Update for next attempt
if corrected_code:
current_code = corrected_code
# Add any new requirements
if new_requirements:
for req in new_requirements:
if req not in current_requirements:
current_requirements.append(req)
if logging_utils:
logging_utils.info(f"Added new requirement: {req}", "execution")
else:
# Could not generate correction, break out of the loop
if logging_utils:
logging_utils.warning("Could not generate code correction, giving up", "execution")
break
# If we get here, all attempts failed - return the last result and attempt info
return result, attempts_info
async def _generate_code_correction(
self,
code: str,
error_message: str,
original_prompt: str,
current_requirements: List[str] = None
) -> Tuple[str, List[str]]:
"""
Generate a corrected version of code based on error messages.
Args:
code: The code that produced errors
error_message: The error message to fix
original_prompt: The original task/requirements
current_requirements: List of currently required packages
Returns:
Tuple of (corrected code, new requirements list)
"""
try:
# Create a detailed prompt for code correction
correction_prompt = f"""You need to fix an error in Python code. The code was written for this task:
ORIGINAL TASK:
{original_prompt}
CURRENT CODE:
```python
{code}
```
ERROR MESSAGE:
```
{error_message}
```
CURRENT REQUIREMENTS: {', '.join(current_requirements) if current_requirements else "None"}
Your task is to analyze the error and provide a corrected version of the code.
Focus specifically on fixing the error while preserving the original functionality.
Common fixes might include:
- Fixing syntax errors (missing parentheses, indentation, etc.)
- Resolving import errors by adding appropriate requirements
- Correcting file paths or handling file not found errors
- Adding error handling for specific edge cases
- Fixing logical errors in the code
FORMAT INSTRUCTIONS:
1. Provide ONLY the complete fixed Python code without ANY explanation
2. DO NOT include code block markers like ```python or ```
3. DO NOT explain what the code does before or after it
4. DO NOT include any text that is not valid Python code
5. Start your response directly with the valid Python code
6. End your response with valid Python code
If you need to add new required packages, place them in a specially formatted comment at the top of your code like this:
# REQUIREMENTS: package1,package2,package3
Your entire response must be valid Python that can be executed without modification.
"""
# Create messages for the API
messages = [
{"role": "system", "content": "You are a Python debugging expert. You provide ONLY clean, fixed Python code without any explanations, markdown formatting, or non-code text. Your response should be nothing but valid, fixed Python code that can be executed directly."},
{"role": "user", "content": correction_prompt}
]
# Call the API with very low temperature for deterministic fixes
generated_content = await self.ai_service.call_api(
messages,
temperature=self.correction_temperature,
max_tokens=self.ai_max_tokens
)
# Clean the generated content to ensure it's only valid Python code
fixed_code = self._clean_code(generated_content)
# Extract requirements from special comment at the top of the code
new_requirements = []
for line in fixed_code.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
req_str = line.replace("# REQUIREMENTS:", "").strip()
new_requirements = [r.strip() for r in req_str.split(',') if r.strip()]
break
return fixed_code, new_requirements
except Exception as e:
logging.error(f"Error generating code correction: {str(e)}", exc_info=True)
# Return None to indicate failure
return None, []
def _clean_code(self, code: str) -> str:
"""
Clean up code by removing markdown code block markers and other formatting artifacts.
Args:
code: The code string to clean
Returns:
Cleaned code string
"""
import re
# Remove code block markers at beginning/end
code = re.sub(r'^```(?:python)?\s*', '', code)
code = re.sub(r'```\s*$', '', code)
# Remove any trailing markdown code blocks that might have been added by the AI
lines = code.split('\n')
clean_lines = []
# Flag to track if we're in a trailing markdown section
in_trailing_markdown = False
for line in reversed(lines):
stripped = line.strip()
# Check if this line contains only backticks (``` or ` or ``)
if re.match(r'^`{1,3}$', stripped):
in_trailing_markdown = True
continue
# Check if this is a markdown comment or note
if in_trailing_markdown and (stripped.startswith('#') or
stripped.lower().startswith('note:') or
stripped.lower().startswith('example:')):
continue
# If we've reached actual code, stop considering trailing markdown
if stripped and not in_trailing_markdown:
in_trailing_markdown = False
# Add this line if it's not part of trailing markdown
if not in_trailing_markdown:
clean_lines.insert(0, line)
# Join the lines back together
clean_code = '\n'.join(clean_lines)
# Final cleanup for any stray backticks
clean_code = re.sub(r'`{1,3}\s*$', '', clean_code)
return clean_code.strip()
async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]]) -> Tuple[str, List[str]]:
"""
Generate Python code from a prompt using AI service.
Args:
prompt: The prompt to generate code from
documents: Documents associated with the prompt
Returns:
Tuple of (generated Python code, required packages)
"""
try:
# Prepare a prompt for code generation
ai_prompt = f"""Generate Python code to solve the following task:
{prompt}
Available input files:
"""
# Add information about available documents
if documents:
for i, doc in enumerate(documents):
source = doc.get("source", {})
doc_name = source.get("name", f"Document {i+1}")
doc_type = source.get("content_type", "unknown")
doc_id = source.get("id", "")
ai_prompt += f"- {doc_name} (type: {doc_type}, id: {doc_id}, path: './input_data/{doc_name}')\n"
else:
ai_prompt += "No input files available.\n"
ai_prompt += """
IMPORTANT REQUIREMENTS:
1. Your code MUST define a 'result' variable to store the final output of your code.
2. At the end of your script, it should print or output the result variable.
3. Make your 'result' variable a dictionary or another JSON-serializable data structure that contains all relevant output.
4. Input files are accessible in the './input_data/' directory.
5. Keep code well-documented with comments explaining key operations.
6. Make your code complete and self-contained.
7. Include proper error handling.
FORMAT INSTRUCTIONS:
- Provide ONLY the Python code without ANY introduction, explanation, or conclusion text
- DO NOT include code block markers like ```python or ```
- DO NOT explain what the code does before or after it
- DO NOT include any text that is not valid Python code
- Start your response directly with valid Python code
- End your response with valid Python code
For required packages, place them in a specially formatted comment at the top of your code one one line like this:
# REQUIREMENTS: pandas,numpy,matplotlib,requests
Your entire response must be valid Python that can be executed without modification.
"""
# Create messages for the API
messages = [
{"role": "system", "content": "You are a Python code generator that provides ONLY clean, executable Python code without any explanations, markdown formatting, or non-code text. Your response should be nothing but valid Python code that can be executed directly."},
{"role": "user", "content": ai_prompt}
]
# Call the API
logging.info(f"Calling AI API to generate code")
generated_content = await self.ai_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens)
# Clean the generated content to ensure it's only valid Python code
code = self._clean_code(generated_content)
# Extract requirements from special comment at the top of the code
requirements = []
for line in code.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
req_str = line.replace("# REQUIREMENTS:", "").strip()
requirements = [r.strip() for r in req_str.split(',') if r.strip()]
break
return code, requirements
except Exception as e:
logging.error(f"Error generating code with AI: {str(e)}", exc_info=True)
# Return basic error handling code and no requirements
error_str = str(e).replace('"', '\\"')
return f"""
# Error during code generation
print(f"An error occurred during code generation: {error_str}")
# Return an error result
result = {{"error": "Code generation failed", "message": "{error_str}"}}
""", []
async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Execute Python code using the SimpleCodeExecutor.
Args:
code: The Python code to execute
requirements: List of required packages
context: Additional context for execution
Returns:
Result of code execution
"""
# Get workflow ID and set up logging
workflow_id = context.get("workflow_id", "") if context else ""
logging_utils = None
if "log_func" in context and workflow_id:
logging_utils = LoggingUtils(workflow_id, context.get("log_func"))
if logging_utils:
logging_utils.info("Executing Python code", "execution")
if requirements:
logging_utils.info(f"Required packages: {', '.join(requirements)}", "execution")
try:
# List of blocked packages for security
blocked_packages = [
"cryptography", "flask", "django", "tornado", # Security risks
"tensorflow", "pytorch", "scikit-learn" # Resource intensive
]
# Initialize SimpleCodeExecutor with requirements and workflow_id for persistence
executor = SimpleCodeExecutor(
workflow_id=workflow_id,
timeout=self.executor_timeout,
max_memory_mb=self.executor_memory_limit,
requirements=requirements,
blocked_packages=blocked_packages,
ai_service = self.ai_service
)
# Prepare input data for the code
input_data = {"context": context, "workflow_id": workflow_id}
# Add file references if available
if context and "documents" in context:
input_data["files"] = [
{
"id": doc.get("source", {}).get("id", ""),
"name": doc.get("source", {}).get("name", ""),
"type": doc.get("source", {}).get("content_type", ""),
"path": doc.get("source", {}).get("path", "") # Full file path
}
for doc in context.get("documents", [])
if doc.get("source", {}).get("type") == "file"
]
# Extract document content from message but don't create files yet
if context and "message" in context and "content" in context["message"]:
message_content = context["message"]["content"]
# Check if there's extracted document content
if "=== EXTRACTED DOCUMENT CONTENT ===" in message_content:
# Add a special field to input_data for extracted content
input_data["extracted_documents"] = []
# Split by the document marker pattern
pattern = r"--- (.*?) ---\s*"
import re
doc_sections = re.split(pattern, message_content)
# Skip the first section (before any "--- doc ---" marker)
for i in range(1, len(doc_sections), 2):
if i+1 < len(doc_sections):
doc_name = doc_sections[i].strip()
doc_content = doc_sections[i+1].strip()
# Store the extracted content to be processed by the executor
input_data["extracted_documents"].append({
"name": doc_name,
"content": doc_content,
"type": "text/csv" if doc_name.endswith(".csv") else "text/plain"
})
if logging_utils:
logging_utils.info(f"Extracted document content: {doc_name}", "execution")
# Execute the code
if logging_utils:
logging_utils.info(f"Executing code with input data containing {len(input_data.get('files', []))} files", "execution")
result = executor.execute_code(code, input_data)
# Log the execution results
if logging_utils:
if result.get("success", False):
logging_utils.info("Code executed successfully", "execution")
# Log a preview of the output
output = result.get("output", "")
if output:
preview = output[:1000] + "..." if len(output) > 1000 else output
logging_utils.info(f"Output preview: {preview}", "execution")
# Log a preview of the result
execution_result = result.get("result")
if execution_result:
if isinstance(execution_result, (dict, list)):
result_str = json.dumps(execution_result, indent=2)
preview = result_str[:1000] + "..." if len(result_str) > 1000 else result_str
else:
str_result = str(execution_result)
preview = str_result[:1000] + "..." if len(str_result) > 1000 else str_result
logging_utils.info(f"Result preview: {preview}", "execution")
else:
# Log error information
error = result.get("error", "Unknown error")
logging_utils.error(f"Error during code execution: {error}", "execution")
# Clean up non-persistent environments
if not executor.is_persistent:
executor.cleanup()
return result
except Exception as e:
error_message = f"Error during code execution: {str(e)}\n{traceback.format_exc()}"
if logging_utils:
logging_utils.error(error_message, "error")
return {
"success": False,
"output": "",
"error": error_message,
"result": None
}
def send_error_message(self, error_description: str, sender_id: str, receiver_id: str = None, context_id: str = None) -> AgentMessage:
"""Send an error message using the protocol"""
return self.protocol.create_error_message(
error_description=error_description,
sender_id=sender_id,
receiver_id=receiver_id,
error_type="code_execution",
context_id=context_id
)
def send_result_message(self, result_content: str, sender_id: str, receiver_id: str, task_id: str,
output_data: Dict[str, Any] = None, context_id: str = None) -> AgentMessage:
"""Send a result message using the protocol"""
return self.protocol.create_result_message(
result_content=result_content,
sender_id=sender_id,
receiver_id=receiver_id,
task_id=task_id,
output_data=output_data,
result_format="python_code",
context_id=context_id
)
# Singleton instance
_coder_agent = None
def get_coder_agent():
"""Returns a singleton instance of the Coder Agent"""
global _coder_agent
if _coder_agent is None:
_coder_agent = CoderAgent()
return _coder_agent