642 lines
No EOL
25 KiB
Python
642 lines
No EOL
25 KiB
Python
"""
|
|
Simple Coder Agent for execution of Python code.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
import shutil
|
|
import sys
|
|
from typing import Dict, Any, List, Tuple
|
|
|
|
from modules.chat_registry import AgentBase
|
|
from modules.configuration import APP_CONFIG
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class AgentCoder(AgentBase):
|
|
"""Simplified Agent for developing and executing Python code with integrated executor"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the coder agent"""
|
|
super().__init__()
|
|
self.name = "coder"
|
|
self.description = "Develops and executes Python code for data processing and automation"
|
|
self.capabilities = [
|
|
"code_development",
|
|
"data_processing",
|
|
"file_processing",
|
|
"automation",
|
|
"code_execution"
|
|
]
|
|
|
|
# Executor settings
|
|
self.executor_timeout = int(APP_CONFIG.get("Agent_Coder_EXECUTION_TIMEOUT")) # seconds
|
|
self.execution_retry_limit = int(APP_CONFIG.get("Agent_Coder_EXECUTION_RETRY")) # max retries
|
|
self.temp_dir = None
|
|
|
|
def set_dependencies(self, mydom=None):
|
|
"""Set external dependencies for the agent."""
|
|
self.mydom = mydom
|
|
|
|
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Process a task and perform code development/execution.
|
|
First checks if the task can be completed without code execution,
|
|
then falls back to code generation if needed.
|
|
|
|
Args:
|
|
task: Task dictionary with prompt, input_documents, output_specifications
|
|
|
|
Returns:
|
|
Dictionary with feedback and documents
|
|
"""
|
|
# 1. Extract task information
|
|
prompt = task.get("prompt", "")
|
|
input_documents = task.get("input_documents", [])
|
|
output_specs = task.get("output_specifications", [])
|
|
|
|
# Check if AI service is available
|
|
if not self.mydom:
|
|
logger.error("No AI service configured for the Coder agent")
|
|
return {
|
|
"feedback": "The Coder agent is not properly configured.",
|
|
"documents": []
|
|
}
|
|
|
|
# 2. Extract data from documents in separate categories
|
|
document_data = [] # For raw file data (for code execution)
|
|
content_data = [] # For content data (later use)
|
|
content_extraction = [] # For AI-extracted data (for quick completion)
|
|
|
|
for doc in input_documents:
|
|
# Create proper filename from name and ext
|
|
filename = f"{doc.get('name')}.{doc.get('ext')}" if doc.get('ext') else doc.get('name')
|
|
|
|
# Add main document data to document_data if it exists
|
|
doc_data = doc.get('data', '')
|
|
if doc_data:
|
|
is_base64 = True # Assume base64 encoded for document data
|
|
document_data.append([filename, doc_data, is_base64])
|
|
|
|
# Process contents for different uses
|
|
if doc.get('contents'):
|
|
for content in doc.get('contents', []):
|
|
content_name = content.get('name', 'unnamed')
|
|
|
|
# For AI-extracted data (quick completion)
|
|
if content.get('data_extracted'):
|
|
content_extraction.append({
|
|
"filename": filename,
|
|
"content_name": content_name,
|
|
"content_data": content.get('data_extracted', ''),
|
|
"content_type": content.get('content_type', ''),
|
|
"summary": content.get('summary', '')
|
|
})
|
|
|
|
# For raw content data
|
|
if content.get('data'):
|
|
raw_data = content.get('data', '')
|
|
is_base64 = content.get('metadata', {}).get('base64_encoded', False)
|
|
content_data.append({
|
|
"filename": filename,
|
|
"content_name": content_name,
|
|
"data": raw_data,
|
|
"is_base64": is_base64,
|
|
"content_type": content.get('content_type', '')
|
|
})
|
|
|
|
# Also add to document_data for code execution if not already added
|
|
if not doc_data or doc_data != raw_data:
|
|
document_data.append([filename, raw_data, is_base64])
|
|
|
|
# 3. Check if task can be completed without code execution
|
|
quick_completion = await self._check_quick_completion(prompt, content_extraction, output_specs)
|
|
|
|
if quick_completion and quick_completion.get("complete") == 1:
|
|
logger.info("Task completed without code execution")
|
|
return {
|
|
"feedback": quick_completion.get("prompt", "Task completed successfully."),
|
|
"documents": quick_completion.get("documents", [])
|
|
}
|
|
else:
|
|
logger.debug(f"Code to generate, no quick check")
|
|
|
|
# If quick completion not possible, continue with code generation and execution
|
|
logger.info("Generating code to solve the task")
|
|
|
|
# 4. Generate code using AI
|
|
code, requirements = await self._generate_code(prompt)
|
|
|
|
if not code:
|
|
return {
|
|
"feedback": "Failed to generate code for the task.",
|
|
"documents": []
|
|
}
|
|
|
|
# 5. Replace the placeholder with actual input_files data
|
|
document_data_json = repr(document_data)
|
|
code_with_data = code.replace("input_files = \"=== JSONLOAD ===\"", f"input_files = {document_data_json}")
|
|
|
|
# 6. Execute code with retry logic
|
|
retry_count = 0
|
|
max_retries = self.execution_retry_limit
|
|
execution_history = []
|
|
|
|
while retry_count <= max_retries:
|
|
execution_result = self._execute_code(code_with_data, requirements)
|
|
execution_history.append({
|
|
"attempt": retry_count + 1,
|
|
"code": code_with_data,
|
|
"result": execution_result
|
|
})
|
|
|
|
# Check if execution was successful
|
|
if execution_result.get("success", False):
|
|
logger.info(f"Code execution succeeded on attempt {retry_count + 1}")
|
|
break
|
|
|
|
# If we've reached max retries, exit the loop
|
|
if retry_count >= max_retries:
|
|
logger.info(f"Reached maximum retry limit ({max_retries}). Giving up.")
|
|
break
|
|
|
|
# Log the error and attempt to improve the code
|
|
error = execution_result.get("error", "Unknown error")
|
|
logger.info(f"Execution attempt {retry_count + 1} failed: {error}. Attempting to improve code.")
|
|
|
|
# Generate improved code based on error
|
|
improved_code, improved_requirements = await self._improve_code(
|
|
original_code=code_with_data,
|
|
error=error,
|
|
execution_result=execution_result,
|
|
attempt=retry_count + 1
|
|
)
|
|
|
|
if improved_code:
|
|
code_with_data = improved_code
|
|
requirements = improved_requirements
|
|
logger.info(f"Code improved for retry {retry_count + 2}")
|
|
else:
|
|
logger.warning("Failed to improve code, using original code for retry")
|
|
|
|
retry_count += 1
|
|
|
|
# 7. Process results and create output documents
|
|
documents = []
|
|
|
|
# Always add the final code document
|
|
documents.append({
|
|
"label": "generated_code.py",
|
|
"content": code_with_data
|
|
})
|
|
|
|
# Add execution history document
|
|
execution_history_str = json.dumps(execution_history, indent=2)
|
|
documents.append({
|
|
"label": "execution_history.json",
|
|
"content": execution_history_str
|
|
})
|
|
|
|
# Create documents based on execution results
|
|
if execution_result.get("success", False):
|
|
result_data = execution_result.get("result")
|
|
|
|
# Create documents based on output specifications
|
|
if output_specs:
|
|
for spec in output_specs:
|
|
label = spec.get("label", "output.txt")
|
|
|
|
# Extract content from result if available
|
|
content = ""
|
|
if isinstance(result_data, dict) and label in result_data:
|
|
content = result_data[label]
|
|
else:
|
|
# Default to execution output
|
|
content = execution_result.get("output", "")
|
|
|
|
documents.append({
|
|
"label": label,
|
|
"content": content
|
|
})
|
|
else:
|
|
# No output specs, create default output document
|
|
documents.append({
|
|
"label": "execution_output.txt",
|
|
"content": execution_result.get("output", "")
|
|
})
|
|
|
|
if retry_count > 0:
|
|
feedback = f"Code executed successfully after {retry_count + 1} attempts. Generated output files based on specifications."
|
|
else:
|
|
feedback = "Code executed successfully. Generated output files based on specifications."
|
|
else:
|
|
# Execution failed
|
|
error = execution_result.get("error", "Unknown error")
|
|
documents.append({
|
|
"label": "execution_error.txt",
|
|
"content": f"Error executing code:\n\n{error}"
|
|
})
|
|
|
|
if retry_count > 0:
|
|
feedback = f"Error during code execution after {retry_count + 1} attempts: {error}"
|
|
else:
|
|
feedback = f"Error during code execution: {error}"
|
|
|
|
return {
|
|
"feedback": feedback,
|
|
"documents": documents
|
|
}
|
|
|
|
async def _improve_code(self, original_code: str, error: str, execution_result: Dict[str, Any], attempt: int) -> Tuple[str, List[str]]:
|
|
"""
|
|
Improve code based on execution error.
|
|
|
|
Args:
|
|
original_code: The code that failed to execute
|
|
error: The error message
|
|
execution_result: Complete execution result dictionary
|
|
attempt: Current attempt number
|
|
|
|
Returns:
|
|
Tuple of (improved_code, requirements)
|
|
"""
|
|
# Create prompt for code improvement
|
|
improvement_prompt = f"""
|
|
Fix the following Python code that failed during execution. This is attempt {attempt} to fix the code.
|
|
|
|
ORIGINAL CODE:
|
|
{original_code}
|
|
|
|
ERROR MESSAGE:
|
|
{error}
|
|
|
|
STDOUT:
|
|
{execution_result.get('output', '')}
|
|
|
|
INSTRUCTIONS:
|
|
1. Fix all errors identified in the error message
|
|
2. Diagnose and fix any logical issues
|
|
3. Pay special attention to:
|
|
- Type conversions and data handling
|
|
- Error handling and edge cases
|
|
- Resource management (file handles, etc.)
|
|
- Syntax errors and typos
|
|
4. Keep the input_files handling logic intact
|
|
5. Maintain the same overall structure and purpose
|
|
|
|
OUTPUT:
|
|
- Your improved code MUST still define a 'result' variable as a dictionary
|
|
- Each output file should be a key in the result dictionary
|
|
- DO NOT remove the input_files assignment line structure
|
|
|
|
REQUIREMENTS:
|
|
Required packages should be specified as:
|
|
# REQUIREMENTS: library==version,library2>=version
|
|
- You may add/remove requirements as needed to fix the code
|
|
|
|
Return ONLY Python code without explanations or markdown.
|
|
"""
|
|
|
|
# Call AI service
|
|
messages = [
|
|
{"role": "system", "content": "You are an expert Python code debugger. Provide only fixed Python code without explanations or formatting."},
|
|
{"role": "user", "content": improvement_prompt}
|
|
]
|
|
|
|
try:
|
|
improved_content = await self.mydom.call_ai(messages, temperature=0.2)
|
|
|
|
# Extract code and requirements
|
|
improved_code = self._clean_code(improved_content)
|
|
|
|
# Extract requirements
|
|
requirements = []
|
|
for line in improved_code.split('\n'):
|
|
if line.strip().startswith("# REQUIREMENTS:"):
|
|
req_str = line.replace("# REQUIREMENTS:", "").strip()
|
|
requirements = [r.strip() for r in req_str.split(',') if r.strip()]
|
|
break
|
|
|
|
return improved_code, requirements
|
|
except Exception as e:
|
|
logger.error(f"Error improving code: {str(e)}")
|
|
return None, []
|
|
|
|
async def _check_quick_completion(self, prompt: str, content_extraction: List[Dict], output_specs: List[Dict]) -> Dict:
|
|
"""
|
|
Check if the task can be completed without writing and executing code.
|
|
|
|
Args:
|
|
prompt: The task prompt
|
|
content_extraction: List of extracted content data with content_name and data_extracted
|
|
output_specs: List of output specifications
|
|
|
|
Returns:
|
|
Dictionary with completion status and results, or None if no quick completion
|
|
"""
|
|
# If no data or no output specs, can't do a quick completion
|
|
if not content_extraction or not output_specs:
|
|
return None
|
|
|
|
# Create a prompt for the AI to check if this can be completed directly
|
|
specs_json = json.dumps(output_specs)
|
|
data_json = json.dumps(content_extraction)
|
|
|
|
check_prompt = f"""
|
|
Analyze this task and determine if it can be completed directly without writing code.
|
|
|
|
TASK:
|
|
{prompt}
|
|
|
|
EXTRACTED DATA AVAILABLE:
|
|
{data_json}
|
|
|
|
Each entry in the extracted data contains:
|
|
- filename: The source file name
|
|
- content_name: The specific content section name
|
|
- content_data: The AI-extracted text from the content
|
|
- content_type: The type of content (text, csv, etc.)
|
|
- summary: A brief summary of the content
|
|
|
|
REQUIRED OUTPUT:
|
|
{specs_json}
|
|
|
|
If the task can be completed directly with the available extracted data, respond with:
|
|
{{"complete": 1, "prompt": "Brief explanation of the solution", "documents": [
|
|
{{"label": "filename.ext", "content": "content here"}}
|
|
]}}
|
|
|
|
If code would be needed to properly complete this task, respond with:
|
|
{{"complete": 0, "prompt": "Explanation why code is needed"}}
|
|
|
|
Only return valid JSON. Your entire response must be parseable as JSON.
|
|
"""
|
|
|
|
# Call AI service
|
|
logger.debug(f"Checking if task can be completed without code execution: {check_prompt}")
|
|
messages = [
|
|
{"role": "system", "content": "You are an AI assistant that determines if tasks require code execution. Reply with JSON only."},
|
|
{"role": "user", "content": check_prompt}
|
|
]
|
|
|
|
try:
|
|
# Use a lower temperature for more deterministic response
|
|
response = await self.mydom.call_ai(messages, produce_user_answer = True, temperature=0.1)
|
|
|
|
# Parse response as JSON
|
|
if response:
|
|
try:
|
|
# Find JSON in response if there's any text around it
|
|
json_start = response.find('{')
|
|
json_end = response.rfind('}') + 1
|
|
|
|
if json_start >= 0 and json_end > json_start:
|
|
json_str = response[json_start:json_end]
|
|
result = json.loads(json_str)
|
|
|
|
# Check if this is a proper response
|
|
if "complete" in result:
|
|
return result
|
|
|
|
except json.JSONDecodeError:
|
|
logger.debug("Failed to parse quick completion response as JSON")
|
|
pass
|
|
except Exception as e:
|
|
logger.debug(f"Error during quick completion check: {str(e)}")
|
|
|
|
# Default to requiring code execution
|
|
return None
|
|
|
|
async def _generate_code(self, prompt: str) -> Tuple[str, List[str]]:
|
|
"""
|
|
Generate Python code from a prompt with the input_files placeholder.
|
|
|
|
Args:
|
|
prompt: The task prompt
|
|
input_files: List of [filename, data, is_base64] items
|
|
|
|
Returns:
|
|
Tuple of (code, requirements)
|
|
"""
|
|
# Create prompt for code generation
|
|
ai_prompt = f"""
|
|
Generate Python code to solve the following task:
|
|
|
|
TASK:
|
|
{prompt}
|
|
|
|
INPUT FILES:
|
|
- 'input_files' variable is provided as [[filename, data, is_base64], ...]
|
|
- For text files (is_base64=False): use data directly as string
|
|
- For binary files (is_base64=True): use base64.b64decode(data)
|
|
|
|
CODE QUALITY:
|
|
- Use explicit type conversions where needed (int/float/str)
|
|
- Implement feature detection, not version checks
|
|
- Handle errors gracefully with appropriate fallbacks
|
|
- Follow latest API conventions for libraries
|
|
- Validate inputs before processing
|
|
|
|
OUTPUT:
|
|
- Your code MUST define a 'result' variable as a dictionary to store outputs.
|
|
- Each output file should be a key in the result dictionary.
|
|
- For example: result = {{"output.txt": "output text", "results.json": json_string}}
|
|
|
|
Your code must start with:
|
|
input_files = "=== JSONLOAD ===" # DO NOT CHANGE THIS LINE
|
|
|
|
REQUIREMENTS:
|
|
Required packages should be specified as:
|
|
# REQUIREMENTS: library==version,library2>=version
|
|
- Specify exact versions for critical libraries
|
|
- Use constraint operators (==,>=,<=) as needed
|
|
|
|
Return ONLY Python code without explanations or markdown.
|
|
"""
|
|
|
|
# Call AI service
|
|
messages = [
|
|
{"role": "system", "content": "You are a Python code generator. Provide only valid Python code without explanations or formatting."},
|
|
{"role": "user", "content": ai_prompt}
|
|
]
|
|
|
|
generated_content = await self.mydom.call_ai(messages, temperature=0.1)
|
|
|
|
# Extract code and requirements
|
|
code = self._clean_code(generated_content)
|
|
|
|
# Extract requirements
|
|
requirements = []
|
|
for line in code.split('\n'):
|
|
if line.strip().startswith("# REQUIREMENTS:"):
|
|
req_str = line.replace("# REQUIREMENTS:", "").strip()
|
|
requirements = [r.strip() for r in req_str.split(',') if r.strip()]
|
|
break
|
|
|
|
return code, requirements
|
|
|
|
def _execute_code(self, code: str, requirements: List[str] = None) -> Dict[str, Any]:
|
|
"""
|
|
Execute Python code in a virtual environment.
|
|
Integrated executor functionality.
|
|
|
|
Args:
|
|
code: Python code to execute
|
|
requirements: List of required packages
|
|
|
|
Returns:
|
|
Execution result dictionary
|
|
"""
|
|
try:
|
|
# 1. Create temp directory and virtual environment
|
|
self.temp_dir = tempfile.mkdtemp(prefix="code_exec_")
|
|
venv_path = os.path.join(self.temp_dir, "venv")
|
|
|
|
# Create venv
|
|
logger.debug(f"Creating virtual environment at {venv_path}")
|
|
subprocess.run([sys.executable, "-m", "venv", venv_path],
|
|
check=True, capture_output=True)
|
|
|
|
# Get Python executable path
|
|
python_exe = os.path.join(venv_path, "Scripts", "python.exe") if os.name == 'nt' else os.path.join(venv_path, "bin", "python")
|
|
|
|
# 2. Install requirements if provided
|
|
if requirements:
|
|
logger.info(f"Installing requirements: {requirements}")
|
|
|
|
# Create requirements.txt
|
|
req_file = os.path.join(self.temp_dir, "requirements.txt")
|
|
with open(req_file, "w") as f:
|
|
f.write("\n".join(requirements))
|
|
|
|
x="\n".join(requirements)
|
|
logger.info(f"Requirements file: {x}.")
|
|
|
|
# Install requirements
|
|
try:
|
|
pip_result = subprocess.run(
|
|
[python_exe, "-m", "pip", "install", "-r", req_file],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=int(APP_CONFIG.get("Agent_Coder_INSTALL_TIMEOUT"))
|
|
)
|
|
if pip_result.returncode != 0:
|
|
logger.debug(f"Error installing requirements: {pip_result.stderr}")
|
|
else:
|
|
logger.debug(f"Requirements installed successfully")
|
|
# Log installed packages if in debug mode
|
|
if logger.isEnabledFor(logging.DEBUG):
|
|
pip_list = subprocess.run(
|
|
[python_exe, "-m", "pip", "list"],
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
logger.debug(f"Installed packages:\n{pip_list.stdout}")
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Exception during requirements installation: {str(e)}")
|
|
|
|
# 3. Write code to file
|
|
code_file = os.path.join(self.temp_dir, "code.py")
|
|
with open(code_file, "w", encoding="utf-8") as f:
|
|
f.write(code)
|
|
|
|
# 4. Execute code
|
|
logger.debug(f"Executing code with timeout of {self.executor_timeout} seconds. Code: {code}")
|
|
process = subprocess.run(
|
|
[python_exe, code_file],
|
|
timeout=self.executor_timeout,
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
|
|
# 5. Process results
|
|
stdout = process.stdout
|
|
stderr = process.stderr
|
|
|
|
# Try to extract result from stdout
|
|
result_data = None
|
|
if process.returncode == 0:
|
|
try:
|
|
# Find the last line that might be JSON
|
|
for line in reversed(stdout.strip().split('\n')):
|
|
line = line.strip()
|
|
if line and line[0] in '{[' and line[-1] in '}]':
|
|
try:
|
|
result_data = json.loads(line)
|
|
logger.debug(f"Extracted result data from stdout: {type(result_data)}")
|
|
break
|
|
except json.JSONDecodeError:
|
|
continue
|
|
except Exception as e:
|
|
logger.debug(f"Error extracting result from stdout: {str(e)}")
|
|
|
|
# Create result dictionary
|
|
return {
|
|
"success": process.returncode == 0,
|
|
"output": stdout,
|
|
"error": stderr if process.returncode != 0 else "",
|
|
"result": result_data,
|
|
"exit_code": process.returncode
|
|
}
|
|
|
|
except subprocess.TimeoutExpired:
|
|
logger.error(f"Execution timed out after {self.executor_timeout} seconds")
|
|
return {
|
|
"success": False,
|
|
"output": "",
|
|
"error": f"Execution timed out after {self.executor_timeout} seconds",
|
|
"result": None,
|
|
"exit_code": -1
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Execution error: {str(e)}")
|
|
return {
|
|
"success": False,
|
|
"output": "",
|
|
"error": f"Execution error: {str(e)}",
|
|
"result": None,
|
|
"exit_code": -1
|
|
}
|
|
finally:
|
|
# Clean up resources
|
|
self._cleanup_execution()
|
|
|
|
def _cleanup_execution(self):
|
|
"""Clean up temporary resources from code execution."""
|
|
if self.temp_dir and os.path.exists(self.temp_dir):
|
|
try:
|
|
logger.debug(f"Cleaning up temporary directory: {self.temp_dir}")
|
|
shutil.rmtree(self.temp_dir)
|
|
self.temp_dir = None
|
|
except Exception as e:
|
|
logger.warning(f"Error cleaning up temp directory: {str(e)}")
|
|
|
|
def _clean_code(self, code: str) -> str:
|
|
"""Remove any markdown formatting or explanations."""
|
|
# Remove code block markers
|
|
code = code.replace("```python", "").replace("```", "")
|
|
|
|
# Remove explanations before or after code
|
|
lines = code.strip().split('\n')
|
|
start_index = 0
|
|
end_index = len(lines)
|
|
|
|
# Find start of actual code
|
|
for i, line in enumerate(lines):
|
|
if line.strip().startswith("input_files =") or line.strip().startswith("# REQUIREMENTS:"):
|
|
start_index = i
|
|
break
|
|
|
|
# Clean code
|
|
cleaned_code = '\n'.join(lines[start_index:end_index])
|
|
return cleaned_code.strip()
|
|
|
|
|
|
# Factory function for the Coder agent
|
|
def get_coder_agent():
|
|
"""Returns an instance of the Coder agent."""
|
|
return AgentCoder() |