641 lines
No EOL
25 KiB
Python
641 lines
No EOL
25 KiB
Python
"""
|
|
Simple Coder Agent for execution of Python code.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
import shutil
|
|
import sys
|
|
from typing import Dict, Any, List, Tuple
|
|
|
|
from modules.workflowAgentsRegistry import AgentBase
|
|
from modules.configuration import APP_CONFIG
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class AgentCoder(AgentBase):
|
|
"""Simplified Agent for developing and executing Python code with integrated executor"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the coder agent"""
|
|
super().__init__()
|
|
self.name = "coder"
|
|
self.description = "Develops and executes Python code for data processing and automation"
|
|
self.capabilities = [
|
|
"code_development",
|
|
"data_processing",
|
|
"file_processing",
|
|
"automation",
|
|
"code_execution"
|
|
]
|
|
|
|
# Executor settings
|
|
self.executorTimeout = int(APP_CONFIG.get("Agent_Coder_EXECUTION_TIMEOUT")) # seconds
|
|
self.executionRetryLimit = int(APP_CONFIG.get("Agent_Coder_EXECUTION_RETRY")) # max retries
|
|
self.tempDir = None
|
|
|
|
def setDependencies(self, mydom=None):
|
|
"""Set external dependencies for the agent."""
|
|
self.mydom = mydom
|
|
|
|
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Process a task and perform code development/execution.
|
|
First checks if the task can be completed without code execution,
|
|
then falls back to code generation if needed.
|
|
|
|
Args:
|
|
task: Task dictionary with prompt, inputDocuments, outputSpecifications
|
|
|
|
Returns:
|
|
Dictionary with feedback and documents
|
|
"""
|
|
# 1. Extract task information
|
|
prompt = task.get("prompt", "")
|
|
inputDocuments = task.get("inputDocuments", [])
|
|
outputSpecs = task.get("outputSpecifications", [])
|
|
|
|
# Check if AI service is available
|
|
if not self.mydom:
|
|
logger.error("No AI service configured for the Coder agent")
|
|
return {
|
|
"feedback": "The Coder agent is not properly configured.",
|
|
"documents": []
|
|
}
|
|
|
|
# 2. Extract data from documents in separate categories
|
|
documentData = [] # For raw file data (for code execution)
|
|
contentData = [] # For content data (later use)
|
|
contentExtraction = [] # For AI-extracted data (for quick completion)
|
|
|
|
for doc in inputDocuments:
|
|
# Create proper filename from name and ext
|
|
filename = f"{doc.get('name')}.{doc.get('ext')}" if doc.get('ext') else doc.get('name')
|
|
|
|
# Add main document data to documentData if it exists
|
|
docData = doc.get('data', '')
|
|
if docData:
|
|
isBase64 = True # Assume base64 encoded for document data
|
|
documentData.append([filename, docData, isBase64])
|
|
|
|
# Process contents for different uses
|
|
if doc.get('contents'):
|
|
for content in doc.get('contents', []):
|
|
contentName = content.get('name', 'unnamed')
|
|
|
|
# For AI-extracted data (quick completion)
|
|
if content.get('dataExtracted'):
|
|
contentExtraction.append({
|
|
"filename": filename,
|
|
"contentName": contentName,
|
|
"contentData": content.get('dataExtracted', ''),
|
|
"contentType": content.get('contentType', ''),
|
|
"summary": content.get('summary', '')
|
|
})
|
|
|
|
# For raw content data
|
|
if content.get('data'):
|
|
rawData = content.get('data', '')
|
|
isBase64 = content.get('metadata', {}).get('base64Encoded', False)
|
|
contentData.append({
|
|
"filename": filename,
|
|
"contentName": contentName,
|
|
"data": rawData,
|
|
"isBase64": isBase64,
|
|
"contentType": content.get('contentType', '')
|
|
})
|
|
|
|
# Also add to documentData for code execution if not already added
|
|
if not docData or docData != rawData:
|
|
documentData.append([filename, rawData, isBase64])
|
|
|
|
# 3. Check if task can be completed without code execution
|
|
quickCompletion = await self._checkQuickCompletion(prompt, contentExtraction, outputSpecs)
|
|
|
|
if quickCompletion and quickCompletion.get("complete") == 1:
|
|
logger.info("Task completed without code execution")
|
|
return {
|
|
"feedback": quickCompletion.get("prompt", "Task completed successfully."),
|
|
"documents": quickCompletion.get("documents", [])
|
|
}
|
|
else:
|
|
logger.debug(f"Code to generate, no quick check")
|
|
|
|
# If quick completion not possible, continue with code generation and execution
|
|
logger.info("Generating code to solve the task")
|
|
|
|
# 4. Generate code using AI
|
|
code, requirements = await self._generateCode(prompt)
|
|
|
|
if not code:
|
|
return {
|
|
"feedback": "Failed to generate code for the task.",
|
|
"documents": []
|
|
}
|
|
|
|
# 5. Replace the placeholder with actual inputFiles data
|
|
documentDataJson = repr(documentData)
|
|
codeWithData = code.replace("inputFiles = \"=== JSONLOAD ===\"", f"inputFiles = {documentDataJson}")
|
|
|
|
# 6. Execute code with retry logic
|
|
retryCount = 0
|
|
maxRetries = self.executionRetryLimit
|
|
executionHistory = []
|
|
|
|
while retryCount <= maxRetries:
|
|
executionResult = self._executeCode(codeWithData, requirements)
|
|
executionHistory.append({
|
|
"attempt": retryCount + 1,
|
|
"code": codeWithData,
|
|
"result": executionResult
|
|
})
|
|
|
|
# Check if execution was successful
|
|
if executionResult.get("success", False):
|
|
logger.info(f"Code execution succeeded on attempt {retryCount + 1}")
|
|
break
|
|
|
|
# If we've reached max retries, exit the loop
|
|
if retryCount >= maxRetries:
|
|
logger.info(f"Reached maximum retry limit ({maxRetries}). Giving up.")
|
|
break
|
|
|
|
# Log the error and attempt to improve the code
|
|
error = executionResult.get("error", "Unknown error")
|
|
logger.info(f"Execution attempt {retryCount + 1} failed: {error}. Attempting to improve code.")
|
|
|
|
# Generate improved code based on error
|
|
improvedCode, improvedRequirements = await self._improveCode(
|
|
originalCode=codeWithData,
|
|
error=error,
|
|
executionResult=executionResult,
|
|
attempt=retryCount + 1
|
|
)
|
|
|
|
if improvedCode:
|
|
codeWithData = improvedCode
|
|
requirements = improvedRequirements
|
|
logger.info(f"Code improved for retry {retryCount + 2}")
|
|
else:
|
|
logger.warning("Failed to improve code, using original code for retry")
|
|
|
|
retryCount += 1
|
|
|
|
# 7. Process results and create output documents
|
|
documents = []
|
|
|
|
# Always add the final code document
|
|
documents.append({
|
|
"label": "generated_code.py",
|
|
"content": codeWithData
|
|
})
|
|
|
|
# Add execution history document
|
|
executionHistoryStr = json.dumps(executionHistory, indent=2)
|
|
documents.append({
|
|
"label": "execution_history.json",
|
|
"content": executionHistoryStr
|
|
})
|
|
|
|
# Create documents based on execution results
|
|
if executionResult.get("success", False):
|
|
resultData = executionResult.get("result")
|
|
|
|
# Create documents based on output specifications
|
|
if outputSpecs:
|
|
for spec in outputSpecs:
|
|
label = spec.get("label", "output.txt")
|
|
|
|
# Extract content from result if available
|
|
content = ""
|
|
if isinstance(resultData, dict) and label in resultData:
|
|
content = resultData[label]
|
|
else:
|
|
# Default to execution output
|
|
content = executionResult.get("output", "")
|
|
|
|
documents.append({
|
|
"label": label,
|
|
"content": content
|
|
})
|
|
else:
|
|
# No output specs, create default output document
|
|
documents.append({
|
|
"label": "execution_output.txt",
|
|
"content": executionResult.get("output", "")
|
|
})
|
|
|
|
if retryCount > 0:
|
|
feedback = f"Code executed successfully after {retryCount + 1} attempts. Generated output files based on specifications."
|
|
else:
|
|
feedback = "Code executed successfully. Generated output files based on specifications."
|
|
else:
|
|
# Execution failed
|
|
error = executionResult.get("error", "Unknown error")
|
|
documents.append({
|
|
"label": "execution_error.txt",
|
|
"content": f"Error executing code:\n\n{error}"
|
|
})
|
|
|
|
if retryCount > 0:
|
|
feedback = f"Error during code execution after {retryCount + 1} attempts: {error}"
|
|
else:
|
|
feedback = f"Error during code execution: {error}"
|
|
|
|
return {
|
|
"feedback": feedback,
|
|
"documents": documents
|
|
}
|
|
|
|
async def _improveCode(self, originalCode: str, error: str, executionResult: Dict[str, Any], attempt: int) -> Tuple[str, List[str]]:
|
|
"""
|
|
Improve code based on execution error.
|
|
|
|
Args:
|
|
originalCode: The code that failed to execute
|
|
error: The error message
|
|
executionResult: Complete execution result dictionary
|
|
attempt: Current attempt number
|
|
|
|
Returns:
|
|
Tuple of (improvedCode, requirements)
|
|
"""
|
|
# Create prompt for code improvement
|
|
improvementPrompt = f"""
|
|
Fix the following Python code that failed during execution. This is attempt {attempt} to fix the code.
|
|
|
|
ORIGINAL CODE:
|
|
{originalCode}
|
|
|
|
ERROR MESSAGE:
|
|
{error}
|
|
|
|
STDOUT:
|
|
{executionResult.get('output', '')}
|
|
|
|
INSTRUCTIONS:
|
|
1. Fix all errors identified in the error message
|
|
2. Diagnose and fix any logical issues
|
|
3. Pay special attention to:
|
|
- Type conversions and data handling
|
|
- Error handling and edge cases
|
|
- Resource management (file handles, etc.)
|
|
- Syntax errors and typos
|
|
4. Keep the inputFiles handling logic intact
|
|
5. Maintain the same overall structure and purpose
|
|
|
|
OUTPUT:
|
|
- Your improved code MUST still define a 'result' variable as a dictionary
|
|
- Each output file should be a key in the result dictionary
|
|
- DO NOT remove the inputFiles assignment line structure
|
|
|
|
REQUIREMENTS:
|
|
Required packages should be specified as:
|
|
# REQUIREMENTS: library==version,library2>=version
|
|
- You may add/remove requirements as needed to fix the code
|
|
|
|
Return ONLY Python code without explanations or markdown.
|
|
"""
|
|
|
|
# Call AI service
|
|
messages = [
|
|
{"role": "system", "content": "You are an expert Python code debugger. Provide only fixed Python code without explanations or formatting."},
|
|
{"role": "user", "content": improvementPrompt}
|
|
]
|
|
|
|
try:
|
|
improvedContent = await self.mydom.callAi(messages, temperature=0.2)
|
|
|
|
# Extract code and requirements
|
|
improvedCode = self._cleanCode(improvedContent)
|
|
|
|
# Extract requirements
|
|
requirements = []
|
|
for line in improvedCode.split('\n'):
|
|
if line.strip().startswith("# REQUIREMENTS:"):
|
|
reqStr = line.replace("# REQUIREMENTS:", "").strip()
|
|
requirements = [r.strip() for r in reqStr.split(',') if r.strip()]
|
|
break
|
|
|
|
return improvedCode, requirements
|
|
except Exception as e:
|
|
logger.error(f"Error improving code: {str(e)}")
|
|
return None, []
|
|
|
|
async def _checkQuickCompletion(self, prompt: str, contentExtraction: List[Dict], outputSpecs: List[Dict]) -> Dict:
|
|
"""
|
|
Check if the task can be completed without writing and executing code.
|
|
|
|
Args:
|
|
prompt: The task prompt
|
|
contentExtraction: List of extracted content data with contentName and dataExtracted
|
|
outputSpecs: List of output specifications
|
|
|
|
Returns:
|
|
Dictionary with completion status and results, or None if no quick completion
|
|
"""
|
|
# If no data or no output specs, can't do a quick completion
|
|
if not contentExtraction or not outputSpecs:
|
|
return None
|
|
|
|
# Create a prompt for the AI to check if this can be completed directly
|
|
specsJson = json.dumps(outputSpecs)
|
|
dataJson = json.dumps(contentExtraction)
|
|
|
|
checkPrompt = f"""
|
|
Analyze this task and determine if it can be completed directly without writing code.
|
|
|
|
TASK:
|
|
{prompt}
|
|
|
|
EXTRACTED DATA AVAILABLE:
|
|
{dataJson}
|
|
|
|
Each entry in the extracted data contains:
|
|
- filename: The source file name
|
|
- contentName: The specific content section name
|
|
- contentData: The AI-extracted text from the content
|
|
- contentType: The type of content (text, csv, etc.)
|
|
- summary: A brief summary of the content
|
|
|
|
REQUIRED OUTPUT:
|
|
{specsJson}
|
|
|
|
If the task can be completed directly with the available extracted data, respond with:
|
|
{{"complete": 1, "prompt": "Brief explanation of the solution", "documents": [
|
|
{{"label": "filename.ext", "content": "content here"}}
|
|
]}}
|
|
|
|
If code would be needed to properly complete this task, respond with:
|
|
{{"complete": 0, "prompt": "Explanation why code is needed"}}
|
|
|
|
Only return valid JSON. Your entire response must be parseable as JSON.
|
|
"""
|
|
|
|
# Call AI service
|
|
logger.debug(f"Checking if task can be completed without code execution: {checkPrompt}")
|
|
messages = [
|
|
{"role": "system", "content": "You are an AI assistant that determines if tasks require code execution. Reply with JSON only."},
|
|
{"role": "user", "content": checkPrompt}
|
|
]
|
|
|
|
try:
|
|
# Use a lower temperature for more deterministic response
|
|
response = await self.mydom.callAi(messages, produceUserAnswer = True, temperature=0.1)
|
|
|
|
# Parse response as JSON
|
|
if response:
|
|
try:
|
|
# Find JSON in response if there's any text around it
|
|
jsonStart = response.find('{')
|
|
jsonEnd = response.rfind('}') + 1
|
|
|
|
if jsonStart >= 0 and jsonEnd > jsonStart:
|
|
jsonStr = response[jsonStart:jsonEnd]
|
|
result = json.loads(jsonStr)
|
|
|
|
# Check if this is a proper response
|
|
if "complete" in result:
|
|
return result
|
|
|
|
except json.JSONDecodeError:
|
|
logger.debug("Failed to parse quick completion response as JSON")
|
|
pass
|
|
except Exception as e:
|
|
logger.debug(f"Error during quick completion check: {str(e)}")
|
|
|
|
# Default to requiring code execution
|
|
return None
|
|
|
|
async def _generateCode(self, prompt: str) -> Tuple[str, List[str]]:
|
|
"""
|
|
Generate Python code from a prompt with the inputFiles placeholder.
|
|
|
|
Args:
|
|
prompt: The task prompt
|
|
|
|
Returns:
|
|
Tuple of (code, requirements)
|
|
"""
|
|
# Create prompt for code generation
|
|
aiPrompt = f"""
|
|
Generate Python code to solve the following task:
|
|
|
|
TASK:
|
|
{prompt}
|
|
|
|
INPUT FILES:
|
|
- 'inputFiles' variable is provided as [[filename, data, isBase64], ...]
|
|
- For text files (isBase64=False): use data directly as string
|
|
- For binary files (isBase64=True): use base64.b64decode(data)
|
|
|
|
CODE QUALITY:
|
|
- Use explicit type conversions where needed (int/float/str)
|
|
- Implement feature detection, not version checks
|
|
- Handle errors gracefully with appropriate fallbacks
|
|
- Follow latest API conventions for libraries
|
|
- Validate inputs before processing
|
|
|
|
OUTPUT:
|
|
- Your code MUST define a 'result' variable as a dictionary to store outputs.
|
|
- Each output file should be a key in the result dictionary.
|
|
- For example: result = {{"output.txt": "output text", "results.json": json_string}}
|
|
|
|
Your code must start with:
|
|
inputFiles = "=== JSONLOAD ===" # DO NOT CHANGE THIS LINE
|
|
|
|
REQUIREMENTS:
|
|
Required packages should be specified as:
|
|
# REQUIREMENTS: library==version,library2>=version
|
|
- Specify exact versions for critical libraries
|
|
- Use constraint operators (==,>=,<=) as needed
|
|
|
|
Return ONLY Python code without explanations or markdown.
|
|
"""
|
|
|
|
# Call AI service
|
|
messages = [
|
|
{"role": "system", "content": "You are a Python code generator. Provide only valid Python code without explanations or formatting."},
|
|
{"role": "user", "content": aiPrompt}
|
|
]
|
|
|
|
generatedContent = await self.mydom.callAi(messages, temperature=0.1)
|
|
|
|
# Extract code and requirements
|
|
code = self._cleanCode(generatedContent)
|
|
|
|
# Extract requirements
|
|
requirements = []
|
|
for line in code.split('\n'):
|
|
if line.strip().startswith("# REQUIREMENTS:"):
|
|
reqStr = line.replace("# REQUIREMENTS:", "").strip()
|
|
requirements = [r.strip() for r in reqStr.split(',') if r.strip()]
|
|
break
|
|
|
|
return code, requirements
|
|
|
|
def _executeCode(self, code: str, requirements: List[str] = None) -> Dict[str, Any]:
|
|
"""
|
|
Execute Python code in a virtual environment.
|
|
Integrated executor functionality.
|
|
|
|
Args:
|
|
code: Python code to execute
|
|
requirements: List of required packages
|
|
|
|
Returns:
|
|
Execution result dictionary
|
|
"""
|
|
try:
|
|
# 1. Create temp directory and virtual environment
|
|
self.tempDir = tempfile.mkdtemp(prefix="code_exec_")
|
|
venvPath = os.path.join(self.tempDir, "venv")
|
|
|
|
# Create venv
|
|
logger.debug(f"Creating virtual environment at {venvPath}")
|
|
subprocess.run([sys.executable, "-m", "venv", venvPath],
|
|
check=True, capture_output=True)
|
|
|
|
# Get Python executable path
|
|
pythonExe = os.path.join(venvPath, "Scripts", "python.exe") if os.name == 'nt' else os.path.join(venvPath, "bin", "python")
|
|
|
|
# 2. Install requirements if provided
|
|
if requirements:
|
|
logger.info(f"Installing requirements: {requirements}")
|
|
|
|
# Create requirements.txt
|
|
reqFile = os.path.join(self.tempDir, "requirements.txt")
|
|
with open(reqFile, "w") as f:
|
|
f.write("\n".join(requirements))
|
|
|
|
x="\n".join(requirements)
|
|
logger.info(f"Requirements file: {x}.")
|
|
|
|
# Install requirements
|
|
try:
|
|
pipResult = subprocess.run(
|
|
[pythonExe, "-m", "pip", "install", "-r", reqFile],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=int(APP_CONFIG.get("Agent_Coder_INSTALL_TIMEOUT"))
|
|
)
|
|
if pipResult.returncode != 0:
|
|
logger.debug(f"Error installing requirements: {pipResult.stderr}")
|
|
else:
|
|
logger.debug(f"Requirements installed successfully")
|
|
# Log installed packages if in debug mode
|
|
if logger.isEnabledFor(logging.DEBUG):
|
|
pipList = subprocess.run(
|
|
[pythonExe, "-m", "pip", "list"],
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
logger.debug(f"Installed packages:\n{pipList.stdout}")
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Exception during requirements installation: {str(e)}")
|
|
|
|
# 3. Write code to file
|
|
codeFile = os.path.join(self.tempDir, "code.py")
|
|
with open(codeFile, "w", encoding="utf-8") as f:
|
|
f.write(code)
|
|
|
|
# 4. Execute code
|
|
logger.debug(f"Executing code with timeout of {self.executorTimeout} seconds. Code: {code}")
|
|
process = subprocess.run(
|
|
[pythonExe, codeFile],
|
|
timeout=self.executorTimeout,
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
|
|
# 5. Process results
|
|
stdout = process.stdout
|
|
stderr = process.stderr
|
|
|
|
# Try to extract result from stdout
|
|
resultData = None
|
|
if process.returncode == 0:
|
|
try:
|
|
# Find the last line that might be JSON
|
|
for line in reversed(stdout.strip().split('\n')):
|
|
line = line.strip()
|
|
if line and line[0] in '{[' and line[-1] in '}]':
|
|
try:
|
|
resultData = json.loads(line)
|
|
logger.debug(f"Extracted result data from stdout: {type(resultData)}")
|
|
break
|
|
except json.JSONDecodeError:
|
|
continue
|
|
except Exception as e:
|
|
logger.debug(f"Error extracting result from stdout: {str(e)}")
|
|
|
|
# Create result dictionary
|
|
return {
|
|
"success": process.returncode == 0,
|
|
"output": stdout,
|
|
"error": stderr if process.returncode != 0 else "",
|
|
"result": resultData,
|
|
"exitCode": process.returncode
|
|
}
|
|
|
|
except subprocess.TimeoutExpired:
|
|
logger.error(f"Execution timed out after {self.executorTimeout} seconds")
|
|
return {
|
|
"success": False,
|
|
"output": "",
|
|
"error": f"Execution timed out after {self.executorTimeout} seconds",
|
|
"result": None,
|
|
"exitCode": -1
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Execution error: {str(e)}")
|
|
return {
|
|
"success": False,
|
|
"output": "",
|
|
"error": f"Execution error: {str(e)}",
|
|
"result": None,
|
|
"exitCode": -1
|
|
}
|
|
finally:
|
|
# Clean up resources
|
|
self._cleanupExecution()
|
|
|
|
def _cleanupExecution(self):
|
|
"""Clean up temporary resources from code execution."""
|
|
if self.tempDir and os.path.exists(self.tempDir):
|
|
try:
|
|
logger.debug(f"Cleaning up temporary directory: {self.tempDir}")
|
|
shutil.rmtree(self.tempDir)
|
|
self.tempDir = None
|
|
except Exception as e:
|
|
logger.warning(f"Error cleaning up temp directory: {str(e)}")
|
|
|
|
def _cleanCode(self, code: str) -> str:
|
|
"""Remove any markdown formatting or explanations."""
|
|
# Remove code block markers
|
|
code = code.replace("```python", "").replace("```", "")
|
|
|
|
# Remove explanations before or after code
|
|
lines = code.strip().split('\n')
|
|
startIndex = 0
|
|
endIndex = len(lines)
|
|
|
|
# Find start of actual code
|
|
for i, line in enumerate(lines):
|
|
if line.strip().startswith("inputFiles =") or line.strip().startswith("# REQUIREMENTS:"):
|
|
startIndex = i
|
|
break
|
|
|
|
# Clean code
|
|
cleanedCode = '\n'.join(lines[startIndex:endIndex])
|
|
return cleanedCode.strip()
|
|
|
|
|
|
# Factory function for the Coder agent
|
|
def getAgentCoder():
|
|
"""Returns an instance of the Coder agent."""
|
|
return AgentCoder() |