gateway/gwserver/old_modules_copy/agentservice_agent_coder.py
2025-04-14 20:05:33 +02:00

500 lines
No EOL
20 KiB
Python

"""
Simplified Coder Agent for developing and executing Python code.
This agent uses the CodeExecutor from the helper module to execute code.
"""
import logging
import json
import re
import uuid
import traceback
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple
from modules.agentservice_base import BaseAgent
from modules.agentservice_utils import FileUtils, WorkflowUtils, MessageUtils, LoggingUtils
from connectors.connector_aichat_openai import ChatService
logger = logging.getLogger(__name__)
class CoderAgent(BaseAgent):
"""Agent for developing and executing Python code"""
def __init__(self):
"""Initialize the coder agent with proper type and capabilities"""
super().__init__()
# Agent metadata
self.id = "coder"
self.type = "coder"
self.name = "Python Code Agent"
self.description = "Develops and executes Python code"
self.capabilities = "code_development,data_processing,file_processing,automation"
self.result_format = "python_code"
# Init utilities
self.file_utils = FileUtils()
self.message_utils = MessageUtils()
# Executor settings
self.executor_timeout = 60 # seconds
self.executor_memory_limit = 512 # MB
# AI service settings
self.ai_temperature = 0.2 # Lower temperature for more deterministic code generation
self.ai_max_tokens = 2000 # Enough tokens for complex code
def get_agent_info(self) -> Dict[str, Any]:
"""Get agent information for agent registry"""
return {
"id": self.id,
"type": self.type,
"name": self.name,
"description": self.description,
"capabilities": self.capabilities,
"result_format": self.result_format,
"metadata": {
"timeout": self.executor_timeout,
"memory_limit": self.executor_memory_limit
}
}
async def process_message(self, message: Dict[str, Any],
workflow: Dict[str, Any],
context: Dict[str, Any] = None,
log_func=None) -> Dict[str, Any]:
"""
Processes a message to develop and execute Python code.
Args:
message: The message to process
workflow: The current workflow
context: Additional context information
log_func: Function for workflow logging
Returns:
Response message
"""
# Initialize logging
workflow_id = workflow.get("id")
logging_utils = LoggingUtils(workflow_id, log_func)
logging_utils.info(f"CoderAgent starting processing", "agents")
# Create response message
response = self.message_utils.create_message(workflow_id, role="assistant")
response["agent_type"] = self.type
response["agent_name"] = self.name
response["parent_message_id"] = message.get("id")
response["documents"] = []
try:
# Check if user directly provided code
content = message.get("content", "")
documents = message.get("documents", [])
# Extract code from message content
code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', content)
code_to_execute = None
if code_blocks:
# Use the first code block found
code_to_execute = code_blocks[0]
# Clean the code to remove any markdown formatting
code_to_execute = self._clean_code(code_to_execute)
logging_utils.info(f"Code extracted from message ({len(code_to_execute)} characters)", "agents")
else:
# Generate code based on the message content using AI
logging_utils.info("No code found in message, generating new code with AI", "agents")
# Generate code using AI
code_to_execute, requirements = await self._generate_code_from_prompt(content, documents)
if not code_to_execute:
logging_utils.warning("AI could not generate code", "agents")
response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions."
self.message_utils.finalize_message(response)
return response
logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents")
# Execute the code
if code_to_execute:
logging_utils.info("Executing code", "execution")
# Prepare execution context
execution_context = {
"workflow_id": workflow_id,
"documents": documents,
"message": message,
"log_func": log_func
}
# Add log_func to execution context
execution_context["log_func"] = log_func
# Execute code
result = await self._execute_code(code_to_execute, requirements, execution_context)
# Prepare response
if result.get("success", False):
# Code execution successful
output = result.get("output", "")
execution_result = result.get("result")
logging_utils.info("Code executed successfully", "execution")
# Format response content
response_content = f"## Code executed successfully\n\n"
# Include the executed code
response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
# Include the output if available
if output:
response_content += f"### Output\n\n```\n{output}\n```\n\n"
# Include the execution result if available
if execution_result:
result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
response_content += f"### Result\n\n```\n{result_str}\n```\n\n"
response["content"] = response_content
# Process any files created by the code
if isinstance(execution_result, dict) and "created_files" in execution_result:
created_files = execution_result.get("created_files", [])
for file_info in created_files:
file_id = file_info.get("id")
if file_id:
logging_utils.info(f"Adding created file {file_info.get('name', file_id)} to documents", "files")
# Add file document to the response
doc = {
"id": f"doc_{uuid.uuid4()}",
"source": file_info,
"type": "file"
}
response["documents"].append(doc)
else:
# Code execution failed
error = result.get("error", "Unknown error")
logging_utils.error(f"Error during code execution: {error}", "execution")
# Format error response
response_content = f"## Error during code execution\n\n"
response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
response_content += f"### Error\n\n```\n{error}\n```\n\n"
# Add recommendation based on error
response_content += get_error_recommendation(error)
response["content"] = response_content
else:
# No code to execute
response["content"] = "I couldn't find or generate executable code. Please provide Python code or explain your requirements more clearly."
# Finalize response
self.message_utils.finalize_message(response)
# Log success
logging_utils.info("CoderAgent has successfully processed the request", "agents")
return response
except Exception as e:
error_msg = f"Error during processing by the CoderAgent: {str(e)}"
logging_utils.error(error_msg, "error")
# Create error response
response["content"] = f"## Processing Error\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```"
self.message_utils.finalize_message(response)
return response
def _clean_code(self, code: str) -> str:
"""
Clean up code by removing markdown code block markers and handling other formatting issues.
Args:
code: The code string to clean
Returns:
Cleaned code string
"""
import re
# Remove code block markers if present
code = re.sub(r'^```(?:python)?\s*', '', code)
code = re.sub(r'```\s*$', '', code)
# Fix potential string literal issues
lines = code.split('\n')
fixed_lines = []
in_string = False
string_delimiter = None
for line in lines:
# Very basic string literal parsing - not perfect but helps with common cases
if in_string:
# We're in a multi-line string, check if it ends
if string_delimiter in line and not line.endswith('\\'):
in_string = False
else:
# Check for unclosed string literals
for delimiter in ['"', "'"]:
count = line.count(delimiter)
# If odd number of delimiters and not escaped
if count % 2 == 1 and not line.endswith('\\'):
in_string = True
string_delimiter = delimiter
break
fixed_lines.append(line)
# If we ended with an unclosed string, add a closing delimiter
if in_string:
fixed_lines[-1] += string_delimiter
logger.warning(f"Fixed unclosed string literal in code")
return '\n'.join(fixed_lines)
async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]]) -> Tuple[str, List[str]]:
"""
Generate Python code from a prompt using AI service.
Args:
prompt: The prompt to generate code from
documents: Documents associated with the prompt
Returns:
Tuple of (generated Python code, required packages)
"""
try:
# Initialize AI service
chat_service = ChatService()
# Prepare a prompt for code generation
ai_prompt = f"""Generate Python code to solve the following task:
{prompt}
Available documents:
"""
# Add information about available documents
if documents:
for i, doc in enumerate(documents):
source = doc.get("source", {})
doc_name = source.get("name", f"Document {i+1}")
doc_type = source.get("content_type", "unknown")
doc_id = source.get("id", "")
ai_prompt += f"- {doc_name} (type: {doc_type}, id: {doc_id})\n"
ai_prompt += """
IMPORTANT REQUIREMENTS:
1. Your code MUST define a 'result' variable that captures the output of your code.
The execution framework specifically looks for this variable.
2. Write only executable Python code in the Python section.
3. Do not include any text explanations or markdown outside of code comments (#).
4. All explanations should be within Python comments only.
5. Make your code complete and self-contained.
6. For CSV processing, include proper error handling.
Return your response in the following format:
## requirements.txt
# Each required package on its own line
pandas
numpy
matplotlib
## python
import pandas as pd
import numpy as np
# Load and process data
def process_data(file_path):
try:
# Read the CSV file
df = pd.read_csv(file_path)
return df
except Exception as e:
print(f"Error: {e}")
return None
# Main processing logic
data = process_data('data.csv')
# Analyze data
if data is not None:
summary = data.describe()
print("Data summary:")
print(summary)
# IMPORTANT: Define result variable to return data
result = {
"summary": summary.to_dict(),
"columns": list(data.columns),
"row_count": len(data)
}
else:
# Always define a result, even in error cases
result = {"error": "Failed to process data"}
"""
# Create messages for the API
messages = [
{"role": "system", "content": "You are a Python code generator. Generate executable Python code following the specified format with requirements.txt and code sections. The code must be well-commented, include error handling, and define a 'result' variable to capture output."},
{"role": "user", "content": ai_prompt}
]
# Call the API
logging.info(f"Calling AI API to generate code")
generated_content = await chat_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens)
# Extract requirements.txt content
requirements_match = re.search(r'## requirements.txt\s*([\s\S]*?)(?=##|\Z)', generated_content)
requirements = []
if requirements_match:
requirements_text = requirements_match.group(1).strip()
# Filter out markdown formatting and invalid characters
for line in requirements_text.split('\n'):
line = line.strip()
# Skip empty lines, comments, and markdown formatting
if not line or line.startswith('#') or line.startswith('`') or line.endswith('`') or '```' in line:
continue
requirements.append(line)
# Extract Python code
code_match = re.search(r'## python\s*([\s\S]*?)(?=##|\Z)', generated_content)
if code_match:
code = code_match.group(1).strip()
else:
# Fallback to legacy code block extraction
code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', generated_content)
code = code_blocks[0].strip() if code_blocks else generated_content.strip()
# Clean the code to remove any markdown formatting
code = self._clean_code(code)
return code, requirements
except Exception as e:
logging.error(f"Error generating code with AI: {str(e)}", exc_info=True)
# Return basic error handling code and no requirements
error_str = str(e).replace('"', '\\"')
return f"""
# Error during code generation
print(f"An error occurred during code generation: {error_str}")
# Return an error result
result = {{"error": "Code generation failed", "message": "{error_str}"}}
""", []
async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Execute Python code using the CodeExecutor.
Args:
code: The Python code to execute
requirements: List of required packages
context: Additional context for execution
Returns:
Result of code execution
"""
# Get workflow ID and set up logging
workflow_id = context.get("workflow_id", "") if context else ""
logging_utils = None
if "log_func" in context and workflow_id:
logging_utils = LoggingUtils(workflow_id, context.get("log_func"))
if logging_utils:
logging_utils.info("Executing Python code", "execution")
if requirements:
logging_utils.info(f"Required packages: {', '.join(requirements)}", "execution")
try:
# List of blocked packages for security
blocked_packages = [
"cryptography", "flask", "django", "tornado", # Security risks
"tensorflow", "pytorch", "scikit-learn" # Resource intensive
]
# Initialize CodeExecutor with requirements and workflow_id for persistence
executor = CodeExecutor(
workflow_id=workflow_id,
timeout=self.executor_timeout,
max_memory_mb=self.executor_memory_limit,
requirements=requirements,
blocked_packages=blocked_packages
)
# Prepare input data for the code
input_data = {"context": context, "workflow_id": workflow_id}
# Add file references if available
if context and "documents" in context:
input_data["files"] = [
{
"id": doc.get("source", {}).get("id", ""),
"name": doc.get("source", {}).get("name", ""),
"type": doc.get("source", {}).get("content_type", "")
}
for doc in context.get("documents", [])
if doc.get("source", {}).get("type") == "file"
]
# Execute the code
result = executor.execute_code(code, input_data)
# Log the execution results
if logging_utils:
if result.get("success", False):
logging_utils.info("Code executed successfully", "execution")
# Log a preview of the output
output = result.get("output", "")
if output:
preview = output[:1000] + "..." if len(output) > 1000 else output
logging_utils.info(f"Output preview: {preview}", "execution")
# Log a preview of the result
execution_result = result.get("result")
if execution_result:
if isinstance(execution_result, (dict, list)):
result_str = json.dumps(execution_result, indent=2)
preview = result_str[:1000] + "..." if len(result_str) > 1000 else result_str
else:
str_result = str(execution_result)
preview = str_result[:1000] + "..." if len(str_result) > 1000 else str_result
logging_utils.info(f"Result preview: {preview}", "execution")
else:
# Log error information
error = result.get("error", "Unknown error")
logging_utils.error(f"Error during code execution: {error}", "execution")
# Only clean up non-persistent environments
if not executor.is_persistent:
executor.cleanup()
return result
except Exception as e:
error_message = f"Error during code execution: {str(e)}\n{traceback.format_exc()}"
if logging_utils:
logging_utils.error(error_message, "error")
return {
"success": False,
"output": "",
"error": error_message,
"result": None
}
# Singleton instance
_coder_agent = None
def get_coder_agent():
"""Returns a singleton instance of the Coder Agent"""
global _coder_agent
if _coder_agent is None:
_coder_agent = CoderAgent()
return _coder_agent