gateway/gwserver/modules/agentservice_workflow_execution.py
2025-04-11 23:39:10 +02:00

641 lines
No EOL
27 KiB
Python

"""
Refactored workflow execution for the Agentservice.
Implements a structured workflow with clear separation of planning and execution phases.
"""
import os
import logging
import asyncio
import uuid
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple, Union
# Import utility module (will be created)
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
# Import for data extraction
from modules.agentservice_dataextraction import data_extraction
logger = logging.getLogger(__name__)
class WorkflowExecution:
"""
Handles the execution of workflows in a structured, multi-phase approach.
Separates planning from execution and provides better logging.
"""
def __init__(self, workflow_manager, workflow_id: str, mandate_id: int, user_id: int, ai_service, lucydom_interface):
"""Initialize the workflow execution"""
self.workflow_manager = workflow_manager
self.workflow_id = workflow_id
self.mandate_id = mandate_id
self.user_id = user_id
self.ai_service = ai_service
self.lucydom_interface = lucydom_interface
# Initialize utilities
self.workflow_utils = WorkflowUtils(workflow_id)
self.message_utils = MessageUtils()
self.logging_utils = LoggingUtils(workflow_id, self._add_log)
async def execute(self, message: Dict[str, Any], workflow: Dict[str, Any], files: List[Dict[str, Any]] = None, is_user_input: bool = False):
"""
Execute the workflow following the new structured approach.
Args:
message: The initiating message (prompt or user input)
workflow: The workflow object
files: Optional list of file metadata
is_user_input: Flag indicating if this is user input
Returns:
Dict with workflow status and result
"""
try:
# 1. Initialize the workflow (already done by the caller)
self.logging_utils.info("Starting workflow execution", "workflow", "Workflow initialized")
# 2. Create a message with user input
user_message = self._create_message(workflow, message.get("role", "user"))
user_message["content"] = message.get("content", "")
# Process files if provided
if files and len(files) > 0:
self.logging_utils.info(f"Processing {len(files)} files", "files", f"Processing files: {[f.get('name', 'unknown') for f in files]}")
await self._process_files(workflow, user_message, files)
# Add the message to the workflow
if "messages" not in workflow:
workflow["messages"] = []
workflow["messages"].append(user_message)
# Save workflow state
self.workflow_manager._save_workflow(workflow)
self.logging_utils.info("User message processed", "workflow", "User input added to workflow")
# 3. Create work plan using AI
work_plan = await self._create_work_plan(workflow, user_message)
self.logging_utils.info(f"Created work plan with {len(work_plan)} activities", "planning", "Work plan created")
# 4. Execute each activity in the work plan
results = []
for i, activity in enumerate(work_plan, 1):
self.logging_utils.info(f"Starting activity {i}/{len(work_plan)}: {activity.get('title', 'Unnamed')}",
"execution", f"Activity: {activity.get('title', 'Unnamed')}")
# Execute the activity
activity_result = await self._execute_activity(workflow, activity)
results.append(activity_result)
# Save intermediate state
self.workflow_manager._save_workflow(workflow)
# 5. Create summary for the user
summary = await self._create_summary(workflow, results)
self.logging_utils.info("Created workflow summary", "summary", "Workflow summary created")
# Set workflow status to completed
workflow["status"] = "completed"
workflow["last_activity"] = datetime.now().isoformat()
# Final save
self.workflow_manager._save_workflow(workflow)
return {
"workflow_id": self.workflow_id,
"status": "completed",
"messages": workflow.get("messages", [])
}
except Exception as e:
self.logging_utils.error(f"Workflow execution failed: {str(e)}", "error", f"Error: {str(e)}")
workflow["status"] = "failed"
self.workflow_manager._save_workflow(workflow)
return {
"workflow_id": self.workflow_id,
"status": "failed",
"error": str(e)
}
async def _process_files(self, workflow: Dict[str, Any], message: Dict[str, Any], files: List[Dict[str, Any]]):
"""
Process files and add them to the message.
Extracts text content where possible.
Args:
workflow: The workflow object
message: The message to add files to
files: List of file metadata
"""
# Import necessary modules
from modules.agentservice_filemanager import get_file_manager
# Get the file manager instance
file_manager = get_file_manager()
# Prepare file contexts
file_contexts = file_manager.prepare_file_contexts(files)
self.logging_utils.info(f"Prepared contexts for {len(file_contexts)} files", "files", "File contexts prepared")
# Read file contents
file_contents = await file_manager.read_file_contents(
file_contexts,
self.lucydom_interface,
self.workflow_id,
self._add_log,
self.ai_service
)
# Add files to message
for file_id, content in file_contents.items():
file_metadata = next((f for f in files if f.get('id') == file_id), {})
file_data = {
"id": file_id,
"name": file_metadata.get('name', 'unnamed_file'),
"content_type": file_metadata.get('content_type'),
"type": file_metadata.get('type', "unknown"),
"content": content.get("content", "") if isinstance(content, dict) else content,
"size": file_metadata.get('size'),
"is_extracted": content.get("is_extracted", False) if isinstance(content, dict) else False
}
self.logging_utils.info(f"Adding file {file_data['name']} to message", "files", f"Adding file: {file_data['name']}")
file_manager.add_file_to_message(message, file_data)
async def _create_work_plan(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create a structured work plan based on the user's request.
Args:
workflow: The workflow object
message: The initiating message
Returns:
List of structured activities to execute
"""
# Extract context information
task = message.get("content", "")
documents = message.get("documents", [])
# Create the planning prompt
plan_prompt = f"""
As an AI workflow manager, create a detailed work plan for the following task:
TASK: {task}
The work plan should include a structured list of activities. Each activity should have:
1. title - A short descriptive title for the activity
2. description - What needs to be done in this activity
3. agent_prompt - A complete prompt to give to the AI agent(s) for this activity
4. data_prompt - A prompt describing what data will be needed for this activity
5. expected_format - The expected output format (e.g., "Text", "JSON", "Table", "FileList")
6. dependencies - List of previous activities this depends on (by index)
Return the work plan as a JSON array of activity objects, each with the above properties.
The work plan should be logical, efficient, and comprehensively address the task.
"""
# Add information about available documents if present
if documents:
doc_info = []
for doc in documents:
source = doc.get("source", {})
doc_info.append({
"name": source.get("name", "unnamed"),
"type": source.get("type", "unknown"),
"content_type": source.get("content_type", "unknown")
})
plan_prompt += f"\n\nAvailable documents: {doc_info}"
self.logging_utils.info("Requesting AI work plan", "planning", "Generating work plan")
# Call AI to generate work plan
try:
plan_response = await self.ai_service.call_api([{"role": "user", "content": plan_prompt}])
print("DEBUG prompt=",plan_prompt," Response=",plan_response)
# Extract JSON plan (using a helper utility)
import json
import re
# Look for JSON array in the response
json_pattern = r'\[\s*\{.*\}\s*\]'
json_match = re.search(json_pattern, plan_response, re.DOTALL)
if json_match:
json_str = json_match.group(0)
work_plan = json.loads(json_str)
self.logging_utils.info(f"Work plan created with {len(work_plan)} activities", "planning",
f"Work plan activities: {[activity.get('title', 'Unnamed') for activity in work_plan]}")
return work_plan
else:
self.logging_utils.warning("Could not extract JSON from AI response", "planning",
"Fallback to default work plan")
# Fallback: Create a simple default work plan
return [{
"title": "Process Task",
"description": "Process the user's request directly",
"agent_prompt": task,
"data_prompt": "All available data is needed for this task",
"expected_format": "Text",
"dependencies": []
}]
except Exception as e:
self.logging_utils.error(f"Error creating work plan: {str(e)}", "planning", f"Work plan error: {str(e)}")
# Return a minimal fallback plan
return [{
"title": "Process Task (Error Recovery)",
"description": "Process the user's request after planning error",
"agent_prompt": task,
"data_prompt": "All available data is needed for this task",
"expected_format": "Text",
"dependencies": []
}]
async def _execute_activity(self, workflow: Dict[str, Any], activity: Dict[str, Any]) -> Dict[str, Any]:
"""
Execute a single activity from the work plan.
Args:
workflow: The workflow object
activity: The activity definition from the work plan
Returns:
Result of the activity execution
"""
# Extract activity information
title = activity.get("title", "Unnamed Activity")
agent_prompt = activity.get("agent_prompt", "")
data_prompt = activity.get("data_prompt", "")
expected_format = activity.get("expected_format", "Text")
self.logging_utils.info(f"Executing activity: {title}", "execution", f"Activity: {title}, Format: {expected_format}")
# 1. Determine which agents to use
agents_config = await self._select_agents(workflow, agent_prompt, expected_format)
self.logging_utils.info(f"Selected {len(agents_config)} agents for execution", "agents",
f"Agents: {[agent.get('agent_id', 'unknown') for agent in agents_config]}")
# 2. Extract the necessary data
from modules.agentservice_registry import AgentRegistry
registry = AgentRegistry.get_instance()
# If no agents were selected, use the moderator directly
if not agents_config:
self.logging_utils.info("No specific agents selected, using moderator", "agents", "Using moderator")
# Create a message with the moderator's response
moderator_message = self._create_message(workflow, "assistant")
moderator_message["content"] = f"No specialized agents needed for this task. Processing directly: {agent_prompt}"
moderator_message["agent_type"] = "moderator"
moderator_message["agent_id"] = "moderator"
moderator_message["agent_name"] = "Moderator"
# Add message to workflow
workflow["messages"].append(moderator_message)
# Direct AI call for simple result
result_content = await self.ai_service.call_api([
{"role": "system", "content": "You are a helpful assistant processing the user's request."},
{"role": "user", "content": agent_prompt}
])
# Create result message
result_message = self._create_message(workflow, "assistant")
result_message["content"] = result_content
result_message["agent_type"] = "assistant"
result_message["agent_id"] = "assistant"
result_message["agent_name"] = "AI Assistant"
result_message["result_format"] = "Text"
# Add message to workflow
workflow["messages"].append(result_message)
return {
"title": title,
"content": result_content,
"agent": "assistant",
"format": "Text"
}
# 3. Execute the agents in sequence
last_result = None
for agent_config in agents_config:
agent_id = agent_config.get("agent_id")
agent_prompt = agent_config.get("prompt")
expected_format = agent_config.get("expected_format", "Text")
# Get the agent from registry
agent = registry.get_agent(agent_id)
if not agent:
self.logging_utils.warning(f"Agent '{agent_id}' not found, skipping", "agents", f"Agent not found: {agent_id}")
continue
# Incorporate previous result if available
if last_result:
agent_prompt = f"{agent_prompt}\n\nPrevious result: {last_result}"
self.logging_utils.info(f"Executing agent: {agent_id}", "agents", f"Agent: {agent_id}, Format: {expected_format}")
# Extract any needed data
if data_prompt:
# Get all messages from the workflow
workflow_messages = workflow.get("messages", [])
# Extract data using the dataextraction module
extracted_data = await data_extraction(
prompt=data_prompt,
files=self._extract_files_from_workflow(workflow),
messages=workflow_messages,
ai_service=self.ai_service,
lucydom_interface=self.lucydom_interface,
workflow_id=self.workflow_id,
add_log_func=self._add_log
)
# Add the data context to the prompt
if extracted_data and "extracted_content" in extracted_data:
data_summary = "\n\nExtracted data summary:\n"
for item in extracted_data.get("extracted_content", []):
data_summary += f"- {item.get('name', 'unnamed')}: {item.get('content', '')[:100]}...\n"
agent_prompt += data_summary
# Create the agent message
agent_message = self._create_message(workflow, "user")
agent_message["content"] = agent_prompt
agent_message["workflow_id"] = self.workflow_id
# Execute the agent
agent_response = await agent.process_message(agent_message, {"expected_format": expected_format})
# Process agent response
if agent_response:
# Create response message
response_message = self._create_message(workflow, "assistant")
response_message["content"] = agent_response.get("content", "")
response_message["agent_type"] = agent_id
response_message["agent_id"] = agent_id
response_message["agent_name"] = agent.name
response_message["result_format"] = agent_response.get("result_format", expected_format)
# Add to workflow
workflow["messages"].append(response_message)
# Update last result
last_result = agent_response.get("content", "")
# Return the final result
return {
"title": title,
"content": last_result or "",
"agent": agent_config.get("agent_id", "unknown") if agents_config else "none",
"format": expected_format
}
async def _select_agents(self, workflow: Dict[str, Any], prompt: str, expected_format: str) -> List[Dict[str, Any]]:
"""
Select appropriate agents for a given prompt and expected format.
Args:
workflow: The workflow object
prompt: The prompt to process
expected_format: The expected output format
Returns:
List of agent configurations (agent_id, prompt, expected_format)
"""
# Get available agents
from modules.agentservice_registry import AgentRegistry
registry = AgentRegistry.get_instance()
# Get all agents except user_agent
system_agents = {}
for agent_id, agent in registry.get_all_agents().items():
if agent.type != "user" and agent_id not in system_agents:
system_agents[agent_id] = agent.get_agent_info()
# Create agent selection prompt
selection_prompt = f"""
You are a workflow coordinator responsible for selecting appropriate agents for a task.
TASK PROMPT: {prompt}
EXPECTED FORMAT: {expected_format}
AVAILABLE AGENTS:
"""
# Add agent descriptions
for agent_id, agent_info in system_agents.items():
selection_prompt += f"""
- ID: {agent_id}
Name: {agent_info.get('name', '')}
Type: {agent_info.get('type', '')}
Description: {agent_info.get('description', '')}
Capabilities: {agent_info.get('capabilities', '')}
Result Format: {agent_info.get('result_format', 'Text')}
"""
selection_prompt += """
Based on the task and expected format, select the appropriate agent(s) to use.
Return your selection as a JSON array with objects containing:
1. agent_id: The ID of the selected agent
2. prompt: A specific prompt tailored for this agent
3. expected_format: The expected output format
You can select multiple agents if needed, in which case they will be executed in sequence.
If no specialized agent is needed, return an empty array.
"""
# Call AI to select agents
try:
selection_response = await self.ai_service.call_api([{"role": "user", "content": selection_prompt}])
# Extract JSON from response
import json
import re
# Look for JSON array
json_pattern = r'\[\s*\{.*\}\s*\]'
json_match = re.search(json_pattern, selection_response, re.DOTALL)
if json_match:
json_str = json_match.group(0)
selected_agents = json.loads(json_str)
# Validate selections
valid_agents = []
for agent_config in selected_agents:
if "agent_id" in agent_config and agent_config["agent_id"] in system_agents:
valid_agents.append(agent_config)
return valid_agents
elif "[]" in selection_response:
# Empty array - no agents needed
return []
else:
# Could not parse response, use default strategy
self.logging_utils.warning("Could not parse agent selection response", "agents",
"Falling back to default agent selection")
# Simple heuristic for default agent selection based on expected format
if expected_format.lower() in ["file", "filelist", "document"]:
return [{
"agent_id": "filecreator_agent",
"prompt": prompt,
"expected_format": expected_format
}]
elif expected_format.lower() in ["report", "analysis", "document"]:
return [{
"agent_id": "documentation_agent",
"prompt": prompt,
"expected_format": expected_format
}]
elif "web" in prompt.lower() or "search" in prompt.lower():
return [{
"agent_id": "webcrawler_agent",
"prompt": prompt,
"expected_format": expected_format
}]
elif "analyze" in prompt.lower() or "data" in prompt.lower():
return [{
"agent_id": "analyst_agent",
"prompt": prompt,
"expected_format": expected_format
}]
else:
# No specific agent needed
return []
except Exception as e:
self.logging_utils.error(f"Error selecting agents: {str(e)}", "agents", f"Agent selection error: {str(e)}")
return [] # Empty array - use default processing
async def _create_summary(self, workflow: Dict[str, Any], results: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Create a summary of the workflow results for the user.
Args:
workflow: The workflow object
results: Results from activity executions
Returns:
Summary message
"""
# Create a summary prompt
summary_prompt = "Create a clear, concise summary of the following workflow results:\n\n"
for i, result in enumerate(results, 1):
title = result.get("title", f"Activity {i}")
content = result.get("content", "")
agent = result.get("agent", "unknown")
# Limit content length for the summary prompt
content_preview = content[:500] + "..." if len(content) > 500 else content
summary_prompt += f"""
ACTIVITY {i}: {title}
Executed by: {agent}
{content_preview}
---
"""
summary_prompt += """
Provide a well-structured summary that:
1. Highlights the key findings and results
2. Connects the results to the original task
3. Presents any conclusions or recommendations
Make sure the summary is clear, concise, and useful to the user.
"""
# Call AI to generate summary
summary_content = await self.ai_service.call_api([{"role": "user", "content": summary_prompt}])
# Create summary message
summary_message = self._create_message(workflow, "assistant")
summary_message["content"] = summary_content
summary_message["agent_type"] = "summary"
summary_message["agent_id"] = "workflow_summary"
summary_message["agent_name"] = "Workflow Summary"
summary_message["result_format"] = "Text"
summary_message["workflow_complete"] = True
# Add to workflow
workflow["messages"].append(summary_message)
return summary_message
def _create_message(self, workflow: Dict[str, Any], role: str) -> Dict[str, Any]:
"""Create a new message object for the workflow"""
# This is a utility function that should be moved to the utility module
message_id = f"msg_{uuid.uuid4()}"
current_time = datetime.now().isoformat()
# Determine sequence number
sequence_no = 1
if "messages" in workflow and workflow["messages"]:
sequence_no = len(workflow["messages"]) + 1
# Create message object
message = {
"id": message_id,
"workflow_id": self.workflow_id,
"started_at": current_time,
"finished_at": None,
"sequence_no": sequence_no,
"status": "pending",
"role": role,
"data_stats": {
"processing_time": 0.0,
"token_count": 0,
"bytes_sent": 0,
"bytes_received": 0
},
"documents": [],
"content": None,
"agent_type": None
}
return message
def _extract_files_from_workflow(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract file information from all messages in the workflow"""
files = []
# Process all messages
for message in workflow.get("messages", []):
# Extract documents from the message
for doc in message.get("documents", []):
source = doc.get("source", {})
# Only include file documents
if source.get("type") == "file":
file_info = {
"id": source.get("id", ""),
"name": source.get("name", ""),
"type": source.get("content_type", ""),
"content_type": source.get("content_type", "")
}
# Check if file is already in the list (avoid duplicates)
if not any(f.get("id") == file_info["id"] for f in files):
files.append(file_info)
return files
def _add_log(self, workflow_id: str, message: str, log_type: str, agent_id: str = None, agent_name: str = None):
"""Add a log entry to the workflow"""
# This actually calls back to the workflow manager's log function
self.workflow_manager._add_log(workflow_id, message, log_type, agent_id, agent_name)