gateway/modules/workflow/agentBase.py

135 lines
No EOL
4.8 KiB
Python

"""
Agent Base Module.
Provides the base class for all chat agents.
Defines the standardized interface for task processing.
"""
import os
import logging
import uuid
from datetime import datetime
from typing import Dict, Any, List, Optional
from modules.shared.mimeUtils import isTextMimeType, determineContentEncoding
logger = logging.getLogger(__name__)
class AgentBase:
"""
Base class for all chat agents.
Defines the standardized interface for task processing.
"""
def __init__(self):
"""Initialize the base agent."""
self.name = "base"
self.label = "Base Agent"
self.description = "Base agent functionality"
self.capabilities = []
self.workflowManager = None
self.mydom = None
def setWorkflowManager(self, workflowManager):
"""Set the workflow manager reference."""
self.workflowManager = workflowManager
# Also set mydom reference from workflow manager
if workflowManager and hasattr(workflowManager, 'mydom'):
self.mydom = workflowManager.mydom
def setMydom(self, mydom):
"""Set the LucyDOM interface reference."""
self.mydom = mydom
def getAgentInfo(self) -> Dict[str, Any]:
"""
Return standardized information about the agent's capabilities.
Returns:
Dictionary with name, description, and capabilities
"""
return {
"name": self.name,
"description": self.description,
"capabilities": self.capabilities
}
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a standardized task structure and return results.
This method must be implemented by all concrete agent classes.
Args:
task: A dictionary containing:
- taskId: Unique ID for this task
- workflowId: ID of the parent workflow
- prompt: The main instruction for the agent
- inputDocuments: List of document objects to process
- outputSpecifications: List of required output documents
- context: Additional contextual information including:
- workflow: The complete workflow object
- workflowRound: Current workflow round
- agentType: Type of agent
- timestamp: Task timestamp
- language: User language
Returns:
A dictionary containing:
- feedback: Text response explaining what the agent did
- documents: List of document objects created by the agent,
each containing a "base64Encoded" flag in addition to "label" and "content"
"""
# Base implementation - should be overridden by specialized agents
logger.warning(f"Agent {self.name} is using the default implementation of processTask")
return {
"feedback": f"The processTask method was not implemented by agent '{self.name}'.",
"documents": []
}
def determineBase64EncodingFlag(self, filename: str, content: Any, mimeType: str = None) -> bool:
"""Wrapper for the utility function"""
return determineContentEncoding(filename, content, mimeType)
def isTextMimeType(self, mimeType: str) -> bool:
"""Wrapper for the utility function"""
return isTextMimeType(mimeType)
def formatAgentDocumentOutput(self, label: str, content: Any, mimeType: str = None) -> Dict[str, Any]:
"""
Format agent output as a document.
Args:
label: Label for the document
content: Content of the document
mimeType: Optional MIME type for the document
"""
# Create document structure
doc = {
"id": str(uuid.uuid4()),
"name": label,
"ext": "txt", # Default extension
"data": content,
"base64Encoded": False,
"metadata": {
"isText": True
}
}
# Set MIME type if provided
if mimeType:
doc["mimeType"] = mimeType
# Update extension based on MIME type
if mimeType == "text/markdown":
doc["ext"] = "md"
elif mimeType == "text/html":
doc["ext"] = "html"
elif mimeType == "text/csv":
doc["ext"] = "csv"
elif mimeType == "application/json":
doc["ext"] = "json"
elif mimeType.startswith("image/"):
doc["ext"] = mimeType.split("/")[1]
doc["metadata"]["isText"] = False
elif mimeType == "application/pdf":
doc["ext"] = "pdf"
doc["metadata"]["isText"] = False
return doc