gateway/modules/workflow/agentBase.py

"""
Agent Base Module.
Provides the base class for all chat agents.
Defines the standardized interface for task processing.
"""

import os
import logging
import uuid
from datetime import datetime
from typing import Dict, Any, List, Optional
from modules.shared.mimeUtils import isTextMimeType, determineContentEncoding

logger = logging.getLogger(__name__)

class AgentBase:
    """
    Base class for all chat agents.
    Defines the standardized interface for task processing.
    """

    def __init__(self):
        """Initialize the base agent."""
        self.name = "base"
        self.label = "Base Agent"
        self.description = "Base agent functionality"
        self.capabilities = []
        self.workflowManager = None
        self.mydom = None

    def setWorkflowManager(self, workflowManager):
        """Set the workflow manager reference."""
        self.workflowManager = workflowManager
        # Also set mydom reference from workflow manager
        if workflowManager and hasattr(workflowManager, 'mydom'):
            self.mydom = workflowManager.mydom

    def setMydom(self, mydom):
        """Set the LucyDOM interface reference."""
        self.mydom = mydom

    def getAgentInfo(self) -> Dict[str, Any]:
        """
        Return standardized information about the agent's capabilities.

        Returns:
            Dictionary with name, description, and capabilities
        """
        return {
            "name": self.name,
            "description": self.description,
            "capabilities": self.capabilities
        }

    async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
        """
        Process a standardized task structure and return results.
        This method must be implemented by all concrete agent classes.

        Args:
            task: A dictionary containing:
                - taskId: Unique ID for this task
                - workflowId: ID of the parent workflow
                - prompt: The main instruction for the agent
                - inputDocuments: List of document objects to process
                - outputSpecifications: List of required output documents
                - context: Additional contextual information including:
                    - workflow: The complete workflow object
                    - workflowRound: Current workflow round
                    - agentType: Type of agent
                    - timestamp: Task timestamp
                    - language: User language

        Returns:
            A dictionary containing:
                - feedback: Text response explaining what the agent did
                - documents: List of document objects created by the agent,
                  each containing a "base64Encoded" flag in addition to "label" and "content"
        """
        # Base implementation - should be overridden by specialized agents
        logger.warning(f"Agent {self.name} is using the default implementation of processTask")
        return {
            "feedback": f"The processTask method was not implemented by agent '{self.name}'.",
            "documents": []
        }

    def determineBase64EncodingFlag(self, filename: str, content: Any, mimeType: str = None) -> bool:
        """Wrapper for the utility function"""
        return determineContentEncoding(filename, content, mimeType)

    def isTextMimeType(self, mimeType: str) -> bool:
        """Wrapper for the utility function"""
        return isTextMimeType(mimeType)

    def formatAgentDocumentOutput(self, label: str, content: Any, mimeType: str = None) -> Dict[str, Any]:
        """
        Format agent output as a document.

        Args:
            label: Label for the document
            content: Content of the document
            mimeType: Optional MIME type for the document
        """
        # Create document structure
        doc = {
            "id": str(uuid.uuid4()),
            "name": label,
            "ext": "txt",  # Default extension
            "data": content,
            "base64Encoded": False,
            "metadata": {
                "isText": True
            }
        }

        # Set MIME type if provided
        if mimeType:
            doc["mimeType"] = mimeType
            # Update extension based on MIME type
            if mimeType == "text/markdown":
                doc["ext"] = "md"
            elif mimeType == "text/html":
                doc["ext"] = "html"
            elif mimeType == "text/csv":
                doc["ext"] = "csv"
            elif mimeType == "application/json":
                doc["ext"] = "json"
            elif mimeType.startswith("image/"):
                doc["ext"] = mimeType.split("/")[1]
                doc["metadata"]["isText"] = False
            elif mimeType == "application/pdf":
                doc["ext"] = "pdf"
                doc["metadata"]["isText"] = False

        return doc