gateway/modules/agents/agentCoach.py

"""
Coach agent for answering questions and generating structured content.
Provides direct AI-based responses using extracted data from documents.
"""

import logging
from typing import Dict, Any, List
import json
from datetime import datetime

from modules.workflow.agentBase import AgentBase

logger = logging.getLogger(__name__)

class AgentCoach(AgentBase):
    """AI-driven agent for answering questions and generating structured content from extracted data"""

    def __init__(self):
        """Initialize the coach agent"""
        super().__init__()
        self.name = "coach"
        self.label = "Coach & Assistant"
        self.description = "Answers questions, converts and generates content directly from data without complex processing"
        self.capabilities = [
            "dataConversion",
            "questionAnswering",
            "contentGeneration",
            "simpleDataFormatting",
            "informationSynthesis",
            "directResponse",
            "imageInterpretation",
            "structuredOutput"
        ]

    def setDependencies(self, mydom=None):
        """Set external dependencies for the agent."""

    async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
        """
        Process a task by directly using AI to provide answers or content based on extracted data.

        Args:
            task: Task dictionary with prompt, inputDocuments, outputSpecifications

        Returns:
            Dictionary with feedback and documents
        """
        try:
            # Extract task information
            prompt = task.get("prompt", "")
            inputDocuments = task.get("inputDocuments", [])
            outputSpecs = task.get("outputSpecifications", [])

            # Check AI service
            if not self.mydom:
                return {
                    "feedback": "The Coach agent requires an AI service to function.",
                    "documents": []
                }

            # Collect all extracted data from input documents
            documentContext = self._collectExtractedData(inputDocuments)

            # Generate task understanding to guide response creation
            taskUnderstanding = await self._analyzeTask(prompt, documentContext)

            # Generate documents based on output specifications
            documents = []

            # If no output specs provided, create a default document
            if not outputSpecs:
                defaultFormat = taskUnderstanding.get("recommendedFormat", "md")
                defaultTitle = taskUnderstanding.get("suggestedFilename", "response")

                outputSpecs = [{
                    "label": f"{defaultTitle}.{defaultFormat}",
                    "description": "Response to your request"
                }]

            # Process each output specification
            for spec in outputSpecs:
                outputLabel = spec.get("label", "output.txt")
                outputDescription = spec.get("description", "")

                # Determine format based on file extension
                outputFormat = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt"

                # Generate document based on format
                document = await self._generateDocument(
                    prompt,
                    documentContext,
                    outputLabel,
                    outputFormat,
                    outputDescription,
                    taskUnderstanding
                )

                documents.append(document)

            # Generate feedback
            feedback = taskUnderstanding.get("feedback", "I've created content based on your request.")

            return {
                "feedback": feedback,
                "documents": documents
            }

        except Exception as e:
            logger.error(f"Error in coach processing: {str(e)}", exc_info=True)
            return {
                "feedback": f"Error while processing your request: {str(e)}",
                "documents": []
            }

    def _collectExtractedData(self, documents: List[Dict[str, Any]]) -> str:
        """
        Collect extracted data from input documents.

        Args:
            documents: List of input documents

        Returns:
            Combined extracted data as text
        """
        contextParts = []

        for doc in documents:
            docName = doc.get("name", "unnamed")
            if doc.get("ext"):
                docName = f"{docName}.{doc.get('ext')}"

            contextParts.append(f"\n\n--- {docName} ---\n")

            # Process contents, focusing on dataExtracted field
            for content in doc.get("contents", []):
                if content.get("dataExtracted"):
                    contextParts.append(content.get("dataExtracted", ""))

        return "\n".join(contextParts)

    async def _analyzeTask(self, prompt: str, context: str) -> Dict:
        """
        Use AI to analyze the task and develop an understanding of what's required.

        Args:
            prompt: The task prompt
            context: Extracted document data

        Returns:
            Task understanding dictionary
        """
        analysisPrompt = f"""
        Analyze this request to determine the best approach for creating a response.

        REQUEST: {prompt}

        EXTRACTED DATA:
        {context[:1500]}... (truncated if longer)

        Create a task analysis in JSON format with the following structure:
        {{
            "requestType": "question|content|data|report|description",
            "recommendedFormat": "md|txt|html|csv|json",
            "suggestedFilename": "appropriate_filename_without_extension",
            "contentFocus": "brief description of what to focus on",
            "feedback": "brief explanation of how you'll approach this request",
            "complexity": "simple|moderate|complex"
        }}

        Only return valid JSON. No preamble or explanations.
        """

        try:
            response = await self.mydom.callAi([
                {"role": "system", "content": "You are a task analysis expert. Respond with valid JSON only."},
                {"role": "user", "content": analysisPrompt}
            ])

            # Extract JSON from response
            jsonStart = response.find('{')
            jsonEnd = response.rfind('}') + 1

            if jsonStart >= 0 and jsonEnd > jsonStart:
                taskUnderstanding = json.loads(response[jsonStart:jsonEnd])
                return taskUnderstanding
            else:
                # Fallback if JSON not found
                return {
                    "requestType": "content",
                    "recommendedFormat": "md",
                    "suggestedFilename": "response",
                    "contentFocus": "Addressing the main request",
                    "feedback": "I've created content based on your request and the provided data.",
                    "complexity": "moderate"
                }

        except Exception as e:
            logger.warning(f"Error analyzing task: {str(e)}")
            return {
                "requestType": "content",
                "recommendedFormat": "md",
                "suggestedFilename": "response",
                "contentFocus": "Addressing the main request",
                "feedback": "I've created content based on your request and the provided data.",
                "complexity": "moderate"
            }

    async def _generateDocument(self, prompt: str, context: str, outputLabel: str,
                             outputFormat: str, description: str, taskUnderstanding: Dict) -> Dict[str, Any]:
        """
        Generate a document based on the request and extracted data.

        Args:
            prompt: The task prompt
            context: Extracted document data
            outputLabel: Output filename
            outputFormat: Output format (file extension)
            description: Output description
            taskUnderstanding: Task understanding from analysis

        Returns:
            Document object
        """
        # Determine content type based on format
        contentType = self._getContentType(outputFormat)

        # Build prompt based on output format
        generationPrompt = f"""
        Create a response to the following request in {outputFormat} format:

        REQUEST: {prompt}

        EXTRACTED DATA:
        {context}

        OUTPUT REQUIREMENTS:
        - Filename: {outputLabel}
        - Format: {outputFormat}
        - Description: {description}
        - Focus on: {taskUnderstanding.get("contentFocus", "Addressing the main request")}

        Guidelines:
        1. Create content that directly addresses the request
        2. Use the extracted data to inform your response
        3. Format the output appropriately for {outputFormat}
        4. Be comprehensive but focused
        5. Include appropriate formatting, structure, and organization

        Your response should be in valid {outputFormat} format without explanations or markdown formatting around it.
        """

        try:
            # Build system prompt based on format
            systemPrompt = f"You create {outputFormat} format content based on requests and extracted data. Provide only the content in valid {outputFormat} format."

            # Generate content with AI
            content = await self.mydom.callAi([
                {"role": "system", "content": systemPrompt},
                {"role": "user", "content": generationPrompt}
            ])

            # Process content based on format
            if outputFormat in ["json", "csv"]:
                # For structured formats, extract from code blocks if present
                content = self._extractFromCodeBlocks(content, outputFormat)

                # Validate JSON if needed
                if outputFormat == "json":
                    try:
                        json.loads(content)
                    except:
                        logger.warning("Invalid JSON generated, attempting to fix")
                        # Try to extract just the JSON portion
                        jsonStart = content.find('{')
                        jsonEnd = content.rfind('}') + 1
                        if jsonStart >= 0 and jsonEnd > jsonStart:
                            content = content[jsonStart:jsonEnd]

            # Ensure proper structure for markdown/HTML
            if outputFormat in ["md", "markdown"] and not content.strip().startswith("#"):
                title = "Response"
                content = f"# {title}\n\n{content}"
            elif outputFormat == "html" and not "<html" in content.lower():
                title = "Response"
                content = f"<html><head><title>{title}</title></head><body><h1>{title}</h1>{content}</body></html>"

            return self.formatAgentDocumentOutput(outputLabel, content, contentType)

        except Exception as e:
            logger.error(f"Error generating document: {str(e)}")

            # Create error document
            errorContent = self._createErrorContent(str(e), outputFormat)
            return self.formatAgentDocumentOutput(outputLabel, errorContent, contentType)

    def _getContentType(self, outputFormat: str) -> str:
        """
        Get content type based on format.

        Args:
            outputFormat: Output format

        Returns:
            Content type
        """
        contentTypeMap = {
            "md": "text/markdown",
            "markdown": "text/markdown",
            "html": "text/html",
            "txt": "text/plain",
            "text": "text/plain",
            "json": "application/json",
            "csv": "text/csv",
            "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        }

        return contentTypeMap.get(outputFormat, "text/plain")

    def _extractFromCodeBlocks(self, content: str, format: str) -> str:
        """
        Extract content from code blocks if present.

        Args:
            content: Raw content
            format: Expected format

        Returns:
            Extracted content
        """
        # Check for code blocks
        codeBlockStart = f"```{format}"
        if codeBlockStart in content:
            start = content.find(codeBlockStart) + len(codeBlockStart)
            end = content.find("```", start)
            if end > start:
                return content[start:end].strip()

        # Check for generic code blocks
        if "```" in content:
            start = content.find("```") + 3
            # Skip format identifier if present
            if content[start:].strip() and not content[start:start+1].isalnum():
                start = content.find("\n", start) + 1
            end = content.find("```", start)
            if end > start:
                return content[start:end].strip()

        return content

    def _createErrorContent(self, errorMessage: str, outputFormat: str) -> str:
        """
        Create error content in the appropriate format.

        Args:
            errorMessage: Error message
            outputFormat: Output format

        Returns:
            Formatted error content
        """
        if outputFormat == "json":
            return json.dumps({"error": errorMessage})
        elif outputFormat == "csv":
            return f"error\n{errorMessage}"
        elif outputFormat in ["md", "markdown"]:
            return f"# Error\n\n{errorMessage}"
        elif outputFormat == "html":
            return f"<html><body><h1>Error</h1><p>{errorMessage}</p></body></html>"
        else:
            return f"Error: {errorMessage}"


# Factory function for the Coach agent
def getAgentCoach():
    """Returns an instance of the Coach agent."""
    return AgentCoach()