gateway/modules/agents/agentCoach.py

"""
Coach agent for answering questions and generating structured content.
Provides direct AI-based responses using extracted data from documents.
"""

import logging
from typing import Dict, Any, List
import json
from datetime import datetime
import uuid

from modules.workflow.agentBase import AgentBase
from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent

logger = logging.getLogger(__name__)

class AgentCoach(AgentBase):
    """AI-driven agent for answering questions and generating structured content from extracted data"""

    def __init__(self):
        """Initialize the coach agent"""
        super().__init__()
        self.name = "coach"
        self.label = "Coach & Assistant"
        self.description = "Answers questions, converts and generates content directly from data without complex processing"
        self.capabilities = [
            "dataConversion",
            "questionAnswering",
            "contentGeneration",
            "simpleDataFormatting",
            "informationSynthesis",
            "directResponse",
            "imageInterpretation",
            "structuredOutput"
        ]

    def setDependencies(self, serviceBase=None):
        """Set external dependencies for the agent."""
        self.setService(serviceBase)

    async def processTask(self, task: Task) -> Dict[str, Any]:
        """
        Process a task by directly using AI to provide answers or content based on extracted data.

        Args:
            task: Task object with prompt, inputDocuments, outputSpecifications

        Returns:
            Dictionary with feedback and documents
        """
        try:
            # Extract task information
            prompt = task.prompt
            inputDocuments = task.filesInput
            outputSpecs = task.filesOutput

            # Check AI service
            if not self.service or not self.service.base:
                return {
                    "feedback": "The Coach agent requires an AI service to function.",
                    "documents": []
                }

            # Collect all extracted data from input documents
            documentContext = self._collectExtractedData(inputDocuments)

            # Generate task understanding to guide response creation
            taskUnderstanding = await self._analyzeTask(prompt, documentContext)

            # Generate documents based on output specifications
            documents = []

            # If no output specs provided, create a default document
            if not outputSpecs:
                defaultFormat = taskUnderstanding.get("recommendedFormat", "md")
                defaultTitle = taskUnderstanding.get("suggestedFilename", "response")

                outputSpecs = [{
                    "label": f"{defaultTitle}.{defaultFormat}",
                    "description": "Response to your request"
                }]

            # Process each output specification
            for spec in outputSpecs:
                outputLabel = spec.get("label", "output.txt")
                outputDescription = spec.get("description", "")

                # Determine format based on file extension
                outputFormat = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt"

                # Generate document based on format
                document = await self._generateDocument(
                    prompt,
                    documentContext,
                    outputLabel,
                    outputFormat,
                    outputDescription,
                    taskUnderstanding
                )

                documents.append(document)

            # Generate feedback
            feedback = taskUnderstanding.get("feedback", "I've created content based on your request.")

            return {
                "feedback": feedback,
                "documents": documents
            }

        except Exception as e:
            logger.error(f"Error in coach processing: {str(e)}", exc_info=True)
            return {
                "feedback": f"Error while processing your request: {str(e)}",
                "documents": []
            }

    def _collectExtractedData(self, documents: List[ChatDocument]) -> str:
        """
        Collect extracted data from input documents.

        Args:
            documents: List of input documents

        Returns:
            Combined extracted data as text
        """
        contextParts = []

        for doc in documents:
            docName = doc.name
            if doc.ext:
                docName = f"{docName}.{doc.ext}"

            contextParts.append(f"\n\n--- {docName} ---\n")

            # Process contents, focusing on dataExtracted field
            for content in doc.contents:
                if content.data:
                    contextParts.append(content.data)

        return "\n".join(contextParts)

    async def _analyzeTask(self, prompt: str, context: str) -> Dict:
        """
        Use AI to analyze the task and develop an understanding of what's required.

        Args:
            prompt: The task prompt
            context: Extracted document data

        Returns:
            Task understanding dictionary
        """
        analysisPrompt = f"""
        Analyze this request to determine the best approach for creating a response.

        REQUEST: {prompt}

        EXTRACTED DATA:
        {context[:1500]}... (truncated if longer)

        Create a task analysis in JSON format with the following structure:
        {{
            "requestType": "question|content|data|report|description",
            "recommendedFormat": "md|txt|html|csv|json",
            "suggestedFilename": "appropriate_filename_without_extension",
            "contentFocus": "brief description of what to focus on",
            "feedback": "brief explanation of how you'll approach this request",
            "complexity": "simple|moderate|complex"
        }}

        Only return valid JSON. No preamble or explanations.
        """

        try:
            # Get task understanding from AI
            response = await self.service.base.callAi([
                {"role": "system", "content": "You are a task analysis expert. Respond with valid JSON only."},
                {"role": "user", "content": analysisPrompt}
            ])

            # Extract JSON from response
            jsonStart = response.find('{')
            jsonEnd = response.rfind('}') + 1

            if jsonStart >= 0 and jsonEnd > jsonStart:
                taskUnderstanding = json.loads(response[jsonStart:jsonEnd])
                return taskUnderstanding
            else:
                # Fallback if JSON not found
                return {
                    "requestType": "content",
                    "recommendedFormat": "md",
                    "suggestedFilename": "response",
                    "contentFocus": "Addressing the main request",
                    "feedback": "I've created content based on your request and the provided data.",
                    "complexity": "moderate"
                }

        except Exception as e:
            logger.warning(f"Error analyzing task: {str(e)}")
            return {
                "requestType": "content",
                "recommendedFormat": "md",
                "suggestedFilename": "response",
                "contentFocus": "Addressing the main request",
                "feedback": "I've created content based on your request and the provided data.",
                "complexity": "moderate"
            }

    async def _generateDocument(self, prompt: str, context: str, outputLabel: str,
                             outputFormat: str, description: str, taskUnderstanding: Dict) -> ChatDocument:
        """
        Generate a document based on the request and extracted data.

        Args:
            prompt: The task prompt
            context: Extracted document data
            outputLabel: Output filename
            outputFormat: Output format (file extension)
            description: Output description
            taskUnderstanding: Task understanding from analysis

        Returns:
            ChatDocument object
        """
        # Determine content type based on format
        contentType = self._getContentType(outputFormat)

        # Build prompt based on output format
        generationPrompt = f"""
        Create a response to the following request in {outputFormat} format:

        REQUEST: {prompt}

        EXTRACTED DATA:
        {context}

        OUTPUT REQUIREMENTS:
        - Filename: {outputLabel}
        - Format: {outputFormat}
        - Description: {description}
        - Focus on: {taskUnderstanding.get("contentFocus", "Addressing the main request")}

        Guidelines:
        1. Create content that directly addresses the request
        2. Use the extracted data to inform your response
        3. Format the output appropriately for {outputFormat}
        4. Be comprehensive but focused
        5. Include appropriate formatting, structure, and organization

        Only return the content. No explanations or additional text.
        """

        try:
            # Get content from AI
            content = await self.service.base.callAi([
                {"role": "system", "content": f"You are a content generation expert. Create content in {outputFormat} format."},
                {"role": "user", "content": generationPrompt}
            ])

            # Extract content from code blocks if present
            content = self._extractFromCodeBlocks(content, outputFormat)

            # Create document object
            return ChatDocument(
                id=str(uuid.uuid4()),
                name=outputLabel.split('.')[0],
                ext=outputFormat,
                data=content,
                contents=[
                    ChatContent(
                        name="main",
                        data=content,
                        summary=description,
                        metadata={"format": outputFormat}
                    )
                ]
            )

        except Exception as e:
            logger.error(f"Error generating document: {str(e)}")
            errorContent = self._createErrorContent(str(e), outputFormat)
            return ChatDocument(
                id=str(uuid.uuid4()),
                name=outputLabel.split('.')[0],
                ext=outputFormat,
                data=errorContent,
                contents=[
                    ChatContent(
                        name="error",
                        data=errorContent,
                        summary="Error generating content",
                        metadata={"format": outputFormat, "error": str(e)}
                    )
                ]
            )

    def _getContentType(self, outputFormat: str) -> str:
        """
        Get content type based on format.

        Args:
            outputFormat: Output format

        Returns:
            Content type
        """
        contentTypeMap = {
            "md": "text/markdown",
            "markdown": "text/markdown",
            "html": "text/html",
            "txt": "text/plain",
            "text": "text/plain",
            "json": "application/json",
            "csv": "text/csv",
            "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        }

        return contentTypeMap.get(outputFormat, "text/plain")

    def _extractFromCodeBlocks(self, content: str, format: str) -> str:
        """
        Extract content from code blocks if present.

        Args:
            content: Raw content
            format: Expected format

        Returns:
            Extracted content
        """
        # Check for code blocks
        codeBlockStart = f"```{format}"
        if codeBlockStart in content:
            start = content.find(codeBlockStart) + len(codeBlockStart)
            end = content.find("```", start)
            if end > start:
                return content[start:end].strip()

        # Check for generic code blocks
        if "```" in content:
            start = content.find("```") + 3
            # Skip format identifier if present
            if content[start:].strip() and not content[start:start+1].isalnum():
                start = content.find("\n", start) + 1
            end = content.find("```", start)
            if end > start:
                return content[start:end].strip()

        return content

    def _createErrorContent(self, errorMessage: str, outputFormat: str) -> str:
        """
        Create error content in the appropriate format.

        Args:
            errorMessage: Error message
            outputFormat: Output format

        Returns:
            Formatted error content
        """
        if outputFormat == "json":
            return json.dumps({"error": errorMessage})
        elif outputFormat == "csv":
            return f"error\n{errorMessage}"
        elif outputFormat in ["md", "markdown"]:
            return f"# Error\n\n{errorMessage}"
        elif outputFormat == "html":
            return f"<html><body><h1>Error</h1><p>{errorMessage}</p></body></html>"
        else:
            return f"Error: {errorMessage}"


# Factory function for the Coach agent
def getAgentCoach():
    """Returns an instance of the Coach agent."""
    return AgentCoach()