""" Coach agent for answering questions and generating structured content. Provides direct AI-based responses using extracted data from documents. """ import logging from typing import Dict, Any, List import json from datetime import datetime import uuid from modules.workflow.agentBase import AgentBase from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent logger = logging.getLogger(__name__) class AgentCoach(AgentBase): """AI-driven agent for answering questions and generating structured content from extracted data""" def __init__(self): """Initialize the coach agent""" super().__init__() self.name = "coach" self.label = "Coach & Assistant" self.description = "Answers questions, converts and generates content directly from data without complex processing" self.capabilities = [ "dataConversion", "questionAnswering", "contentGeneration", "simpleDataFormatting", "informationSynthesis", "directResponse", "imageInterpretation", "structuredOutput" ] def setDependencies(self, serviceBase=None): """Set external dependencies for the agent.""" self.setService(serviceBase) async def processTask(self, task: Task) -> Dict[str, Any]: """ Process a task by directly using AI to provide answers or content based on extracted data. Args: task: Task object with prompt, inputDocuments, outputSpecifications Returns: Dictionary with feedback and documents """ try: # Extract task information prompt = task.prompt inputDocuments = task.filesInput outputSpecs = task.filesOutput # Check AI service if not self.service or not self.service.base: return { "feedback": "The Coach agent requires an AI service to function.", "documents": [] } # Collect all extracted data from input documents documentContext = self._collectExtractedData(inputDocuments) # Generate task understanding to guide response creation taskUnderstanding = await self._analyzeTask(prompt, documentContext) # Generate documents based on output specifications documents = [] # If no output specs provided, create a default document if not outputSpecs: defaultFormat = taskUnderstanding.get("recommendedFormat", "md") defaultTitle = taskUnderstanding.get("suggestedFilename", "response") outputSpecs = [{ "label": f"{defaultTitle}.{defaultFormat}", "description": "Response to your request" }] # Process each output specification for spec in outputSpecs: outputLabel = spec.get("label", "output.txt") outputDescription = spec.get("description", "") # Determine format based on file extension outputFormat = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt" # Generate document based on format document = await self._generateDocument( prompt, documentContext, outputLabel, outputFormat, outputDescription, taskUnderstanding ) documents.append(document) # Generate feedback feedback = taskUnderstanding.get("feedback", "I've created content based on your request.") return { "feedback": feedback, "documents": documents } except Exception as e: logger.error(f"Error in coach processing: {str(e)}", exc_info=True) return { "feedback": f"Error while processing your request: {str(e)}", "documents": [] } def _collectExtractedData(self, documents: List[ChatDocument]) -> str: """ Collect extracted data from input documents. Args: documents: List of input documents Returns: Combined extracted data as text """ contextParts = [] for doc in documents: docName = doc.name if doc.ext: docName = f"{docName}.{doc.ext}" contextParts.append(f"\n\n--- {docName} ---\n") # Process contents, focusing on dataExtracted field for content in doc.contents: if content.data: contextParts.append(content.data) return "\n".join(contextParts) async def _analyzeTask(self, prompt: str, context: str) -> Dict: """ Use AI to analyze the task and develop an understanding of what's required. Args: prompt: The task prompt context: Extracted document data Returns: Task understanding dictionary """ analysisPrompt = f""" Analyze this request to determine the best approach for creating a response. REQUEST: {prompt} EXTRACTED DATA: {context[:1500]}... (truncated if longer) Create a task analysis in JSON format with the following structure: {{ "requestType": "question|content|data|report|description", "recommendedFormat": "md|txt|html|csv|json", "suggestedFilename": "appropriate_filename_without_extension", "contentFocus": "brief description of what to focus on", "feedback": "brief explanation of how you'll approach this request", "complexity": "simple|moderate|complex" }} Only return valid JSON. No preamble or explanations. """ try: # Get task understanding from AI response = await self.service.base.callAi([ {"role": "system", "content": "You are a task analysis expert. Respond with valid JSON only."}, {"role": "user", "content": analysisPrompt} ]) # Extract JSON from response jsonStart = response.find('{') jsonEnd = response.rfind('}') + 1 if jsonStart >= 0 and jsonEnd > jsonStart: taskUnderstanding = json.loads(response[jsonStart:jsonEnd]) return taskUnderstanding else: # Fallback if JSON not found return { "requestType": "content", "recommendedFormat": "md", "suggestedFilename": "response", "contentFocus": "Addressing the main request", "feedback": "I've created content based on your request and the provided data.", "complexity": "moderate" } except Exception as e: logger.warning(f"Error analyzing task: {str(e)}") return { "requestType": "content", "recommendedFormat": "md", "suggestedFilename": "response", "contentFocus": "Addressing the main request", "feedback": "I've created content based on your request and the provided data.", "complexity": "moderate" } async def _generateDocument(self, prompt: str, context: str, outputLabel: str, outputFormat: str, description: str, taskUnderstanding: Dict) -> ChatDocument: """ Generate a document based on the request and extracted data. Args: prompt: The task prompt context: Extracted document data outputLabel: Output filename outputFormat: Output format (file extension) description: Output description taskUnderstanding: Task understanding from analysis Returns: ChatDocument object """ # Determine content type based on format contentType = self._getContentType(outputFormat) # Build prompt based on output format generationPrompt = f""" Create a response to the following request in {outputFormat} format: REQUEST: {prompt} EXTRACTED DATA: {context} OUTPUT REQUIREMENTS: - Filename: {outputLabel} - Format: {outputFormat} - Description: {description} - Focus on: {taskUnderstanding.get("contentFocus", "Addressing the main request")} Guidelines: 1. Create content that directly addresses the request 2. Use the extracted data to inform your response 3. Format the output appropriately for {outputFormat} 4. Be comprehensive but focused 5. Include appropriate formatting, structure, and organization Only return the content. No explanations or additional text. """ try: # Get content from AI content = await self.service.base.callAi([ {"role": "system", "content": f"You are a content generation expert. Create content in {outputFormat} format."}, {"role": "user", "content": generationPrompt} ]) # Extract content from code blocks if present content = self._extractFromCodeBlocks(content, outputFormat) # Create document object return ChatDocument( id=str(uuid.uuid4()), name=outputLabel.split('.')[0], ext=outputFormat, data=content, contents=[ ChatContent( name="main", data=content, summary=description, metadata={"format": outputFormat} ) ] ) except Exception as e: logger.error(f"Error generating document: {str(e)}") errorContent = self._createErrorContent(str(e), outputFormat) return ChatDocument( id=str(uuid.uuid4()), name=outputLabel.split('.')[0], ext=outputFormat, data=errorContent, contents=[ ChatContent( name="error", data=errorContent, summary="Error generating content", metadata={"format": outputFormat, "error": str(e)} ) ] ) def _getContentType(self, outputFormat: str) -> str: """ Get content type based on format. Args: outputFormat: Output format Returns: Content type """ contentTypeMap = { "md": "text/markdown", "markdown": "text/markdown", "html": "text/html", "txt": "text/plain", "text": "text/plain", "json": "application/json", "csv": "text/csv", "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" } return contentTypeMap.get(outputFormat, "text/plain") def _extractFromCodeBlocks(self, content: str, format: str) -> str: """ Extract content from code blocks if present. Args: content: Raw content format: Expected format Returns: Extracted content """ # Check for code blocks codeBlockStart = f"```{format}" if codeBlockStart in content: start = content.find(codeBlockStart) + len(codeBlockStart) end = content.find("```", start) if end > start: return content[start:end].strip() # Check for generic code blocks if "```" in content: start = content.find("```") + 3 # Skip format identifier if present if content[start:].strip() and not content[start:start+1].isalnum(): start = content.find("\n", start) + 1 end = content.find("```", start) if end > start: return content[start:end].strip() return content def _createErrorContent(self, errorMessage: str, outputFormat: str) -> str: """ Create error content in the appropriate format. Args: errorMessage: Error message outputFormat: Output format Returns: Formatted error content """ if outputFormat == "json": return json.dumps({"error": errorMessage}) elif outputFormat == "csv": return f"error\n{errorMessage}" elif outputFormat in ["md", "markdown"]: return f"# Error\n\n{errorMessage}" elif outputFormat == "html": return f"

Error

{errorMessage}

" else: return f"Error: {errorMessage}" # Factory function for the Coach agent def getAgentCoach(): """Returns an instance of the Coach agent.""" return AgentCoach()