""" Coach agent for answering questions and generating structured content. Provides direct AI-based responses using extracted data from documents. """ import logging from typing import Dict, Any, List import json from datetime import datetime from modules.workflowAgentsRegistry import AgentBase logger = logging.getLogger(__name__) class AgentCoach(AgentBase): """AI-driven agent for answering questions and generating structured content from extracted data""" def __init__(self): """Initialize the coach agent""" super().__init__() self.name = "coach" self.label = "Coach & Assistant" self.description = "Answers questions and generates content directly from extracted data without complex processing" self.capabilities = [ "questionAnswering", "contentGeneration", "simpleDataFormatting", "informationSynthesis", "directResponse", "imageInterpretation", "structuredOutput" ] def setDependencies(self, mydom=None): """Set external dependencies for the agent.""" self.mydom = mydom async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]: """ Process a task by directly using AI to provide answers or content based on extracted data. Args: task: Task dictionary with prompt, inputDocuments, outputSpecifications Returns: Dictionary with feedback and documents """ try: # Extract task information prompt = task.get("prompt", "") inputDocuments = task.get("inputDocuments", []) outputSpecs = task.get("outputSpecifications", []) # Check AI service if not self.mydom: return { "feedback": "The Coach agent requires an AI service to function.", "documents": [] } # Collect all extracted data from input documents documentContext = self._collectExtractedData(inputDocuments) # Generate task understanding to guide response creation taskUnderstanding = await self._analyzeTask(prompt, documentContext) # Generate documents based on output specifications documents = [] # If no output specs provided, create a default document if not outputSpecs: defaultFormat = taskUnderstanding.get("recommendedFormat", "md") defaultTitle = taskUnderstanding.get("suggestedFilename", "response") outputSpecs = [{ "label": f"{defaultTitle}.{defaultFormat}", "description": "Response to your request" }] # Process each output specification for spec in outputSpecs: outputLabel = spec.get("label", "output.txt") outputDescription = spec.get("description", "") # Determine format based on file extension outputFormat = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt" # Generate document based on format document = await self._generateDocument( prompt, documentContext, outputLabel, outputFormat, outputDescription, taskUnderstanding ) documents.append(document) # Generate feedback feedback = taskUnderstanding.get("feedback", "I've created content based on your request.") return { "feedback": feedback, "documents": documents } except Exception as e: logger.error(f"Error in coach processing: {str(e)}", exc_info=True) return { "feedback": f"Error while processing your request: {str(e)}", "documents": [] } def _collectExtractedData(self, documents: List[Dict[str, Any]]) -> str: """ Collect extracted data from input documents. Args: documents: List of input documents Returns: Combined extracted data as text """ contextParts = [] for doc in documents: docName = doc.get("name", "unnamed") if doc.get("ext"): docName = f"{docName}.{doc.get('ext')}" contextParts.append(f"\n\n--- {docName} ---\n") # Process contents, focusing on dataExtracted field for content in doc.get("contents", []): if content.get("dataExtracted"): contextParts.append(content.get("dataExtracted", "")) return "\n".join(contextParts) async def _analyzeTask(self, prompt: str, context: str) -> Dict: """ Use AI to analyze the task and develop an understanding of what's required. Args: prompt: The task prompt context: Extracted document data Returns: Task understanding dictionary """ analysisPrompt = f""" Analyze this request to determine the best approach for creating a response. REQUEST: {prompt} EXTRACTED DATA: {context[:1500]}... (truncated if longer) Create a task analysis in JSON format with the following structure: {{ "requestType": "question|content|data|report|description", "recommendedFormat": "md|txt|html|csv|json", "suggestedFilename": "appropriate_filename_without_extension", "contentFocus": "brief description of what to focus on", "feedback": "brief explanation of how you'll approach this request", "complexity": "simple|moderate|complex" }} Only return valid JSON. No preamble or explanations. """ try: response = await self.mydom.callAi([ {"role": "system", "content": "You are a task analysis expert. Respond with valid JSON only."}, {"role": "user", "content": analysisPrompt} ]) # Extract JSON from response jsonStart = response.find('{') jsonEnd = response.rfind('}') + 1 if jsonStart >= 0 and jsonEnd > jsonStart: taskUnderstanding = json.loads(response[jsonStart:jsonEnd]) return taskUnderstanding else: # Fallback if JSON not found return { "requestType": "content", "recommendedFormat": "md", "suggestedFilename": "response", "contentFocus": "Addressing the main request", "feedback": "I've created content based on your request and the provided data.", "complexity": "moderate" } except Exception as e: logger.warning(f"Error analyzing task: {str(e)}") return { "requestType": "content", "recommendedFormat": "md", "suggestedFilename": "response", "contentFocus": "Addressing the main request", "feedback": "I've created content based on your request and the provided data.", "complexity": "moderate" } async def _generateDocument(self, prompt: str, context: str, outputLabel: str, outputFormat: str, description: str, taskUnderstanding: Dict) -> Dict[str, Any]: """ Generate a document based on the request and extracted data. Args: prompt: The task prompt context: Extracted document data outputLabel: Output filename outputFormat: Output format (file extension) description: Output description taskUnderstanding: Task understanding from analysis Returns: Document object """ # Determine content type based on format contentType = self._getContentType(outputFormat) # Build prompt based on output format generationPrompt = f""" Create a response to the following request in {outputFormat} format: REQUEST: {prompt} EXTRACTED DATA: {context} OUTPUT REQUIREMENTS: - Filename: {outputLabel} - Format: {outputFormat} - Description: {description} - Focus on: {taskUnderstanding.get("contentFocus", "Addressing the main request")} Guidelines: 1. Create content that directly addresses the request 2. Use the extracted data to inform your response 3. Format the output appropriately for {outputFormat} 4. Be comprehensive but focused 5. Include appropriate formatting, structure, and organization Your response should be in valid {outputFormat} format without explanations or markdown formatting around it. """ try: # Build system prompt based on format systemPrompt = f"You create {outputFormat} format content based on requests and extracted data. Provide only the content in valid {outputFormat} format." # Generate content with AI content = await self.mydom.callAi([ {"role": "system", "content": systemPrompt}, {"role": "user", "content": generationPrompt} ]) # Process content based on format if outputFormat in ["json", "csv"]: # For structured formats, extract from code blocks if present content = self._extractFromCodeBlocks(content, outputFormat) # Validate JSON if needed if outputFormat == "json": try: json.loads(content) except: logger.warning("Invalid JSON generated, attempting to fix") # Try to extract just the JSON portion jsonStart = content.find('{') jsonEnd = content.rfind('}') + 1 if jsonStart >= 0 and jsonEnd > jsonStart: content = content[jsonStart:jsonEnd] # Ensure proper structure for markdown/HTML if outputFormat in ["md", "markdown"] and not content.strip().startswith("#"): title = "Response" content = f"# {title}\n\n{content}" elif outputFormat == "html" and not "{title}

{title}

{content}" return self.formatAgentDocumentOutput(outputLabel, content, contentType) except Exception as e: logger.error(f"Error generating document: {str(e)}") # Create error document errorContent = self._createErrorContent(str(e), outputFormat) return self.formatAgentDocumentOutput(outputLabel, errorContent, contentType) def _getContentType(self, outputFormat: str) -> str: """ Get content type based on format. Args: outputFormat: Output format Returns: Content type """ contentTypeMap = { "md": "text/markdown", "markdown": "text/markdown", "html": "text/html", "txt": "text/plain", "text": "text/plain", "json": "application/json", "csv": "text/csv", "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" } return contentTypeMap.get(outputFormat, "text/plain") def _extractFromCodeBlocks(self, content: str, format: str) -> str: """ Extract content from code blocks if present. Args: content: Raw content format: Expected format Returns: Extracted content """ # Check for code blocks codeBlockStart = f"```{format}" if codeBlockStart in content: start = content.find(codeBlockStart) + len(codeBlockStart) end = content.find("```", start) if end > start: return content[start:end].strip() # Check for generic code blocks if "```" in content: start = content.find("```") + 3 # Skip format identifier if present if content[start:].strip() and not content[start:start+1].isalnum(): start = content.find("\n", start) + 1 end = content.find("```", start) if end > start: return content[start:end].strip() return content def _createErrorContent(self, errorMessage: str, outputFormat: str) -> str: """ Create error content in the appropriate format. Args: errorMessage: Error message outputFormat: Output format Returns: Formatted error content """ if outputFormat == "json": return json.dumps({"error": errorMessage}) elif outputFormat == "csv": return f"error\n{errorMessage}" elif outputFormat in ["md", "markdown"]: return f"# Error\n\n{errorMessage}" elif outputFormat == "html": return f"

Error

{errorMessage}

" else: return f"Error: {errorMessage}" # Factory function for the Coach agent def getAgentCoach(): """Returns an instance of the Coach agent.""" return AgentCoach()