376 lines
No EOL
14 KiB
Python
376 lines
No EOL
14 KiB
Python
"""
|
|
Coach agent for answering questions and generating structured content.
|
|
Provides direct AI-based responses using extracted data from documents.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Dict, Any, List
|
|
import json
|
|
from datetime import datetime
|
|
|
|
from modules.workflow.agentBase import AgentBase
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class AgentCoach(AgentBase):
|
|
"""AI-driven agent for answering questions and generating structured content from extracted data"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the coach agent"""
|
|
super().__init__()
|
|
self.name = "coach"
|
|
self.label = "Coach & Assistant"
|
|
self.description = "Answers questions, converts and generates content directly from data without complex processing"
|
|
self.capabilities = [
|
|
"dataConversion",
|
|
"questionAnswering",
|
|
"contentGeneration",
|
|
"simpleDataFormatting",
|
|
"informationSynthesis",
|
|
"directResponse",
|
|
"imageInterpretation",
|
|
"structuredOutput"
|
|
]
|
|
|
|
def setDependencies(self, mydom=None):
|
|
"""Set external dependencies for the agent."""
|
|
|
|
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Process a task by directly using AI to provide answers or content based on extracted data.
|
|
|
|
Args:
|
|
task: Task dictionary with prompt, inputDocuments, outputSpecifications
|
|
|
|
Returns:
|
|
Dictionary with feedback and documents
|
|
"""
|
|
try:
|
|
# Extract task information
|
|
prompt = task.get("prompt", "")
|
|
inputDocuments = task.get("inputDocuments", [])
|
|
outputSpecs = task.get("outputSpecifications", [])
|
|
|
|
# Check AI service
|
|
if not self.mydom:
|
|
return {
|
|
"feedback": "The Coach agent requires an AI service to function.",
|
|
"documents": []
|
|
}
|
|
|
|
# Collect all extracted data from input documents
|
|
documentContext = self._collectExtractedData(inputDocuments)
|
|
|
|
# Generate task understanding to guide response creation
|
|
taskUnderstanding = await self._analyzeTask(prompt, documentContext)
|
|
|
|
# Generate documents based on output specifications
|
|
documents = []
|
|
|
|
# If no output specs provided, create a default document
|
|
if not outputSpecs:
|
|
defaultFormat = taskUnderstanding.get("recommendedFormat", "md")
|
|
defaultTitle = taskUnderstanding.get("suggestedFilename", "response")
|
|
|
|
outputSpecs = [{
|
|
"label": f"{defaultTitle}.{defaultFormat}",
|
|
"description": "Response to your request"
|
|
}]
|
|
|
|
# Process each output specification
|
|
for spec in outputSpecs:
|
|
outputLabel = spec.get("label", "output.txt")
|
|
outputDescription = spec.get("description", "")
|
|
|
|
# Determine format based on file extension
|
|
outputFormat = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt"
|
|
|
|
# Generate document based on format
|
|
document = await self._generateDocument(
|
|
prompt,
|
|
documentContext,
|
|
outputLabel,
|
|
outputFormat,
|
|
outputDescription,
|
|
taskUnderstanding
|
|
)
|
|
|
|
documents.append(document)
|
|
|
|
# Generate feedback
|
|
feedback = taskUnderstanding.get("feedback", "I've created content based on your request.")
|
|
|
|
return {
|
|
"feedback": feedback,
|
|
"documents": documents
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in coach processing: {str(e)}", exc_info=True)
|
|
return {
|
|
"feedback": f"Error while processing your request: {str(e)}",
|
|
"documents": []
|
|
}
|
|
|
|
def _collectExtractedData(self, documents: List[Dict[str, Any]]) -> str:
|
|
"""
|
|
Collect extracted data from input documents.
|
|
|
|
Args:
|
|
documents: List of input documents
|
|
|
|
Returns:
|
|
Combined extracted data as text
|
|
"""
|
|
contextParts = []
|
|
|
|
for doc in documents:
|
|
docName = doc.get("name", "unnamed")
|
|
if doc.get("ext"):
|
|
docName = f"{docName}.{doc.get('ext')}"
|
|
|
|
contextParts.append(f"\n\n--- {docName} ---\n")
|
|
|
|
# Process contents, focusing on dataExtracted field
|
|
for content in doc.get("contents", []):
|
|
if content.get("dataExtracted"):
|
|
contextParts.append(content.get("dataExtracted", ""))
|
|
|
|
return "\n".join(contextParts)
|
|
|
|
async def _analyzeTask(self, prompt: str, context: str) -> Dict:
|
|
"""
|
|
Use AI to analyze the task and develop an understanding of what's required.
|
|
|
|
Args:
|
|
prompt: The task prompt
|
|
context: Extracted document data
|
|
|
|
Returns:
|
|
Task understanding dictionary
|
|
"""
|
|
analysisPrompt = f"""
|
|
Analyze this request to determine the best approach for creating a response.
|
|
|
|
REQUEST: {prompt}
|
|
|
|
EXTRACTED DATA:
|
|
{context[:1500]}... (truncated if longer)
|
|
|
|
Create a task analysis in JSON format with the following structure:
|
|
{{
|
|
"requestType": "question|content|data|report|description",
|
|
"recommendedFormat": "md|txt|html|csv|json",
|
|
"suggestedFilename": "appropriate_filename_without_extension",
|
|
"contentFocus": "brief description of what to focus on",
|
|
"feedback": "brief explanation of how you'll approach this request",
|
|
"complexity": "simple|moderate|complex"
|
|
}}
|
|
|
|
Only return valid JSON. No preamble or explanations.
|
|
"""
|
|
|
|
try:
|
|
response = await self.mydom.callAi([
|
|
{"role": "system", "content": "You are a task analysis expert. Respond with valid JSON only."},
|
|
{"role": "user", "content": analysisPrompt}
|
|
])
|
|
|
|
# Extract JSON from response
|
|
jsonStart = response.find('{')
|
|
jsonEnd = response.rfind('}') + 1
|
|
|
|
if jsonStart >= 0 and jsonEnd > jsonStart:
|
|
taskUnderstanding = json.loads(response[jsonStart:jsonEnd])
|
|
return taskUnderstanding
|
|
else:
|
|
# Fallback if JSON not found
|
|
return {
|
|
"requestType": "content",
|
|
"recommendedFormat": "md",
|
|
"suggestedFilename": "response",
|
|
"contentFocus": "Addressing the main request",
|
|
"feedback": "I've created content based on your request and the provided data.",
|
|
"complexity": "moderate"
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error analyzing task: {str(e)}")
|
|
return {
|
|
"requestType": "content",
|
|
"recommendedFormat": "md",
|
|
"suggestedFilename": "response",
|
|
"contentFocus": "Addressing the main request",
|
|
"feedback": "I've created content based on your request and the provided data.",
|
|
"complexity": "moderate"
|
|
}
|
|
|
|
async def _generateDocument(self, prompt: str, context: str, outputLabel: str,
|
|
outputFormat: str, description: str, taskUnderstanding: Dict) -> Dict[str, Any]:
|
|
"""
|
|
Generate a document based on the request and extracted data.
|
|
|
|
Args:
|
|
prompt: The task prompt
|
|
context: Extracted document data
|
|
outputLabel: Output filename
|
|
outputFormat: Output format (file extension)
|
|
description: Output description
|
|
taskUnderstanding: Task understanding from analysis
|
|
|
|
Returns:
|
|
Document object
|
|
"""
|
|
# Determine content type based on format
|
|
contentType = self._getContentType(outputFormat)
|
|
|
|
# Build prompt based on output format
|
|
generationPrompt = f"""
|
|
Create a response to the following request in {outputFormat} format:
|
|
|
|
REQUEST: {prompt}
|
|
|
|
EXTRACTED DATA:
|
|
{context}
|
|
|
|
OUTPUT REQUIREMENTS:
|
|
- Filename: {outputLabel}
|
|
- Format: {outputFormat}
|
|
- Description: {description}
|
|
- Focus on: {taskUnderstanding.get("contentFocus", "Addressing the main request")}
|
|
|
|
Guidelines:
|
|
1. Create content that directly addresses the request
|
|
2. Use the extracted data to inform your response
|
|
3. Format the output appropriately for {outputFormat}
|
|
4. Be comprehensive but focused
|
|
5. Include appropriate formatting, structure, and organization
|
|
|
|
Your response should be in valid {outputFormat} format without explanations or markdown formatting around it.
|
|
"""
|
|
|
|
try:
|
|
# Build system prompt based on format
|
|
systemPrompt = f"You create {outputFormat} format content based on requests and extracted data. Provide only the content in valid {outputFormat} format."
|
|
|
|
# Generate content with AI
|
|
content = await self.mydom.callAi([
|
|
{"role": "system", "content": systemPrompt},
|
|
{"role": "user", "content": generationPrompt}
|
|
])
|
|
|
|
# Process content based on format
|
|
if outputFormat in ["json", "csv"]:
|
|
# For structured formats, extract from code blocks if present
|
|
content = self._extractFromCodeBlocks(content, outputFormat)
|
|
|
|
# Validate JSON if needed
|
|
if outputFormat == "json":
|
|
try:
|
|
json.loads(content)
|
|
except:
|
|
logger.warning("Invalid JSON generated, attempting to fix")
|
|
# Try to extract just the JSON portion
|
|
jsonStart = content.find('{')
|
|
jsonEnd = content.rfind('}') + 1
|
|
if jsonStart >= 0 and jsonEnd > jsonStart:
|
|
content = content[jsonStart:jsonEnd]
|
|
|
|
# Ensure proper structure for markdown/HTML
|
|
if outputFormat in ["md", "markdown"] and not content.strip().startswith("#"):
|
|
title = "Response"
|
|
content = f"# {title}\n\n{content}"
|
|
elif outputFormat == "html" and not "<html" in content.lower():
|
|
title = "Response"
|
|
content = f"<html><head><title>{title}</title></head><body><h1>{title}</h1>{content}</body></html>"
|
|
|
|
return self.formatAgentDocumentOutput(outputLabel, content, contentType)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating document: {str(e)}")
|
|
|
|
# Create error document
|
|
errorContent = self._createErrorContent(str(e), outputFormat)
|
|
return self.formatAgentDocumentOutput(outputLabel, errorContent, contentType)
|
|
|
|
def _getContentType(self, outputFormat: str) -> str:
|
|
"""
|
|
Get content type based on format.
|
|
|
|
Args:
|
|
outputFormat: Output format
|
|
|
|
Returns:
|
|
Content type
|
|
"""
|
|
contentTypeMap = {
|
|
"md": "text/markdown",
|
|
"markdown": "text/markdown",
|
|
"html": "text/html",
|
|
"txt": "text/plain",
|
|
"text": "text/plain",
|
|
"json": "application/json",
|
|
"csv": "text/csv",
|
|
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
}
|
|
|
|
return contentTypeMap.get(outputFormat, "text/plain")
|
|
|
|
def _extractFromCodeBlocks(self, content: str, format: str) -> str:
|
|
"""
|
|
Extract content from code blocks if present.
|
|
|
|
Args:
|
|
content: Raw content
|
|
format: Expected format
|
|
|
|
Returns:
|
|
Extracted content
|
|
"""
|
|
# Check for code blocks
|
|
codeBlockStart = f"```{format}"
|
|
if codeBlockStart in content:
|
|
start = content.find(codeBlockStart) + len(codeBlockStart)
|
|
end = content.find("```", start)
|
|
if end > start:
|
|
return content[start:end].strip()
|
|
|
|
# Check for generic code blocks
|
|
if "```" in content:
|
|
start = content.find("```") + 3
|
|
# Skip format identifier if present
|
|
if content[start:].strip() and not content[start:start+1].isalnum():
|
|
start = content.find("\n", start) + 1
|
|
end = content.find("```", start)
|
|
if end > start:
|
|
return content[start:end].strip()
|
|
|
|
return content
|
|
|
|
def _createErrorContent(self, errorMessage: str, outputFormat: str) -> str:
|
|
"""
|
|
Create error content in the appropriate format.
|
|
|
|
Args:
|
|
errorMessage: Error message
|
|
outputFormat: Output format
|
|
|
|
Returns:
|
|
Formatted error content
|
|
"""
|
|
if outputFormat == "json":
|
|
return json.dumps({"error": errorMessage})
|
|
elif outputFormat == "csv":
|
|
return f"error\n{errorMessage}"
|
|
elif outputFormat in ["md", "markdown"]:
|
|
return f"# Error\n\n{errorMessage}"
|
|
elif outputFormat == "html":
|
|
return f"<html><body><h1>Error</h1><p>{errorMessage}</p></body></html>"
|
|
else:
|
|
return f"Error: {errorMessage}"
|
|
|
|
|
|
# Factory function for the Coach agent
|
|
def getAgentCoach():
|
|
"""Returns an instance of the Coach agent."""
|
|
return AgentCoach() |