gateway/modules/agents/agentCoach.py
2025-05-28 01:51:10 +02:00

380 lines
No EOL
14 KiB
Python

"""
Coach agent for answering questions and generating structured content.
Provides direct AI-based responses using extracted data from documents.
"""
import logging
from typing import Dict, Any, List
import json
from datetime import datetime
import uuid
from modules.workflow.agentBase import AgentBase
from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent
logger = logging.getLogger(__name__)
class AgentCoach(AgentBase):
"""AI-driven agent for answering questions and generating structured content from extracted data"""
def __init__(self):
"""Initialize the coach agent"""
super().__init__()
self.name = "coach"
self.label = "Coach & Assistant"
self.description = "Answers questions, converts and generates content directly from data without complex processing"
self.capabilities = [
"dataConversion",
"questionAnswering",
"contentGeneration",
"simpleDataFormatting",
"informationSynthesis",
"directResponse",
"imageInterpretation",
"structuredOutput"
]
def setDependencies(self, serviceBase=None):
"""Set external dependencies for the agent."""
self.setService(serviceBase)
async def processTask(self, task: Task) -> Dict[str, Any]:
"""
Process a task by directly using AI to provide answers or content based on extracted data.
Args:
task: Task object with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
try:
# Extract task information
prompt = task.prompt
inputDocuments = task.filesInput
outputSpecs = task.filesOutput
# Check AI service
if not self.service or not self.service.base:
return {
"feedback": "The Coach agent requires an AI service to function.",
"documents": []
}
# Collect all extracted data from input documents
documentContext = self._collectExtractedData(inputDocuments)
# Generate task understanding to guide response creation
taskUnderstanding = await self._analyzeTask(prompt, documentContext)
# Generate documents based on output specifications
documents = []
# If no output specs provided, create a default document
if not outputSpecs:
defaultFormat = taskUnderstanding.get("recommendedFormat", "md")
defaultTitle = taskUnderstanding.get("suggestedFilename", "response")
outputSpecs = [{
"label": f"{defaultTitle}.{defaultFormat}",
"description": "Response to your request"
}]
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "output.txt")
outputDescription = spec.get("description", "")
# Determine format based on file extension
outputFormat = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt"
# Generate document based on format
document = await self._generateDocument(
prompt,
documentContext,
outputLabel,
outputFormat,
outputDescription,
taskUnderstanding
)
documents.append(document)
# Generate feedback
feedback = taskUnderstanding.get("feedback", "I've created content based on your request.")
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error in coach processing: {str(e)}", exc_info=True)
return {
"feedback": f"Error while processing your request: {str(e)}",
"documents": []
}
def _collectExtractedData(self, documents: List[ChatDocument]) -> str:
"""
Collect extracted data from input documents.
Args:
documents: List of input documents
Returns:
Combined extracted data as text
"""
contextParts = []
for doc in documents:
docName = doc.name
if doc.ext:
docName = f"{docName}.{doc.ext}"
contextParts.append(f"\n\n--- {docName} ---\n")
# Process contents, focusing on dataExtracted field
for content in doc.contents:
if content.data:
contextParts.append(content.data)
return "\n".join(contextParts)
async def _analyzeTask(self, prompt: str, context: str) -> Dict:
"""
Use AI to analyze the task and develop an understanding of what's required.
Args:
prompt: The task prompt
context: Extracted document data
Returns:
Task understanding dictionary
"""
analysisPrompt = f"""
Analyze this request to determine the best approach for creating a response.
REQUEST: {prompt}
EXTRACTED DATA:
{context[:1500]}... (truncated if longer)
Create a task analysis in JSON format with the following structure:
{{
"requestType": "question|content|data|report|description",
"recommendedFormat": "md|txt|html|csv|json",
"suggestedFilename": "appropriate_filename_without_extension",
"contentFocus": "brief description of what to focus on",
"feedback": "brief explanation of how you'll approach this request",
"complexity": "simple|moderate|complex"
}}
Only return valid JSON. No preamble or explanations.
"""
try:
# Get task understanding from AI
response = await self.service.base.callAi([
{"role": "system", "content": "You are a task analysis expert. Respond with valid JSON only."},
{"role": "user", "content": analysisPrompt}
])
# Extract JSON from response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
taskUnderstanding = json.loads(response[jsonStart:jsonEnd])
return taskUnderstanding
else:
# Fallback if JSON not found
return {
"requestType": "content",
"recommendedFormat": "md",
"suggestedFilename": "response",
"contentFocus": "Addressing the main request",
"feedback": "I've created content based on your request and the provided data.",
"complexity": "moderate"
}
except Exception as e:
logger.warning(f"Error analyzing task: {str(e)}")
return {
"requestType": "content",
"recommendedFormat": "md",
"suggestedFilename": "response",
"contentFocus": "Addressing the main request",
"feedback": "I've created content based on your request and the provided data.",
"complexity": "moderate"
}
async def _generateDocument(self, prompt: str, context: str, outputLabel: str,
outputFormat: str, description: str, taskUnderstanding: Dict) -> ChatDocument:
"""
Generate a document based on the request and extracted data.
Args:
prompt: The task prompt
context: Extracted document data
outputLabel: Output filename
outputFormat: Output format (file extension)
description: Output description
taskUnderstanding: Task understanding from analysis
Returns:
ChatDocument object
"""
# Determine content type based on format
contentType = self._getContentType(outputFormat)
# Build prompt based on output format
generationPrompt = f"""
Create a response to the following request in {outputFormat} format:
REQUEST: {prompt}
EXTRACTED DATA:
{context}
OUTPUT REQUIREMENTS:
- Filename: {outputLabel}
- Format: {outputFormat}
- Description: {description}
- Focus on: {taskUnderstanding.get("contentFocus", "Addressing the main request")}
Guidelines:
1. Create content that directly addresses the request
2. Use the extracted data to inform your response
3. Format the output appropriately for {outputFormat}
4. Be comprehensive but focused
5. Include appropriate formatting, structure, and organization
Only return the content. No explanations or additional text.
"""
try:
# Get content from AI
content = await self.service.base.callAi([
{"role": "system", "content": f"You are a content generation expert. Create content in {outputFormat} format."},
{"role": "user", "content": generationPrompt}
])
# Extract content from code blocks if present
content = self._extractFromCodeBlocks(content, outputFormat)
# Create document object
return ChatDocument(
id=str(uuid.uuid4()),
name=outputLabel.split('.')[0],
ext=outputFormat,
data=content,
contents=[
ChatContent(
name="main",
data=content,
summary=description,
metadata={"format": outputFormat}
)
]
)
except Exception as e:
logger.error(f"Error generating document: {str(e)}")
errorContent = self._createErrorContent(str(e), outputFormat)
return ChatDocument(
id=str(uuid.uuid4()),
name=outputLabel.split('.')[0],
ext=outputFormat,
data=errorContent,
contents=[
ChatContent(
name="error",
data=errorContent,
summary="Error generating content",
metadata={"format": outputFormat, "error": str(e)}
)
]
)
def _getContentType(self, outputFormat: str) -> str:
"""
Get content type based on format.
Args:
outputFormat: Output format
Returns:
Content type
"""
contentTypeMap = {
"md": "text/markdown",
"markdown": "text/markdown",
"html": "text/html",
"txt": "text/plain",
"text": "text/plain",
"json": "application/json",
"csv": "text/csv",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
}
return contentTypeMap.get(outputFormat, "text/plain")
def _extractFromCodeBlocks(self, content: str, format: str) -> str:
"""
Extract content from code blocks if present.
Args:
content: Raw content
format: Expected format
Returns:
Extracted content
"""
# Check for code blocks
codeBlockStart = f"```{format}"
if codeBlockStart in content:
start = content.find(codeBlockStart) + len(codeBlockStart)
end = content.find("```", start)
if end > start:
return content[start:end].strip()
# Check for generic code blocks
if "```" in content:
start = content.find("```") + 3
# Skip format identifier if present
if content[start:].strip() and not content[start:start+1].isalnum():
start = content.find("\n", start) + 1
end = content.find("```", start)
if end > start:
return content[start:end].strip()
return content
def _createErrorContent(self, errorMessage: str, outputFormat: str) -> str:
"""
Create error content in the appropriate format.
Args:
errorMessage: Error message
outputFormat: Output format
Returns:
Formatted error content
"""
if outputFormat == "json":
return json.dumps({"error": errorMessage})
elif outputFormat == "csv":
return f"error\n{errorMessage}"
elif outputFormat in ["md", "markdown"]:
return f"# Error\n\n{errorMessage}"
elif outputFormat == "html":
return f"<html><body><h1>Error</h1><p>{errorMessage}</p></body></html>"
else:
return f"Error: {errorMessage}"
# Factory function for the Coach agent
def getAgentCoach():
"""Returns an instance of the Coach agent."""
return AgentCoach()