565 lines
No EOL
23 KiB
Python
565 lines
No EOL
23 KiB
Python
"""
|
|
Documentation agent for creating documentation, reports, and structured content.
|
|
Reimagined with an output-first, AI-driven approach with multi-step document generation.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
from typing import Dict, Any, List
|
|
|
|
from modules.chat_registry import AgentBase
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class AgentDocumentation(AgentBase):
|
|
"""AI-driven agent for creating documentation and structured content using multi-step generation"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the documentation agent"""
|
|
super().__init__()
|
|
self.name = "documentation"
|
|
self.description = "Creates structured documentation, reports, and content using AI with multi-step generation"
|
|
self.capabilities = [
|
|
"report_generation",
|
|
"documentation",
|
|
"content_structuring",
|
|
"technical_writing",
|
|
"knowledge_organization"
|
|
]
|
|
|
|
def set_dependencies(self, mydom=None):
|
|
"""Set external dependencies for the agent."""
|
|
self.mydom = mydom
|
|
|
|
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Process a task by focusing on required outputs and using AI to generate them.
|
|
|
|
Args:
|
|
task: Task dictionary with prompt, input_documents, output_specifications
|
|
|
|
Returns:
|
|
Dictionary with feedback and documents
|
|
"""
|
|
try:
|
|
# Extract task information
|
|
prompt = task.get("prompt", "")
|
|
input_documents = task.get("input_documents", [])
|
|
output_specs = task.get("output_specifications", [])
|
|
|
|
# Check AI service
|
|
if not self.mydom:
|
|
return {
|
|
"feedback": "The Documentation agent requires an AI service to function.",
|
|
"documents": []
|
|
}
|
|
|
|
# Extract context from input documents - focusing only on data_extracted
|
|
document_context = self._extract_document_context(input_documents)
|
|
|
|
# Create task analysis to understand the requirements
|
|
documentation_plan = await self._analyze_task(prompt, document_context, output_specs)
|
|
|
|
# Generate all required output documents
|
|
documents = []
|
|
|
|
# If no output specs provided, create default document
|
|
if not output_specs:
|
|
default_format = documentation_plan.get("recommended_format", "markdown")
|
|
default_title = documentation_plan.get("title", "Documentation")
|
|
safe_title = self._sanitize_filename(default_title)
|
|
|
|
output_specs = [
|
|
{"label": f"{safe_title}.{default_format}", "description": "Comprehensive documentation"}
|
|
]
|
|
|
|
# Process each output specification
|
|
for spec in output_specs:
|
|
output_label = spec.get("label", "")
|
|
output_description = spec.get("description", "")
|
|
|
|
# Generate the document using multi-step approach
|
|
document = await self._create_document_multi_step(
|
|
prompt,
|
|
document_context,
|
|
output_label,
|
|
output_description,
|
|
documentation_plan
|
|
)
|
|
|
|
documents.append(document)
|
|
|
|
# Generate feedback
|
|
feedback = documentation_plan.get("feedback", f"Created {len(documents)} documents based on your requirements.")
|
|
|
|
return {
|
|
"feedback": feedback,
|
|
"documents": documents
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in documentation generation: {str(e)}", exc_info=True)
|
|
return {
|
|
"feedback": f"Error during documentation generation: {str(e)}",
|
|
"documents": []
|
|
}
|
|
|
|
def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
|
|
"""
|
|
Extract context from input documents, focusing on data_extracted.
|
|
|
|
Args:
|
|
documents: List of document objects
|
|
|
|
Returns:
|
|
Extracted context as text
|
|
"""
|
|
context_parts = []
|
|
|
|
for doc in documents:
|
|
doc_name = doc.get("name", "unnamed")
|
|
if doc.get("ext"):
|
|
doc_name = f"{doc_name}.{doc.get('ext')}"
|
|
|
|
context_parts.append(f"\n\n--- {doc_name} ---\n")
|
|
|
|
# Process contents for data_extracted
|
|
for content in doc.get("contents", []):
|
|
if content.get("data_extracted"):
|
|
context_parts.append(content.get("data_extracted", ""))
|
|
|
|
return "\n".join(context_parts)
|
|
|
|
def _sanitize_filename(self, filename: str) -> str:
|
|
"""
|
|
Sanitize a filename by removing invalid characters.
|
|
|
|
Args:
|
|
filename: Filename to sanitize
|
|
|
|
Returns:
|
|
Sanitized filename
|
|
"""
|
|
# Replace invalid characters with underscores
|
|
invalid_chars = r'<>:"/\|?*'
|
|
for char in invalid_chars:
|
|
filename = filename.replace(char, '_')
|
|
|
|
# Trim filename if too long
|
|
if len(filename) > 100:
|
|
filename = filename[:97] + "..."
|
|
|
|
return filename
|
|
|
|
async def _analyze_task(self, prompt: str, context: str, output_specs: List) -> Dict:
|
|
"""
|
|
Use AI to analyze the task and create a documentation plan.
|
|
|
|
Args:
|
|
prompt: The task prompt
|
|
context: Document context
|
|
output_specs: Output specifications
|
|
|
|
Returns:
|
|
Documentation plan dictionary
|
|
"""
|
|
analysis_prompt = f"""
|
|
Analyze this documentation task and create a detailed plan.
|
|
|
|
TASK: {prompt}
|
|
|
|
DOCUMENT CONTEXT SAMPLE:
|
|
{context[:1000]}... (truncated)
|
|
|
|
OUTPUT REQUIREMENTS:
|
|
{json.dumps(output_specs, indent=2)}
|
|
|
|
Create a detailed documentation plan in JSON format with the following structure:
|
|
{{
|
|
"title": "Document Title",
|
|
"document_type": "report|manual|guide|whitepaper|etc",
|
|
"audience": "technical|general|executive|etc",
|
|
"detailed_structure": [
|
|
{{
|
|
"title": "Chapter/Section Title",
|
|
"key_points": ["point1", "point2", ...],
|
|
"subsections": ["subsection1", "subsection2", ...],
|
|
"importance": "high|medium|low",
|
|
"estimated_length": "short|medium|long"
|
|
}},
|
|
... more sections ...
|
|
],
|
|
"key_topics": ["topic1", "topic2", ...],
|
|
"tone": "formal|conversational|instructional|etc",
|
|
"recommended_format": "markdown|html|text|etc",
|
|
"formatting_requirements": ["requirement1", "requirement2", ...],
|
|
"executive_summary": "Brief description of what the document will cover",
|
|
"feedback": "Brief message explaining the documentation approach"
|
|
}}
|
|
|
|
Only return valid JSON. No preamble or explanations.
|
|
"""
|
|
|
|
try:
|
|
response = await self.mydom.call_ai([
|
|
{"role": "system", "content": "You are a documentation expert. Respond with valid JSON only."},
|
|
{"role": "user", "content": analysis_prompt}
|
|
])
|
|
|
|
# Extract JSON from response
|
|
json_start = response.find('{')
|
|
json_end = response.rfind('}') + 1
|
|
|
|
if json_start >= 0 and json_end > json_start:
|
|
plan = json.loads(response[json_start:json_end])
|
|
return plan
|
|
else:
|
|
# Fallback if JSON not found
|
|
return {
|
|
"title": "Documentation",
|
|
"document_type": "report",
|
|
"audience": "general",
|
|
"detailed_structure": [
|
|
{
|
|
"title": "Introduction",
|
|
"key_points": ["Purpose", "Scope"],
|
|
"subsections": [],
|
|
"importance": "high",
|
|
"estimated_length": "short"
|
|
},
|
|
{
|
|
"title": "Main Content",
|
|
"key_points": ["Core Information"],
|
|
"subsections": ["Key Findings", "Analysis"],
|
|
"importance": "high",
|
|
"estimated_length": "long"
|
|
},
|
|
{
|
|
"title": "Conclusion",
|
|
"key_points": ["Summary", "Next Steps"],
|
|
"subsections": [],
|
|
"importance": "medium",
|
|
"estimated_length": "short"
|
|
}
|
|
],
|
|
"key_topics": ["General Information"],
|
|
"tone": "formal",
|
|
"recommended_format": "markdown",
|
|
"formatting_requirements": ["Clear headings", "Professional formatting"],
|
|
"executive_summary": "A comprehensive documentation covering the requested topics.",
|
|
"feedback": "Created documentation based on your requirements."
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error creating documentation plan: {str(e)}")
|
|
return {
|
|
"title": "Documentation",
|
|
"document_type": "report",
|
|
"audience": "general",
|
|
"detailed_structure": [
|
|
{
|
|
"title": "Introduction",
|
|
"key_points": ["Purpose", "Scope"],
|
|
"subsections": [],
|
|
"importance": "high",
|
|
"estimated_length": "short"
|
|
},
|
|
{
|
|
"title": "Main Content",
|
|
"key_points": ["Core Information"],
|
|
"subsections": ["Key Findings", "Analysis"],
|
|
"importance": "high",
|
|
"estimated_length": "long"
|
|
},
|
|
{
|
|
"title": "Conclusion",
|
|
"key_points": ["Summary", "Next Steps"],
|
|
"subsections": [],
|
|
"importance": "medium",
|
|
"estimated_length": "short"
|
|
}
|
|
],
|
|
"key_topics": ["General Information"],
|
|
"tone": "formal",
|
|
"recommended_format": "markdown",
|
|
"formatting_requirements": ["Clear headings", "Professional formatting"],
|
|
"executive_summary": "A comprehensive documentation covering the requested topics.",
|
|
"feedback": "Created documentation based on your requirements."
|
|
}
|
|
|
|
async def _create_document_multi_step(self, prompt: str, context: str, output_label: str,
|
|
output_description: str, documentation_plan: Dict) -> Dict:
|
|
"""
|
|
Create a document using a multi-step approach with separate AI calls for each section.
|
|
|
|
Args:
|
|
prompt: Original task prompt
|
|
context: Document context
|
|
output_label: Output filename
|
|
output_description: Description of desired output
|
|
documentation_plan: Documentation plan from AI
|
|
|
|
Returns:
|
|
Document object
|
|
"""
|
|
# Determine format from filename
|
|
format_type = output_label.split('.')[-1].lower() if '.' in output_label else "md"
|
|
|
|
# Map format to content_type
|
|
content_type_map = {
|
|
"md": "text/markdown",
|
|
"markdown": "text/markdown",
|
|
"html": "text/html",
|
|
"txt": "text/plain",
|
|
"text": "text/plain",
|
|
"json": "application/json",
|
|
"csv": "text/csv"
|
|
}
|
|
|
|
content_type = content_type_map.get(format_type, "text/plain")
|
|
|
|
# Get document information
|
|
title = documentation_plan.get("title", "Documentation")
|
|
document_type = documentation_plan.get("document_type", "document")
|
|
audience = documentation_plan.get("audience", "general")
|
|
tone = documentation_plan.get("tone", "formal")
|
|
key_topics = documentation_plan.get("key_topics", [])
|
|
formatting_requirements = documentation_plan.get("formatting_requirements", [])
|
|
|
|
# Get the detailed structure
|
|
detailed_structure = documentation_plan.get("detailed_structure", [])
|
|
if not detailed_structure:
|
|
# Fallback structure if none provided
|
|
detailed_structure = [
|
|
{
|
|
"title": "Introduction",
|
|
"key_points": ["Purpose", "Scope"],
|
|
"importance": "high"
|
|
},
|
|
{
|
|
"title": "Main Content",
|
|
"key_points": ["Core Information"],
|
|
"importance": "high"
|
|
},
|
|
{
|
|
"title": "Conclusion",
|
|
"key_points": ["Summary", "Next Steps"],
|
|
"importance": "medium"
|
|
}
|
|
]
|
|
|
|
try:
|
|
# Step 1: Generate document introduction
|
|
intro_prompt = f"""
|
|
Create the introduction for a {document_type} titled "{title}".
|
|
|
|
DOCUMENT OVERVIEW:
|
|
- Type: {document_type}
|
|
- Audience: {audience}
|
|
- Tone: {tone}
|
|
- Key Topics: {', '.join(key_topics)}
|
|
- Format: {format_type}
|
|
|
|
TASK CONTEXT: {prompt}
|
|
|
|
This introduction should:
|
|
1. Clearly state the purpose and scope of the document
|
|
2. Provide context and background information
|
|
3. Outline what the reader will find in the document
|
|
4. Set the appropriate tone for the {audience} audience
|
|
|
|
The introduction should be professional and engaging, formatted according to {format_type} standards.
|
|
"""
|
|
|
|
introduction = await self.mydom.call_ai([
|
|
{"role": "system", "content": f"You are a documentation expert creating an introduction in {format_type} format."},
|
|
{"role": "user", "content": intro_prompt}
|
|
], produce_user_answer = True)
|
|
|
|
# Step 2: Generate executive summary (if applicable)
|
|
if document_type in ["report", "whitepaper", "case study"]:
|
|
summary_prompt = f"""
|
|
Create an executive summary for a {document_type} titled "{title}".
|
|
|
|
DOCUMENT OVERVIEW:
|
|
- Type: {document_type}
|
|
- Audience: {audience}
|
|
- Key Topics: {', '.join(key_topics)}
|
|
|
|
TASK CONTEXT: {prompt}
|
|
|
|
This executive summary should:
|
|
1. Provide a concise overview of the entire document
|
|
2. Highlight key findings, recommendations, or conclusions
|
|
3. Be suitable for executives or busy readers who may only read this section
|
|
4. Be professionally formatted according to {format_type} standards
|
|
|
|
Keep the summary focused and impactful, approximately 200-300 words.
|
|
"""
|
|
|
|
executive_summary = await self.mydom.call_ai([
|
|
{"role": "system", "content": f"You are a documentation expert creating an executive summary in {format_type} format."},
|
|
{"role": "user", "content": summary_prompt}
|
|
], produce_user_answer = True)
|
|
else:
|
|
executive_summary = ""
|
|
|
|
# Step 3: Generate each section
|
|
sections = []
|
|
|
|
for section in detailed_structure:
|
|
section_title = section.get("title", "Section")
|
|
key_points = section.get("key_points", [])
|
|
subsections = section.get("subsections", [])
|
|
importance = section.get("importance", "medium")
|
|
|
|
# Adjust depth based on importance
|
|
detail_level = "high" if importance == "high" else "medium"
|
|
|
|
section_prompt = f"""
|
|
Create the "{section_title}" section for a {document_type} titled "{title}".
|
|
|
|
SECTION DETAILS:
|
|
- Title: {section_title}
|
|
- Key Points to Cover: {', '.join(key_points)}
|
|
- Subsections: {', '.join(subsections)}
|
|
- Detail Level: {detail_level}
|
|
|
|
DOCUMENT CONTEXT:
|
|
- Type: {document_type}
|
|
- Audience: {audience}
|
|
- Tone: {tone}
|
|
- Format: {format_type}
|
|
|
|
TASK CONTEXT: {prompt}
|
|
|
|
AVAILABLE INFORMATION:
|
|
{context[:500]}... (truncated)
|
|
|
|
This section should:
|
|
1. Be comprehensive and well-structured
|
|
2. Cover all the key points listed
|
|
3. Include the specified subsections with appropriate headings
|
|
4. Maintain a {tone} tone suitable for the {audience} audience
|
|
5. Be properly formatted according to {format_type} standards
|
|
6. Include specific examples, data, or evidence where appropriate
|
|
|
|
Be thorough in your coverage of this section, providing substantive content.
|
|
"""
|
|
|
|
section_content = await self.mydom.call_ai([
|
|
{"role": "system", "content": f"You are a documentation expert creating detailed content for the {section_title} section."},
|
|
{"role": "user", "content": section_prompt}
|
|
], produce_user_answer = True)
|
|
|
|
sections.append(section_content)
|
|
|
|
# Step 4: Generate conclusion
|
|
conclusion_prompt = f"""
|
|
Create the conclusion for a {document_type} titled "{title}".
|
|
|
|
DOCUMENT OVERVIEW:
|
|
- Type: {document_type}
|
|
- Audience: {audience}
|
|
- Key Topics: {', '.join(key_topics)}
|
|
|
|
TASK CONTEXT: {prompt}
|
|
|
|
This conclusion should:
|
|
1. Summarize the key points covered in the document
|
|
2. Provide closure to the topics discussed
|
|
3. Include any relevant recommendations or next steps
|
|
4. Leave the reader with a clear understanding of the document's significance
|
|
|
|
The conclusion should be professional and impactful, formatted according to {format_type} standards.
|
|
"""
|
|
|
|
conclusion = await self.mydom.call_ai([
|
|
{"role": "system", "content": f"You are a documentation expert creating a conclusion in {format_type} format."},
|
|
{"role": "user", "content": conclusion_prompt}
|
|
], produce_user_answer = True)
|
|
|
|
# Step 5: Assemble the complete document
|
|
if format_type in ["md", "markdown"]:
|
|
# Markdown format
|
|
document_content = f"# {title}\n\n"
|
|
|
|
if executive_summary:
|
|
document_content += f"## Executive Summary\n\n{executive_summary}\n\n"
|
|
|
|
document_content += f"{introduction}\n\n"
|
|
|
|
for i, section_content in enumerate(sections):
|
|
# Ensure section starts with heading if not already
|
|
section_title = detailed_structure[i].get("title", f"Section {i+1}")
|
|
if not section_content.strip().startswith("#"):
|
|
document_content += f"## {section_title}\n\n"
|
|
document_content += f"{section_content}\n\n"
|
|
|
|
document_content += f"## Conclusion\n\n{conclusion}\n"
|
|
|
|
elif format_type == "html":
|
|
# HTML format
|
|
document_content = f"<html>\n<head>\n<title>{title}</title>\n</head>\n<body>\n"
|
|
document_content += f"<h1>{title}</h1>\n\n"
|
|
|
|
if executive_summary:
|
|
document_content += f"<h2>Executive Summary</h2>\n<div>{executive_summary}</div>\n\n"
|
|
|
|
document_content += f"<div>{introduction}</div>\n\n"
|
|
|
|
for i, section_content in enumerate(sections):
|
|
section_title = detailed_structure[i].get("title", f"Section {i+1}")
|
|
document_content += f"<h2>{section_title}</h2>\n<div>{section_content}</div>\n\n"
|
|
|
|
document_content += f"<h2>Conclusion</h2>\n<div>{conclusion}</div>\n"
|
|
document_content += "</body>\n</html>"
|
|
|
|
else:
|
|
# Plain text format
|
|
document_content = f"{title}\n{'=' * len(title)}\n\n"
|
|
|
|
if executive_summary:
|
|
document_content += f"EXECUTIVE SUMMARY\n{'-' * 17}\n\n{executive_summary}\n\n"
|
|
|
|
document_content += f"{introduction}\n\n"
|
|
|
|
for i, section_content in enumerate(sections):
|
|
section_title = detailed_structure[i].get("title", f"Section {i+1}")
|
|
document_content += f"{section_title}\n{'-' * len(section_title)}\n\n{section_content}\n\n"
|
|
|
|
document_content += f"CONCLUSION\n{'-' * 10}\n\n{conclusion}\n"
|
|
|
|
# Create document object
|
|
return {
|
|
"label": output_label,
|
|
"content": document_content,
|
|
"metadata": {
|
|
"content_type": content_type
|
|
}
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error creating document: {str(e)}", exc_info=True)
|
|
|
|
# Create a simple error document
|
|
if format_type in ["md", "markdown"]:
|
|
content = f"# Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
|
|
elif format_type == "html":
|
|
content = f"<html><body><h1>Error in Documentation</h1><p>There was an error generating the documentation: {str(e)}</p></body></html>"
|
|
else:
|
|
content = f"Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
|
|
|
|
return {
|
|
"label": output_label,
|
|
"content": content,
|
|
"metadata": {
|
|
"content_type": content_type
|
|
}
|
|
}
|
|
|
|
|
|
# Factory function for the Documentation agent
|
|
def get_documentation_agent():
|
|
"""Returns an instance of the Documentation agent."""
|
|
return AgentDocumentation() |