gateway/modules/chat_agent_documentation.py
2025-04-21 17:44:28 +02:00

457 lines
No EOL
17 KiB
Python

"""
Documentation agent for creating documentation, reports, and structured content.
Optimized for the new task-based processing.
"""
import logging
import uuid
from typing import Dict, Any, List
from modules.chat_registry import AgentBase
logger = logging.getLogger(__name__)
class AgentDocumentation(AgentBase):
"""Agent for creating documentation and structured content"""
def __init__(self):
"""Initialize the documentation agent"""
super().__init__()
self.name = "documentation"
self.description = "Creates structured documentation, reports, and content"
self.capabilities = [
"report_generation",
"documentation",
"content_structuring",
"technical_writing",
"knowledge_organization"
]
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a standardized task structure and create documentation.
Args:
task: A dictionary containing:
- task_id: Unique ID for this task
- prompt: The main instruction for the agent
- input_documents: List of documents to process
- output_specifications: List of required output documents
- context: Additional contextual information
Returns:
A dictionary containing:
- feedback: Text response explaining the created documentation
- documents: List of created document objects
"""
try:
# Extract relevant task information
prompt = task.get("prompt", "")
input_documents = task.get("input_documents", [])
output_specs = task.get("output_specifications", [])
# Check if AI service is available
if not self.ai_service:
logger.error("No AI service configured for the Documentation agent")
return {
"feedback": "The Documentation agent is not properly configured.",
"documents": []
}
# Extract context from input documents
document_context = self._extract_document_context(input_documents)
# Generate title for the document
title = await self._generate_title(prompt, document_context)
# Collect created documents
generated_documents = []
# Create a document for each requested output
for spec in output_specs:
output_label = spec.get("label", "")
output_description = spec.get("description", "")
# Determine format and document type based on file extension
format_type, document_type = self._determine_format_and_type(output_label)
# Assess complexity
is_complex = self._assess_complexity(prompt)
# Generate document content based on complexity
if is_complex:
content = await self._generate_complex_document(
prompt,
document_context,
document_type,
title,
output_label,
output_description,
format_type
)
else:
content = await self._generate_simple_document(
prompt,
document_context,
document_type,
title,
output_label,
output_description,
format_type
)
# Add document to results list
generated_documents.append({
"label": output_label,
"content": content
})
# If no specific outputs requested, create default markdown document
if not output_specs:
content = await self._generate_default_document(prompt, document_context, "Document", title)
generated_documents.append({
"label": f"{self._sanitize_filename(title)}.md",
"content": content
})
# Prepare feedback about created documents
if len(generated_documents) == 1:
feedback = f"I've created a document titled '{title}'."
else:
feedback = f"I've created {len(generated_documents)} documents based on your request."
return {
"feedback": feedback,
"documents": generated_documents
}
except Exception as e:
error_msg = f"Error creating documentation: {str(e)}"
logger.error(error_msg)
return {
"feedback": f"An error occurred while creating the documentation: {str(e)}",
"documents": []
}
def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
"""
Extract context from input documents.
Args:
documents: List of document objects
Returns:
Extracted context as text
"""
if not documents:
return ""
context_parts = []
for doc in documents:
doc_name = doc.get("name", "Unnamed document")
context_parts.append(f"--- {doc_name} ---")
for content in doc.get("contents", []):
if content.get("metadata", {}).get("is_text", False):
context_parts.append(content.get("data", ""))
return "\n\n".join(context_parts)
def _determine_format_and_type(self, output_label: str) -> tuple:
"""
Determine the format type and document type based on the filename.
Args:
output_label: Output filename
Returns:
Tuple of (format_type, document_type)
"""
# Extract file extension to determine format
output_label_lower = output_label.lower()
# Determine format based on extension
if output_label_lower.endswith(".md"):
format_type = "markdown"
elif output_label_lower.endswith(".html"):
format_type = "html"
elif output_label_lower.endswith(".txt"):
format_type = "text"
elif output_label_lower.endswith(".csv"):
format_type = "csv"
elif output_label_lower.endswith(".json"):
format_type = "json"
else:
# Default to markdown
format_type = "markdown"
# Determine document type based on filename or format
if "manual" in output_label_lower or "guide" in output_label_lower:
document_type = "Manual"
elif "report" in output_label_lower or "analysis" in output_label_lower:
document_type = "Report"
elif "process" in output_label_lower or "workflow" in output_label_lower:
document_type = "Process Documentation"
elif "present" in output_label_lower or "slide" in output_label_lower:
document_type = "Presentation"
else:
document_type = "Document"
return format_type, document_type
def _assess_complexity(self, prompt: str) -> bool:
"""
Assess the complexity of the task.
Args:
prompt: Task description
Returns:
True for complex tasks, False otherwise
"""
# Language-agnostic complexity assessment
prompt_length = len(prompt)
# Check for structural indicators in a language-agnostic way
has_sections = ":" in prompt and "\n" in prompt
has_lists = "-" in prompt or "*" in prompt or "#" in prompt
# Complex if the prompt is long or contains structural elements
return prompt_length > 500 or has_sections or has_lists
def _sanitize_filename(self, filename: str) -> str:
"""
Sanitize a filename by removing invalid characters.
Args:
filename: Filename to sanitize
Returns:
Sanitized filename
"""
# Replace invalid characters with underscores
invalid_chars = r'<>:"/\|?*'
for char in invalid_chars:
filename = filename.replace(char, '_')
# Trim filename if too long
if len(filename) > 100:
filename = filename[:97] + "..."
return filename
async def _generate_title(self, prompt: str, context: str) -> str:
"""
Generate a title for the document.
Args:
prompt: Task description
context: Document context
Returns:
Generated title
"""
if not self.ai_service:
return f"Document {uuid.uuid4().hex[:8]}"
title_prompt = f"""
Create a concise, professional title for this document based on the following request:
{prompt}
Reply ONLY with the title, nothing else.
"""
try:
title = await self.ai_service.call_api([
{"role": "system", "content": "You create precise document titles."},
{"role": "user", "content": title_prompt}
])
# Clean up title
title = title.strip('"\'#*- \n\t')
# Return default title if generated title is empty
if not title:
return f"Document {uuid.uuid4().hex[:8]}"
return title
except Exception as e:
logger.warning(f"Error in title generation: {str(e)}")
return f"Document {uuid.uuid4().hex[:8]}"
async def _generate_complex_document(self, prompt: str, context: str, document_type: str,
title: str, output_label: str, output_description: str,
format_type: str) -> str:
"""
Generate a complex document with structure.
Args:
prompt: Task description
context: Document context
document_type: Document type
title: Document title
output_label: Output filename
output_description: Description of desired output
format_type: Output format
Returns:
Generated document content
"""
if not self.ai_service:
return f"# {title}\n\nDocument generation not possible: AI service not available."
generation_prompt = f"""
Create a comprehensive, well-structured {document_type} with the title "{title}" based on:
TASK:
{prompt}
CONTEXT:
{context if context else 'No additional context available.'}
OUTPUT REQUIREMENTS:
- Filename: {output_label}
- Description: {output_description}
- Format: {format_type}
The document should include:
1. A clear introduction with purpose and scope
2. Logically organized sections with headings
3. Detailed content with examples and evidence
4. A conclusion with key insights
5. Appropriate formatting according to the output format ({format_type})
The document must perfectly match the {format_type} format.
"""
try:
content = await self.ai_service.call_api([
{"role": "system", "content": f"You create comprehensive, well-structured documentation in {format_type} format."},
{"role": "user", "content": generation_prompt}
])
# For markdown format, ensure the title is at the beginning
if format_type == "markdown" and not content.strip().startswith("# "):
content = f"# {title}\n\n{content}"
return content
except Exception as e:
logger.error(f"Error in document generation: {str(e)}")
return f"# {title}\n\nError in document generation: {str(e)}"
async def _generate_simple_document(self, prompt: str, context: str, document_type: str,
title: str, output_label: str, output_description: str,
format_type: str) -> str:
"""
Generate a simple document without complex structure.
Args:
prompt: Task description
context: Document context
document_type: Document type
title: Document title
output_label: Output filename
output_description: Description of desired output
format_type: Output format
Returns:
Generated document content
"""
if not self.ai_service:
return f"# {title}\n\nDocument generation not possible: AI service not available."
generation_prompt = f"""
Create a precise, focused {document_type} with the title "{title}" based on:
TASK:
{prompt}
CONTEXT:
{context if context else 'No additional context available.'}
OUTPUT REQUIREMENTS:
- Filename: {output_label}
- Description: {output_description}
- Format: {format_type}
The document should be clear, precise, and to the point, without a complex chapter structure.
Format it according to the output format ({format_type}).
The document must perfectly match the {format_type} format.
"""
try:
content = await self.ai_service.call_api([
{"role": "system", "content": f"You create precise, focused documentation in {format_type} format."},
{"role": "user", "content": generation_prompt}
])
# For markdown format, ensure the title is at the beginning
if format_type == "markdown" and not content.strip().startswith("# "):
content = f"# {title}\n\n{content}"
return content
except Exception as e:
logger.error(f"Error in document generation: {str(e)}")
return f"# {title}\n\nError in document generation: {str(e)}"
async def _generate_default_document(self, prompt: str, context: str, document_type: str, title: str) -> str:
"""
Generate a default markdown document when no specific output specifications are present.
Args:
prompt: Task description
context: Document context
document_type: Document type
title: Document title
Returns:
Generated document content
"""
if not self.ai_service:
return f"# {title}\n\nDocument generation not possible: AI service not available."
generation_prompt = f"""
Create a structured {document_type} with the title "{title}" based on:
TASK:
{prompt}
CONTEXT:
{context if context else 'No additional context available.'}
Format the document with markdown syntax and create a clear, professional structure.
"""
try:
content = await self.ai_service.call_api([
{"role": "system", "content": "You create structured documentation in markdown format."},
{"role": "user", "content": generation_prompt}
])
# Ensure the title is at the beginning
if not content.strip().startswith("# "):
content = f"# {title}\n\n{content}"
return content
except Exception as e:
logger.error(f"Error in document generation: {str(e)}")
return f"# {title}\n\nError in document generation: {str(e)}"
# Factory function for the Documentation agent
def get_documentation_agent():
"""
Factory function that returns an instance of the Documentation agent.
Returns:
An instance of the Documentation agent
"""
return AgentDocumentation()