gateway/modules/chat_agent_documentation.py

"""
Documentation agent for creating documentation, reports, and structured content.
Reimagined with an output-first, AI-driven approach with multi-step document generation.
"""

import logging
import json
from typing import Dict, Any, List

from modules.chat_registry import AgentBase

logger = logging.getLogger(__name__)

class AgentDocumentation(AgentBase):
    """AI-driven agent for creating documentation and structured content using multi-step generation"""

    def __init__(self):
        """Initialize the documentation agent"""
        super().__init__()
        self.name = "documentation"
        self.description = "Creates structured documentation, reports, and content using AI with multi-step generation"
        self.capabilities = [
            "report_generation",
            "documentation",
            "content_structuring",
            "technical_writing",
            "knowledge_organization"
        ]

    def set_dependencies(self, mydom=None):
        """Set external dependencies for the agent."""
        self.mydom = mydom

    async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
        """
        Process a task by focusing on required outputs and using AI to generate them.

        Args:
            task: Task dictionary with prompt, input_documents, output_specifications

        Returns:
            Dictionary with feedback and documents
        """
        try:
            # Extract task information
            prompt = task.get("prompt", "")
            input_documents = task.get("input_documents", [])
            output_specs = task.get("output_specifications", [])

            # Check AI service
            if not self.mydom:
                return {
                    "feedback": "The Documentation agent requires an AI service to function.",
                    "documents": []
                }

            # Extract context from input documents - focusing only on data_extracted
            document_context = self._extract_document_context(input_documents)

            # Create task analysis to understand the requirements
            documentation_plan = await self._analyze_task(prompt, document_context, output_specs)

            # Generate all required output documents
            documents = []

            # If no output specs provided, create default document
            if not output_specs:
                default_format = documentation_plan.get("recommended_format", "markdown")
                default_title = documentation_plan.get("title", "Documentation")
                safe_title = self._sanitize_filename(default_title)

                output_specs = [
                    {"label": f"{safe_title}.{default_format}", "description": "Comprehensive documentation"}
                ]

            # Process each output specification
            for spec in output_specs:
                output_label = spec.get("label", "")
                output_description = spec.get("description", "")

                # Generate the document using multi-step approach
                document = await self._create_document_multi_step(
                    prompt,
                    document_context,
                    output_label,
                    output_description,
                    documentation_plan
                )

                documents.append(document)

            # Generate feedback
            feedback = documentation_plan.get("feedback", f"Created {len(documents)} documents based on your requirements.")

            return {
                "feedback": feedback,
                "documents": documents
            }

        except Exception as e:
            logger.error(f"Error in documentation generation: {str(e)}", exc_info=True)
            return {
                "feedback": f"Error during documentation generation: {str(e)}",
                "documents": []
            }

    def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
        """
        Extract context from input documents, focusing on data_extracted.

        Args:
            documents: List of document objects

        Returns:
            Extracted context as text
        """
        context_parts = []

        for doc in documents:
            doc_name = doc.get("name", "unnamed")
            if doc.get("ext"):
                doc_name = f"{doc_name}.{doc.get('ext')}"

            context_parts.append(f"\n\n--- {doc_name} ---\n")

            # Process contents for data_extracted
            for content in doc.get("contents", []):
                if content.get("data_extracted"):
                    context_parts.append(content.get("data_extracted", ""))

        return "\n".join(context_parts)

    def _sanitize_filename(self, filename: str) -> str:
        """
        Sanitize a filename by removing invalid characters.

        Args:
            filename: Filename to sanitize

        Returns:
            Sanitized filename
        """
        # Replace invalid characters with underscores
        invalid_chars = r'<>:"/\|?*'
        for char in invalid_chars:
            filename = filename.replace(char, '_')

        # Trim filename if too long
        if len(filename) > 100:
            filename = filename[:97] + "..."

        return filename

    async def _analyze_task(self, prompt: str, context: str, output_specs: List) -> Dict:
        """
        Use AI to analyze the task and create a documentation plan.

        Args:
            prompt: The task prompt
            context: Document context
            output_specs: Output specifications

        Returns:
            Documentation plan dictionary
        """
        analysis_prompt = f"""
        Analyze this documentation task and create a detailed plan.

        TASK: {prompt}

        DOCUMENT CONTEXT SAMPLE:
        {context[:1000]}... (truncated)

        OUTPUT REQUIREMENTS:
        {json.dumps(output_specs, indent=2)}

        Create a detailed documentation plan in JSON format with the following structure:
        {{
            "title": "Document Title",
            "document_type": "report|manual|guide|whitepaper|etc",
            "audience": "technical|general|executive|etc",
            "detailed_structure": [
                {{
                    "title": "Chapter/Section Title",
                    "key_points": ["point1", "point2", ...],
                    "subsections": ["subsection1", "subsection2", ...],
                    "importance": "high|medium|low",
                    "estimated_length": "short|medium|long"
                }},
                ... more sections ...
            ],
            "key_topics": ["topic1", "topic2", ...],
            "tone": "formal|conversational|instructional|etc",
            "recommended_format": "markdown|html|text|etc",
            "formatting_requirements": ["requirement1", "requirement2", ...],
            "executive_summary": "Brief description of what the document will cover",
            "feedback": "Brief message explaining the documentation approach"
        }}

        Only return valid JSON. No preamble or explanations.
        """

        try:
            response = await self.mydom.call_ai([
                {"role": "system", "content": "You are a documentation expert. Respond with valid JSON only."},
                {"role": "user", "content": analysis_prompt}
            ])

            # Extract JSON from response
            json_start = response.find('{')
            json_end = response.rfind('}') + 1

            if json_start >= 0 and json_end > json_start:
                plan = json.loads(response[json_start:json_end])
                return plan
            else:
                # Fallback if JSON not found
                return {
                    "title": "Documentation",
                    "document_type": "report",
                    "audience": "general",
                    "detailed_structure": [
                        {
                            "title": "Introduction",
                            "key_points": ["Purpose", "Scope"],
                            "subsections": [],
                            "importance": "high",
                            "estimated_length": "short"
                        },
                        {
                            "title": "Main Content",
                            "key_points": ["Core Information"],
                            "subsections": ["Key Findings", "Analysis"],
                            "importance": "high",
                            "estimated_length": "long"
                        },
                        {
                            "title": "Conclusion",
                            "key_points": ["Summary", "Next Steps"],
                            "subsections": [],
                            "importance": "medium",
                            "estimated_length": "short"
                        }
                    ],
                    "key_topics": ["General Information"],
                    "tone": "formal",
                    "recommended_format": "markdown",
                    "formatting_requirements": ["Clear headings", "Professional formatting"],
                    "executive_summary": "A comprehensive documentation covering the requested topics.",
                    "feedback": "Created documentation based on your requirements."
                }

        except Exception as e:
            logger.warning(f"Error creating documentation plan: {str(e)}")
            return {
                "title": "Documentation",
                "document_type": "report",
                "audience": "general",
                "detailed_structure": [
                    {
                        "title": "Introduction",
                        "key_points": ["Purpose", "Scope"],
                        "subsections": [],
                        "importance": "high",
                        "estimated_length": "short"
                    },
                    {
                        "title": "Main Content",
                        "key_points": ["Core Information"],
                        "subsections": ["Key Findings", "Analysis"],
                        "importance": "high",
                        "estimated_length": "long"
                    },
                    {
                        "title": "Conclusion",
                        "key_points": ["Summary", "Next Steps"],
                        "subsections": [],
                        "importance": "medium",
                        "estimated_length": "short"
                    }
                ],
                "key_topics": ["General Information"],
                "tone": "formal",
                "recommended_format": "markdown",
                "formatting_requirements": ["Clear headings", "Professional formatting"],
                "executive_summary": "A comprehensive documentation covering the requested topics.",
                "feedback": "Created documentation based on your requirements."
            }

    async def _create_document_multi_step(self, prompt: str, context: str, output_label: str,
                                     output_description: str, documentation_plan: Dict) -> Dict:
        """
        Create a document using a multi-step approach with separate AI calls for each section.

        Args:
            prompt: Original task prompt
            context: Document context
            output_label: Output filename
            output_description: Description of desired output
            documentation_plan: Documentation plan from AI

        Returns:
            Document object
        """
        # Determine format from filename
        format_type = output_label.split('.')[-1].lower() if '.' in output_label else "md"

        # Map format to content_type
        content_type_map = {
            "md": "text/markdown",
            "markdown": "text/markdown",
            "html": "text/html",
            "txt": "text/plain",
            "text": "text/plain",
            "json": "application/json",
            "csv": "text/csv"
        }

        content_type = content_type_map.get(format_type, "text/plain")

        # Get document information
        title = documentation_plan.get("title", "Documentation")
        document_type = documentation_plan.get("document_type", "document")
        audience = documentation_plan.get("audience", "general")
        tone = documentation_plan.get("tone", "formal")
        key_topics = documentation_plan.get("key_topics", [])
        formatting_requirements = documentation_plan.get("formatting_requirements", [])

        # Get the detailed structure
        detailed_structure = documentation_plan.get("detailed_structure", [])
        if not detailed_structure:
            # Fallback structure if none provided
            detailed_structure = [
                {
                    "title": "Introduction",
                    "key_points": ["Purpose", "Scope"],
                    "importance": "high"
                },
                {
                    "title": "Main Content",
                    "key_points": ["Core Information"],
                    "importance": "high"
                },
                {
                    "title": "Conclusion",
                    "key_points": ["Summary", "Next Steps"],
                    "importance": "medium"
                }
            ]

        try:
            # Step 1: Generate document introduction
            intro_prompt = f"""
            Create the introduction for a {document_type} titled "{title}".

            DOCUMENT OVERVIEW:
            - Type: {document_type}
            - Audience: {audience}
            - Tone: {tone}
            - Key Topics: {', '.join(key_topics)}
            - Format: {format_type}

            TASK CONTEXT: {prompt}

            This introduction should:
            1. Clearly state the purpose and scope of the document
            2. Provide context and background information
            3. Outline what the reader will find in the document
            4. Set the appropriate tone for the {audience} audience

            The introduction should be professional and engaging, formatted according to {format_type} standards.
            """

            introduction = await self.mydom.call_ai([
                {"role": "system", "content": f"You are a documentation expert creating an introduction in {format_type} format."},
                {"role": "user", "content": intro_prompt}
            ], produce_user_answer = True)

            # Step 2: Generate executive summary (if applicable)
            if document_type in ["report", "whitepaper", "case study"]:
                summary_prompt = f"""
                Create an executive summary for a {document_type} titled "{title}".

                DOCUMENT OVERVIEW:
                - Type: {document_type}
                - Audience: {audience}
                - Key Topics: {', '.join(key_topics)}

                TASK CONTEXT: {prompt}

                This executive summary should:
                1. Provide a concise overview of the entire document
                2. Highlight key findings, recommendations, or conclusions
                3. Be suitable for executives or busy readers who may only read this section
                4. Be professionally formatted according to {format_type} standards

                Keep the summary focused and impactful, approximately 200-300 words.
                """

                executive_summary = await self.mydom.call_ai([
                    {"role": "system", "content": f"You are a documentation expert creating an executive summary in {format_type} format."},
                    {"role": "user", "content": summary_prompt}
                ], produce_user_answer = True)
            else:
                executive_summary = ""

            # Step 3: Generate each section
            sections = []

            for section in detailed_structure:
                section_title = section.get("title", "Section")
                key_points = section.get("key_points", [])
                subsections = section.get("subsections", [])
                importance = section.get("importance", "medium")

                # Adjust depth based on importance
                detail_level = "high" if importance == "high" else "medium"

                section_prompt = f"""
                Create the "{section_title}" section for a {document_type} titled "{title}".

                SECTION DETAILS:
                - Title: {section_title}
                - Key Points to Cover: {', '.join(key_points)}
                - Subsections: {', '.join(subsections)}
                - Detail Level: {detail_level}

                DOCUMENT CONTEXT:
                - Type: {document_type}
                - Audience: {audience}
                - Tone: {tone}
                - Format: {format_type}

                TASK CONTEXT: {prompt}

                AVAILABLE INFORMATION:
                {context[:500]}... (truncated)

                This section should:
                1. Be comprehensive and well-structured
                2. Cover all the key points listed
                3. Include the specified subsections with appropriate headings
                4. Maintain a {tone} tone suitable for the {audience} audience
                5. Be properly formatted according to {format_type} standards
                6. Include specific examples, data, or evidence where appropriate

                Be thorough in your coverage of this section, providing substantive content.
                """

                section_content = await self.mydom.call_ai([
                    {"role": "system", "content": f"You are a documentation expert creating detailed content for the {section_title} section."},
                    {"role": "user", "content": section_prompt}
                ], produce_user_answer = True)

                sections.append(section_content)

            # Step 4: Generate conclusion
            conclusion_prompt = f"""
            Create the conclusion for a {document_type} titled "{title}".

            DOCUMENT OVERVIEW:
            - Type: {document_type}
            - Audience: {audience}
            - Key Topics: {', '.join(key_topics)}

            TASK CONTEXT: {prompt}

            This conclusion should:
            1. Summarize the key points covered in the document
            2. Provide closure to the topics discussed
            3. Include any relevant recommendations or next steps
            4. Leave the reader with a clear understanding of the document's significance

            The conclusion should be professional and impactful, formatted according to {format_type} standards.
            """

            conclusion = await self.mydom.call_ai([
                {"role": "system", "content": f"You are a documentation expert creating a conclusion in {format_type} format."},
                {"role": "user", "content": conclusion_prompt}
            ], produce_user_answer = True)

            # Step 5: Assemble the complete document
            if format_type in ["md", "markdown"]:
                # Markdown format
                document_content = f"# {title}\n\n"

                if executive_summary:
                    document_content += f"## Executive Summary\n\n{executive_summary}\n\n"

                document_content += f"{introduction}\n\n"

                for i, section_content in enumerate(sections):
                    # Ensure section starts with heading if not already
                    section_title = detailed_structure[i].get("title", f"Section {i+1}")
                    if not section_content.strip().startswith("#"):
                        document_content += f"## {section_title}\n\n"
                    document_content += f"{section_content}\n\n"

                document_content += f"## Conclusion\n\n{conclusion}\n"

            elif format_type == "html":
                # HTML format
                document_content = f"<html>\n<head>\n<title>{title}</title>\n</head>\n<body>\n"
                document_content += f"<h1>{title}</h1>\n\n"

                if executive_summary:
                    document_content += f"<h2>Executive Summary</h2>\n<div>{executive_summary}</div>\n\n"

                document_content += f"<div>{introduction}</div>\n\n"

                for i, section_content in enumerate(sections):
                    section_title = detailed_structure[i].get("title", f"Section {i+1}")
                    document_content += f"<h2>{section_title}</h2>\n<div>{section_content}</div>\n\n"

                document_content += f"<h2>Conclusion</h2>\n<div>{conclusion}</div>\n"
                document_content += "</body>\n</html>"

            else:
                # Plain text format
                document_content = f"{title}\n{'=' * len(title)}\n\n"

                if executive_summary:
                    document_content += f"EXECUTIVE SUMMARY\n{'-' * 17}\n\n{executive_summary}\n\n"

                document_content += f"{introduction}\n\n"

                for i, section_content in enumerate(sections):
                    section_title = detailed_structure[i].get("title", f"Section {i+1}")
                    document_content += f"{section_title}\n{'-' * len(section_title)}\n\n{section_content}\n\n"

                document_content += f"CONCLUSION\n{'-' * 10}\n\n{conclusion}\n"

            # Create document object
            return {
                "label": output_label,
                "content": document_content,
                "metadata": {
                    "content_type": content_type
                }
            }

        except Exception as e:
            logger.error(f"Error creating document: {str(e)}", exc_info=True)

            # Create a simple error document
            if format_type in ["md", "markdown"]:
                content = f"# Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
            elif format_type == "html":
                content = f"<html><body><h1>Error in Documentation</h1><p>There was an error generating the documentation: {str(e)}</p></body></html>"
            else:
                content = f"Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"

            return {
                "label": output_label,
                "content": content,
                "metadata": {
                    "content_type": content_type
                }
            }


# Factory function for the Documentation agent
def get_documentation_agent():
    """Returns an instance of the Documentation agent."""
    return AgentDocumentation()