gateway/modules/chat_agent_documentation.py

"""
Documentation agent for creating documentation, reports, and structured content.
Optimized for the new task-based processing.
"""

import logging
import uuid
from typing import Dict, Any, List

from modules.chat_registry import AgentBase

logger = logging.getLogger(__name__)

class AgentDocumentation(AgentBase):
    """Agent for creating documentation and structured content"""

    def __init__(self):
        """Initialize the documentation agent"""
        super().__init__()
        self.name = "documentation"
        self.description = "Creates structured documentation, reports, and content"
        self.capabilities = [
            "report_generation",
            "documentation",
            "content_structuring",
            "technical_writing",
            "knowledge_organization"
        ]

    def set_dependencies(self, ai_service=None):
        """Set external dependencies for the agent."""
        self.ai_service = ai_service

    async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
        """
        Process a standardized task structure and create documentation.

        Args:
            task: A dictionary containing:
                - task_id: Unique ID for this task
                - prompt: The main instruction for the agent
                - input_documents: List of documents to process
                - output_specifications: List of required output documents
                - context: Additional contextual information

        Returns:
            A dictionary containing:
                - feedback: Text response explaining the created documentation
                - documents: List of created document objects
        """
        try:
            # Extract relevant task information
            prompt = task.get("prompt", "")
            input_documents = task.get("input_documents", [])
            output_specs = task.get("output_specifications", [])

            # Check if AI service is available
            if not self.ai_service:
                logger.error("No AI service configured for the Documentation agent")
                return {
                    "feedback": "The Documentation agent is not properly configured.",
                    "documents": []
                }

            # Extract context from input documents
            document_context = self._extract_document_context(input_documents)

            # Generate title for the document
            title = await self._generate_title(prompt, document_context)

            # Collect created documents
            generated_documents = []

            # Create a document for each requested output
            for spec in output_specs:
                output_label = spec.get("label", "")
                output_description = spec.get("description", "")

                # Determine format and document type based on file extension
                format_type, document_type = self._determine_format_and_type(output_label)

                # Assess complexity
                is_complex = self._assess_complexity(prompt)

                # Generate document content based on complexity
                if is_complex:
                    content = await self._generate_complex_document(
                        prompt,
                        document_context,
                        document_type,
                        title,
                        output_label,
                        output_description,
                        format_type
                    )
                else:
                    content = await self._generate_simple_document(
                        prompt,
                        document_context,
                        document_type,
                        title,
                        output_label,
                        output_description,
                        format_type
                    )

                # Add document to results list
                generated_documents.append({
                    "label": output_label,
                    "content": content
                })

            # If no specific outputs requested, create default markdown document
            if not output_specs:
                content = await self._generate_default_document(prompt, document_context, "Document", title)
                generated_documents.append({
                    "label": f"{self._sanitize_filename(title)}.md",
                    "content": content
                })

            # Prepare feedback about created documents
            if len(generated_documents) == 1:
                feedback = f"I've created a document titled '{title}'."
            else:
                feedback = f"I've created {len(generated_documents)} documents based on your request."

            return {
                "feedback": feedback,
                "documents": generated_documents
            }

        except Exception as e:
            error_msg = f"Error creating documentation: {str(e)}"
            logger.error(error_msg)
            return {
                "feedback": f"An error occurred while creating the documentation: {str(e)}",
                "documents": []
            }

    def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
        """
        Extract context from input documents.

        Args:
            documents: List of document objects

        Returns:
            Extracted context as text
        """
        if not documents:
            return ""

        context_parts = []

        for doc in documents:
            doc_name = doc.get("name", "Unnamed document")
            context_parts.append(f"--- {doc_name} ---")

            for content in doc.get("contents", []):
                if content.get("metadata", {}).get("is_text", False):
                    context_parts.append(content.get("data", ""))

        return "\n\n".join(context_parts)

    def _determine_format_and_type(self, output_label: str) -> tuple:
        """
        Determine the format type and document type based on the filename.

        Args:
            output_label: Output filename

        Returns:
            Tuple of (format_type, document_type)
        """
        # Extract file extension to determine format
        output_label_lower = output_label.lower()

        # Determine format based on extension
        if output_label_lower.endswith(".md"):
            format_type = "markdown"
        elif output_label_lower.endswith(".html"):
            format_type = "html"
        elif output_label_lower.endswith(".txt"):
            format_type = "text"
        elif output_label_lower.endswith(".csv"):
            format_type = "csv"
        elif output_label_lower.endswith(".json"):
            format_type = "json"
        else:
            # Default to markdown
            format_type = "markdown"

        # Determine document type based on filename or format
        if "manual" in output_label_lower or "guide" in output_label_lower:
            document_type = "Manual"
        elif "report" in output_label_lower or "analysis" in output_label_lower:
            document_type = "Report"
        elif "process" in output_label_lower or "workflow" in output_label_lower:
            document_type = "Process Documentation"
        elif "present" in output_label_lower or "slide" in output_label_lower:
            document_type = "Presentation"
        else:
            document_type = "Document"

        return format_type, document_type

    def _assess_complexity(self, prompt: str) -> bool:
        """
        Assess the complexity of the task.

        Args:
            prompt: Task description

        Returns:
            True for complex tasks, False otherwise
        """
        # Language-agnostic complexity assessment
        prompt_length = len(prompt)

        # Check for structural indicators in a language-agnostic way
        has_sections = ":" in prompt and "\n" in prompt
        has_lists = "-" in prompt or "*" in prompt or "#" in prompt

        # Complex if the prompt is long or contains structural elements
        return prompt_length > 500 or has_sections or has_lists

    def _sanitize_filename(self, filename: str) -> str:
        """
        Sanitize a filename by removing invalid characters.

        Args:
            filename: Filename to sanitize

        Returns:
            Sanitized filename
        """
        # Replace invalid characters with underscores
        invalid_chars = r'<>:"/\|?*'
        for char in invalid_chars:
            filename = filename.replace(char, '_')

        # Trim filename if too long
        if len(filename) > 100:
            filename = filename[:97] + "..."

        return filename

    async def _generate_title(self, prompt: str, context: str) -> str:
        """
        Generate a title for the document.

        Args:
            prompt: Task description
            context: Document context

        Returns:
            Generated title
        """
        if not self.ai_service:
            return f"Document {uuid.uuid4().hex[:8]}"

        title_prompt = f"""
        Create a concise, professional title for this document based on the following request:

        {prompt}

        Reply ONLY with the title, nothing else.
        """

        try:
            title = await self.ai_service.call_api([
                {"role": "system", "content": "You create precise document titles."},
                {"role": "user", "content": title_prompt}
            ])

            # Clean up title
            title = title.strip('"\'#*- \n\t')

            # Return default title if generated title is empty
            if not title:
                return f"Document {uuid.uuid4().hex[:8]}"

            return title

        except Exception as e:
            logger.warning(f"Error in title generation: {str(e)}")
            return f"Document {uuid.uuid4().hex[:8]}"

    async def _generate_complex_document(self, prompt: str, context: str, document_type: str,
                                      title: str, output_label: str, output_description: str,
                                      format_type: str) -> str:
        """
        Generate a complex document with structure.

        Args:
            prompt: Task description
            context: Document context
            document_type: Document type
            title: Document title
            output_label: Output filename
            output_description: Description of desired output
            format_type: Output format

        Returns:
            Generated document content
        """
        if not self.ai_service:
            return f"# {title}\n\nDocument generation not possible: AI service not available."

        generation_prompt = f"""
        Create a comprehensive, well-structured {document_type} with the title "{title}" based on:

        TASK:
        {prompt}

        CONTEXT:
        {context if context else 'No additional context available.'}

        OUTPUT REQUIREMENTS:
        - Filename: {output_label}
        - Description: {output_description}
        - Format: {format_type}

        The document should include:
        1. A clear introduction with purpose and scope
        2. Logically organized sections with headings
        3. Detailed content with examples and evidence
        4. A conclusion with key insights
        5. Appropriate formatting according to the output format ({format_type})

        The document must perfectly match the {format_type} format.
        """

        try:
            content = await self.ai_service.call_api([
                {"role": "system", "content": f"You create comprehensive, well-structured documentation in {format_type} format."},
                {"role": "user", "content": generation_prompt}
            ])

            # For markdown format, ensure the title is at the beginning
            if format_type == "markdown" and not content.strip().startswith("# "):
                content = f"# {title}\n\n{content}"

            return content
        except Exception as e:
            logger.error(f"Error in document generation: {str(e)}")
            return f"# {title}\n\nError in document generation: {str(e)}"

    async def _generate_simple_document(self, prompt: str, context: str, document_type: str,
                                     title: str, output_label: str, output_description: str,
                                     format_type: str) -> str:
        """
        Generate a simple document without complex structure.

        Args:
            prompt: Task description
            context: Document context
            document_type: Document type
            title: Document title
            output_label: Output filename
            output_description: Description of desired output
            format_type: Output format

        Returns:
            Generated document content
        """
        if not self.ai_service:
            return f"# {title}\n\nDocument generation not possible: AI service not available."

        generation_prompt = f"""
        Create a precise, focused {document_type} with the title "{title}" based on:

        TASK:
        {prompt}

        CONTEXT:
        {context if context else 'No additional context available.'}

        OUTPUT REQUIREMENTS:
        - Filename: {output_label}
        - Description: {output_description}
        - Format: {format_type}

        The document should be clear, precise, and to the point, without a complex chapter structure.
        Format it according to the output format ({format_type}).

        The document must perfectly match the {format_type} format.
        """

        try:
            content = await self.ai_service.call_api([
                {"role": "system", "content": f"You create precise, focused documentation in {format_type} format."},
                {"role": "user", "content": generation_prompt}
            ])

            # For markdown format, ensure the title is at the beginning
            if format_type == "markdown" and not content.strip().startswith("# "):
                content = f"# {title}\n\n{content}"

            return content
        except Exception as e:
            logger.error(f"Error in document generation: {str(e)}")
            return f"# {title}\n\nError in document generation: {str(e)}"

    async def _generate_default_document(self, prompt: str, context: str, document_type: str, title: str) -> str:
        """
        Generate a default markdown document when no specific output specifications are present.

        Args:
            prompt: Task description
            context: Document context
            document_type: Document type
            title: Document title

        Returns:
            Generated document content
        """
        if not self.ai_service:
            return f"# {title}\n\nDocument generation not possible: AI service not available."

        generation_prompt = f"""
        Create a structured {document_type} with the title "{title}" based on:

        TASK:
        {prompt}

        CONTEXT:
        {context if context else 'No additional context available.'}

        Format the document with markdown syntax and create a clear, professional structure.
        """

        try:
            content = await self.ai_service.call_api([
                {"role": "system", "content": "You create structured documentation in markdown format."},
                {"role": "user", "content": generation_prompt}
            ])

            # Ensure the title is at the beginning
            if not content.strip().startswith("# "):
                content = f"# {title}\n\n{content}"

            return content
        except Exception as e:
            logger.error(f"Error in document generation: {str(e)}")
            return f"# {title}\n\nError in document generation: {str(e)}"


# Factory function for the Documentation agent
def get_documentation_agent():
    """
    Factory function that returns an instance of the Documentation agent.

    Returns:
        An instance of the Documentation agent
    """
    return AgentDocumentation()