gateway/modules/services/serviceGeneration/renderers/rendererMarkdown.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Markdown renderer for report generation.
"""

from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List

class RendererMarkdown(BaseRenderer):
    """Renders content to Markdown format with format-specific extraction."""

    @classmethod
    def getSupportedFormats(cls) -> List[str]:
        """Return supported Markdown formats."""
        return ['md', 'markdown']

    @classmethod
    def getFormatAliases(cls) -> List[str]:
        """Return format aliases."""
        return ['mdown', 'mkd']

    @classmethod
    def getPriority(cls) -> int:
        """Return priority for markdown renderer."""
        return 95

    async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
        """Render extracted JSON content to Markdown format."""
        try:
            # Generate markdown from JSON structure
            markdownContent = self._generateMarkdownFromJson(extractedContent, title)

            return markdownContent, "text/markdown"

        except Exception as e:
            self.logger.error(f"Error rendering markdown: {str(e)}")
            # Return minimal markdown fallback
            return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"

    def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
        """Generate markdown content from structured JSON document."""
        try:
            # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
            if not self._validateJsonStructure(jsonContent):
                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")

            # Extract sections and metadata from standardized schema
            sections = self._extractSections(jsonContent)
            metadata = self._extractMetadata(jsonContent)

            # Use title from JSON metadata if available, otherwise use provided title
            documentTitle = metadata.get("title", title)

            # Build markdown content
            markdownParts = []

            # Document title
            markdownParts.append(f"# {documentTitle}")
            markdownParts.append("")

            # Process each section
            for section in sections:
                sectionMarkdown = self._renderJsonSection(section)
                if sectionMarkdown:
                    markdownParts.append(sectionMarkdown)
                    markdownParts.append("")  # Add spacing between sections

            # Add generation info
            markdownParts.append("---")
            markdownParts.append(f"*Generated: {self._formatTimestamp()}*")

            return '\n'.join(markdownParts)

        except Exception as e:
            self.logger.error(f"Error generating markdown from JSON: {str(e)}")
            raise Exception(f"Markdown generation failed: {str(e)}")

    def _renderJsonSection(self, section: Dict[str, Any]) -> str:
        """Render a single JSON section to markdown.
        Supports three content formats: reference, object (base64), extracted_text.
        """
        try:
            sectionType = self._getSectionType(section)
            sectionData = self._getSectionData(section)

            # Check for three content formats from Phase 5D in elements
            if isinstance(sectionData, list):
                markdownParts = []
                for element in sectionData:
                    element_type = element.get("type", "") if isinstance(element, dict) else ""

                    # Support three content formats from Phase 5D
                    if element_type == "reference":
                        # Document reference format
                        doc_ref = element.get("documentReference", "")
                        label = element.get("label", "Reference")
                        markdownParts.append(f"*[Reference: {label}]*")
                        continue
                    elif element_type == "extracted_text":
                        # Extracted text format
                        content = element.get("content", "")
                        source = element.get("source", "")
                        if content:
                            source_text = f" *(Source: {source})*" if source else ""
                            markdownParts.append(f"{content}{source_text}")
                        continue

                # If we processed reference/extracted_text elements, return them
                if markdownParts:
                    return '\n\n'.join(markdownParts)

            if sectionType == "table":
                # Process the section data to extract table structure
                processedData = self._processSectionByType(section)
                return self._renderJsonTable(processedData)
            elif sectionType == "bullet_list":
                # Process the section data to extract bullet list structure
                processedData = self._processSectionByType(section)
                return self._renderJsonBulletList(processedData)
            elif sectionType == "heading":
                return self._renderJsonHeading(sectionData)
            elif sectionType == "paragraph":
                return self._renderJsonParagraph(sectionData)
            elif sectionType == "code_block":
                # Process the section data to extract code block structure
                processedData = self._processSectionByType(section)
                return self._renderJsonCodeBlock(processedData)
            elif sectionType == "image":
                # Process the section data to extract image structure
                processedData = self._processSectionByType(section)
                return self._renderJsonImage(processedData)
            else:
                # Fallback to paragraph for unknown types
                return self._renderJsonParagraph(sectionData)

        except Exception as e:
            self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
            return f"*[Error rendering section: {str(e)}]*"

    def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
        """Render a JSON table to markdown."""
        try:
            headers = tableData.get("headers", [])
            rows = tableData.get("rows", [])

            if not headers or not rows:
                return ""

            markdownParts = []

            # Create table header
            headerLine = " | ".join(str(header) for header in headers)
            markdownParts.append(headerLine)

            # Add separator line
            separatorLine = " | ".join("---" for _ in headers)
            markdownParts.append(separatorLine)

            # Add data rows
            for row in rows:
                rowLine = " | ".join(str(cellData) for cellData in row)
                markdownParts.append(rowLine)

            return '\n'.join(markdownParts)

        except Exception as e:
            self.logger.warning(f"Error rendering table: {str(e)}")
            return ""

    def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
        """Render a JSON bullet list to markdown."""
        try:
            items = listData.get("items", [])

            if not items:
                return ""

            markdownParts = []
            for item in items:
                if isinstance(item, str):
                    markdownParts.append(f"- {item}")
                elif isinstance(item, dict) and "text" in item:
                    markdownParts.append(f"- {item['text']}")

            return '\n'.join(markdownParts)

        except Exception as e:
            self.logger.warning(f"Error rendering bullet list: {str(e)}")
            return ""

    def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
        """Render a JSON heading to markdown."""
        try:
            level = headingData.get("level", 1)
            text = headingData.get("text", "")

            if text:
                level = max(1, min(6, level))
                return f"{'#' * level} {text}"

            return ""

        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")
            return ""

    def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
        """Render a JSON paragraph to markdown."""
        try:
            text = paragraphData.get("text", "")
            return text if text else ""

        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")
            return ""

    def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
        """Render a JSON code block to markdown."""
        try:
            code = codeData.get("code", "")
            language = codeData.get("language", "")

            if code:
                if language:
                    return f"```{language}\n{code}\n```"
                else:
                    return f"```\n{code}\n```"

            return ""

        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")
            return ""

    def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
        """Render a JSON image to markdown."""
        try:
            altText = imageData.get("altText", "Image")
            base64Data = imageData.get("base64Data", "")

            if base64Data:
                # For base64 images, we can't embed them directly in markdown
                # So we'll use a placeholder with the alt text
                return f"![{altText}](data:image/png;base64,{base64Data[:50]}...)"
            else:
                return f"![{altText}](image-placeholder)"

        except Exception as e:
            self.logger.warning(f"Error rendering image: {str(e)}")
            return f"![{imageData.get('altText', 'Image')}](image-error)"