gateway/modules/services/serviceGeneration/renderers/rendererMarkdown.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Markdown renderer for report generation.
"""

from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional

class RendererMarkdown(BaseRenderer):
    """Renders content to Markdown format with format-specific extraction."""

    @classmethod
    def getSupportedFormats(cls) -> List[str]:
        """Return supported Markdown formats."""
        return ['md', 'markdown']

    @classmethod
    def getFormatAliases(cls) -> List[str]:
        """Return format aliases."""
        return ['mdown', 'mkd']

    @classmethod
    def getPriority(cls) -> int:
        """Return priority for markdown renderer."""
        return 95

    @classmethod
    def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
        """Return output style classification: Markdown documents are formatted documents."""
        return 'document'

    async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
        """Render extracted JSON content to Markdown format."""
        try:
            # Generate markdown from JSON structure
            markdownContent = self._generateMarkdownFromJson(extractedContent, title)

            # Determine filename from document or title
            documents = extractedContent.get("documents", [])
            if documents and isinstance(documents[0], dict):
                filename = documents[0].get("filename")
                if not filename:
                    filename = self._determineFilename(title, "text/markdown")
            else:
                filename = self._determineFilename(title, "text/markdown")

            # Extract metadata for document type and other info
            metadata = extractedContent.get("metadata", {}) if extractedContent else {}
            documentType = metadata.get("documentType") if isinstance(metadata, dict) else None

            return [
                RenderedDocument(
                    documentData=markdownContent.encode('utf-8'),
                    mimeType="text/markdown",
                    filename=filename,
                    documentType=documentType,
                    metadata=metadata if isinstance(metadata, dict) else None
                )
            ]

        except Exception as e:
            self.logger.error(f"Error rendering markdown: {str(e)}")
            # Return minimal markdown fallback
            fallbackContent = f"# {title}\n\nError rendering report: {str(e)}"
            metadata = extractedContent.get("metadata", {}) if extractedContent else {}
            documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
            return [
                RenderedDocument(
                    documentData=fallbackContent.encode('utf-8'),
                    mimeType="text/markdown",
                    filename=self._determineFilename(title, "text/markdown"),
                    documentType=documentType,
                    metadata=metadata if isinstance(metadata, dict) else None
                )
            ]

    def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
        """Generate markdown content from structured JSON document."""
        try:
            # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
            if not self._validateJsonStructure(jsonContent):
                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")

            # Extract sections and metadata from standardized schema
            sections = self._extractSections(jsonContent)
            metadata = self._extractMetadata(jsonContent)

            # Use provided title (which comes from documents[].title) as primary source
            # Fallback to metadata.title only if title parameter is empty
            documentTitle = title if title else metadata.get("title", "Generated Document")

            # Build markdown content
            markdownParts = []

            # Document title
            markdownParts.append(f"# {documentTitle}")
            markdownParts.append("")

            # Process each section
            for section in sections:
                sectionMarkdown = self._renderJsonSection(section)
                if sectionMarkdown:
                    markdownParts.append(sectionMarkdown)
                    markdownParts.append("")  # Add spacing between sections

            # Add generation info
            markdownParts.append("---")
            markdownParts.append(f"*Generated: {self._formatTimestamp()}*")

            return '\n'.join(markdownParts)

        except Exception as e:
            self.logger.error(f"Error generating markdown from JSON: {str(e)}")
            raise Exception(f"Markdown generation failed: {str(e)}")

    def _renderJsonSection(self, section: Dict[str, Any]) -> str:
        """Render a single JSON section to markdown.
        Supports three content formats: reference, object (base64), extracted_text.
        """
        try:
            sectionType = self._getSectionType(section)
            sectionData = self._getSectionData(section)

            # Check for three content formats from Phase 5D in elements
            if isinstance(sectionData, list):
                markdownParts = []
                for element in sectionData:
                    element_type = element.get("type", "") if isinstance(element, dict) else ""

                    # Support three content formats from Phase 5D
                    if element_type == "reference":
                        # Document reference format
                        doc_ref = element.get("documentReference", "")
                        label = element.get("label", "Reference")
                        markdownParts.append(f"*[Reference: {label}]*")
                        continue
                    elif element_type == "extracted_text":
                        # Extracted text format
                        content = element.get("content", "")
                        source = element.get("source", "")
                        if content:
                            source_text = f" *(Source: {source})*" if source else ""
                            markdownParts.append(f"{content}{source_text}")
                        continue

                # If we processed reference/extracted_text elements, return them
                if markdownParts:
                    return '\n\n'.join(markdownParts)

            if sectionType == "table":
                # Work directly with elements like other renderers
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonTable(element)
                return ""
            elif sectionType == "bullet_list":
                # Work directly with elements like other renderers
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonBulletList(element)
                return ""
            elif sectionType == "heading":
                # Work directly with elements like other renderers
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonHeading(element)
                return ""
            elif sectionType == "paragraph":
                # Work directly with elements like other renderers
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonParagraph(element)
                elif isinstance(sectionData, dict):
                    return self._renderJsonParagraph(sectionData)
                return ""
            elif sectionType == "code_block":
                # Work directly with elements like other renderers
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonCodeBlock(element)
                return ""
            elif sectionType == "image":
                # Work directly with elements like other renderers
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonImage(element)
                return ""
            else:
                # Fallback to paragraph for unknown types
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonParagraph(element)
                elif isinstance(sectionData, dict):
                    return self._renderJsonParagraph(sectionData)
                return ""

        except Exception as e:
            self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
            return f"*[Error rendering section: {str(e)}]*"

    def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
        """Render a JSON table to markdown."""
        try:
            # Extract from nested content structure: element.content.{headers, rows}
            content = tableData.get("content", {})
            if not isinstance(content, dict):
                return ""
            headers = content.get("headers", [])
            rows = content.get("rows", [])

            if not headers or not rows:
                return ""

            markdownParts = []

            # Create table header
            headerLine = " | ".join(str(header) for header in headers)
            markdownParts.append(headerLine)

            # Add separator line
            separatorLine = " | ".join("---" for _ in headers)
            markdownParts.append(separatorLine)

            # Add data rows
            for row in rows:
                rowLine = " | ".join(str(cellData) for cellData in row)
                markdownParts.append(rowLine)

            return '\n'.join(markdownParts)

        except Exception as e:
            self.logger.warning(f"Error rendering table: {str(e)}")
            return ""

    def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
        """Render a JSON bullet list to markdown."""
        try:
            # Extract from nested content structure: element.content.{items}
            content = listData.get("content", {})
            if not isinstance(content, dict):
                return ""
            items = content.get("items", [])

            if not items:
                return ""

            markdownParts = []
            for item in items:
                if isinstance(item, str):
                    markdownParts.append(f"- {item}")
                elif isinstance(item, dict) and "text" in item:
                    markdownParts.append(f"- {item['text']}")

            return '\n'.join(markdownParts)

        except Exception as e:
            self.logger.warning(f"Error rendering bullet list: {str(e)}")
            return ""

    def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
        """Render a JSON heading to markdown."""
        try:
            # Extract from nested content structure: element.content.{text, level}
            content = headingData.get("content", {})
            if not isinstance(content, dict):
                return ""
            text = content.get("text", "")
            level = content.get("level", 1)

            if text:
                level = max(1, min(6, level))
                return f"{'#' * level} {text}"

            return ""

        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")
            return ""

    def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
        """Render a JSON paragraph to markdown."""
        try:
            # Extract from nested content structure
            content = paragraphData.get("content", {})
            if isinstance(content, dict):
                text = content.get("text", "")
            elif isinstance(content, str):
                text = content
            else:
                text = ""
            return text if text else ""

        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")
            return ""

    def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
        """Render a JSON code block to markdown."""
        try:
            # Extract from nested content structure
            content = codeData.get("content", {})
            if not isinstance(content, dict):
                return ""
            code = content.get("code", "")
            language = content.get("language", "")

            if code:
                if language:
                    return f"```{language}\n{code}\n```"
                else:
                    return f"```\n{code}\n```"

            return ""

        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")
            return ""

    def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
        """Render a JSON image to markdown."""
        try:
            # Extract from nested content structure: element.content.{base64Data, altText, caption}
            content = imageData.get("content", {})
            if not isinstance(content, dict):
                return ""
            altText = content.get("altText", "Image")
            base64Data = content.get("base64Data", "")

            if base64Data:
                # For base64 images, we can't embed them directly in markdown
                # So we'll use a placeholder with the alt text
                return f"![{altText}](data:image/png;base64,{base64Data[:50]}...)"
            else:
                return f"![{altText}](image-placeholder)"

        except Exception as e:
            self.logger.warning(f"Error rendering image: {str(e)}")
            return f"![{imageData.get('altText', 'Image')}](image-error)"