251 lines
10 KiB
Python
251 lines
10 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Markdown renderer for report generation.
|
|
"""
|
|
|
|
from .rendererBaseTemplate import BaseRenderer
|
|
from typing import Dict, Any, Tuple, List
|
|
|
|
class RendererMarkdown(BaseRenderer):
|
|
"""Renders content to Markdown format with format-specific extraction."""
|
|
|
|
@classmethod
|
|
def getSupportedFormats(cls) -> List[str]:
|
|
"""Return supported Markdown formats."""
|
|
return ['md', 'markdown']
|
|
|
|
@classmethod
|
|
def getFormatAliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return ['mdown', 'mkd']
|
|
|
|
@classmethod
|
|
def getPriority(cls) -> int:
|
|
"""Return priority for markdown renderer."""
|
|
return 95
|
|
|
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
|
"""Render extracted JSON content to Markdown format."""
|
|
try:
|
|
# Generate markdown from JSON structure
|
|
markdownContent = self._generateMarkdownFromJson(extractedContent, title)
|
|
|
|
return markdownContent, "text/markdown"
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering markdown: {str(e)}")
|
|
# Return minimal markdown fallback
|
|
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
|
|
|
|
def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
|
"""Generate markdown content from structured JSON document."""
|
|
try:
|
|
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
|
|
if not self._validateJsonStructure(jsonContent):
|
|
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
|
|
|
|
# Extract sections and metadata from standardized schema
|
|
sections = self._extractSections(jsonContent)
|
|
metadata = self._extractMetadata(jsonContent)
|
|
|
|
# Use title from JSON metadata if available, otherwise use provided title
|
|
documentTitle = metadata.get("title", title)
|
|
|
|
# Build markdown content
|
|
markdownParts = []
|
|
|
|
# Document title
|
|
markdownParts.append(f"# {documentTitle}")
|
|
markdownParts.append("")
|
|
|
|
# Process each section
|
|
for section in sections:
|
|
sectionMarkdown = self._renderJsonSection(section)
|
|
if sectionMarkdown:
|
|
markdownParts.append(sectionMarkdown)
|
|
markdownParts.append("") # Add spacing between sections
|
|
|
|
# Add generation info
|
|
markdownParts.append("---")
|
|
markdownParts.append(f"*Generated: {self._formatTimestamp()}*")
|
|
|
|
return '\n'.join(markdownParts)
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error generating markdown from JSON: {str(e)}")
|
|
raise Exception(f"Markdown generation failed: {str(e)}")
|
|
|
|
def _renderJsonSection(self, section: Dict[str, Any]) -> str:
|
|
"""Render a single JSON section to markdown.
|
|
Supports three content formats: reference, object (base64), extracted_text.
|
|
"""
|
|
try:
|
|
sectionType = self._getSectionType(section)
|
|
sectionData = self._getSectionData(section)
|
|
|
|
# Check for three content formats from Phase 5D in elements
|
|
if isinstance(sectionData, list):
|
|
markdownParts = []
|
|
for element in sectionData:
|
|
element_type = element.get("type", "") if isinstance(element, dict) else ""
|
|
|
|
# Support three content formats from Phase 5D
|
|
if element_type == "reference":
|
|
# Document reference format
|
|
doc_ref = element.get("documentReference", "")
|
|
label = element.get("label", "Reference")
|
|
markdownParts.append(f"*[Reference: {label}]*")
|
|
continue
|
|
elif element_type == "extracted_text":
|
|
# Extracted text format
|
|
content = element.get("content", "")
|
|
source = element.get("source", "")
|
|
if content:
|
|
source_text = f" *(Source: {source})*" if source else ""
|
|
markdownParts.append(f"{content}{source_text}")
|
|
continue
|
|
|
|
# If we processed reference/extracted_text elements, return them
|
|
if markdownParts:
|
|
return '\n\n'.join(markdownParts)
|
|
|
|
if sectionType == "table":
|
|
# Process the section data to extract table structure
|
|
processedData = self._processSectionByType(section)
|
|
return self._renderJsonTable(processedData)
|
|
elif sectionType == "bullet_list":
|
|
# Process the section data to extract bullet list structure
|
|
processedData = self._processSectionByType(section)
|
|
return self._renderJsonBulletList(processedData)
|
|
elif sectionType == "heading":
|
|
return self._renderJsonHeading(sectionData)
|
|
elif sectionType == "paragraph":
|
|
return self._renderJsonParagraph(sectionData)
|
|
elif sectionType == "code_block":
|
|
# Process the section data to extract code block structure
|
|
processedData = self._processSectionByType(section)
|
|
return self._renderJsonCodeBlock(processedData)
|
|
elif sectionType == "image":
|
|
# Process the section data to extract image structure
|
|
processedData = self._processSectionByType(section)
|
|
return self._renderJsonImage(processedData)
|
|
else:
|
|
# Fallback to paragraph for unknown types
|
|
return self._renderJsonParagraph(sectionData)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
|
|
return f"*[Error rendering section: {str(e)}]*"
|
|
|
|
def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
|
|
"""Render a JSON table to markdown."""
|
|
try:
|
|
headers = tableData.get("headers", [])
|
|
rows = tableData.get("rows", [])
|
|
|
|
if not headers or not rows:
|
|
return ""
|
|
|
|
markdownParts = []
|
|
|
|
# Create table header
|
|
headerLine = " | ".join(str(header) for header in headers)
|
|
markdownParts.append(headerLine)
|
|
|
|
# Add separator line
|
|
separatorLine = " | ".join("---" for _ in headers)
|
|
markdownParts.append(separatorLine)
|
|
|
|
# Add data rows
|
|
for row in rows:
|
|
rowLine = " | ".join(str(cellData) for cellData in row)
|
|
markdownParts.append(rowLine)
|
|
|
|
return '\n'.join(markdownParts)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering table: {str(e)}")
|
|
return ""
|
|
|
|
def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
|
|
"""Render a JSON bullet list to markdown."""
|
|
try:
|
|
items = listData.get("items", [])
|
|
|
|
if not items:
|
|
return ""
|
|
|
|
markdownParts = []
|
|
for item in items:
|
|
if isinstance(item, str):
|
|
markdownParts.append(f"- {item}")
|
|
elif isinstance(item, dict) and "text" in item:
|
|
markdownParts.append(f"- {item['text']}")
|
|
|
|
return '\n'.join(markdownParts)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
|
return ""
|
|
|
|
def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
|
|
"""Render a JSON heading to markdown."""
|
|
try:
|
|
level = headingData.get("level", 1)
|
|
text = headingData.get("text", "")
|
|
|
|
if text:
|
|
level = max(1, min(6, level))
|
|
return f"{'#' * level} {text}"
|
|
|
|
return ""
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering heading: {str(e)}")
|
|
return ""
|
|
|
|
def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
|
|
"""Render a JSON paragraph to markdown."""
|
|
try:
|
|
text = paragraphData.get("text", "")
|
|
return text if text else ""
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
|
return ""
|
|
|
|
def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
|
|
"""Render a JSON code block to markdown."""
|
|
try:
|
|
code = codeData.get("code", "")
|
|
language = codeData.get("language", "")
|
|
|
|
if code:
|
|
if language:
|
|
return f"```{language}\n{code}\n```"
|
|
else:
|
|
return f"```\n{code}\n```"
|
|
|
|
return ""
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering code block: {str(e)}")
|
|
return ""
|
|
|
|
def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
|
|
"""Render a JSON image to markdown."""
|
|
try:
|
|
altText = imageData.get("altText", "Image")
|
|
base64Data = imageData.get("base64Data", "")
|
|
|
|
if base64Data:
|
|
# For base64 images, we can't embed them directly in markdown
|
|
# So we'll use a placeholder with the alt text
|
|
return f""
|
|
else:
|
|
return f""
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering image: {str(e)}")
|
|
return f""
|