adapted all renderers from flat to nested element.content.(...) structure

This commit is contained in:
ValueOn AG 2025-12-29 02:22:01 +01:00
parent bcbaf41f4f
commit bc2dd6687d
4 changed files with 90 additions and 309 deletions

View file

@ -198,161 +198,6 @@ class BaseRenderer(ABC):
return section.get("id", "unknown")
return "unknown"
def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
"""Extract table headers and rows from section data. Expects nested content structure."""
# Normalize when elements array was passed in
if isinstance(sectionData, list):
if sectionData and isinstance(sectionData[0], dict):
sectionData = sectionData[0]
else:
return [], []
# Ensure sectionData is a dict
if not isinstance(sectionData, dict):
return [], []
# Extract from nested content structure
content = sectionData.get("content", {})
if not isinstance(content, dict):
return [], []
headers = content.get("headers", [])
rows = content.get("rows", [])
return headers, rows
def _extractBulletListItems(self, sectionData: Dict[str, Any]) -> List[str]:
"""Extract bullet list items from section data. Expects nested content structure."""
# Normalize when elements array was passed in
if isinstance(sectionData, list):
if sectionData and isinstance(sectionData[0], dict):
sectionData = sectionData[0]
else:
return []
# Ensure sectionData is a dict
if not isinstance(sectionData, dict):
return []
# Extract from nested content structure
content = sectionData.get("content", {})
if not isinstance(content, dict):
return []
items = content.get("items", [])
result = []
for item in items:
if isinstance(item, str):
result.append(item)
elif isinstance(item, dict) and "text" in item:
result.append(item["text"])
return result
def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]:
"""Extract heading level and text from section data. Expects nested content structure."""
# Normalize when elements array was passed in
if isinstance(sectionData, list):
if sectionData and isinstance(sectionData[0], dict):
sectionData = sectionData[0]
else:
return 1, ""
# Ensure sectionData is a dict
if not isinstance(sectionData, dict):
return 1, ""
# Extract from nested content structure
content = sectionData.get("content", {})
if not isinstance(content, dict):
return 1, ""
level = content.get("level", 1)
text = content.get("text", "")
return level, text
def _extractParagraphText(self, sectionData: Dict[str, Any]) -> str:
"""Extract paragraph text from section data. Expects nested content structure."""
if isinstance(sectionData, list):
# Join multiple paragraph elements if provided as a list
texts = []
for el in sectionData:
if isinstance(el, dict):
content = el.get("content", {})
if isinstance(content, dict):
text = content.get("text", "")
elif isinstance(content, str):
text = content
else:
text = ""
if text:
texts.append(text)
elif isinstance(el, str):
texts.append(el)
return "\n".join(texts)
# Extract from nested content structure
if not isinstance(sectionData, dict):
return ""
content = sectionData.get("content", {})
if isinstance(content, dict):
return content.get("text", "")
elif isinstance(content, str):
return content
return ""
def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
"""Extract code and language from section data. Expects nested content structure."""
# Normalize when elements array was passed in
if isinstance(sectionData, list):
if sectionData and isinstance(sectionData[0], dict):
sectionData = sectionData[0]
else:
return "", ""
# Ensure sectionData is a dict
if not isinstance(sectionData, dict):
return "", ""
# Extract from nested content structure
content = sectionData.get("content", {})
if not isinstance(content, dict):
return "", ""
code = content.get("code", "")
language = content.get("language", "")
return code, language
def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
"""Extract base64 data and alt text from section data. Expects nested content structure."""
# Normalize when elements array was passed in
if isinstance(sectionData, list):
if sectionData and isinstance(sectionData[0], dict):
sectionData = sectionData[0]
else:
return "", "Image"
# Ensure sectionData is a dict
if not isinstance(sectionData, dict):
return "", "Image"
# Extract from nested content structure
content = sectionData.get("content", {})
if not isinstance(content, dict):
return "", "Image"
base64Data = content.get("base64Data", "")
altText = content.get("altText", "Image")
return base64Data, altText
def _renderImageSection(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
"""
Render an image section. This is a base implementation that should be overridden
by format-specific renderers.
Args:
section: Image section data
styles: Optional styling information
Returns:
Format-specific image representation
"""
sectionData = self._getSectionData(section)
base64Data, altText = self._extractImageData(sectionData)
# Base implementation returns a simple dict
# Format-specific renderers should override this method
return {
"content_type": "image",
"base64Data": base64Data,
"altText": altText,
"width": sectionData.get("width", None),
"height": sectionData.get("height", None),
"caption": sectionData.get("caption", "")
}
def _validateImageData(self, base64Data: str, altText: str) -> bool:
"""Validate image data."""
if not base64Data:
@ -429,64 +274,6 @@ class BaseRenderer(ABC):
"""Check if a section type is valid."""
return sectionType in self._getSupportedSectionTypes()
def _processSectionByType(self, section: Dict[str, Any]) -> Dict[str, Any]:
"""Process a section and return structured data based on its type."""
sectionType = self._getSectionType(section)
sectionData = self._getSectionData(section)
if sectionType == "table":
headers, rows = self._extractTableData(sectionData)
return {"content_type": "table", "headers": headers, "rows": rows}
elif sectionType == "bullet_list":
items = self._extractBulletListItems(sectionData)
return {"content_type": "bullet_list", "items": items}
elif sectionType == "heading":
level, text = self._extractHeadingData(sectionData)
return {"content_type": "heading", "level": level, "text": text}
elif sectionType == "paragraph":
text = self._extractParagraphText(sectionData)
return {"content_type": "paragraph", "text": text}
elif sectionType == "code_block":
code, language = self._extractCodeBlockData(sectionData)
return {"content_type": "code_block", "code": code, "language": language}
elif sectionType == "image":
# Extract image data - preserve nested content structure
if isinstance(sectionData, list) and sectionData:
# Get first element from elements array
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
elif isinstance(sectionData, dict):
element = sectionData
else:
return {"content_type": "paragraph", "text": "[Image: Invalid data]"}
# Extract from nested content structure (standard JSON format)
content = element.get("content", {})
if not isinstance(content, dict):
return {"content_type": "paragraph", "text": "[Image: Invalid content]"}
base64Data = content.get("base64Data", "")
altText = content.get("altText", "Image")
caption = content.get("caption", "")
# Validate image data
if self._validateImageData(base64Data, altText):
# Return nested structure matching standard JSON format
return {
"content_type": "image",
"content": {
"base64Data": base64Data,
"altText": altText,
"caption": caption
}
}
else:
# Return placeholder if image data is invalid
return {"content_type": "paragraph", "text": f"[Image: {altText}]"}
else:
# Fallback to paragraph
text = self._extractParagraphText(sectionData)
return {"content_type": "paragraph", "text": text}
def _formatTimestamp(self, timestamp: str = None) -> str:
"""Format timestamp for display."""
if timestamp:

View file

@ -366,39 +366,23 @@ class RendererHtml(BaseRenderer):
# Process elements according to section's content_type, not just element types
if sectionType == "table":
# Process the section data to extract table structure
processedData = self._processSectionByType(section)
return self._renderJsonTable(processedData, styles)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonTable(element, styles)
return ""
elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure
processedData = self._processSectionByType(section)
return self._renderJsonBulletList(processedData, styles)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonBulletList(element, styles)
return ""
elif sectionType == "heading":
# Extract text from elements for heading rendering
if isinstance(sectionData, list):
# Extract text from heading elements
headingText = ""
for element in sectionData:
if isinstance(element, dict):
element_type = element.get("type", "")
if element_type == "heading":
headingText = element.get("content", element.get("text", ""))
break
elif element_type == "extracted_text":
# Use extracted text as heading if no heading element found
content = element.get("content", "")
if content and not headingText:
# Extract first line or title from extracted text
headingText = content.split('\n')[0].strip()
# Remove markdown formatting
headingText = headingText.replace('#', '').replace('**', '').strip()
break
elif "text" in element:
headingText = element.get("text", "")
break
if headingText:
return self._renderJsonHeading({"text": headingText, "level": 2}, styles)
return self._renderJsonHeading(sectionData, styles)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonHeading(element, styles)
return ""
elif sectionType == "paragraph":
# Process paragraph elements, including extracted_text
if isinstance(sectionData, list):
@ -435,13 +419,17 @@ class RendererHtml(BaseRenderer):
return '\n'.join(htmlParts)
return self._renderJsonParagraph(sectionData, styles)
elif sectionType == "code_block":
# Process the section data to extract code block structure
processedData = self._processSectionByType(section)
return self._renderJsonCodeBlock(processedData, styles)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonCodeBlock(element, styles)
return ""
elif sectionType == "image":
# Process the section data to extract image structure
processedData = self._processSectionByType(section)
return self._renderJsonImage(processedData, styles)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonImage(element, styles)
return ""
else:
# Fallback: Check for special element types first
if isinstance(sectionData, list):
@ -472,7 +460,7 @@ class RendererHtml(BaseRenderer):
def _renderJsonTable(self, tableData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON table to HTML using AI-generated styles."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{headers, rows}
content = tableData.get("content", {})
if not isinstance(content, dict):
return ""
@ -507,9 +495,9 @@ class RendererHtml(BaseRenderer):
return ""
def _renderJsonBulletList(self, listData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON bullet list to HTML using AI-generated styles. Expects nested content structure."""
"""Render a JSON bullet list to HTML using AI-generated styles."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{items}
content = listData.get("content", {})
if not isinstance(content, dict):
return ""
@ -535,19 +523,7 @@ class RendererHtml(BaseRenderer):
def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON heading to HTML using AI-generated styles."""
try:
# Normalize inputs - headingData is typically a list of elements from _getSectionData
if isinstance(headingData, list):
# Extract first element from elements array
if headingData and len(headingData) > 0:
headingData = headingData[0] if isinstance(headingData[0], dict) else {}
else:
return ""
elif isinstance(headingData, str):
headingData = {"text": headingData, "level": 2}
elif not isinstance(headingData, dict):
return ""
# Extract from nested content structure
# Extract from nested content structure: element.content.{text, level}
content = headingData.get("content", {})
if not isinstance(content, dict):
return ""
@ -611,9 +587,9 @@ class RendererHtml(BaseRenderer):
return ""
def _renderJsonCodeBlock(self, codeData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON code block to HTML using AI-generated styles. Expects nested content structure."""
"""Render a JSON code block to HTML using AI-generated styles."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{code, language}
content = codeData.get("content", {})
if not isinstance(content, dict):
return ""

View file

@ -144,25 +144,37 @@ class RendererMarkdown(BaseRenderer):
return '\n\n'.join(markdownParts)
if sectionType == "table":
# Process the section data to extract table structure
processedData = self._processSectionByType(section)
return self._renderJsonTable(processedData)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonTable(element)
return ""
elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure
processedData = self._processSectionByType(section)
return self._renderJsonBulletList(processedData)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonBulletList(element)
return ""
elif sectionType == "heading":
return self._renderJsonHeading(sectionData)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonHeading(element)
return ""
elif sectionType == "paragraph":
return self._renderJsonParagraph(sectionData)
elif sectionType == "code_block":
# Process the section data to extract code block structure
processedData = self._processSectionByType(section)
return self._renderJsonCodeBlock(processedData)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonCodeBlock(element)
return ""
elif sectionType == "image":
# Process the section data to extract image structure
processedData = self._processSectionByType(section)
return self._renderJsonImage(processedData)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonImage(element)
return ""
else:
# Fallback to paragraph for unknown types
return self._renderJsonParagraph(sectionData)
@ -174,7 +186,7 @@ class RendererMarkdown(BaseRenderer):
def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
"""Render a JSON table to markdown."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{headers, rows}
content = tableData.get("content", {})
if not isinstance(content, dict):
return ""
@ -208,7 +220,7 @@ class RendererMarkdown(BaseRenderer):
def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
"""Render a JSON bullet list to markdown."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{items}
content = listData.get("content", {})
if not isinstance(content, dict):
return ""
@ -233,7 +245,7 @@ class RendererMarkdown(BaseRenderer):
def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
"""Render a JSON heading to markdown."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{text, level}
content = headingData.get("content", {})
if not isinstance(content, dict):
return ""
@ -292,7 +304,7 @@ class RendererMarkdown(BaseRenderer):
def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
"""Render a JSON image to markdown."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{base64Data, altText, caption}
content = imageData.get("content", {})
if not isinstance(content, dict):
return ""

View file

@ -167,35 +167,41 @@ class RendererText(BaseRenderer):
return '\n\n'.join(textParts)
if sectionType == "table":
# Process the section data to extract table structure
processedData = self._processSectionByType(section)
return self._renderJsonTable(processedData)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonTable(element)
return ""
elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure
processedData = self._processSectionByType(section)
return self._renderJsonBulletList(processedData)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonBulletList(element)
return ""
elif sectionType == "heading":
# Render each heading element in the elements array
# sectionData is already the elements array from _getSectionData
renderedElements = []
for element in sectionData:
renderedElements.append(self._renderJsonHeading(element))
return "\n".join(renderedElements)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonHeading(element)
return ""
elif sectionType == "paragraph":
# Render each paragraph element in the elements array
# sectionData is already the elements array from _getSectionData
renderedElements = []
for element in sectionData:
renderedElements.append(self._renderJsonParagraph(element))
return "\n".join(renderedElements)
elif sectionType == "code_block":
# Process the section data to extract code block structure
processedData = self._processSectionByType(section)
return self._renderJsonCodeBlock(processedData)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonCodeBlock(element)
return ""
elif sectionType == "image":
# Process the section data to extract image structure
processedData = self._processSectionByType(section)
return self._renderJsonImage(processedData)
# Work directly with elements like other renderers
if isinstance(sectionData, list) and sectionData:
element = sectionData[0] if isinstance(sectionData[0], dict) else {}
return self._renderJsonImage(element)
return ""
else:
# Fallback to paragraph for unknown types - render each element
# sectionData is already the elements array from _getSectionData
@ -211,7 +217,7 @@ class RendererText(BaseRenderer):
def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
"""Render a JSON table to text."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{headers, rows}
content = tableData.get("content", {})
if not isinstance(content, dict):
return ""
@ -245,7 +251,7 @@ class RendererText(BaseRenderer):
def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
"""Render a JSON bullet list to text."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{items}
content = listData.get("content", {})
if not isinstance(content, dict):
return ""
@ -270,7 +276,7 @@ class RendererText(BaseRenderer):
def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
"""Render a JSON heading to text."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{text, level}
content = headingData.get("content", {})
if not isinstance(content, dict):
return ""
@ -312,7 +318,7 @@ class RendererText(BaseRenderer):
def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
"""Render a JSON code block to text."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{code, language}
content = codeData.get("content", {})
if not isinstance(content, dict):
return ""
@ -334,7 +340,7 @@ class RendererText(BaseRenderer):
def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
"""Render a JSON image to text."""
try:
# Extract from nested content structure
# Extract from nested content structure: element.content.{base64Data, altText, caption}
content = imageData.get("content", {})
if isinstance(content, dict):
altText = content.get("altText", "Image")