# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
PDF renderer for report generation using reportlab.
"""
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
import io
import base64
try:
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
REPORTLAB_AVAILABLE = True
except ImportError:
REPORTLAB_AVAILABLE = False
class RendererPdf(BaseRenderer):
"""Renders content to PDF format using reportlab."""
@classmethod
def getSupportedFormats(cls) -> List[str]:
"""Return supported PDF formats."""
return ['pdf']
@classmethod
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['document', 'print']
@classmethod
def getPriority(cls) -> int:
"""Return priority for PDF renderer."""
return 120
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
try:
if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available
from .rendererHtml import RendererHtml
html_renderer = RendererHtml()
return await html_renderer.render(extractedContent, title, userPrompt, aiService)
# Generate PDF using AI-analyzed styling
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
# Extract metadata for document type and other info
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
# Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "application/pdf")
else:
filename = self._determineFilename(title, "application/pdf")
# Convert PDF content to bytes if it's a string (base64)
if isinstance(pdf_content, str):
# Try to decode as base64, otherwise encode as UTF-8
try:
pdf_bytes = base64.b64decode(pdf_content)
except Exception:
pdf_bytes = pdf_content.encode('utf-8')
else:
pdf_bytes = pdf_content
return [
RenderedDocument(
documentData=pdf_bytes,
mimeType="application/pdf",
filename=filename,
documentType=documentType,
metadata=metadata if isinstance(metadata, dict) else None
)
]
except Exception as e:
self.logger.error(f"Error rendering PDF: {str(e)}")
# Return minimal fallback
fallbackContent = f"PDF Generation Error: {str(e)}"
return [
RenderedDocument(
documentData=fallbackContent.encode('utf-8'),
mimeType="text/plain",
filename=self._determineFilename(title, "text/plain")
)
]
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate PDF content from structured JSON document using AI-generated styling."""
try:
# Get style set: use styles from metadata if available, otherwise enhance with AI
styles = await self._getStyleSet(json_content, userPrompt, aiService)
# Validate JSON structure
if not self._validateJsonStructure(json_content):
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
# Extract sections and metadata from standardized schema
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
# Use title from JSON metadata if available, otherwise use provided title
document_title = metadata.get("title", title)
# Make title shorter to prevent wrapping/overlapping
if len(document_title) > 40:
document_title = "PowerOn - Consent Agreement"
# Create a buffer to hold the PDF
buffer = io.BytesIO()
# Create PDF document
doc = SimpleDocTemplate(
buffer,
pagesize=A4,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=18
)
# Build PDF content
story = []
# Title page
title_style = self._createTitleStyle(styles)
story.append(Paragraph(document_title, title_style))
story.append(Spacer(1, 50)) # Increased spacing to prevent overlap
story.append(Paragraph(f"Generated: {self._formatTimestamp()}", self._createNormalStyle(styles)))
story.append(Spacer(1, 30)) # Add spacing before page break
story.append(PageBreak())
# Process each section (sections already extracted above)
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
for i, section in enumerate(sections):
self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
section_elements = self._renderJsonSection(section, styles)
self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
story.extend(section_elements)
# Build PDF
doc.build(story)
# Get PDF content as base64
buffer.seek(0)
pdf_bytes = buffer.getvalue()
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
return pdf_base64
except Exception as e:
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
raise Exception(f"PDF generation failed: {str(e)}")
async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
"""Get style set - use styles from document generation metadata if available,
otherwise enhance default styles with AI if userPrompt provided.
WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
not be generated separately by renderers. Only fall back to AI if styles not provided.
Args:
extractedContent: Document content with metadata (may contain styles)
userPrompt: User's prompt (AI will detect style instructions in any language)
aiService: AI service (used only if styles not in metadata and userPrompt provided)
templateName: Name of template style set (None = default)
Returns:
Dict with style definitions for all document styles
"""
# Get default style set
defaultStyleSet = self._getDefaultStyleSet()
# FIRST: Check if styles are provided in document generation metadata (preferred approach)
if extractedContent:
metadata = extractedContent.get("metadata", {})
if isinstance(metadata, dict):
styles = metadata.get("styles")
if styles and isinstance(styles, dict):
self.logger.debug("Using styles from document generation metadata")
enhancedStyleSet = self._convertColorsFormat(styles)
return self._validateStylesContrast(enhancedStyleSet)
# FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
if userPrompt and aiService:
self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
# Convert colors to PDF format after getting styles
enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet)
return self._validateStylesContrast(enhancedStyleSet)
else:
# Use default styles only
return defaultStyleSet
async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
"""Enhance default styles with AI based on user prompt."""
try:
style_template = self._createAiStyleTemplate("pdf", userPrompt, defaultStyleSet)
enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet)
return enhanced_styles
except Exception as e:
self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
return defaultStyleSet
def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
bg_color = header.get("background", "#FFFFFF")
text_color = header.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
bg_color = cell.get("background", "#FFFFFF")
text_color = cell.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
return styles
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
return self._getDefaultStyleSet()
def _getDefaultStyleSet(self) -> Dict[str, Any]:
"""Default PDF style set - used when no style instructions present."""
return {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
"heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
}
async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper PDF color conversion."""
if not ai_service:
return default_styles
try:
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
request_options = AiCallOptions()
request_options.operationType = OperationTypeEnum.DATA_GENERATE
request = AiCallRequest(prompt=style_template, context="", options=request_options)
# Check if AI service is properly configured
if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
self.logger.warning("AI service not properly configured, using defaults")
return default_styles
response = await ai_service.callAi(request)
# Check if response is valid
if not response:
self.logger.warning("AI service returned no response, using defaults")
return default_styles
import json
import re
# Clean and parse JSON
result = response.content.strip() if response and response.content else ""
# Check if result is empty
if not result:
self.logger.warning("AI styling returned empty response, using defaults")
return default_styles
# Log the raw response for debugging
self.logger.debug(f"AI styling raw response: {result[:200]}...")
# Extract JSON from various formats
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if json_match:
result = json_match.group(1).strip()
elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
# Try to extract JSON from explanatory text
json_patterns = [
r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object
r'\{.*?"title".*?\}', # JSON with title field
r'\{.*?"font_size".*?\}', # JSON with font_size field
]
for pattern in json_patterns:
json_match = re.search(pattern, result, re.DOTALL)
if json_match:
result = json_match.group(0)
break
# Additional cleanup - remove any leading/trailing whitespace and newlines
result = result.strip()
# Check if result is still empty after cleanup
if not result:
self.logger.warning("AI styling returned empty content after cleanup, using defaults")
return default_styles
# Try to parse JSON
try:
styles = json.loads(result)
self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
except json.JSONDecodeError as json_error:
self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
# Use print instead of logger to avoid truncation
self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "PDF_RENDERER")
self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "PDF_RENDERER")
self.logger.warning(f"Raw content that failed to parse: {result}")
# Try to fix incomplete JSON by adding missing closing braces
open_braces = result.count('{')
close_braces = result.count('}')
if open_braces > close_braces:
# JSON is incomplete, add missing closing braces
missing_braces = open_braces - close_braces
result = result + '}' * missing_braces
self.logger.info(f"Added {missing_braces} missing closing brace(s)")
# Try parsing the fixed JSON
try:
styles = json.loads(result)
self.logger.info("Successfully fixed incomplete JSON")
except json.JSONDecodeError as fix_error:
self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
# Try to extract just the JSON part if it's embedded in text
json_start = result.find('{')
json_end = result.rfind('}')
if json_start != -1 and json_end != -1 and json_end > json_start:
json_part = result[json_start:json_end+1]
try:
styles = json.loads(json_part)
self.logger.info("Successfully extracted JSON from explanatory text")
except json.JSONDecodeError:
self.logger.warning("Could not extract valid JSON from response, using defaults")
return default_styles
else:
return default_styles
else:
# Try to extract just the JSON part if it's embedded in text
json_start = result.find('{')
json_end = result.rfind('}')
if json_start != -1 and json_end != -1 and json_end > json_start:
json_part = result[json_start:json_end+1]
try:
styles = json.loads(json_part)
self.logger.info("Successfully extracted JSON from explanatory text")
except json.JSONDecodeError:
self.logger.warning("Could not extract valid JSON from response, using defaults")
return default_styles
else:
return default_styles
# Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
styles = self._convertColorsFormat(styles)
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert colors to proper format for PDF compatibility."""
try:
for style_name, style_config in styles.items():
if isinstance(style_config, dict):
for prop, value in style_config.items():
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel) for consistency
styles[style_name][prop] = f"FF{value[1:]}"
elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
# Already aRGB format, keep as is
pass
return styles
except Exception as e:
self.logger.warning(f"Color conversion failed: {str(e)}")
return styles
def _getSafeColor(self, color_value: str, default: str = "#000000") -> str:
"""Get a safe hex color value for PDF."""
if isinstance(color_value, str) and color_value.startswith('#'):
if len(color_value) == 7:
return f"FF{color_value[1:]}"
elif len(color_value) == 9:
return color_value
return default
def _createTitleStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create title style from style definitions."""
title_style_def = styles.get("title", {})
# DEBUG: Show what color and spacing is being used for title
title_color = title_style_def.get("color", "#1F4E79")
title_space_after = title_style_def.get("space_after", 30)
self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hexToColor(title_color)}", "PDF_RENDERER")
self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
return ParagraphStyle(
'CustomTitle',
fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20
spaceAfter=title_style_def.get("space_after", 30),
alignment=self._getAlignment(title_style_def.get("align", "center")),
textColor=self._hexToColor(title_color),
leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles
spaceBefore=0 # Ensure no space before title
)
def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
"""Create heading style from style definitions."""
heading_key = f"heading{level}"
heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
return ParagraphStyle(
f'CustomHeading{level}',
fontSize=heading_style_def.get("font_size", 18 - level * 2),
spaceAfter=heading_style_def.get("space_after", 12),
spaceBefore=heading_style_def.get("space_before", 12),
alignment=self._getAlignment(heading_style_def.get("align", "left")),
textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F"))
)
def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create normal paragraph style from style definitions."""
paragraph_style_def = styles.get("paragraph", {})
return ParagraphStyle(
'CustomNormal',
fontSize=paragraph_style_def.get("font_size", 11),
spaceAfter=paragraph_style_def.get("space_after", 6),
alignment=self._getAlignment(paragraph_style_def.get("align", "left")),
textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")),
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
)
def _getAlignment(self, align: str) -> int:
"""Convert alignment string to reportlab alignment constant."""
if not align or not isinstance(align, str):
return TA_LEFT
align_map = {
"center": TA_CENTER,
"left": TA_LEFT,
"justify": TA_JUSTIFY,
"right": TA_LEFT, # ReportLab doesn't have TA_RIGHT, use LEFT as fallback
"0": TA_LEFT, # Handle numeric strings
"1": TA_CENTER,
"2": TA_JUSTIFY
}
return align_map.get(align.lower().strip(), TA_LEFT)
def _getTableAlignment(self, align: str) -> str:
"""Convert alignment string to ReportLab table alignment string."""
if not align or not isinstance(align, str):
return 'LEFT'
align_map = {
"center": 'CENTER',
"left": 'LEFT',
"justify": 'LEFT', # Tables don't support justify, use LEFT
"right": 'RIGHT',
"0": 'LEFT', # Handle numeric strings
"1": 'CENTER',
"2": 'LEFT' # Tables don't support justify, use LEFT
}
return align_map.get(align.lower().strip(), 'LEFT')
def _hexToColor(self, hex_color: str) -> colors.Color:
"""Convert hex color to reportlab color."""
try:
hex_color = hex_color.lstrip('#')
# Handle aRGB format (8 characters: FF + RGB)
if len(hex_color) == 8:
# Skip the alpha channel (first 2 characters)
hex_color = hex_color[2:]
# Handle RGB format (6 characters)
if len(hex_color) == 6:
r = int(hex_color[0:2], 16) / 255.0
g = int(hex_color[2:4], 16) / 255.0
b = int(hex_color[4:6], 16) / 255.0
return colors.Color(r, g, b)
# Fallback for other formats
return colors.black
except:
return colors.black
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a single JSON section to PDF elements using AI-generated styles.
Supports three content formats: reference, object (base64), extracted_text.
"""
try:
section_type = self._getSectionType(section)
elements = self._getSectionData(section)
# Process each element in the section
all_elements = []
for element in elements:
element_type = element.get("type", "") if isinstance(element, dict) else ""
# Support three content formats from Phase 5D
if element_type == "reference":
# Document reference format
doc_ref = element.get("documentReference", "")
label = element.get("label", "Reference")
ref_style = ParagraphStyle(
'Reference',
parent=self._createNormalStyle(styles),
fontStyle='italic',
textColor=colors.grey
)
all_elements.append(Paragraph(f"[Reference: {label}]", ref_style))
all_elements.append(Spacer(1, 6))
continue
elif element_type == "extracted_text":
# Extracted text format
content = element.get("content", "")
source = element.get("source", "")
if content:
source_text = f" (Source: {source})" if source else ""
all_elements.append(Paragraph(f"{content}{source_text}", self._createNormalStyle(styles)))
all_elements.append(Spacer(1, 6))
continue
# Check element type, not section type (elements can have different types than section)
if element_type == "table":
all_elements.extend(self._renderJsonTable(element, styles))
elif element_type == "bullet_list":
all_elements.extend(self._renderJsonBulletList(element, styles))
elif element_type == "heading":
all_elements.extend(self._renderJsonHeading(element, styles))
elif element_type == "paragraph":
all_elements.extend(self._renderJsonParagraph(element, styles))
elif element_type == "code_block":
all_elements.extend(self._renderJsonCodeBlock(element, styles))
elif element_type == "image":
all_elements.extend(self._renderJsonImage(element, styles))
else:
# Fallback: if element_type not set, use section_type as fallback
if section_type == "table":
all_elements.extend(self._renderJsonTable(element, styles))
elif section_type == "bullet_list":
all_elements.extend(self._renderJsonBulletList(element, styles))
elif section_type == "heading":
all_elements.extend(self._renderJsonHeading(element, styles))
elif section_type == "paragraph":
all_elements.extend(self._renderJsonParagraph(element, styles))
elif section_type == "code_block":
all_elements.extend(self._renderJsonCodeBlock(element, styles))
elif section_type == "image":
all_elements.extend(self._renderJsonImage(element, styles))
else:
# Final fallback to paragraph for unknown types
all_elements.extend(self._renderJsonParagraph(element, styles))
return all_elements
except Exception as e:
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return [Paragraph(f"[Error rendering section: {str(e)}]", self._createNormalStyle(styles))]
def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON table to PDF elements using AI-generated styles."""
try:
# Handle nested content structure: element.content.headers vs element.headers
# Extract from nested content structure
content = table_data.get("content", {})
if not isinstance(content, dict):
return []
headers = content.get("headers", [])
rows = content.get("rows", [])
if not headers or not rows:
return []
# Prepare table data
table_data_list = [headers] + rows
# Create table
table = Table(table_data_list)
# Apply styling
table_header_style = styles.get("table_header", {})
table_cell_style = styles.get("table_cell", {})
table_style = [
('BACKGROUND', (0, 0), (-1, 0), self._hexToColor(table_header_style.get("background", "#4F4F4F"))),
('TEXTCOLOR', (0, 0), (-1, 0), self._hexToColor(table_header_style.get("text_color", "#FFFFFF"))),
('ALIGN', (0, 0), (-1, -1), self._getTableAlignment(table_cell_style.get("align", "left"))),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), self._hexToColor(table_cell_style.get("background", "#FFFFFF"))),
('FONTSIZE', (0, 1), (-1, -1), table_cell_style.get("font_size", 10)),
('GRID', (0, 0), (-1, -1), 1, colors.black)
]
table.setStyle(TableStyle(table_style))
return [table, Spacer(1, 12)]
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return []
def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
try:
# Extract from nested content structure
content = list_data.get("content", {})
if not isinstance(content, dict):
return []
items = content.get("items", [])
bullet_style_def = styles.get("bullet_list", {})
elements = []
for item in items:
if isinstance(item, str):
elements.append(Paragraph(f"• {item}", self._createNormalStyle(styles)))
elif isinstance(item, dict) and "text" in item:
elements.append(Paragraph(f"• {item['text']}", self._createNormalStyle(styles)))
if elements:
elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
return elements
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return []
def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON heading to PDF elements using AI-generated styles."""
try:
# Extract from nested content structure
content = heading_data.get("content", {})
if not isinstance(content, dict):
return []
text = content.get("text", "")
level = content.get("level", 1)
if text:
level = max(1, min(6, level))
heading_style = self._createHeadingStyle(styles, level)
return [Paragraph(text, heading_style)]
return []
except Exception as e:
self.logger.warning(f"Error rendering heading: {str(e)}")
return []
def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
try:
# Extract from nested content structure
content = paragraph_data.get("content", {})
if isinstance(content, dict):
text = content.get("text", "")
elif isinstance(content, str):
text = content
else:
text = ""
if text:
return [Paragraph(text, self._createNormalStyle(styles))]
return []
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return []
def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON code block to PDF elements using AI-generated styles."""
try:
# Extract from nested content structure
content = code_data.get("content", {})
if not isinstance(content, dict):
return []
code = content.get("code", "")
language = content.get("language", "")
code_style_def = styles.get("code_block", {})
if code:
elements = []
if language:
lang_style = ParagraphStyle(
'CodeLanguage',
fontSize=code_style_def.get("font_size", 9),
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
fontName='Helvetica-Bold'
)
elements.append(Paragraph(f"Code ({language}):", lang_style))
code_style = ParagraphStyle(
'CodeBlock',
fontSize=code_style_def.get("font_size", 9),
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
fontName=code_style_def.get("font", "Courier"),
backColor=self._hexToColor(code_style_def.get("background", "#F5F5F5")),
spaceAfter=code_style_def.get("space_after", 6)
)
elements.append(Paragraph(code, code_style))
return elements
return []
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
return []
def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON image to PDF elements using reportlab."""
try:
# Extract from nested content structure
content = image_data.get("content", {})
base64_data = ""
alt_text = "Image"
caption = ""
if isinstance(content, dict):
# Nested content structure
base64_data = content.get("base64Data", "")
alt_text = content.get("altText", "Image")
caption = content.get("caption", "")
elif isinstance(content, str):
# Content might be base64 string directly (shouldn't happen, but handle it)
self.logger.warning("Image content is a string, not a dict. This should not happen.")
return [Paragraph(f"[Image: Invalid format]", self._createNormalStyle(styles))]
# If base64Data not found in content, try direct element fields (fallback)
if not base64_data:
base64_data = image_data.get("base64Data", "")
if not alt_text or alt_text == "Image":
alt_text = image_data.get("altText", "Image")
if not caption:
caption = image_data.get("caption", "")
# If base64Data still not found, try extracting from url data URI
if not base64_data:
url = image_data.get("url", "") or (content.get("url", "") if isinstance(content, dict) else "")
if url and isinstance(url, str) and url.startswith("data:image/"):
# Extract base64 from data URI: data:image/png;base64,
import re
match = re.match(r'data:image/[^;]+;base64,(.+)', url)
if match:
base64_data = match.group(1)
if not base64_data:
self.logger.warning(f"No base64 data found for image. Alt text: {alt_text}")
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
# Validate that base64_data is actually base64 (not the entire element rendered as text)
if len(base64_data) > 10000: # Very long string might be entire element JSON
self.logger.warning(f"Base64 data seems too long ({len(base64_data)} chars), might be incorrectly extracted")
# Ensure base64_data is a string, not bytes or other type
if not isinstance(base64_data, str):
self.logger.warning(f"Base64 data is not a string: {type(base64_data)}")
return [Paragraph(f"[Image: {alt_text} - Invalid data type]", self._createNormalStyle(styles))]
try:
from reportlab.platypus import Image as ReportLabImage
from reportlab.lib.units import inch
import base64
import io
# Decode base64 image data
imageBytes = base64.b64decode(base64_data)
imageStream = io.BytesIO(imageBytes)
# Create reportlab Image element
# Try to get image dimensions from PIL
try:
from PIL import Image as PILImage
from reportlab.lib.pagesizes import A4
pilImage = PILImage.open(imageStream)
originalWidth, originalHeight = pilImage.size
# Calculate available page dimensions (A4 with margins: 72pt left/right, 72pt top, 18pt bottom)
pageWidth = A4[0] # 595.27 points
pageHeight = A4[1] # 841.89 points
leftMargin = 72
rightMargin = 72
topMargin = 72
bottomMargin = 18
# Use actual frame dimensions from SimpleDocTemplate
# Frame is smaller than page minus margins due to internal spacing
# From error message: frame is 439.27559055118115 x 739.8897637795277
# Use conservative values with safety margin
availableWidth = 430.0 # Slightly smaller than frame width for safety
availableHeight = 730.0 # Slightly smaller than frame height for safety
# Convert original image size from pixels to points
# PIL provides size in pixels, need to convert to points
# Standard conversion: 1 inch = 72 points, typical screen DPI = 96 pixels/inch
# So: pixels * (72/96) = points, or pixels * 0.75 = points
# But for images, we should use the image's actual DPI if available
dpi = pilImage.info.get('dpi', (96, 96))[0] # Default to 96 DPI if not specified
if dpi <= 0:
dpi = 96 # Fallback to 96 DPI
# Convert pixels to points: 1 point = 1/72 inch, so pixels * (72/dpi) = points
imgWidthPoints = originalWidth * (72.0 / dpi)
imgHeightPoints = originalHeight * (72.0 / dpi)
# Scale to fit within available page dimensions while maintaining aspect ratio
widthScale = availableWidth / imgWidthPoints if imgWidthPoints > 0 else 1.0
heightScale = availableHeight / imgHeightPoints if imgHeightPoints > 0 else 1.0
# Use the smaller scale to ensure image fits both width and height
scale = min(widthScale, heightScale, 1.0) # Don't scale up, only down
imgWidth = imgWidthPoints * scale
imgHeight = imgHeightPoints * scale
# Additional safety check: ensure dimensions don't exceed available space
if imgWidth > availableWidth:
scale = availableWidth / imgWidth
imgWidth = availableWidth
imgHeight = imgHeight * scale
if imgHeight > availableHeight:
scale = availableHeight / imgHeight
imgHeight = availableHeight
imgWidth = imgWidth * scale
# Reset stream for reportlab
imageStream.seek(0)
except Exception as e:
# Fallback: use default size that fits page
self.logger.warning(f"Error calculating image size: {str(e)}, using safe default")
# Use 80% of available width as safe default
imgWidth = 4 * inch # ~288 points, safe for ~451pt available width
imgHeight = 3 * inch # ~216 points, safe for ~751pt available height
imageStream.seek(0)
# Create reportlab Image
reportlabImage = ReportLabImage(imageStream, width=imgWidth, height=imgHeight)
elements = [reportlabImage]
# Add caption if available
if caption:
captionStyle = self._createNormalStyle(styles)
captionStyle.fontSize = 10
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
elements.append(Paragraph(f"{caption}", captionStyle))
elif alt_text and alt_text != "Image":
# Use alt text as caption if no caption provided
captionStyle = self._createNormalStyle(styles)
captionStyle.fontSize = 10
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
elements.append(Paragraph(f"Figure: {alt_text}", captionStyle))
return elements
except Exception as imgError:
self.logger.error(f"Error embedding image in PDF: {str(imgError)}")
# Return error message instead of placeholder
errorStyle = self._createNormalStyle(styles)
errorStyle.textColor = self._hexToColor("#FF0000") # Red color for error
errorMsg = f"[Error: Could not embed image '{alt_text}'. {str(imgError)}]"
return [Paragraph(errorMsg, errorStyle)]
except Exception as e:
self.logger.error(f"Error rendering image: {str(e)}")
errorStyle = self._createNormalStyle(styles)
errorStyle.textColor = self._hexToColor("#FF0000") # Red color for error
errorMsg = f"[Error: Could not render image '{image_data.get('altText', 'Image')}'. {str(e)}]"
return [Paragraph(errorMsg, errorStyle)]