935 lines
No EOL
46 KiB
Python
935 lines
No EOL
46 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
PDF renderer for report generation using reportlab.
|
|
"""
|
|
|
|
from .documentRendererBaseTemplate import BaseRenderer
|
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
|
from typing import Dict, Any, List, Optional
|
|
import io
|
|
import base64
|
|
|
|
try:
|
|
from reportlab.lib.pagesizes import letter, A4
|
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
from reportlab.lib.units import inch
|
|
from reportlab.lib import colors
|
|
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
|
|
REPORTLAB_AVAILABLE = True
|
|
except ImportError:
|
|
REPORTLAB_AVAILABLE = False
|
|
|
|
class RendererPdf(BaseRenderer):
|
|
"""Renders content to PDF format using reportlab."""
|
|
|
|
@classmethod
|
|
def getSupportedFormats(cls) -> List[str]:
|
|
"""Return supported PDF formats."""
|
|
return ['pdf']
|
|
|
|
@classmethod
|
|
def getFormatAliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return ['document', 'print']
|
|
|
|
@classmethod
|
|
def getPriority(cls) -> int:
|
|
"""Return priority for PDF renderer."""
|
|
return 120
|
|
|
|
@classmethod
|
|
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
|
|
"""Return output style classification: PDF documents are formatted documents."""
|
|
return 'document'
|
|
|
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
|
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
|
try:
|
|
if not REPORTLAB_AVAILABLE:
|
|
# Fallback to HTML if reportlab not available
|
|
from .rendererHtml import RendererHtml
|
|
html_renderer = RendererHtml()
|
|
return await html_renderer.render(extractedContent, title, userPrompt, aiService)
|
|
|
|
# Generate PDF using AI-analyzed styling
|
|
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
|
|
|
|
# Extract metadata for document type and other info
|
|
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
|
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
|
|
|
|
# Determine filename from document or title
|
|
documents = extractedContent.get("documents", [])
|
|
if documents and isinstance(documents[0], dict):
|
|
filename = documents[0].get("filename")
|
|
if not filename:
|
|
filename = self._determineFilename(title, "application/pdf")
|
|
else:
|
|
filename = self._determineFilename(title, "application/pdf")
|
|
|
|
# Convert PDF content to bytes if it's a string (base64)
|
|
if isinstance(pdf_content, str):
|
|
# Try to decode as base64, otherwise encode as UTF-8
|
|
try:
|
|
pdf_bytes = base64.b64decode(pdf_content)
|
|
except Exception:
|
|
pdf_bytes = pdf_content.encode('utf-8')
|
|
else:
|
|
pdf_bytes = pdf_content
|
|
|
|
return [
|
|
RenderedDocument(
|
|
documentData=pdf_bytes,
|
|
mimeType="application/pdf",
|
|
filename=filename,
|
|
documentType=documentType,
|
|
metadata=metadata if isinstance(metadata, dict) else None
|
|
)
|
|
]
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering PDF: {str(e)}")
|
|
# Return minimal fallback
|
|
fallbackContent = f"PDF Generation Error: {str(e)}"
|
|
return [
|
|
RenderedDocument(
|
|
documentData=fallbackContent.encode('utf-8'),
|
|
mimeType="text/plain",
|
|
filename=self._determineFilename(title, "text/plain")
|
|
)
|
|
]
|
|
|
|
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
|
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
|
try:
|
|
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
|
styles = await self._getStyleSet(json_content, userPrompt, aiService)
|
|
|
|
# Validate JSON structure
|
|
if not self._validateJsonStructure(json_content):
|
|
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
|
|
|
|
# Extract sections and metadata from standardized schema
|
|
sections = self._extractSections(json_content)
|
|
metadata = self._extractMetadata(json_content)
|
|
|
|
# Use provided title (which comes from documents[].title) as primary source
|
|
# Fallback to metadata.title only if title parameter is empty
|
|
document_title = title if title else metadata.get("title", "Generated Document")
|
|
|
|
# Make title shorter to prevent wrapping/overlapping
|
|
if len(document_title) > 40:
|
|
document_title = "PowerOn - Consent Agreement"
|
|
|
|
# Create a buffer to hold the PDF
|
|
buffer = io.BytesIO()
|
|
|
|
# Create PDF document
|
|
doc = SimpleDocTemplate(
|
|
buffer,
|
|
pagesize=A4,
|
|
rightMargin=72,
|
|
leftMargin=72,
|
|
topMargin=72,
|
|
bottomMargin=18
|
|
)
|
|
|
|
# Build PDF content
|
|
story = []
|
|
|
|
# Title page
|
|
title_style = self._createTitleStyle(styles)
|
|
story.append(Paragraph(document_title, title_style))
|
|
story.append(Spacer(1, 50)) # Increased spacing to prevent overlap
|
|
story.append(Paragraph(f"Generated: {self._formatTimestamp()}", self._createNormalStyle(styles)))
|
|
story.append(Spacer(1, 30)) # Add spacing before page break
|
|
story.append(PageBreak())
|
|
|
|
# Process each section (sections already extracted above)
|
|
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
|
|
for i, section in enumerate(sections):
|
|
self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
|
|
section_elements = self._renderJsonSection(section, styles)
|
|
self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
|
|
story.extend(section_elements)
|
|
|
|
# Build PDF
|
|
doc.build(story)
|
|
|
|
# Get PDF content as base64
|
|
buffer.seek(0)
|
|
pdf_bytes = buffer.getvalue()
|
|
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
|
|
|
return pdf_base64
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
|
|
raise Exception(f"PDF generation failed: {str(e)}")
|
|
|
|
async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
|
|
"""Get style set - use styles from document generation metadata if available,
|
|
otherwise enhance default styles with AI if userPrompt provided.
|
|
|
|
WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
|
|
not be generated separately by renderers. Only fall back to AI if styles not provided.
|
|
|
|
Args:
|
|
extractedContent: Document content with metadata (may contain styles)
|
|
userPrompt: User's prompt (AI will detect style instructions in any language)
|
|
aiService: AI service (used only if styles not in metadata and userPrompt provided)
|
|
templateName: Name of template style set (None = default)
|
|
|
|
Returns:
|
|
Dict with style definitions for all document styles
|
|
"""
|
|
# Get default style set
|
|
defaultStyleSet = self._getDefaultStyleSet()
|
|
|
|
# FIRST: Check if styles are provided in document generation metadata (preferred approach)
|
|
if extractedContent:
|
|
metadata = extractedContent.get("metadata", {})
|
|
if isinstance(metadata, dict):
|
|
styles = metadata.get("styles")
|
|
if styles and isinstance(styles, dict):
|
|
self.logger.debug("Using styles from document generation metadata")
|
|
enhancedStyleSet = self._convertColorsFormat(styles)
|
|
return self._validateStylesContrast(enhancedStyleSet)
|
|
|
|
# FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
|
|
if userPrompt and aiService:
|
|
self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
|
|
enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
|
|
# Convert colors to PDF format after getting styles
|
|
enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet)
|
|
return self._validateStylesContrast(enhancedStyleSet)
|
|
else:
|
|
# Use default styles only
|
|
return defaultStyleSet
|
|
|
|
async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
|
|
"""Enhance default styles with AI based on user prompt."""
|
|
try:
|
|
style_template = self._createAiStyleTemplate("pdf", userPrompt, defaultStyleSet)
|
|
enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet)
|
|
return enhanced_styles
|
|
except Exception as e:
|
|
self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
|
|
return defaultStyleSet
|
|
|
|
def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Validate and fix contrast issues in AI-generated styles."""
|
|
try:
|
|
# Fix table header contrast
|
|
if "table_header" in styles:
|
|
header = styles["table_header"]
|
|
bg_color = header.get("background", "#FFFFFF")
|
|
text_color = header.get("text_color", "#000000")
|
|
|
|
# If both are white or both are dark, fix it
|
|
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
|
header["background"] = "#4F4F4F"
|
|
header["text_color"] = "#FFFFFF"
|
|
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
|
header["background"] = "#4F4F4F"
|
|
header["text_color"] = "#FFFFFF"
|
|
|
|
# Fix table cell contrast
|
|
if "table_cell" in styles:
|
|
cell = styles["table_cell"]
|
|
bg_color = cell.get("background", "#FFFFFF")
|
|
text_color = cell.get("text_color", "#000000")
|
|
|
|
# If both are white or both are dark, fix it
|
|
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
|
cell["background"] = "#FFFFFF"
|
|
cell["text_color"] = "#2F2F2F"
|
|
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
|
cell["background"] = "#FFFFFF"
|
|
cell["text_color"] = "#2F2F2F"
|
|
|
|
return styles
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Style validation failed: {str(e)}")
|
|
return self._getDefaultStyleSet()
|
|
|
|
def _getDefaultStyleSet(self) -> Dict[str, Any]:
|
|
"""Default PDF style set - used when no style instructions present."""
|
|
return {
|
|
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
|
|
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
|
|
"heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
|
|
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
|
|
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
|
|
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
|
|
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
|
|
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
|
|
}
|
|
|
|
async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Get AI styles with proper PDF color conversion."""
|
|
if not ai_service:
|
|
return default_styles
|
|
|
|
try:
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
|
|
|
request_options = AiCallOptions()
|
|
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
|
|
|
request = AiCallRequest(prompt=style_template, context="", options=request_options)
|
|
|
|
# Check if AI service is properly configured
|
|
if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
|
|
self.logger.warning("AI service not properly configured, using defaults")
|
|
return default_styles
|
|
|
|
response = await ai_service.callAi(request)
|
|
|
|
# Check if response is valid
|
|
if not response:
|
|
self.logger.warning("AI service returned no response, using defaults")
|
|
return default_styles
|
|
|
|
import json
|
|
import re
|
|
|
|
# Clean and parse JSON
|
|
result = response.content.strip() if response and response.content else ""
|
|
|
|
# Check if result is empty
|
|
if not result:
|
|
self.logger.warning("AI styling returned empty response, using defaults")
|
|
return default_styles
|
|
|
|
# Log the raw response for debugging
|
|
self.logger.debug(f"AI styling raw response: {result[:200]}...")
|
|
|
|
# Extract JSON from various formats
|
|
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
|
if json_match:
|
|
result = json_match.group(1).strip()
|
|
elif result.startswith('```json'):
|
|
result = re.sub(r'^```json\s*', '', result)
|
|
result = re.sub(r'\s*```$', '', result)
|
|
elif result.startswith('```'):
|
|
result = re.sub(r'^```\s*', '', result)
|
|
result = re.sub(r'\s*```$', '', result)
|
|
|
|
# Try to extract JSON from explanatory text
|
|
json_patterns = [
|
|
r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object
|
|
r'\{.*?"title".*?\}', # JSON with title field
|
|
r'\{.*?"font_size".*?\}', # JSON with font_size field
|
|
]
|
|
|
|
for pattern in json_patterns:
|
|
json_match = re.search(pattern, result, re.DOTALL)
|
|
if json_match:
|
|
result = json_match.group(0)
|
|
break
|
|
|
|
# Additional cleanup - remove any leading/trailing whitespace and newlines
|
|
result = result.strip()
|
|
|
|
# Check if result is still empty after cleanup
|
|
if not result:
|
|
self.logger.warning("AI styling returned empty content after cleanup, using defaults")
|
|
return default_styles
|
|
|
|
# Try to parse JSON
|
|
try:
|
|
styles = json.loads(result)
|
|
self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
|
|
except json.JSONDecodeError as json_error:
|
|
self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
|
|
|
|
# Use print instead of logger to avoid truncation
|
|
self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "PDF_RENDERER")
|
|
self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "PDF_RENDERER")
|
|
|
|
self.logger.warning(f"Raw content that failed to parse: {result}")
|
|
|
|
# Try to fix incomplete JSON by adding missing closing braces
|
|
open_braces = result.count('{')
|
|
close_braces = result.count('}')
|
|
|
|
if open_braces > close_braces:
|
|
# JSON is incomplete, add missing closing braces
|
|
missing_braces = open_braces - close_braces
|
|
result = result + '}' * missing_braces
|
|
self.logger.info(f"Added {missing_braces} missing closing brace(s)")
|
|
|
|
# Try parsing the fixed JSON
|
|
try:
|
|
styles = json.loads(result)
|
|
self.logger.info("Successfully fixed incomplete JSON")
|
|
except json.JSONDecodeError as fix_error:
|
|
self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
|
|
# Try to extract just the JSON part if it's embedded in text
|
|
json_start = result.find('{')
|
|
json_end = result.rfind('}')
|
|
if json_start != -1 and json_end != -1 and json_end > json_start:
|
|
json_part = result[json_start:json_end+1]
|
|
try:
|
|
styles = json.loads(json_part)
|
|
self.logger.info("Successfully extracted JSON from explanatory text")
|
|
except json.JSONDecodeError:
|
|
self.logger.warning("Could not extract valid JSON from response, using defaults")
|
|
return default_styles
|
|
else:
|
|
return default_styles
|
|
else:
|
|
# Try to extract just the JSON part if it's embedded in text
|
|
json_start = result.find('{')
|
|
json_end = result.rfind('}')
|
|
if json_start != -1 and json_end != -1 and json_end > json_start:
|
|
json_part = result[json_start:json_end+1]
|
|
try:
|
|
styles = json.loads(json_part)
|
|
self.logger.info("Successfully extracted JSON from explanatory text")
|
|
except json.JSONDecodeError:
|
|
self.logger.warning("Could not extract valid JSON from response, using defaults")
|
|
return default_styles
|
|
else:
|
|
return default_styles
|
|
|
|
# Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
|
|
styles = self._convertColorsFormat(styles)
|
|
|
|
return styles
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
|
return default_styles
|
|
|
|
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Convert colors to proper format for PDF compatibility."""
|
|
try:
|
|
for style_name, style_config in styles.items():
|
|
if isinstance(style_config, dict):
|
|
for prop, value in style_config.items():
|
|
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
|
|
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel) for consistency
|
|
styles[style_name][prop] = f"FF{value[1:]}"
|
|
elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
|
|
# Already aRGB format, keep as is
|
|
pass
|
|
return styles
|
|
except Exception as e:
|
|
self.logger.warning(f"Color conversion failed: {str(e)}")
|
|
return styles
|
|
|
|
def _getSafeColor(self, color_value: str, default: str = "#000000") -> str:
|
|
"""Get a safe hex color value for PDF."""
|
|
if isinstance(color_value, str) and color_value.startswith('#'):
|
|
if len(color_value) == 7:
|
|
return f"FF{color_value[1:]}"
|
|
elif len(color_value) == 9:
|
|
return color_value
|
|
return default
|
|
|
|
|
|
def _createTitleStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
|
"""Create title style from style definitions."""
|
|
title_style_def = styles.get("title", {})
|
|
|
|
# DEBUG: Show what color and spacing is being used for title
|
|
title_color = title_style_def.get("color", "#1F4E79")
|
|
title_space_after = title_style_def.get("space_after", 30)
|
|
self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hexToColor(title_color)}", "PDF_RENDERER")
|
|
self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
|
|
|
|
return ParagraphStyle(
|
|
'CustomTitle',
|
|
fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20
|
|
spaceAfter=title_style_def.get("space_after", 30),
|
|
alignment=self._getAlignment(title_style_def.get("align", "center")),
|
|
textColor=self._hexToColor(title_color),
|
|
leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles
|
|
spaceBefore=0 # Ensure no space before title
|
|
)
|
|
|
|
def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
|
|
"""Create heading style from style definitions."""
|
|
heading_key = f"heading{level}"
|
|
heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
|
|
|
|
return ParagraphStyle(
|
|
f'CustomHeading{level}',
|
|
fontSize=heading_style_def.get("font_size", 18 - level * 2),
|
|
spaceAfter=heading_style_def.get("space_after", 12),
|
|
spaceBefore=heading_style_def.get("space_before", 12),
|
|
alignment=self._getAlignment(heading_style_def.get("align", "left")),
|
|
textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F"))
|
|
)
|
|
|
|
def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
|
"""Create normal paragraph style from style definitions."""
|
|
paragraph_style_def = styles.get("paragraph", {})
|
|
|
|
return ParagraphStyle(
|
|
'CustomNormal',
|
|
fontSize=paragraph_style_def.get("font_size", 11),
|
|
spaceAfter=paragraph_style_def.get("space_after", 6),
|
|
alignment=self._getAlignment(paragraph_style_def.get("align", "left")),
|
|
textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")),
|
|
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
|
|
)
|
|
|
|
def _getAlignment(self, align: str) -> int:
|
|
"""Convert alignment string to reportlab alignment constant."""
|
|
if not align or not isinstance(align, str):
|
|
return TA_LEFT
|
|
|
|
align_map = {
|
|
"center": TA_CENTER,
|
|
"left": TA_LEFT,
|
|
"justify": TA_JUSTIFY,
|
|
"right": TA_LEFT, # ReportLab doesn't have TA_RIGHT, use LEFT as fallback
|
|
"0": TA_LEFT, # Handle numeric strings
|
|
"1": TA_CENTER,
|
|
"2": TA_JUSTIFY
|
|
}
|
|
return align_map.get(align.lower().strip(), TA_LEFT)
|
|
|
|
def _getTableAlignment(self, align: str) -> str:
|
|
"""Convert alignment string to ReportLab table alignment string."""
|
|
if not align or not isinstance(align, str):
|
|
return 'LEFT'
|
|
|
|
align_map = {
|
|
"center": 'CENTER',
|
|
"left": 'LEFT',
|
|
"justify": 'LEFT', # Tables don't support justify, use LEFT
|
|
"right": 'RIGHT',
|
|
"0": 'LEFT', # Handle numeric strings
|
|
"1": 'CENTER',
|
|
"2": 'LEFT' # Tables don't support justify, use LEFT
|
|
}
|
|
return align_map.get(align.lower().strip(), 'LEFT')
|
|
|
|
def _hexToColor(self, hex_color: str) -> colors.Color:
|
|
"""Convert hex color to reportlab color."""
|
|
try:
|
|
hex_color = hex_color.lstrip('#')
|
|
|
|
# Handle aRGB format (8 characters: FF + RGB)
|
|
if len(hex_color) == 8:
|
|
# Skip the alpha channel (first 2 characters)
|
|
hex_color = hex_color[2:]
|
|
|
|
# Handle RGB format (6 characters)
|
|
if len(hex_color) == 6:
|
|
r = int(hex_color[0:2], 16) / 255.0
|
|
g = int(hex_color[2:4], 16) / 255.0
|
|
b = int(hex_color[4:6], 16) / 255.0
|
|
return colors.Color(r, g, b)
|
|
|
|
# Fallback for other formats
|
|
return colors.black
|
|
except:
|
|
return colors.black
|
|
|
|
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
|
"""Render a single JSON section to PDF elements using AI-generated styles.
|
|
Supports three content formats: reference, object (base64), extracted_text.
|
|
"""
|
|
try:
|
|
section_type = self._getSectionType(section)
|
|
elements = self._getSectionData(section)
|
|
|
|
# Process each element in the section
|
|
all_elements = []
|
|
for element in elements:
|
|
element_type = element.get("type", "") if isinstance(element, dict) else ""
|
|
|
|
# Support three content formats from Phase 5D
|
|
if element_type == "reference":
|
|
# Document reference format
|
|
doc_ref = element.get("documentReference", "")
|
|
label = element.get("label", "Reference")
|
|
ref_style = ParagraphStyle(
|
|
'Reference',
|
|
parent=self._createNormalStyle(styles),
|
|
fontStyle='italic',
|
|
textColor=colors.grey
|
|
)
|
|
all_elements.append(Paragraph(f"[Reference: {label}]", ref_style))
|
|
all_elements.append(Spacer(1, 6))
|
|
continue
|
|
elif element_type == "extracted_text":
|
|
# Extracted text format
|
|
content = element.get("content", "")
|
|
source = element.get("source", "")
|
|
if content:
|
|
source_text = f" <i>(Source: {source})</i>" if source else ""
|
|
all_elements.append(Paragraph(f"{content}{source_text}", self._createNormalStyle(styles)))
|
|
all_elements.append(Spacer(1, 6))
|
|
continue
|
|
|
|
# Check element type, not section type (elements can have different types than section)
|
|
if element_type == "table":
|
|
all_elements.extend(self._renderJsonTable(element, styles))
|
|
elif element_type == "bullet_list":
|
|
all_elements.extend(self._renderJsonBulletList(element, styles))
|
|
elif element_type == "heading":
|
|
all_elements.extend(self._renderJsonHeading(element, styles))
|
|
elif element_type == "paragraph":
|
|
all_elements.extend(self._renderJsonParagraph(element, styles))
|
|
elif element_type == "code_block":
|
|
all_elements.extend(self._renderJsonCodeBlock(element, styles))
|
|
elif element_type == "image":
|
|
all_elements.extend(self._renderJsonImage(element, styles))
|
|
else:
|
|
# Fallback: if element_type not set, use section_type as fallback
|
|
if section_type == "table":
|
|
all_elements.extend(self._renderJsonTable(element, styles))
|
|
elif section_type == "bullet_list":
|
|
all_elements.extend(self._renderJsonBulletList(element, styles))
|
|
elif section_type == "heading":
|
|
all_elements.extend(self._renderJsonHeading(element, styles))
|
|
elif section_type == "paragraph":
|
|
all_elements.extend(self._renderJsonParagraph(element, styles))
|
|
elif section_type == "code_block":
|
|
all_elements.extend(self._renderJsonCodeBlock(element, styles))
|
|
elif section_type == "image":
|
|
all_elements.extend(self._renderJsonImage(element, styles))
|
|
else:
|
|
# Final fallback to paragraph for unknown types
|
|
all_elements.extend(self._renderJsonParagraph(element, styles))
|
|
|
|
return all_elements
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
|
|
return [Paragraph(f"[Error rendering section: {str(e)}]", self._createNormalStyle(styles))]
|
|
|
|
def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
|
"""Render a JSON table to PDF elements using AI-generated styles."""
|
|
try:
|
|
# Handle nested content structure: element.content.headers vs element.headers
|
|
# Extract from nested content structure
|
|
content = table_data.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return []
|
|
headers = content.get("headers", [])
|
|
rows = content.get("rows", [])
|
|
|
|
if not headers or not rows:
|
|
return []
|
|
|
|
# Prepare table data
|
|
table_data_list = [headers] + rows
|
|
|
|
# Create table
|
|
table = Table(table_data_list)
|
|
|
|
# Apply styling
|
|
table_header_style = styles.get("table_header", {})
|
|
table_cell_style = styles.get("table_cell", {})
|
|
|
|
table_style = [
|
|
('BACKGROUND', (0, 0), (-1, 0), self._hexToColor(table_header_style.get("background", "#4F4F4F"))),
|
|
('TEXTCOLOR', (0, 0), (-1, 0), self._hexToColor(table_header_style.get("text_color", "#FFFFFF"))),
|
|
('ALIGN', (0, 0), (-1, -1), self._getTableAlignment(table_cell_style.get("align", "left"))),
|
|
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
|
|
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
|
|
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
|
('BACKGROUND', (0, 1), (-1, -1), self._hexToColor(table_cell_style.get("background", "#FFFFFF"))),
|
|
('FONTSIZE', (0, 1), (-1, -1), table_cell_style.get("font_size", 10)),
|
|
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
|
]
|
|
|
|
table.setStyle(TableStyle(table_style))
|
|
|
|
return [table, Spacer(1, 12)]
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering table: {str(e)}")
|
|
return []
|
|
|
|
def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
|
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
|
|
try:
|
|
# Extract from nested content structure
|
|
content = list_data.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return []
|
|
items = content.get("items", [])
|
|
bullet_style_def = styles.get("bullet_list", {})
|
|
|
|
elements = []
|
|
for item in items:
|
|
if isinstance(item, str):
|
|
elements.append(Paragraph(f"• {item}", self._createNormalStyle(styles)))
|
|
elif isinstance(item, dict) and "text" in item:
|
|
elements.append(Paragraph(f"• {item['text']}", self._createNormalStyle(styles)))
|
|
|
|
if elements:
|
|
elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
|
|
|
|
return elements
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
|
return []
|
|
|
|
def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
|
"""Render a JSON heading to PDF elements using AI-generated styles."""
|
|
try:
|
|
# Extract from nested content structure
|
|
content = heading_data.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return []
|
|
text = content.get("text", "")
|
|
level = content.get("level", 1)
|
|
|
|
if text:
|
|
level = max(1, min(6, level))
|
|
heading_style = self._createHeadingStyle(styles, level)
|
|
return [Paragraph(text, heading_style)]
|
|
|
|
return []
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering heading: {str(e)}")
|
|
return []
|
|
|
|
def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
|
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
|
|
try:
|
|
# Extract from nested content structure
|
|
content = paragraph_data.get("content", {})
|
|
if isinstance(content, dict):
|
|
text = content.get("text", "")
|
|
elif isinstance(content, str):
|
|
text = content
|
|
else:
|
|
text = ""
|
|
|
|
if text:
|
|
return [Paragraph(text, self._createNormalStyle(styles))]
|
|
|
|
return []
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
|
return []
|
|
|
|
def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
|
"""Render a JSON code block to PDF elements using AI-generated styles."""
|
|
try:
|
|
# Extract from nested content structure
|
|
content = code_data.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return []
|
|
code = content.get("code", "")
|
|
language = content.get("language", "")
|
|
code_style_def = styles.get("code_block", {})
|
|
|
|
if code:
|
|
elements = []
|
|
|
|
if language:
|
|
lang_style = ParagraphStyle(
|
|
'CodeLanguage',
|
|
fontSize=code_style_def.get("font_size", 9),
|
|
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
|
|
fontName='Helvetica-Bold'
|
|
)
|
|
elements.append(Paragraph(f"Code ({language}):", lang_style))
|
|
|
|
code_style = ParagraphStyle(
|
|
'CodeBlock',
|
|
fontSize=code_style_def.get("font_size", 9),
|
|
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
|
|
fontName=code_style_def.get("font", "Courier"),
|
|
backColor=self._hexToColor(code_style_def.get("background", "#F5F5F5")),
|
|
spaceAfter=code_style_def.get("space_after", 6)
|
|
)
|
|
elements.append(Paragraph(code, code_style))
|
|
|
|
return elements
|
|
|
|
return []
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering code block: {str(e)}")
|
|
return []
|
|
|
|
def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
|
"""Render a JSON image to PDF elements using reportlab."""
|
|
try:
|
|
# Extract from nested content structure
|
|
content = image_data.get("content", {})
|
|
base64_data = ""
|
|
alt_text = "Image"
|
|
caption = ""
|
|
|
|
if isinstance(content, dict):
|
|
# Nested content structure
|
|
base64_data = content.get("base64Data", "")
|
|
alt_text = content.get("altText", "Image")
|
|
caption = content.get("caption", "")
|
|
elif isinstance(content, str):
|
|
# Content might be base64 string directly (shouldn't happen, but handle it)
|
|
self.logger.warning("Image content is a string, not a dict. This should not happen.")
|
|
return [Paragraph(f"[Image: Invalid format]", self._createNormalStyle(styles))]
|
|
|
|
# If base64Data not found in content, try direct element fields (fallback)
|
|
if not base64_data:
|
|
base64_data = image_data.get("base64Data", "")
|
|
if not alt_text or alt_text == "Image":
|
|
alt_text = image_data.get("altText", "Image")
|
|
if not caption:
|
|
caption = image_data.get("caption", "")
|
|
|
|
# If base64Data still not found, try extracting from url data URI
|
|
if not base64_data:
|
|
url = image_data.get("url", "") or (content.get("url", "") if isinstance(content, dict) else "")
|
|
if url and isinstance(url, str) and url.startswith("data:image/"):
|
|
# Extract base64 from data URI: data:image/png;base64,<base64>
|
|
import re
|
|
match = re.match(r'data:image/[^;]+;base64,(.+)', url)
|
|
if match:
|
|
base64_data = match.group(1)
|
|
|
|
if not base64_data:
|
|
self.logger.warning(f"No base64 data found for image. Alt text: {alt_text}")
|
|
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
|
|
|
|
# Validate that base64_data is actually base64 (not the entire element rendered as text)
|
|
if len(base64_data) > 10000: # Very long string might be entire element JSON
|
|
self.logger.warning(f"Base64 data seems too long ({len(base64_data)} chars), might be incorrectly extracted")
|
|
|
|
# Ensure base64_data is a string, not bytes or other type
|
|
if not isinstance(base64_data, str):
|
|
self.logger.warning(f"Base64 data is not a string: {type(base64_data)}")
|
|
return [Paragraph(f"[Image: {alt_text} - Invalid data type]", self._createNormalStyle(styles))]
|
|
|
|
try:
|
|
from reportlab.platypus import Image as ReportLabImage
|
|
from reportlab.lib.units import inch
|
|
import base64
|
|
import io
|
|
|
|
# Decode base64 image data
|
|
imageBytes = base64.b64decode(base64_data)
|
|
imageStream = io.BytesIO(imageBytes)
|
|
|
|
# Create reportlab Image element
|
|
# Try to get image dimensions from PIL
|
|
try:
|
|
from PIL import Image as PILImage
|
|
from reportlab.lib.pagesizes import A4
|
|
|
|
pilImage = PILImage.open(imageStream)
|
|
originalWidth, originalHeight = pilImage.size
|
|
|
|
# Calculate available page dimensions (A4 with margins: 72pt left/right, 72pt top, 18pt bottom)
|
|
pageWidth = A4[0] # 595.27 points
|
|
pageHeight = A4[1] # 841.89 points
|
|
leftMargin = 72
|
|
rightMargin = 72
|
|
topMargin = 72
|
|
bottomMargin = 18
|
|
|
|
# Use actual frame dimensions from SimpleDocTemplate
|
|
# Frame is smaller than page minus margins due to internal spacing
|
|
# From error message: frame is 439.27559055118115 x 739.8897637795277
|
|
# Use conservative values with safety margin
|
|
availableWidth = 430.0 # Slightly smaller than frame width for safety
|
|
availableHeight = 730.0 # Slightly smaller than frame height for safety
|
|
|
|
# Convert original image size from pixels to points
|
|
# PIL provides size in pixels, need to convert to points
|
|
# Standard conversion: 1 inch = 72 points, typical screen DPI = 96 pixels/inch
|
|
# So: pixels * (72/96) = points, or pixels * 0.75 = points
|
|
# But for images, we should use the image's actual DPI if available
|
|
dpi = pilImage.info.get('dpi', (96, 96))[0] # Default to 96 DPI if not specified
|
|
if dpi <= 0:
|
|
dpi = 96 # Fallback to 96 DPI
|
|
|
|
# Convert pixels to points: 1 point = 1/72 inch, so pixels * (72/dpi) = points
|
|
imgWidthPoints = originalWidth * (72.0 / dpi)
|
|
imgHeightPoints = originalHeight * (72.0 / dpi)
|
|
|
|
# Scale to fit within available page dimensions while maintaining aspect ratio
|
|
widthScale = availableWidth / imgWidthPoints if imgWidthPoints > 0 else 1.0
|
|
heightScale = availableHeight / imgHeightPoints if imgHeightPoints > 0 else 1.0
|
|
|
|
# Use the smaller scale to ensure image fits both width and height
|
|
scale = min(widthScale, heightScale, 1.0) # Don't scale up, only down
|
|
|
|
imgWidth = imgWidthPoints * scale
|
|
imgHeight = imgHeightPoints * scale
|
|
|
|
# Additional safety check: ensure dimensions don't exceed available space
|
|
if imgWidth > availableWidth:
|
|
scale = availableWidth / imgWidth
|
|
imgWidth = availableWidth
|
|
imgHeight = imgHeight * scale
|
|
|
|
if imgHeight > availableHeight:
|
|
scale = availableHeight / imgHeight
|
|
imgHeight = availableHeight
|
|
imgWidth = imgWidth * scale
|
|
|
|
# Reset stream for reportlab
|
|
imageStream.seek(0)
|
|
except Exception as e:
|
|
# Fallback: use default size that fits page
|
|
self.logger.warning(f"Error calculating image size: {str(e)}, using safe default")
|
|
# Use 80% of available width as safe default
|
|
imgWidth = 4 * inch # ~288 points, safe for ~451pt available width
|
|
imgHeight = 3 * inch # ~216 points, safe for ~751pt available height
|
|
imageStream.seek(0)
|
|
|
|
# Create reportlab Image
|
|
reportlabImage = ReportLabImage(imageStream, width=imgWidth, height=imgHeight)
|
|
|
|
elements = [reportlabImage]
|
|
|
|
# Add caption if available
|
|
if caption:
|
|
captionStyle = self._createNormalStyle(styles)
|
|
captionStyle.fontSize = 10
|
|
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
|
|
elements.append(Paragraph(f"<i>{caption}</i>", captionStyle))
|
|
elif alt_text and alt_text != "Image":
|
|
# Use alt text as caption if no caption provided, but avoid usageHint format
|
|
if "Render as visual element:" in alt_text:
|
|
# Extract filename from usageHint if possible
|
|
parts = alt_text.split("Render as visual element:")
|
|
if len(parts) > 1:
|
|
filename = parts[1].strip()
|
|
caption_text = f"Figure: {filename}"
|
|
else:
|
|
caption_text = alt_text
|
|
else:
|
|
caption_text = f"Figure: {alt_text}"
|
|
captionStyle = self._createNormalStyle(styles)
|
|
captionStyle.fontSize = 10
|
|
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
|
|
elements.append(Paragraph(f"<i>{caption_text}</i>", captionStyle))
|
|
|
|
return elements
|
|
|
|
except Exception as imgError:
|
|
self.logger.error(f"Error embedding image in PDF: {str(imgError)}")
|
|
# Return error message instead of placeholder
|
|
errorStyle = self._createNormalStyle(styles)
|
|
errorStyle.textColor = self._hexToColor("#FF0000") # Red color for error
|
|
errorMsg = f"[Error: Could not embed image '{alt_text}'. {str(imgError)}]"
|
|
return [Paragraph(errorMsg, errorStyle)]
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering image: {str(e)}")
|
|
errorStyle = self._createNormalStyle(styles)
|
|
errorStyle.textColor = self._hexToColor("#FF0000") # Red color for error
|
|
errorMsg = f"[Error: Could not render image '{image_data.get('altText', 'Image')}'. {str(e)}]"
|
|
return [Paragraph(errorMsg, errorStyle)] |