gateway/modules/services/serviceGeneration/renderers/rendererPdf.py
2025-12-15 21:55:26 +01:00

653 lines
No EOL
31 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
PDF renderer for report generation using reportlab.
"""
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
try:
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
REPORTLAB_AVAILABLE = True
except ImportError:
REPORTLAB_AVAILABLE = False
class RendererPdf(BaseRenderer):
"""Renders content to PDF format using reportlab."""
@classmethod
def getSupportedFormats(cls) -> List[str]:
"""Return supported PDF formats."""
return ['pdf']
@classmethod
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['document', 'print']
@classmethod
def getPriority(cls) -> int:
"""Return priority for PDF renderer."""
return 120
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
try:
if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available
from .rendererHtml import RendererHtml
html_renderer = RendererHtml()
html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService)
return html_content, "text/html"
# Generate PDF using AI-analyzed styling
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
return pdf_content, "application/pdf"
except Exception as e:
self.logger.error(f"Error rendering PDF: {str(e)}")
# Return minimal fallback
return f"PDF Generation Error: {str(e)}", "text/plain"
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate PDF content from structured JSON document using AI-generated styling."""
try:
# Get style set: default styles, enhanced with AI if userPrompt provided
styles = await self._getStyleSet(userPrompt, aiService)
# Validate JSON structure
if not isinstance(json_content, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
# Make title shorter to prevent wrapping/overlapping
if len(document_title) > 40:
document_title = "PowerOn - Consent Agreement"
# Create a buffer to hold the PDF
buffer = io.BytesIO()
# Create PDF document
doc = SimpleDocTemplate(
buffer,
pagesize=A4,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=18
)
# Build PDF content
story = []
# Title page
title_style = self._createTitleStyle(styles)
story.append(Paragraph(document_title, title_style))
story.append(Spacer(1, 50)) # Increased spacing to prevent overlap
story.append(Paragraph(f"Generated: {self._formatTimestamp()}", self._createNormalStyle(styles)))
story.append(Spacer(1, 30)) # Add spacing before page break
story.append(PageBreak())
# Process each section
sections = json_content.get("sections", [])
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
for i, section in enumerate(sections):
self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
section_elements = self._renderJsonSection(section, styles)
self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
story.extend(section_elements)
# Build PDF
doc.build(story)
# Get PDF content as base64
buffer.seek(0)
pdf_bytes = buffer.getvalue()
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
return pdf_base64
except Exception as e:
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
raise Exception(f"PDF generation failed: {str(e)}")
async def _getStyleSet(self, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
"""Get style set - default styles, enhanced with AI if userPrompt provided.
Args:
userPrompt: User's prompt (AI will detect style instructions in any language)
aiService: AI service (used only if userPrompt provided)
templateName: Name of template style set (None = default)
Returns:
Dict with style definitions for all document styles
"""
# Get default style set
defaultStyleSet = self._getDefaultStyleSet()
# Enhance with AI if userPrompt provided (AI handles multilingual style detection)
if userPrompt and aiService:
# AI will naturally detect style instructions in any language
self.logger.info(f"Enhancing styles with AI based on user prompt...")
enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
# Convert colors to PDF format after getting styles
enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet)
return self._validateStylesContrast(enhancedStyleSet)
else:
# Use default styles only
return defaultStyleSet
async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
"""Enhance default styles with AI based on user prompt."""
try:
style_template = self._createAiStyleTemplate("pdf", userPrompt, defaultStyleSet)
enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet)
return enhanced_styles
except Exception as e:
self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
return defaultStyleSet
def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
bg_color = header.get("background", "#FFFFFF")
text_color = header.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
bg_color = cell.get("background", "#FFFFFF")
text_color = cell.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
return styles
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
return self._getDefaultStyleSet()
def _getDefaultStyleSet(self) -> Dict[str, Any]:
"""Default PDF style set - used when no style instructions present."""
return {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
"heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
}
async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper PDF color conversion."""
if not ai_service:
return default_styles
try:
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
request_options = AiCallOptions()
request_options.operationType = OperationTypeEnum.DATA_GENERATE
request = AiCallRequest(prompt=style_template, context="", options=request_options)
# Check if AI service is properly configured
if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
self.logger.warning("AI service not properly configured, using defaults")
return default_styles
response = await ai_service.callAi(request)
# Check if response is valid
if not response:
self.logger.warning("AI service returned no response, using defaults")
return default_styles
import json
import re
# Clean and parse JSON
result = response.content.strip() if response and response.content else ""
# Check if result is empty
if not result:
self.logger.warning("AI styling returned empty response, using defaults")
return default_styles
# Log the raw response for debugging
self.logger.debug(f"AI styling raw response: {result[:200]}...")
# Extract JSON from various formats
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if json_match:
result = json_match.group(1).strip()
elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
# Try to extract JSON from explanatory text
json_patterns = [
r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object
r'\{.*?"title".*?\}', # JSON with title field
r'\{.*?"font_size".*?\}', # JSON with font_size field
]
for pattern in json_patterns:
json_match = re.search(pattern, result, re.DOTALL)
if json_match:
result = json_match.group(0)
break
# Additional cleanup - remove any leading/trailing whitespace and newlines
result = result.strip()
# Check if result is still empty after cleanup
if not result:
self.logger.warning("AI styling returned empty content after cleanup, using defaults")
return default_styles
# Try to parse JSON
try:
styles = json.loads(result)
self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
except json.JSONDecodeError as json_error:
self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
# Use print instead of logger to avoid truncation
self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "PDF_RENDERER")
self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "PDF_RENDERER")
self.logger.warning(f"Raw content that failed to parse: {result}")
# Try to fix incomplete JSON by adding missing closing braces
open_braces = result.count('{')
close_braces = result.count('}')
if open_braces > close_braces:
# JSON is incomplete, add missing closing braces
missing_braces = open_braces - close_braces
result = result + '}' * missing_braces
self.logger.info(f"Added {missing_braces} missing closing brace(s)")
# Try parsing the fixed JSON
try:
styles = json.loads(result)
self.logger.info("Successfully fixed incomplete JSON")
except json.JSONDecodeError as fix_error:
self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
# Try to extract just the JSON part if it's embedded in text
json_start = result.find('{')
json_end = result.rfind('}')
if json_start != -1 and json_end != -1 and json_end > json_start:
json_part = result[json_start:json_end+1]
try:
styles = json.loads(json_part)
self.logger.info("Successfully extracted JSON from explanatory text")
except json.JSONDecodeError:
self.logger.warning("Could not extract valid JSON from response, using defaults")
return default_styles
else:
return default_styles
else:
# Try to extract just the JSON part if it's embedded in text
json_start = result.find('{')
json_end = result.rfind('}')
if json_start != -1 and json_end != -1 and json_end > json_start:
json_part = result[json_start:json_end+1]
try:
styles = json.loads(json_part)
self.logger.info("Successfully extracted JSON from explanatory text")
except json.JSONDecodeError:
self.logger.warning("Could not extract valid JSON from response, using defaults")
return default_styles
else:
return default_styles
# Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
styles = self._convertColorsFormat(styles)
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert colors to proper format for PDF compatibility."""
try:
for style_name, style_config in styles.items():
if isinstance(style_config, dict):
for prop, value in style_config.items():
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel) for consistency
styles[style_name][prop] = f"FF{value[1:]}"
elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
# Already aRGB format, keep as is
pass
return styles
except Exception as e:
self.logger.warning(f"Color conversion failed: {str(e)}")
return styles
def _getSafeColor(self, color_value: str, default: str = "#000000") -> str:
"""Get a safe hex color value for PDF."""
if isinstance(color_value, str) and color_value.startswith('#'):
if len(color_value) == 7:
return f"FF{color_value[1:]}"
elif len(color_value) == 9:
return color_value
return default
def _createTitleStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create title style from style definitions."""
title_style_def = styles.get("title", {})
# DEBUG: Show what color and spacing is being used for title
title_color = title_style_def.get("color", "#1F4E79")
title_space_after = title_style_def.get("space_after", 30)
self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hexToColor(title_color)}", "PDF_RENDERER")
self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
return ParagraphStyle(
'CustomTitle',
fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20
spaceAfter=title_style_def.get("space_after", 30),
alignment=self._getAlignment(title_style_def.get("align", "center")),
textColor=self._hexToColor(title_color),
leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles
spaceBefore=0 # Ensure no space before title
)
def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
"""Create heading style from style definitions."""
heading_key = f"heading{level}"
heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
return ParagraphStyle(
f'CustomHeading{level}',
fontSize=heading_style_def.get("font_size", 18 - level * 2),
spaceAfter=heading_style_def.get("space_after", 12),
spaceBefore=heading_style_def.get("space_before", 12),
alignment=self._getAlignment(heading_style_def.get("align", "left")),
textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F"))
)
def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create normal paragraph style from style definitions."""
paragraph_style_def = styles.get("paragraph", {})
return ParagraphStyle(
'CustomNormal',
fontSize=paragraph_style_def.get("font_size", 11),
spaceAfter=paragraph_style_def.get("space_after", 6),
alignment=self._getAlignment(paragraph_style_def.get("align", "left")),
textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")),
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
)
def _getAlignment(self, align: str) -> int:
"""Convert alignment string to reportlab alignment constant."""
if not align or not isinstance(align, str):
return TA_LEFT
align_map = {
"center": TA_CENTER,
"left": TA_LEFT,
"justify": TA_JUSTIFY,
"right": TA_LEFT, # ReportLab doesn't have TA_RIGHT, use LEFT as fallback
"0": TA_LEFT, # Handle numeric strings
"1": TA_CENTER,
"2": TA_JUSTIFY
}
return align_map.get(align.lower().strip(), TA_LEFT)
def _getTableAlignment(self, align: str) -> str:
"""Convert alignment string to ReportLab table alignment string."""
if not align or not isinstance(align, str):
return 'LEFT'
align_map = {
"center": 'CENTER',
"left": 'LEFT',
"justify": 'LEFT', # Tables don't support justify, use LEFT
"right": 'RIGHT',
"0": 'LEFT', # Handle numeric strings
"1": 'CENTER',
"2": 'LEFT' # Tables don't support justify, use LEFT
}
return align_map.get(align.lower().strip(), 'LEFT')
def _hexToColor(self, hex_color: str) -> colors.Color:
"""Convert hex color to reportlab color."""
try:
hex_color = hex_color.lstrip('#')
# Handle aRGB format (8 characters: FF + RGB)
if len(hex_color) == 8:
# Skip the alpha channel (first 2 characters)
hex_color = hex_color[2:]
# Handle RGB format (6 characters)
if len(hex_color) == 6:
r = int(hex_color[0:2], 16) / 255.0
g = int(hex_color[2:4], 16) / 255.0
b = int(hex_color[4:6], 16) / 255.0
return colors.Color(r, g, b)
# Fallback for other formats
return colors.black
except:
return colors.black
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a single JSON section to PDF elements using AI-generated styles."""
try:
section_type = self._getSectionType(section)
elements = self._getSectionData(section)
# Process each element in the section
all_elements = []
for element in elements:
if section_type == "table":
all_elements.extend(self._renderJsonTable(element, styles))
elif section_type == "bullet_list":
all_elements.extend(self._renderJsonBulletList(element, styles))
elif section_type == "heading":
all_elements.extend(self._renderJsonHeading(element, styles))
elif section_type == "paragraph":
all_elements.extend(self._renderJsonParagraph(element, styles))
elif section_type == "code_block":
all_elements.extend(self._renderJsonCodeBlock(element, styles))
elif section_type == "image":
all_elements.extend(self._renderJsonImage(element, styles))
else:
# Fallback to paragraph for unknown types
all_elements.extend(self._renderJsonParagraph(element, styles))
return all_elements
except Exception as e:
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON table to PDF elements using AI-generated styles."""
try:
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
if not headers or not rows:
return []
# Prepare table data
table_data_list = [headers] + rows
# Create table
table = Table(table_data_list)
# Apply styling
table_header_style = styles.get("table_header", {})
table_cell_style = styles.get("table_cell", {})
table_style = [
('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))),
('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))),
('ALIGN', (0, 0), (-1, -1), self._getTableAlignment(table_cell_style.get("align", "left"))),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), self._hex_to_color(table_cell_style.get("background", "#FFFFFF"))),
('FONTSIZE', (0, 1), (-1, -1), table_cell_style.get("font_size", 10)),
('GRID', (0, 0), (-1, -1), 1, colors.black)
]
table.setStyle(TableStyle(table_style))
return [table, Spacer(1, 12)]
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return []
def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
try:
items = list_data.get("items", [])
bullet_style_def = styles.get("bullet_list", {})
elements = []
for item in items:
if isinstance(item, str):
elements.append(Paragraph(f"{item}", self._create_normal_style(styles)))
elif isinstance(item, dict) and "text" in item:
elements.append(Paragraph(f"{item['text']}", self._create_normal_style(styles)))
if elements:
elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
return elements
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return []
def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON heading to PDF elements using AI-generated styles."""
try:
level = heading_data.get("level", 1)
text = heading_data.get("text", "")
if text:
level = max(1, min(6, level))
heading_style = self._createHeadingStyle(styles, level)
return [Paragraph(text, heading_style)]
return []
except Exception as e:
self.logger.warning(f"Error rendering heading: {str(e)}")
return []
def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
try:
text = paragraph_data.get("text", "")
if text:
return [Paragraph(text, self._createNormalStyle(styles))]
return []
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return []
def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON code block to PDF elements using AI-generated styles."""
try:
code = code_data.get("code", "")
language = code_data.get("language", "")
code_style_def = styles.get("code_block", {})
if code:
elements = []
if language:
lang_style = ParagraphStyle(
'CodeLanguage',
fontSize=code_style_def.get("font_size", 9),
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
fontName='Helvetica-Bold'
)
elements.append(Paragraph(f"Code ({language}):", lang_style))
code_style = ParagraphStyle(
'CodeBlock',
fontSize=code_style_def.get("font_size", 9),
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
fontName=code_style_def.get("font", "Courier"),
backColor=self._hexToColor(code_style_def.get("background", "#F5F5F5")),
spaceAfter=code_style_def.get("space_after", 6)
)
elements.append(Paragraph(code, code_style))
return elements
return []
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
return []
def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON image to PDF elements."""
try:
base64_data = image_data.get("base64Data", "")
alt_text = image_data.get("altText", "Image")
if base64_data:
# For now, just add a placeholder since reportlab image handling is complex
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
return []
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._createNormalStyle(styles))]