gateway/modules/services/serviceGeneration/renderers/pdf_renderer.py
2025-10-06 15:39:25 +02:00

225 lines
No EOL
8.4 KiB
Python

"""
PDF renderer for report generation using reportlab.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC
try:
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
REPORTLAB_AVAILABLE = True
except ImportError:
REPORTLAB_AVAILABLE = False
class PdfRenderer(BaseRenderer):
"""Renders content to PDF format using reportlab."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported PDF formats."""
return ['pdf']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['document', 'print']
@classmethod
def get_priority(cls) -> int:
"""Return priority for PDF renderer."""
return 120
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only PDF-specific guidelines; global prompt is built centrally."""
return (
"PDF FORMAT GUIDELINES:\n"
"- Provide structured content suitable for pagination and headings (H1/H2/H3-like).\n"
"- Use bullet lists and tables where useful; separate major sections clearly.\n"
"- Avoid markdown/HTML; produce clean, plain content that can be laid out as PDF.\n"
"OUTPUT: Return ONLY the PDF-ready textual content (no fences)."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to PDF format."""
try:
if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available
from .html_renderer import HtmlRenderer
html_renderer = HtmlRenderer()
html_content, _ = await html_renderer.render(extracted_content, title)
return html_content, "text/html"
# Generate PDF using reportlab
pdf_content = self._generate_pdf(extracted_content, title)
return pdf_content, "application/pdf"
except Exception as e:
self.logger.error(f"Error rendering PDF: {str(e)}")
# Return minimal fallback
return f"PDF Generation Error: {str(e)}", "text/plain"
def _generate_pdf(self, content: str, title: str) -> str:
"""Generate PDF content using reportlab."""
try:
# Create a buffer to hold the PDF
buffer = io.BytesIO()
# Create PDF document
doc = SimpleDocTemplate(
buffer,
pagesize=A4,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=18
)
# Get styles
styles = getSampleStyleSheet()
# Create custom styles
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
spaceAfter=30,
alignment=TA_CENTER,
textColor=colors.darkblue
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading2'],
fontSize=16,
spaceAfter=12,
spaceBefore=12,
textColor=colors.darkblue
)
# Build PDF content
story = []
# Title page
story.append(Paragraph(title, title_style))
story.append(Spacer(1, 20))
story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
story.append(PageBreak())
# Process content
lines = content.split('\n')
current_section = []
for line in lines:
line = line.strip()
if not line:
continue
# Check for headings
if line.startswith('# '):
# H1 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[2:], title_style))
story.append(Spacer(1, 12))
elif line.startswith('## '):
# H2 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[3:], heading_style))
story.append(Spacer(1, 8))
elif line.startswith('### '):
# H3 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[4:], styles['Heading3']))
story.append(Spacer(1, 6))
else:
current_section.append(line)
# Process remaining content
if current_section:
story.extend(self._process_section(current_section, styles))
# Build PDF
doc.build(story)
# Get PDF content as base64
buffer.seek(0)
pdf_bytes = buffer.getvalue()
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
return pdf_base64
except Exception as e:
self.logger.error(f"Error generating PDF: {str(e)}")
raise
def _process_section(self, lines: list, styles) -> list:
"""Process a section of content into PDF elements."""
elements = []
for line in lines:
if not line.strip():
continue
# Check for tables (lines with |)
if '|' in line and not line.startswith('|'):
# This might be part of a table, process as table
table_data = self._extract_table_data(lines)
if table_data:
table = Table(table_data)
table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 14),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black)
]))
elements.append(table)
elements.append(Spacer(1, 12))
return elements
# Check for lists
if line.startswith('- ') or line.startswith('* '):
# This is a list item
elements.append(Paragraph(f"{line[2:]}", styles['Normal']))
else:
# Regular paragraph
elements.append(Paragraph(line, styles['Normal']))
elements.append(Spacer(1, 6))
return elements
def _extract_table_data(self, lines: list) -> list:
"""Extract table data from lines."""
table_data = []
in_table = False
for line in lines:
if '|' in line:
if not in_table:
in_table = True
# Split by | and clean up
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
if cells:
table_data.append(cells)
elif in_table and not line.strip():
# Empty line, might be end of table
break
return table_data if len(table_data) > 1 else []