225 lines
No EOL
8.4 KiB
Python
225 lines
No EOL
8.4 KiB
Python
"""
|
|
PDF renderer for report generation using reportlab.
|
|
"""
|
|
|
|
from .base_renderer import BaseRenderer
|
|
from typing import Dict, Any, Tuple, List
|
|
import io
|
|
import base64
|
|
from datetime import datetime, UTC
|
|
|
|
try:
|
|
from reportlab.lib.pagesizes import letter, A4
|
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
from reportlab.lib.units import inch
|
|
from reportlab.lib import colors
|
|
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
|
|
REPORTLAB_AVAILABLE = True
|
|
except ImportError:
|
|
REPORTLAB_AVAILABLE = False
|
|
|
|
class PdfRenderer(BaseRenderer):
|
|
"""Renders content to PDF format using reportlab."""
|
|
|
|
@classmethod
|
|
def get_supported_formats(cls) -> List[str]:
|
|
"""Return supported PDF formats."""
|
|
return ['pdf']
|
|
|
|
@classmethod
|
|
def get_format_aliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return ['document', 'print']
|
|
|
|
@classmethod
|
|
def get_priority(cls) -> int:
|
|
"""Return priority for PDF renderer."""
|
|
return 120
|
|
|
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
|
"""Return only PDF-specific guidelines; global prompt is built centrally."""
|
|
return (
|
|
"PDF FORMAT GUIDELINES:\n"
|
|
"- Provide structured content suitable for pagination and headings (H1/H2/H3-like).\n"
|
|
"- Use bullet lists and tables where useful; separate major sections clearly.\n"
|
|
"- Avoid markdown/HTML; produce clean, plain content that can be laid out as PDF.\n"
|
|
"OUTPUT: Return ONLY the PDF-ready textual content (no fences)."
|
|
)
|
|
|
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
|
"""Render extracted content to PDF format."""
|
|
try:
|
|
if not REPORTLAB_AVAILABLE:
|
|
# Fallback to HTML if reportlab not available
|
|
from .html_renderer import HtmlRenderer
|
|
html_renderer = HtmlRenderer()
|
|
html_content, _ = await html_renderer.render(extracted_content, title)
|
|
return html_content, "text/html"
|
|
|
|
# Generate PDF using reportlab
|
|
pdf_content = self._generate_pdf(extracted_content, title)
|
|
|
|
return pdf_content, "application/pdf"
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering PDF: {str(e)}")
|
|
# Return minimal fallback
|
|
return f"PDF Generation Error: {str(e)}", "text/plain"
|
|
|
|
def _generate_pdf(self, content: str, title: str) -> str:
|
|
"""Generate PDF content using reportlab."""
|
|
try:
|
|
# Create a buffer to hold the PDF
|
|
buffer = io.BytesIO()
|
|
|
|
# Create PDF document
|
|
doc = SimpleDocTemplate(
|
|
buffer,
|
|
pagesize=A4,
|
|
rightMargin=72,
|
|
leftMargin=72,
|
|
topMargin=72,
|
|
bottomMargin=18
|
|
)
|
|
|
|
# Get styles
|
|
styles = getSampleStyleSheet()
|
|
|
|
# Create custom styles
|
|
title_style = ParagraphStyle(
|
|
'CustomTitle',
|
|
parent=styles['Heading1'],
|
|
fontSize=24,
|
|
spaceAfter=30,
|
|
alignment=TA_CENTER,
|
|
textColor=colors.darkblue
|
|
)
|
|
|
|
heading_style = ParagraphStyle(
|
|
'CustomHeading',
|
|
parent=styles['Heading2'],
|
|
fontSize=16,
|
|
spaceAfter=12,
|
|
spaceBefore=12,
|
|
textColor=colors.darkblue
|
|
)
|
|
|
|
# Build PDF content
|
|
story = []
|
|
|
|
# Title page
|
|
story.append(Paragraph(title, title_style))
|
|
story.append(Spacer(1, 20))
|
|
story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
|
|
story.append(PageBreak())
|
|
|
|
# Process content
|
|
lines = content.split('\n')
|
|
current_section = []
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
# Check for headings
|
|
if line.startswith('# '):
|
|
# H1 heading
|
|
if current_section:
|
|
story.extend(self._process_section(current_section, styles))
|
|
current_section = []
|
|
story.append(Paragraph(line[2:], title_style))
|
|
story.append(Spacer(1, 12))
|
|
elif line.startswith('## '):
|
|
# H2 heading
|
|
if current_section:
|
|
story.extend(self._process_section(current_section, styles))
|
|
current_section = []
|
|
story.append(Paragraph(line[3:], heading_style))
|
|
story.append(Spacer(1, 8))
|
|
elif line.startswith('### '):
|
|
# H3 heading
|
|
if current_section:
|
|
story.extend(self._process_section(current_section, styles))
|
|
current_section = []
|
|
story.append(Paragraph(line[4:], styles['Heading3']))
|
|
story.append(Spacer(1, 6))
|
|
else:
|
|
current_section.append(line)
|
|
|
|
# Process remaining content
|
|
if current_section:
|
|
story.extend(self._process_section(current_section, styles))
|
|
|
|
# Build PDF
|
|
doc.build(story)
|
|
|
|
# Get PDF content as base64
|
|
buffer.seek(0)
|
|
pdf_bytes = buffer.getvalue()
|
|
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
|
|
|
return pdf_base64
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error generating PDF: {str(e)}")
|
|
raise
|
|
|
|
def _process_section(self, lines: list, styles) -> list:
|
|
"""Process a section of content into PDF elements."""
|
|
elements = []
|
|
|
|
for line in lines:
|
|
if not line.strip():
|
|
continue
|
|
|
|
# Check for tables (lines with |)
|
|
if '|' in line and not line.startswith('|'):
|
|
# This might be part of a table, process as table
|
|
table_data = self._extract_table_data(lines)
|
|
if table_data:
|
|
table = Table(table_data)
|
|
table.setStyle(TableStyle([
|
|
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
|
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
|
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
|
('FONTSIZE', (0, 0), (-1, 0), 14),
|
|
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
|
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
|
|
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
|
]))
|
|
elements.append(table)
|
|
elements.append(Spacer(1, 12))
|
|
return elements
|
|
|
|
# Check for lists
|
|
if line.startswith('- ') or line.startswith('* '):
|
|
# This is a list item
|
|
elements.append(Paragraph(f"• {line[2:]}", styles['Normal']))
|
|
else:
|
|
# Regular paragraph
|
|
elements.append(Paragraph(line, styles['Normal']))
|
|
|
|
elements.append(Spacer(1, 6))
|
|
return elements
|
|
|
|
def _extract_table_data(self, lines: list) -> list:
|
|
"""Extract table data from lines."""
|
|
table_data = []
|
|
in_table = False
|
|
|
|
for line in lines:
|
|
if '|' in line:
|
|
if not in_table:
|
|
in_table = True
|
|
# Split by | and clean up
|
|
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
|
if cells:
|
|
table_data.append(cells)
|
|
elif in_table and not line.strip():
|
|
# Empty line, might be end of table
|
|
break
|
|
|
|
return table_data if len(table_data) > 1 else [] |