261 lines
No EOL
9.4 KiB
Python
261 lines
No EOL
9.4 KiB
Python
"""
|
|
PDF renderer for report generation using reportlab.
|
|
"""
|
|
|
|
from .base_renderer import BaseRenderer
|
|
from typing import Dict, Any, Tuple, List
|
|
import io
|
|
import base64
|
|
from datetime import datetime, UTC
|
|
|
|
try:
|
|
from reportlab.lib.pagesizes import letter, A4
|
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
from reportlab.lib.units import inch
|
|
from reportlab.lib import colors
|
|
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
|
|
REPORTLAB_AVAILABLE = True
|
|
except ImportError:
|
|
REPORTLAB_AVAILABLE = False
|
|
|
|
class PdfRenderer(BaseRenderer):
|
|
"""Renders content to PDF format using reportlab."""
|
|
|
|
@classmethod
|
|
def get_supported_formats(cls) -> List[str]:
|
|
"""Return supported PDF formats."""
|
|
return ['pdf']
|
|
|
|
@classmethod
|
|
def get_format_aliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return ['document', 'print']
|
|
|
|
@classmethod
|
|
def get_priority(cls) -> int:
|
|
"""Return priority for PDF renderer."""
|
|
return 120
|
|
|
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
|
"""Get PDF-specific extraction prompt."""
|
|
return f"""
|
|
{user_prompt}
|
|
|
|
Generate a comprehensive PDF report with the title: "{title}"
|
|
|
|
PDF FORMAT REQUIREMENTS:
|
|
- Create structured content suitable for PDF documents
|
|
- Use clear headings and sections
|
|
- Include tables for structured data
|
|
- Use bullet points and lists where appropriate
|
|
- Include source document information
|
|
- Add page breaks between major sections
|
|
- Structure content for professional presentation
|
|
|
|
PDF STRUCTURE:
|
|
- Title page with report title and generation date
|
|
- Table of contents (if multiple sections)
|
|
- Executive summary
|
|
- Main content sections with clear headings
|
|
- Data tables and analysis
|
|
- Conclusions and recommendations
|
|
- Appendices with source information
|
|
|
|
FORMATTING RULES:
|
|
- Use clear section headings (H1, H2, H3 style)
|
|
- Include page numbers and headers
|
|
- Use consistent formatting throughout
|
|
- Include tables with proper alignment
|
|
- Use bullet points for lists
|
|
- Add source citations and references
|
|
- Include generation metadata
|
|
|
|
OUTPUT POLICY:
|
|
- Return ONLY PDF-compatible content
|
|
- No HTML, no markdown, no code blocks
|
|
- Structured text suitable for PDF generation
|
|
- Professional document format
|
|
- Include all necessary information
|
|
|
|
CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
|
|
|
|
Generate the complete PDF report content using the actual data from the source documents:
|
|
"""
|
|
|
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
|
"""Render extracted content to PDF format."""
|
|
try:
|
|
if not REPORTLAB_AVAILABLE:
|
|
# Fallback to HTML if reportlab not available
|
|
from .html_renderer import HtmlRenderer
|
|
html_renderer = HtmlRenderer()
|
|
html_content, _ = await html_renderer.render(extracted_content, title)
|
|
return html_content, "text/html"
|
|
|
|
# Generate PDF using reportlab
|
|
pdf_content = self._generate_pdf(extracted_content, title)
|
|
|
|
return pdf_content, "application/pdf"
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering PDF: {str(e)}")
|
|
# Return minimal fallback
|
|
return f"PDF Generation Error: {str(e)}", "text/plain"
|
|
|
|
def _generate_pdf(self, content: str, title: str) -> str:
|
|
"""Generate PDF content using reportlab."""
|
|
try:
|
|
# Create a buffer to hold the PDF
|
|
buffer = io.BytesIO()
|
|
|
|
# Create PDF document
|
|
doc = SimpleDocTemplate(
|
|
buffer,
|
|
pagesize=A4,
|
|
rightMargin=72,
|
|
leftMargin=72,
|
|
topMargin=72,
|
|
bottomMargin=18
|
|
)
|
|
|
|
# Get styles
|
|
styles = getSampleStyleSheet()
|
|
|
|
# Create custom styles
|
|
title_style = ParagraphStyle(
|
|
'CustomTitle',
|
|
parent=styles['Heading1'],
|
|
fontSize=24,
|
|
spaceAfter=30,
|
|
alignment=TA_CENTER,
|
|
textColor=colors.darkblue
|
|
)
|
|
|
|
heading_style = ParagraphStyle(
|
|
'CustomHeading',
|
|
parent=styles['Heading2'],
|
|
fontSize=16,
|
|
spaceAfter=12,
|
|
spaceBefore=12,
|
|
textColor=colors.darkblue
|
|
)
|
|
|
|
# Build PDF content
|
|
story = []
|
|
|
|
# Title page
|
|
story.append(Paragraph(title, title_style))
|
|
story.append(Spacer(1, 20))
|
|
story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
|
|
story.append(PageBreak())
|
|
|
|
# Process content
|
|
lines = content.split('\n')
|
|
current_section = []
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
# Check for headings
|
|
if line.startswith('# '):
|
|
# H1 heading
|
|
if current_section:
|
|
story.extend(self._process_section(current_section, styles))
|
|
current_section = []
|
|
story.append(Paragraph(line[2:], title_style))
|
|
story.append(Spacer(1, 12))
|
|
elif line.startswith('## '):
|
|
# H2 heading
|
|
if current_section:
|
|
story.extend(self._process_section(current_section, styles))
|
|
current_section = []
|
|
story.append(Paragraph(line[3:], heading_style))
|
|
story.append(Spacer(1, 8))
|
|
elif line.startswith('### '):
|
|
# H3 heading
|
|
if current_section:
|
|
story.extend(self._process_section(current_section, styles))
|
|
current_section = []
|
|
story.append(Paragraph(line[4:], styles['Heading3']))
|
|
story.append(Spacer(1, 6))
|
|
else:
|
|
current_section.append(line)
|
|
|
|
# Process remaining content
|
|
if current_section:
|
|
story.extend(self._process_section(current_section, styles))
|
|
|
|
# Build PDF
|
|
doc.build(story)
|
|
|
|
# Get PDF content as base64
|
|
buffer.seek(0)
|
|
pdf_bytes = buffer.getvalue()
|
|
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
|
|
|
return pdf_base64
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error generating PDF: {str(e)}")
|
|
raise
|
|
|
|
def _process_section(self, lines: list, styles) -> list:
|
|
"""Process a section of content into PDF elements."""
|
|
elements = []
|
|
|
|
for line in lines:
|
|
if not line.strip():
|
|
continue
|
|
|
|
# Check for tables (lines with |)
|
|
if '|' in line and not line.startswith('|'):
|
|
# This might be part of a table, process as table
|
|
table_data = self._extract_table_data(lines)
|
|
if table_data:
|
|
table = Table(table_data)
|
|
table.setStyle(TableStyle([
|
|
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
|
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
|
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
|
('FONTSIZE', (0, 0), (-1, 0), 14),
|
|
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
|
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
|
|
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
|
]))
|
|
elements.append(table)
|
|
elements.append(Spacer(1, 12))
|
|
return elements
|
|
|
|
# Check for lists
|
|
if line.startswith('- ') or line.startswith('* '):
|
|
# This is a list item
|
|
elements.append(Paragraph(f"• {line[2:]}", styles['Normal']))
|
|
else:
|
|
# Regular paragraph
|
|
elements.append(Paragraph(line, styles['Normal']))
|
|
|
|
elements.append(Spacer(1, 6))
|
|
return elements
|
|
|
|
def _extract_table_data(self, lines: list) -> list:
|
|
"""Extract table data from lines."""
|
|
table_data = []
|
|
in_table = False
|
|
|
|
for line in lines:
|
|
if '|' in line:
|
|
if not in_table:
|
|
in_table = True
|
|
# Split by | and clean up
|
|
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
|
if cells:
|
|
table_data.append(cells)
|
|
elif in_table and not line.strip():
|
|
# Empty line, might be end of table
|
|
break
|
|
|
|
return table_data if len(table_data) > 1 else [] |