gateway/modules/services/serviceGeneration/renderers/pdf_renderer.py
2025-10-03 22:40:41 +02:00

261 lines
No EOL
9.4 KiB
Python

"""
PDF renderer for report generation using reportlab.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC
try:
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
REPORTLAB_AVAILABLE = True
except ImportError:
REPORTLAB_AVAILABLE = False
class PdfRenderer(BaseRenderer):
"""Renders content to PDF format using reportlab."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported PDF formats."""
return ['pdf']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['document', 'print']
@classmethod
def get_priority(cls) -> int:
"""Return priority for PDF renderer."""
return 120
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Get PDF-specific extraction prompt."""
return f"""
{user_prompt}
Generate a comprehensive PDF report with the title: "{title}"
PDF FORMAT REQUIREMENTS:
- Create structured content suitable for PDF documents
- Use clear headings and sections
- Include tables for structured data
- Use bullet points and lists where appropriate
- Include source document information
- Add page breaks between major sections
- Structure content for professional presentation
PDF STRUCTURE:
- Title page with report title and generation date
- Table of contents (if multiple sections)
- Executive summary
- Main content sections with clear headings
- Data tables and analysis
- Conclusions and recommendations
- Appendices with source information
FORMATTING RULES:
- Use clear section headings (H1, H2, H3 style)
- Include page numbers and headers
- Use consistent formatting throughout
- Include tables with proper alignment
- Use bullet points for lists
- Add source citations and references
- Include generation metadata
OUTPUT POLICY:
- Return ONLY PDF-compatible content
- No HTML, no markdown, no code blocks
- Structured text suitable for PDF generation
- Professional document format
- Include all necessary information
CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
Generate the complete PDF report content using the actual data from the source documents:
"""
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to PDF format."""
try:
if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available
from .html_renderer import HtmlRenderer
html_renderer = HtmlRenderer()
html_content, _ = await html_renderer.render(extracted_content, title)
return html_content, "text/html"
# Generate PDF using reportlab
pdf_content = self._generate_pdf(extracted_content, title)
return pdf_content, "application/pdf"
except Exception as e:
self.logger.error(f"Error rendering PDF: {str(e)}")
# Return minimal fallback
return f"PDF Generation Error: {str(e)}", "text/plain"
def _generate_pdf(self, content: str, title: str) -> str:
"""Generate PDF content using reportlab."""
try:
# Create a buffer to hold the PDF
buffer = io.BytesIO()
# Create PDF document
doc = SimpleDocTemplate(
buffer,
pagesize=A4,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=18
)
# Get styles
styles = getSampleStyleSheet()
# Create custom styles
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
spaceAfter=30,
alignment=TA_CENTER,
textColor=colors.darkblue
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading2'],
fontSize=16,
spaceAfter=12,
spaceBefore=12,
textColor=colors.darkblue
)
# Build PDF content
story = []
# Title page
story.append(Paragraph(title, title_style))
story.append(Spacer(1, 20))
story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
story.append(PageBreak())
# Process content
lines = content.split('\n')
current_section = []
for line in lines:
line = line.strip()
if not line:
continue
# Check for headings
if line.startswith('# '):
# H1 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[2:], title_style))
story.append(Spacer(1, 12))
elif line.startswith('## '):
# H2 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[3:], heading_style))
story.append(Spacer(1, 8))
elif line.startswith('### '):
# H3 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[4:], styles['Heading3']))
story.append(Spacer(1, 6))
else:
current_section.append(line)
# Process remaining content
if current_section:
story.extend(self._process_section(current_section, styles))
# Build PDF
doc.build(story)
# Get PDF content as base64
buffer.seek(0)
pdf_bytes = buffer.getvalue()
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
return pdf_base64
except Exception as e:
self.logger.error(f"Error generating PDF: {str(e)}")
raise
def _process_section(self, lines: list, styles) -> list:
"""Process a section of content into PDF elements."""
elements = []
for line in lines:
if not line.strip():
continue
# Check for tables (lines with |)
if '|' in line and not line.startswith('|'):
# This might be part of a table, process as table
table_data = self._extract_table_data(lines)
if table_data:
table = Table(table_data)
table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 14),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black)
]))
elements.append(table)
elements.append(Spacer(1, 12))
return elements
# Check for lists
if line.startswith('- ') or line.startswith('* '):
# This is a list item
elements.append(Paragraph(f"{line[2:]}", styles['Normal']))
else:
# Regular paragraph
elements.append(Paragraph(line, styles['Normal']))
elements.append(Spacer(1, 6))
return elements
def _extract_table_data(self, lines: list) -> list:
"""Extract table data from lines."""
table_data = []
in_table = False
for line in lines:
if '|' in line:
if not in_table:
in_table = True
# Split by | and clean up
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
if cells:
table_data.append(cells)
elif in_table and not line.strip():
# Empty line, might be end of table
break
return table_data if len(table_data) > 1 else []