""" PDF renderer for report generation using reportlab. """ from .base_renderer import BaseRenderer from typing import Dict, Any, Tuple, List import io import base64 from datetime import datetime, UTC try: from reportlab.lib.pagesizes import letter, A4 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import inch from reportlab.lib import colors from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY REPORTLAB_AVAILABLE = True except ImportError: REPORTLAB_AVAILABLE = False class PdfRenderer(BaseRenderer): """Renders content to PDF format using reportlab.""" @classmethod def get_supported_formats(cls) -> List[str]: """Return supported PDF formats.""" return ['pdf'] @classmethod def get_format_aliases(cls) -> List[str]: """Return format aliases.""" return ['document', 'print'] @classmethod def get_priority(cls) -> int: """Return priority for PDF renderer.""" return 120 def getExtractionPrompt(self, user_prompt: str, title: str) -> str: """Return only PDF-specific guidelines; global prompt is built centrally.""" return ( "PDF FORMAT GUIDELINES:\n" "- Provide structured content suitable for pagination and headings (H1/H2/H3-like).\n" "- Use bullet lists and tables where useful; separate major sections clearly.\n" "- Avoid markdown/HTML; produce clean, plain content that can be laid out as PDF.\n" "OUTPUT: Return ONLY the PDF-ready textual content (no fences)." ) async def render(self, extracted_content: str, title: str) -> Tuple[str, str]: """Render extracted content to PDF format.""" try: if not REPORTLAB_AVAILABLE: # Fallback to HTML if reportlab not available from .html_renderer import HtmlRenderer html_renderer = HtmlRenderer() html_content, _ = await html_renderer.render(extracted_content, title) return html_content, "text/html" # Generate PDF using reportlab pdf_content = self._generate_pdf(extracted_content, title) return pdf_content, "application/pdf" except Exception as e: self.logger.error(f"Error rendering PDF: {str(e)}") # Return minimal fallback return f"PDF Generation Error: {str(e)}", "text/plain" def _generate_pdf(self, content: str, title: str) -> str: """Generate PDF content using reportlab.""" try: # Create a buffer to hold the PDF buffer = io.BytesIO() # Create PDF document doc = SimpleDocTemplate( buffer, pagesize=A4, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18 ) # Get styles styles = getSampleStyleSheet() # Create custom styles title_style = ParagraphStyle( 'CustomTitle', parent=styles['Heading1'], fontSize=24, spaceAfter=30, alignment=TA_CENTER, textColor=colors.darkblue ) heading_style = ParagraphStyle( 'CustomHeading', parent=styles['Heading2'], fontSize=16, spaceAfter=12, spaceBefore=12, textColor=colors.darkblue ) # Build PDF content story = [] # Title page story.append(Paragraph(title, title_style)) story.append(Spacer(1, 20)) story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal'])) story.append(PageBreak()) # Process content lines = content.split('\n') current_section = [] for line in lines: line = line.strip() if not line: continue # Check for headings if line.startswith('# '): # H1 heading if current_section: story.extend(self._process_section(current_section, styles)) current_section = [] story.append(Paragraph(line[2:], title_style)) story.append(Spacer(1, 12)) elif line.startswith('## '): # H2 heading if current_section: story.extend(self._process_section(current_section, styles)) current_section = [] story.append(Paragraph(line[3:], heading_style)) story.append(Spacer(1, 8)) elif line.startswith('### '): # H3 heading if current_section: story.extend(self._process_section(current_section, styles)) current_section = [] story.append(Paragraph(line[4:], styles['Heading3'])) story.append(Spacer(1, 6)) else: current_section.append(line) # Process remaining content if current_section: story.extend(self._process_section(current_section, styles)) # Build PDF doc.build(story) # Get PDF content as base64 buffer.seek(0) pdf_bytes = buffer.getvalue() pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8') return pdf_base64 except Exception as e: self.logger.error(f"Error generating PDF: {str(e)}") raise def _process_section(self, lines: list, styles) -> list: """Process a section of content into PDF elements.""" elements = [] for line in lines: if not line.strip(): continue # Check for tables (lines with |) if '|' in line and not line.startswith('|'): # This might be part of a table, process as table table_data = self._extract_table_data(lines) if table_data: table = Table(table_data) table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), colors.grey), ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), ('FONTSIZE', (0, 0), (-1, 0), 14), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), colors.beige), ('GRID', (0, 0), (-1, -1), 1, colors.black) ])) elements.append(table) elements.append(Spacer(1, 12)) return elements # Check for lists if line.startswith('- ') or line.startswith('* '): # This is a list item elements.append(Paragraph(f"• {line[2:]}", styles['Normal'])) else: # Regular paragraph elements.append(Paragraph(line, styles['Normal'])) elements.append(Spacer(1, 6)) return elements def _extract_table_data(self, lines: list) -> list: """Extract table data from lines.""" table_data = [] in_table = False for line in lines: if '|' in line: if not in_table: in_table = True # Split by | and clean up cells = [cell.strip() for cell in line.split('|') if cell.strip()] if cells: table_data.append(cells) elif in_table and not line.strip(): # Empty line, might be end of table break return table_data if len(table_data) > 1 else []