""" PDF renderer for report generation using reportlab. """ from .base_renderer import BaseRenderer from typing import Dict, Any, Tuple, List import io import base64 from datetime import datetime, UTC try: from reportlab.lib.pagesizes import letter, A4 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import inch from reportlab.lib import colors from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY REPORTLAB_AVAILABLE = True except ImportError: REPORTLAB_AVAILABLE = False class PdfRenderer(BaseRenderer): """Renders content to PDF format using reportlab.""" @classmethod def get_supported_formats(cls) -> List[str]: """Return supported PDF formats.""" return ['pdf'] @classmethod def get_format_aliases(cls) -> List[str]: """Return format aliases.""" return ['document', 'print'] @classmethod def get_priority(cls) -> int: """Return priority for PDF renderer.""" return 120 def getExtractionPrompt(self, user_prompt: str, title: str) -> str: """Get PDF-specific extraction prompt.""" return f""" {user_prompt} Generate a comprehensive PDF report with the title: "{title}" PDF FORMAT REQUIREMENTS: - Create structured content suitable for PDF documents - Use clear headings and sections - Include tables for structured data - Use bullet points and lists where appropriate - Include source document information - Add page breaks between major sections - Structure content for professional presentation PDF STRUCTURE: - Title page with report title and generation date - Table of contents (if multiple sections) - Executive summary - Main content sections with clear headings - Data tables and analysis - Conclusions and recommendations - Appendices with source information FORMATTING RULES: - Use clear section headings (H1, H2, H3 style) - Include page numbers and headers - Use consistent formatting throughout - Include tables with proper alignment - Use bullet points for lists - Add source citations and references - Include generation metadata OUTPUT POLICY: - Return ONLY PDF-compatible content - No HTML, no markdown, no code blocks - Structured text suitable for PDF generation - Professional document format - Include all necessary information Generate the complete PDF report content: """ async def render(self, extracted_content: str, title: str) -> Tuple[str, str]: """Render extracted content to PDF format.""" try: if not REPORTLAB_AVAILABLE: # Fallback to HTML if reportlab not available from .html_renderer import HtmlRenderer html_renderer = HtmlRenderer() html_content, _ = await html_renderer.render(extracted_content, title) return html_content, "text/html" # Generate PDF using reportlab pdf_content = self._generate_pdf(extracted_content, title) return pdf_content, "application/pdf" except Exception as e: self.logger.error(f"Error rendering PDF: {str(e)}") # Return minimal fallback return f"PDF Generation Error: {str(e)}", "text/plain" def _generate_pdf(self, content: str, title: str) -> str: """Generate PDF content using reportlab.""" try: # Create a buffer to hold the PDF buffer = io.BytesIO() # Create PDF document doc = SimpleDocTemplate( buffer, pagesize=A4, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18 ) # Get styles styles = getSampleStyleSheet() # Create custom styles title_style = ParagraphStyle( 'CustomTitle', parent=styles['Heading1'], fontSize=24, spaceAfter=30, alignment=TA_CENTER, textColor=colors.darkblue ) heading_style = ParagraphStyle( 'CustomHeading', parent=styles['Heading2'], fontSize=16, spaceAfter=12, spaceBefore=12, textColor=colors.darkblue ) # Build PDF content story = [] # Title page story.append(Paragraph(title, title_style)) story.append(Spacer(1, 20)) story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal'])) story.append(PageBreak()) # Process content lines = content.split('\n') current_section = [] for line in lines: line = line.strip() if not line: continue # Check for headings if line.startswith('# '): # H1 heading if current_section: story.extend(self._process_section(current_section, styles)) current_section = [] story.append(Paragraph(line[2:], title_style)) story.append(Spacer(1, 12)) elif line.startswith('## '): # H2 heading if current_section: story.extend(self._process_section(current_section, styles)) current_section = [] story.append(Paragraph(line[3:], heading_style)) story.append(Spacer(1, 8)) elif line.startswith('### '): # H3 heading if current_section: story.extend(self._process_section(current_section, styles)) current_section = [] story.append(Paragraph(line[4:], styles['Heading3'])) story.append(Spacer(1, 6)) else: current_section.append(line) # Process remaining content if current_section: story.extend(self._process_section(current_section, styles)) # Build PDF doc.build(story) # Get PDF content as base64 buffer.seek(0) pdf_bytes = buffer.getvalue() pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8') return pdf_base64 except Exception as e: self.logger.error(f"Error generating PDF: {str(e)}") raise def _process_section(self, lines: list, styles) -> list: """Process a section of content into PDF elements.""" elements = [] for line in lines: if not line.strip(): continue # Check for tables (lines with |) if '|' in line and not line.startswith('|'): # This might be part of a table, process as table table_data = self._extract_table_data(lines) if table_data: table = Table(table_data) table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), colors.grey), ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), ('FONTSIZE', (0, 0), (-1, 0), 14), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), colors.beige), ('GRID', (0, 0), (-1, -1), 1, colors.black) ])) elements.append(table) elements.append(Spacer(1, 12)) return elements # Check for lists if line.startswith('- ') or line.startswith('* '): # This is a list item elements.append(Paragraph(f"• {line[2:]}", styles['Normal'])) else: # Regular paragraph elements.append(Paragraph(line, styles['Normal'])) elements.append(Spacer(1, 6)) return elements def _extract_table_data(self, lines: list) -> list: """Extract table data from lines.""" table_data = [] in_table = False for line in lines: if '|' in line: if not in_table: in_table = True # Split by | and clean up cells = [cell.strip() for cell in line.split('|') if cell.strip()] if cells: table_data.append(cells) elif in_table and not line.strip(): # Empty line, might be end of table break return table_data if len(table_data) > 1 else []