gateway/modules/services/serviceGeneration/renderers/pdf_renderer.py

"""
PDF renderer for report generation using reportlab.
"""

from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC

try:
    from reportlab.lib.pagesizes import letter, A4
    from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
    from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
    from reportlab.lib.units import inch
    from reportlab.lib import colors
    from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
    REPORTLAB_AVAILABLE = True
except ImportError:
    REPORTLAB_AVAILABLE = False

class PdfRenderer(BaseRenderer):
    """Renders content to PDF format using reportlab."""

    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported PDF formats."""
        return ['pdf']

    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['document', 'print']

    @classmethod
    def get_priority(cls) -> int:
        """Return priority for PDF renderer."""
        return 120

    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Get PDF-specific extraction prompt."""
        return f"""
{user_prompt}

Generate a comprehensive PDF report with the title: "{title}"

PDF FORMAT REQUIREMENTS:
- Create structured content suitable for PDF documents
- Use clear headings and sections
- Include tables for structured data
- Use bullet points and lists where appropriate
- Include source document information
- Add page breaks between major sections
- Structure content for professional presentation

PDF STRUCTURE:
- Title page with report title and generation date
- Table of contents (if multiple sections)
- Executive summary
- Main content sections with clear headings
- Data tables and analysis
- Conclusions and recommendations
- Appendices with source information

FORMATTING RULES:
- Use clear section headings (H1, H2, H3 style)
- Include page numbers and headers
- Use consistent formatting throughout
- Include tables with proper alignment
- Use bullet points for lists
- Add source citations and references
- Include generation metadata

OUTPUT POLICY:
- Return ONLY PDF-compatible content
- No HTML, no markdown, no code blocks
- Structured text suitable for PDF generation
- Professional document format
- Include all necessary information

CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.

Generate the complete PDF report content using the actual data from the source documents:
"""

    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """Render extracted content to PDF format."""
        try:
            if not REPORTLAB_AVAILABLE:
                # Fallback to HTML if reportlab not available
                from .html_renderer import HtmlRenderer
                html_renderer = HtmlRenderer()
                html_content, _ = await html_renderer.render(extracted_content, title)
                return html_content, "text/html"

            # Generate PDF using reportlab
            pdf_content = self._generate_pdf(extracted_content, title)

            return pdf_content, "application/pdf"

        except Exception as e:
            self.logger.error(f"Error rendering PDF: {str(e)}")
            # Return minimal fallback
            return f"PDF Generation Error: {str(e)}", "text/plain"

    def _generate_pdf(self, content: str, title: str) -> str:
        """Generate PDF content using reportlab."""
        try:
            # Create a buffer to hold the PDF
            buffer = io.BytesIO()

            # Create PDF document
            doc = SimpleDocTemplate(
                buffer,
                pagesize=A4,
                rightMargin=72,
                leftMargin=72,
                topMargin=72,
                bottomMargin=18
            )

            # Get styles
            styles = getSampleStyleSheet()

            # Create custom styles
            title_style = ParagraphStyle(
                'CustomTitle',
                parent=styles['Heading1'],
                fontSize=24,
                spaceAfter=30,
                alignment=TA_CENTER,
                textColor=colors.darkblue
            )

            heading_style = ParagraphStyle(
                'CustomHeading',
                parent=styles['Heading2'],
                fontSize=16,
                spaceAfter=12,
                spaceBefore=12,
                textColor=colors.darkblue
            )

            # Build PDF content
            story = []

            # Title page
            story.append(Paragraph(title, title_style))
            story.append(Spacer(1, 20))
            story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
            story.append(PageBreak())

            # Process content
            lines = content.split('\n')
            current_section = []

            for line in lines:
                line = line.strip()
                if not line:
                    continue

                # Check for headings
                if line.startswith('# '):
                    # H1 heading
                    if current_section:
                        story.extend(self._process_section(current_section, styles))
                        current_section = []
                    story.append(Paragraph(line[2:], title_style))
                    story.append(Spacer(1, 12))
                elif line.startswith('## '):
                    # H2 heading
                    if current_section:
                        story.extend(self._process_section(current_section, styles))
                        current_section = []
                    story.append(Paragraph(line[3:], heading_style))
                    story.append(Spacer(1, 8))
                elif line.startswith('### '):
                    # H3 heading
                    if current_section:
                        story.extend(self._process_section(current_section, styles))
                        current_section = []
                    story.append(Paragraph(line[4:], styles['Heading3']))
                    story.append(Spacer(1, 6))
                else:
                    current_section.append(line)

            # Process remaining content
            if current_section:
                story.extend(self._process_section(current_section, styles))

            # Build PDF
            doc.build(story)

            # Get PDF content as base64
            buffer.seek(0)
            pdf_bytes = buffer.getvalue()
            pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')

            return pdf_base64

        except Exception as e:
            self.logger.error(f"Error generating PDF: {str(e)}")
            raise

    def _process_section(self, lines: list, styles) -> list:
        """Process a section of content into PDF elements."""
        elements = []

        for line in lines:
            if not line.strip():
                continue

            # Check for tables (lines with |)
            if '|' in line and not line.startswith('|'):
                # This might be part of a table, process as table
                table_data = self._extract_table_data(lines)
                if table_data:
                    table = Table(table_data)
                    table.setStyle(TableStyle([
                        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
                        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
                        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                        ('FONTSIZE', (0, 0), (-1, 0), 14),
                        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
                        ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
                        ('GRID', (0, 0), (-1, -1), 1, colors.black)
                    ]))
                    elements.append(table)
                    elements.append(Spacer(1, 12))
                    return elements

            # Check for lists
            if line.startswith('- ') or line.startswith('* '):
                # This is a list item
                elements.append(Paragraph(f"• {line[2:]}", styles['Normal']))
            else:
                # Regular paragraph
                elements.append(Paragraph(line, styles['Normal']))

        elements.append(Spacer(1, 6))
        return elements

    def _extract_table_data(self, lines: list) -> list:
        """Extract table data from lines."""
        table_data = []
        in_table = False

        for line in lines:
            if '|' in line:
                if not in_table:
                    in_table = True
                # Split by | and clean up
                cells = [cell.strip() for cell in line.split('|') if cell.strip()]
                if cells:
                    table_data.append(cells)
            elif in_table and not line.strip():
                # Empty line, might be end of table
                break

        return table_data if len(table_data) > 1 else []