gateway/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
PDF renderer for report generation using reportlab.
"""

from __future__ import annotations

import unicodedata

from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import io
import base64

try:
    from reportlab.lib.pagesizes import A4
    from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Preformatted
    from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
    from reportlab.lib.units import inch
    from reportlab.lib import colors
    from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
    REPORTLAB_AVAILABLE = True
except ImportError:
    REPORTLAB_AVAILABLE = False

import re as _re_pdf

# A4 width in pt; margins must match SimpleDocTemplate(leftMargin/rightMargin)
_PDF_MARGIN_LR_PT = 72.0
_PDF_A4_WIDTH_PT = 595.27
_PDF_CONTENT_WIDTH_PT = _PDF_A4_WIDTH_PT - (2 * _PDF_MARGIN_LR_PT)


def _boxDrawingCharToAscii(ch: str) -> str:
    """Map one box-drawing character to ASCII (Courier has no glyphs for U+2500–U+257F)."""
    nm = unicodedata.name(ch, "")
    v = "VERTICAL" in nm
    h = "HORIZONTAL" in nm
    and_ = "AND" in nm
    if v and h:
        return "+"
    if v and not h and not and_:
        return "|"
    if h and not v and not and_:
        return "-"
    return "+"


def _normalizePdfMonospaceText(text: str) -> str:
    """Replace Unicode box/block drawing with ASCII so PDF core fonts render readable code/trees."""
    if not text:
        return ""
    out: List[str] = []
    for ch in text:
        o = ord(ch)
        if 0x2500 <= o <= 0x257F:
            out.append(_boxDrawingCharToAscii(ch))
        elif 0x2580 <= o <= 0x259F:
            out.append("#")
        else:
            out.append(ch)
    return "".join(out)


def _prepareCodeBlockPlainText(text: str) -> str:
    """Normalize newlines/tabs for preformatted code (no HTML/XML; spaces must stay significant)."""
    if not text:
        return ""
    text = text.replace("\r\n", "\n").replace("\r", "\n")
    return text.expandtabs(4)


class RendererPdf(BaseRenderer):
    """Renders content to PDF format using reportlab."""

    @classmethod
    def getSupportedFormats(cls) -> List[str]:
        """Return supported PDF formats."""
        return ['pdf']

    @classmethod
    def getFormatAliases(cls) -> List[str]:
        """Return format aliases."""
        return ['document', 'print']

    @classmethod
    def getPriority(cls) -> int:
        """Return priority for PDF renderer."""
        return 120

    @classmethod
    def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
        """Return output style classification: PDF documents are formatted documents."""
        return 'document'

    @classmethod
    def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
        """
        Return list of section content types that PDF renderer accepts.
        PDF renderer accepts all section types (PDF documents can contain all content types).
        """
        from modules.datamodels.datamodelJson import supportedSectionTypes
        return list(supportedSectionTypes)

    async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
        """Render extracted JSON content to PDF format using AI-analyzed styling."""
        try:
            if not REPORTLAB_AVAILABLE:
                # Fallback to HTML if reportlab not available
                from .rendererHtml import RendererHtml
                html_renderer = RendererHtml()
                return await html_renderer.render(extractedContent, title, userPrompt, aiService)

            # Generate PDF using AI-analyzed styling
            pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)

            # Extract metadata for document type and other info
            metadata = extractedContent.get("metadata", {}) if extractedContent else {}
            documentType = metadata.get("documentType") if isinstance(metadata, dict) else None

            # Determine filename from document or title
            documents = extractedContent.get("documents", [])
            if documents and isinstance(documents[0], dict):
                filename = documents[0].get("filename")
                if not filename:
                    filename = self._determineFilename(title, "application/pdf")
            else:
                filename = self._determineFilename(title, "application/pdf")

            # Convert PDF content to bytes if it's a string (base64)
            if isinstance(pdf_content, str):
                # Try to decode as base64, otherwise encode as UTF-8
                try:
                    pdf_bytes = base64.b64decode(pdf_content)
                except Exception:
                    pdf_bytes = pdf_content.encode('utf-8')
            else:
                pdf_bytes = pdf_content

            return [
                RenderedDocument(
                    documentData=pdf_bytes,
                    mimeType="application/pdf",
                    filename=filename,
                    documentType=documentType,
                    metadata=metadata if isinstance(metadata, dict) else None
                )
            ]

        except Exception as e:
            self.logger.error(f"Error rendering PDF: {str(e)}")
            # Return minimal fallback
            fallbackContent = f"PDF Generation Error: {str(e)}"
            return [
                RenderedDocument(
                    documentData=fallbackContent.encode('utf-8'),
                    mimeType="text/plain",
                    filename=self._determineFilename(title, "text/plain")
                )
            ]

    async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
        """Generate PDF content from structured JSON document using AI-generated styling."""
        try:
            # Get style set: use styles from metadata if available, otherwise enhance with AI
            styles = await self._getStyleSet(json_content, userPrompt, aiService)

            # Validate JSON structure
            if not self._validateJsonStructure(json_content):
                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")

            # Extract sections and metadata from standardized schema
            sections = self._extractSections(json_content)

            # Create a buffer to hold the PDF
            buffer = io.BytesIO()

            # Create PDF document
            doc = SimpleDocTemplate(
                buffer,
                pagesize=A4,
                rightMargin=72,
                leftMargin=72,
                topMargin=72,
                bottomMargin=18
            )

            # Build PDF content (no cover page — body starts on page 1; filename still uses `title`)
            story = []

            # Process each section (sections already extracted above)
            self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
            for i, section in enumerate(sections):
                self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
                section_elements = self._renderJsonSection(section, styles)
                self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
                story.extend(section_elements)

            # Build PDF — retry with oversized flowables removed on LayoutError
            self._buildPdfWithOverflowGuard(doc, story, buffer)

            buffer.seek(0)
            pdf_bytes = buffer.getvalue()
            pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')

            return pdf_base64

        except Exception as e:
            self.logger.error(f"Error generating PDF from JSON: {str(e)}")
            raise Exception(f"PDF generation failed: {str(e)}")

    def _buildPdfWithOverflowGuard(self, doc, story: List[Any], buffer) -> None:
        """Try doc.build(); on 'too large on page' LayoutError, drop the offending
        flowable, log a warning, and retry (up to 5 times)."""
        maxRetries = 5
        for attempt in range(maxRetries + 1):
            try:
                buffer.seek(0)
                buffer.truncate()
                doc.build(story)
                return
            except Exception as e:
                msg = str(e)
                if "too large on page" not in msg or attempt == maxRetries:
                    raise
                # Identify the offending flowable from the error repr
                self.logger.warning(f"PDF overflow (attempt {attempt + 1}): {msg} — removing oversized element and retrying")
                removed = False
                for idx, flowable in enumerate(story):
                    fRepr = repr(flowable)
                    if "Table" in fRepr and hasattr(flowable, '_cellvalues'):
                        try:
                            nRows = len(flowable._cellvalues)
                            nCols = len(flowable._cellvalues[0]) if flowable._cellvalues else 0
                            if nRows == 1 and nCols == 1:
                                errPara = Paragraph(
                                    "[Code block omitted — content too large for PDF page]",
                                    self._createNormalStyle({}),
                                )
                                story[idx] = errPara
                                removed = True
                                break
                        except Exception:
                            pass
                if not removed:
                    raise

    async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
        """Get style set - use styles from document generation metadata if available,
        otherwise enhance default styles with AI if userPrompt provided.

        WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
        not be generated separately by renderers. Only fall back to AI if styles not provided.

        Args:
            extractedContent: Document content with metadata (may contain styles)
            userPrompt: User's prompt (AI will detect style instructions in any language)
            aiService: AI service (used only if styles not in metadata and userPrompt provided)
            templateName: Name of template style set (None = default)

        Returns:
            Dict with style definitions for all document styles
        """
        # Get default style set
        defaultStyleSet = self._getDefaultStyleSet()

        # FIRST: Check if styles are provided in document generation metadata (preferred approach)
        if extractedContent:
            metadata = extractedContent.get("metadata", {})
            if isinstance(metadata, dict):
                styles = metadata.get("styles")
                if styles and isinstance(styles, dict):
                    self.logger.debug("Using styles from document generation metadata")
                    enhancedStyleSet = self._convertColorsFormat(styles)
                    return self._validateStylesContrast(enhancedStyleSet)

        # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
        if userPrompt and aiService:
            self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
            enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
            # Convert colors to PDF format after getting styles
            enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet)
            return self._validateStylesContrast(enhancedStyleSet)
        else:
            # Use default styles only
            return defaultStyleSet

    async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
        """Enhance default styles with AI based on user prompt."""
        try:
            style_template = self._createAiStyleTemplate("pdf", userPrompt, defaultStyleSet)
            enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet)
            return enhanced_styles
        except Exception as e:
            self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
            return defaultStyleSet

    def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and fix contrast issues in AI-generated styles."""
        try:
            # Fix table header contrast
            if "table_header" in styles:
                header = styles["table_header"]
                bg_color = header.get("background", "#FFFFFF")
                text_color = header.get("text_color", "#000000")

                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"

            # Fix table cell contrast
            if "table_cell" in styles:
                cell = styles["table_cell"]
                bg_color = cell.get("background", "#FFFFFF")
                text_color = cell.get("text_color", "#000000")

                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"

            return styles

        except Exception as e:
            self.logger.warning(f"Style validation failed: {str(e)}")
            return self._getDefaultStyleSet()

    def _getDefaultStyleSet(self) -> Dict[str, Any]:
        """Default PDF style set - used when no style instructions present."""
        return {
            "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
            # Markdown #..###### — sizes must strictly decrease (H1 largest … H6 smallest).
            "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
            "heading2": {"font_size": 15, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 10, "space_before": 10},
            "heading3": {"font_size": 13, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
            "heading4": {"font_size": 12, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 6, "space_before": 6},
            "heading5": {"font_size": 11, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 6, "space_before": 6},
            "heading6": {"font_size": 10, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 4, "space_before": 4},
            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "left", "font_size": 12},
            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
            "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6, "align": "left"}
        }

    async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
        """Get AI styles with proper PDF color conversion."""
        if not ai_service:
            return default_styles

        try:
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum

            request_options = AiCallOptions()
            request_options.operationType = OperationTypeEnum.DATA_GENERATE

            request = AiCallRequest(prompt=style_template, context="", options=request_options)

            # Check if AI service is properly configured
            if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
                self.logger.warning("AI service not properly configured, using defaults")
                return default_styles

            response = await ai_service.callAi(request)

            # Check if response is valid
            if not response:
                self.logger.warning("AI service returned no response, using defaults")
                return default_styles

            import json
            import re

            # Clean and parse JSON
            result = response.content.strip() if response and response.content else ""

            # Check if result is empty
            if not result:
                self.logger.warning("AI styling returned empty response, using defaults")
                return default_styles

            # Log the raw response for debugging
            self.logger.debug(f"AI styling raw response: {result[:200]}...")

            # Extract JSON from various formats
            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
            if json_match:
                result = json_match.group(1).strip()
            elif result.startswith('```json'):
                result = re.sub(r'^```json\s*', '', result)
                result = re.sub(r'\s*```$', '', result)
            elif result.startswith('```'):
                result = re.sub(r'^```\s*', '', result)
                result = re.sub(r'\s*```$', '', result)

            # Try to extract JSON from explanatory text
            json_patterns = [
                r'\{[^{}]*"title"[^{}]*\}',  # Simple JSON object
                r'\{.*?"title".*?\}',        # JSON with title field
                r'\{.*?"font_size".*?\}',    # JSON with font_size field
            ]

            for pattern in json_patterns:
                json_match = re.search(pattern, result, re.DOTALL)
                if json_match:
                    result = json_match.group(0)
                    break

            # Additional cleanup - remove any leading/trailing whitespace and newlines
            result = result.strip()

            # Check if result is still empty after cleanup
            if not result:
                self.logger.warning("AI styling returned empty content after cleanup, using defaults")
                return default_styles

            # Try to parse JSON
            try:
                styles = json.loads(result)
                self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
            except json.JSONDecodeError as json_error:
                self.logger.warning(f"AI styling returned invalid JSON: {json_error}")

                # Use print instead of logger to avoid truncation
                self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "PDF_RENDERER")
                self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "PDF_RENDERER")

                self.logger.warning(f"Raw content that failed to parse: {result}")

                # Try to fix incomplete JSON by adding missing closing braces
                open_braces = result.count('{')
                close_braces = result.count('}')

                if open_braces > close_braces:
                    # JSON is incomplete, add missing closing braces
                    missing_braces = open_braces - close_braces
                    result = result + '}' * missing_braces
                    self.logger.info(f"Added {missing_braces} missing closing brace(s)")

                    # Try parsing the fixed JSON
                    try:
                        styles = json.loads(result)
                        self.logger.info("Successfully fixed incomplete JSON")
                    except json.JSONDecodeError as fix_error:
                        self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
                        # Try to extract just the JSON part if it's embedded in text
                        json_start = result.find('{')
                        json_end = result.rfind('}')
                        if json_start != -1 and json_end != -1 and json_end > json_start:
                            json_part = result[json_start:json_end+1]
                            try:
                                styles = json.loads(json_part)
                                self.logger.info("Successfully extracted JSON from explanatory text")
                            except json.JSONDecodeError:
                                self.logger.warning("Could not extract valid JSON from response, using defaults")
                                return default_styles
                        else:
                            return default_styles
                else:
                    # Try to extract just the JSON part if it's embedded in text
                    json_start = result.find('{')
                    json_end = result.rfind('}')
                    if json_start != -1 and json_end != -1 and json_end > json_start:
                        json_part = result[json_start:json_end+1]
                        try:
                            styles = json.loads(json_part)
                            self.logger.info("Successfully extracted JSON from explanatory text")
                        except json.JSONDecodeError:
                            self.logger.warning("Could not extract valid JSON from response, using defaults")
                            return default_styles
                    else:
                        return default_styles

            # Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
            styles = self._convertColorsFormat(styles)

            return styles

        except Exception as e:
            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
            return default_styles

    def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Convert colors to proper format for PDF compatibility."""
        try:
            for style_name, style_config in styles.items():
                if isinstance(style_config, dict):
                    for prop, value in style_config.items():
                        if isinstance(value, str) and value.startswith('#') and len(value) == 7:
                            # Convert #RRGGBB to #AARRGGBB (add FF alpha channel) for consistency
                            styles[style_name][prop] = f"FF{value[1:]}"
                        elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
                            # Already aRGB format, keep as is
                            pass
            return styles
        except Exception as e:
            self.logger.warning(f"Color conversion failed: {str(e)}")
            return styles

    def _getSafeColor(self, color_value: str, default: str = "#000000") -> str:
        """Get a safe hex color value for PDF."""
        if isinstance(color_value, str) and color_value.startswith('#'):
            if len(color_value) == 7:
                return f"FF{color_value[1:]}"
            elif len(color_value) == 9:
                return color_value
        return default

    def _defaultHeadingStyleDef(self, level: int) -> Dict[str, Any]:
        """When heading{N} is missing from styles, never fall back to heading1 (that made H3 > H2)."""
        sizes = {1: 18, 2: 15, 3: 13, 4: 12, 5: 11, 6: 10}
        fs = sizes.get(level, 10)
        sb = max(4, 14 - level)
        return {
            "font_size": fs,
            "color": "#2F2F2F" if level <= 2 else "#4F4F4F",
            "bold": True,
            "align": "left",
            "space_after": sb,
            "space_before": sb,
        }

    def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
        """Create heading style from style definitions."""
        heading_key = f"heading{level}"
        heading_style_def = styles.get(heading_key) or self._defaultHeadingStyleDef(level)
        fs = heading_style_def.get("font_size", self._defaultHeadingStyleDef(level)["font_size"])
        bold = heading_style_def.get("bold", True)
        return ParagraphStyle(
            f'CustomHeading{level}',
            fontName="Helvetica-Bold" if bold else "Helvetica",
            fontSize=fs,
            spaceAfter=heading_style_def.get("space_after", 12),
            spaceBefore=heading_style_def.get("space_before", 12),
            alignment=self._getAlignment(heading_style_def.get("align", "left")),
            textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F")),
            leading=fs * 1.35,
        )

    def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
        """Create normal paragraph style from style definitions."""
        paragraph_style_def = styles.get("paragraph", {})

        return ParagraphStyle(
            'CustomNormal',
            fontSize=paragraph_style_def.get("font_size", 11),
            spaceAfter=paragraph_style_def.get("space_after", 6),
            alignment=self._getAlignment(paragraph_style_def.get("align", "left")),
            textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")),
            leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
        )

    def _getAlignment(self, align: str) -> int:
        """Convert alignment string to reportlab alignment constant."""
        if not align or not isinstance(align, str):
            return TA_LEFT

        align_map = {
            "center": TA_CENTER,
            "left": TA_LEFT,
            "justify": TA_JUSTIFY,
            "right": TA_LEFT,  # ReportLab doesn't have TA_RIGHT, use LEFT as fallback
            "0": TA_LEFT,      # Handle numeric strings
            "1": TA_CENTER,
            "2": TA_JUSTIFY
        }
        return align_map.get(align.lower().strip(), TA_LEFT)

    def _hexToColor(self, hex_color: str) -> colors.Color:
        """Convert hex color to reportlab color."""
        try:
            hex_color = hex_color.lstrip('#')

            # Handle aRGB format (8 characters: FF + RGB)
            if len(hex_color) == 8:
                # Skip the alpha channel (first 2 characters)
                hex_color = hex_color[2:]

            # Handle RGB format (6 characters)
            if len(hex_color) == 6:
                r = int(hex_color[0:2], 16) / 255.0
                g = int(hex_color[2:4], 16) / 255.0
                b = int(hex_color[4:6], 16) / 255.0
                return colors.Color(r, g, b)

            # Fallback for other formats
            return colors.black
        except:
            return colors.black

    def _escapeReportlabXml(self, text: str) -> str:
        """Escape text for ReportLab Paragraph markup."""
        if not text:
            return ""
        return (
            text.replace("&", "&amp;")
            .replace("<", "&lt;")
            .replace(">", "&gt;")
        )

    def _applyInlineMarkdownToEscapedPlain(self, text: str) -> str:
        """Escape XML then apply bold/italic to a segment with no `code` spans (code is handled separately)."""
        if not text:
            return ""
        s = self._escapeReportlabXml(text)
        s = _re_pdf.sub(r"\*\*(.+?)\*\*", r"<b>\1</b>", s, flags=_re_pdf.DOTALL)
        s = _re_pdf.sub(r"__(.+?)__", r"<b>\1</b>", s, flags=_re_pdf.DOTALL)
        s = _re_pdf.sub(r"(?<!\*)\*([^*\n]+?)\*(?!\*)", r"<i>\1</i>", s)
        s = _re_pdf.sub(r"(?<![\w/])_([^_\n]+?)_(?![\w/])", r"<i>\1</i>", s)
        return s

    def _markdownInlineToReportlabXml(self, text: str) -> str:
        """Turn common markdown inline (**bold**, *italic*, `code`) into ReportLab XML.
        Backtick spans are extracted first so paths like `.../<Slug>/...` are not corrupted by
        markdown patterns and XML escaping stays well-formed inside <font name=\"Courier\">.
        """
        if not text:
            return ""
        text = _normalizePdfMonospaceText(text)
        out: List[str] = []
        pos = 0
        for m in _re_pdf.finditer(r"`([^`]*)`", text):
            before = text[pos:m.start()]
            out.append(self._applyInlineMarkdownToEscapedPlain(before))
            code = m.group(1)
            out.append(f'<font name="Courier">{self._escapeReportlabXml(code)}</font>')
            pos = m.end()
        out.append(self._applyInlineMarkdownToEscapedPlain(text[pos:]))
        return "".join(out)

    def _paragraphFromInlineMarkdown(self, text: str, style: ParagraphStyle) -> Paragraph:
        return Paragraph(self._markdownInlineToReportlabXml(text), style)

    def _createTableCellParagraphStyle(
        self, styles: Dict[str, Any], *, header: bool, tableStyleKey: str
    ) -> ParagraphStyle:
        """Paragraph style for table cells (word wrap within colWidth)."""
        tdef = styles.get(tableStyleKey, {})
        fs = tdef.get("font_size", 12 if header else 10)
        defaultTc = "#FFFFFF" if header else "#2F2F2F"
        return ParagraphStyle(
            f"TblCell{'H' if header else 'B'}{tableStyleKey}",
            fontSize=fs,
            leading=fs * 1.25,
            alignment=TA_LEFT,
            textColor=self._hexToColor(tdef.get("text_color", defaultTc)),
            fontName="Helvetica-Bold" if header and tdef.get("bold", True) else "Helvetica",
        )

    def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a single JSON section to PDF elements using AI-generated styles.
        Supports three content formats: reference, object (base64), extracted_text.
        """
        try:
            section_type = self._getSectionType(section)
            elements = self._getSectionData(section)

            # Process each element in the section
            all_elements = []
            for element in elements:
                element_type = element.get("type", "") if isinstance(element, dict) else ""

                # Support three content formats from Phase 5D
                if element_type == "reference":
                    # Document reference format
                    doc_ref = element.get("documentReference", "")
                    label = element.get("label", "Reference")
                    ref_style = ParagraphStyle(
                        'Reference',
                        parent=self._createNormalStyle(styles),
                        fontStyle='italic',
                        textColor=colors.grey
                    )
                    all_elements.append(Paragraph(f"[Reference: {label}]", ref_style))
                    all_elements.append(Spacer(1, 6))
                    continue
                elif element_type == "extracted_text":
                    # Extracted text format
                    content = element.get("content", "")
                    source = element.get("source", "")
                    if content:
                        bodyXml = self._markdownInlineToReportlabXml(content)
                        if source:
                            bodyXml = f"{bodyXml} <i>(Source: {self._escapeReportlabXml(source)})</i>"
                        all_elements.append(Paragraph(bodyXml, self._createNormalStyle(styles)))
                        all_elements.append(Spacer(1, 6))
                    continue

                # Check element type, not section type (elements can have different types than section)
                if element_type == "table":
                    all_elements.extend(self._renderJsonTable(element, styles))
                elif element_type == "bullet_list":
                    all_elements.extend(self._renderJsonBulletList(element, styles))
                elif element_type == "heading":
                    all_elements.extend(self._renderJsonHeading(element, styles))
                elif element_type == "paragraph":
                    all_elements.extend(self._renderJsonParagraph(element, styles))
                elif element_type == "code_block":
                    all_elements.extend(self._renderJsonCodeBlock(element, styles))
                elif element_type == "image":
                    all_elements.extend(self._renderJsonImage(element, styles))
                else:
                    # Fallback: if element_type not set, use section_type as fallback
                    if section_type == "table":
                        all_elements.extend(self._renderJsonTable(element, styles))
                    elif section_type == "bullet_list":
                        all_elements.extend(self._renderJsonBulletList(element, styles))
                    elif section_type == "heading":
                        all_elements.extend(self._renderJsonHeading(element, styles))
                    elif section_type == "paragraph":
                        all_elements.extend(self._renderJsonParagraph(element, styles))
                    elif section_type == "code_block":
                        all_elements.extend(self._renderJsonCodeBlock(element, styles))
                    elif section_type == "image":
                        all_elements.extend(self._renderJsonImage(element, styles))
                    else:
                        # Final fallback to paragraph for unknown types
                        all_elements.extend(self._renderJsonParagraph(element, styles))

            return all_elements

        except Exception as e:
            self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
            return [Paragraph(f"[Error rendering section: {str(e)}]", self._createNormalStyle(styles))]

    def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON table: left-aligned, width capped to printable area, cells wrap."""
        try:
            content = table_data.get("content", {})
            if not isinstance(content, dict):
                return []
            headers = content.get("headers", [])
            rows = content.get("rows", [])

            if not headers or not rows:
                return []

            numCols = len(headers)
            colWidth = _PDF_CONTENT_WIDTH_PT / max(numCols, 1)
            colWidths = [colWidth] * numCols

            hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header")
            cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell")

            def _cellPara(val, ps):
                return self._paragraphFromInlineMarkdown(str(val) if val is not None else "", ps)

            headerRow = [_cellPara(h, hdrPs) for h in headers]
            bodyRows = []
            for row in rows:
                padded = list(row) + [""] * max(0, numCols - len(row))
                padded = padded[:numCols]
                bodyRows.append([_cellPara(c, cellPs) for c in padded])

            table_matrix = [headerRow] + bodyRows
            table = Table(table_matrix, colWidths=colWidths, repeatRows=1)

            table_header_style = styles.get("table_header", {})
            table_cell_style = styles.get("table_cell", {})

            table_style = [
                ("BACKGROUND", (0, 0), (-1, 0), self._hexToColor(table_header_style.get("background", "#4F4F4F"))),
                ("BACKGROUND", (0, 1), (-1, -1), self._hexToColor(table_cell_style.get("background", "#FFFFFF"))),
                ("ALIGN", (0, 0), (-1, -1), "LEFT"),
                ("VALIGN", (0, 0), (-1, -1), "TOP"),
                ("LEFTPADDING", (0, 0), (-1, -1), 4),
                ("RIGHTPADDING", (0, 0), (-1, -1), 4),
                ("TOPPADDING", (0, 0), (-1, 0), 6),
                ("BOTTOMPADDING", (0, 0), (-1, 0), 8),
                ("TOPPADDING", (0, 1), (-1, -1), 4),
                ("BOTTOMPADDING", (0, 1), (-1, -1), 4),
                ("GRID", (0, 0), (-1, -1), 0.5, colors.black),
            ]
            table.setStyle(TableStyle(table_style))
            return [table, Spacer(1, 12)]

        except Exception as e:
            self.logger.warning(f"Error rendering table: {str(e)}")
            return []

    def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON bullet list to PDF elements using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = list_data.get("content", {})
            if not isinstance(content, dict):
                return []
            items = content.get("items", [])
            bullet_style_def = styles.get("bullet_list", {})

            elements = []
            for item in items:
                if isinstance(item, str):
                    elements.append(
                        Paragraph(f"• {self._markdownInlineToReportlabXml(item)}", self._createNormalStyle(styles))
                    )
                elif isinstance(item, dict) and "text" in item:
                    elements.append(
                        Paragraph(
                            f"• {self._markdownInlineToReportlabXml(item['text'])}",
                            self._createNormalStyle(styles),
                        )
                    )

            if elements:
                elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))

            return elements

        except Exception as e:
            self.logger.warning(f"Error rendering bullet list: {str(e)}")
            return []

    def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON heading to PDF elements using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = heading_data.get("content", {})
            if not isinstance(content, dict):
                return []
            text = content.get("text", "")
            level = content.get("level", 1)

            if text:
                level = max(1, min(6, level))
                heading_style = self._createHeadingStyle(styles, level)
                return [self._paragraphFromInlineMarkdown(text, heading_style)]

            return []

        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")
            return []

    def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON paragraph to PDF elements using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = paragraph_data.get("content", {})
            if isinstance(content, dict):
                text = content.get("text", "")
            elif isinstance(content, str):
                text = content
            else:
                text = ""

            if text:
                return [self._paragraphFromInlineMarkdown(text, self._createNormalStyle(styles))]

            return []

        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")
            return []

    def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON code block to PDF elements using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = code_data.get("content", {})
            if not isinstance(content, dict):
                return []
            code = content.get("code", "")
            language = content.get("language", "")
            code_style_def = styles.get("code_block", {})

            if code:
                code = _prepareCodeBlockPlainText(code)
                code = _normalizePdfMonospaceText(code)
                elements = []
                fs = code_style_def.get("font_size", 9)
                mono = code_style_def.get("font", "Courier")

                if language:
                    lang_style = ParagraphStyle(
                        "CodeLanguage",
                        fontSize=fs,
                        textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
                        fontName="Helvetica-Bold",
                        alignment=TA_LEFT,
                    )
                    elements.append(
                        Paragraph(
                            self._escapeReportlabXml(f"Code ({language}):"),
                            lang_style,
                        )
                    )

                approxCharWPt = max(fs * 0.52, 4.5)
                usableWidth = _PDF_CONTENT_WIDTH_PT - 16  # left+right padding
                maxLineChars = max(48, int(usableWidth / approxCharWPt))
                bg_col = self._hexToColor(code_style_def.get("background", "#F5F5F5"))
                leading = fs * 1.2
                spaceAfter = code_style_def.get("space_after", 6)

                # Each source line may wrap to ceil(len/maxLineChars) visual lines.
                # Frame height ~740pt minus padding → keep rendered height < 600pt.
                maxVisualLinesPerChunk = max(8, int(600 / leading))
                srcLines = code.split("\n")
                chunks: List[List[str]] = []
                curChunk: List[str] = []
                curVisual = 0
                for sl in srcLines:
                    wrapped = max(1, -(-len(sl) // maxLineChars)) if sl else 1
                    if curVisual + wrapped > maxVisualLinesPerChunk and curChunk:
                        chunks.append(curChunk)
                        curChunk = []
                        curVisual = 0
                    curChunk.append(sl)
                    curVisual += wrapped
                if curChunk:
                    chunks.append(curChunk)

                for ci, chunkLines in enumerate(chunks):
                    chunkText = "\n".join(chunkLines)
                    styleId = f"CodePre_{id(code_data) & 0xFFFFFFFF}_{ci}"
                    codePrStyle = ParagraphStyle(
                        styleId,
                        fontName=mono,
                        fontSize=fs,
                        leading=leading,
                        textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
                        alignment=TA_LEFT,
                        leftIndent=0,
                        rightIndent=0,
                    )
                    pf = Preformatted(chunkText, codePrStyle, dedent=0, maxLineLength=maxLineChars)
                    tbl = Table([[pf]], colWidths=[_PDF_CONTENT_WIDTH_PT])
                    tbl.setStyle(
                        TableStyle(
                            [
                                ("BACKGROUND", (0, 0), (-1, -1), bg_col),
                                ("VALIGN", (0, 0), (-1, -1), "TOP"),
                                ("LEFTPADDING", (0, 0), (-1, -1), 8),
                                ("RIGHTPADDING", (0, 0), (-1, -1), 8),
                                ("TOPPADDING", (0, 0), (-1, -1), 6),
                                ("BOTTOMPADDING", (0, 0), (-1, -1), 6),
                            ]
                        )
                    )
                    tbl.spaceAfter = 0 if ci < len(chunks) - 1 else spaceAfter
                    elements.append(tbl)
                return elements

            return []

        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")
            return []

    def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON image to PDF elements using reportlab."""
        try:
            # Extract from nested content structure
            content = image_data.get("content", {})
            base64_data = ""
            alt_text = "Image"
            caption = ""

            if isinstance(content, dict):
                # Nested content structure
                base64_data = content.get("base64Data", "")
                alt_text = content.get("altText", "Image")
                caption = content.get("caption", "")
            elif isinstance(content, str):
                # Content might be base64 string directly (shouldn't happen, but handle it)
                self.logger.warning("Image content is a string, not a dict. This should not happen.")
                return [Paragraph(f"[Image: Invalid format]", self._createNormalStyle(styles))]

            # If base64Data not found in content, try direct element fields (fallback)
            if not base64_data:
                base64_data = image_data.get("base64Data", "")
                if not alt_text or alt_text == "Image":
                    alt_text = image_data.get("altText", "Image")
                if not caption:
                    caption = image_data.get("caption", "")

            # If base64Data still not found, try extracting from url data URI
            if not base64_data:
                url = image_data.get("url", "") or (content.get("url", "") if isinstance(content, dict) else "")
                if url and isinstance(url, str) and url.startswith("data:image/"):
                    # Extract base64 from data URI: data:image/png;base64,<base64>
                    import re
                    match = re.match(r'data:image/[^;]+;base64,(.+)', url)
                    if match:
                        base64_data = match.group(1)

            if not base64_data:
                self.logger.warning(f"No base64 data found for image. Alt text: {alt_text}")
                return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]

            # Validate that base64_data is actually base64 (not the entire element rendered as text)
            if len(base64_data) > 10000:  # Very long string might be entire element JSON
                self.logger.warning(f"Base64 data seems too long ({len(base64_data)} chars), might be incorrectly extracted")

            # Ensure base64_data is a string, not bytes or other type
            if not isinstance(base64_data, str):
                self.logger.warning(f"Base64 data is not a string: {type(base64_data)}")
                return [Paragraph(f"[Image: {alt_text} - Invalid data type]", self._createNormalStyle(styles))]

            try:
                from reportlab.platypus import Image as ReportLabImage
                from reportlab.lib.units import inch
                import base64
                import io

                # Decode base64 image data
                imageBytes = base64.b64decode(base64_data)
                imageStream = io.BytesIO(imageBytes)

                # Create reportlab Image element
                # Try to get image dimensions from PIL
                try:
                    from PIL import Image as PILImage
                    from reportlab.lib.pagesizes import A4

                    pilImage = PILImage.open(imageStream)
                    originalWidth, originalHeight = pilImage.size

                    # Calculate available page dimensions (A4 with margins: 72pt left/right, 72pt top, 18pt bottom)
                    pageWidth = A4[0]  # 595.27 points
                    pageHeight = A4[1]  # 841.89 points
                    leftMargin = 72
                    rightMargin = 72
                    topMargin = 72
                    bottomMargin = 18

                    # Use actual frame dimensions from SimpleDocTemplate
                    # Frame is smaller than page minus margins due to internal spacing
                    # From error message: frame is 439.27559055118115 x 739.8897637795277
                    # Use conservative values with safety margin
                    availableWidth = 430.0  # Slightly smaller than frame width for safety
                    availableHeight = 730.0  # Slightly smaller than frame height for safety

                    # Convert original image size from pixels to points
                    # PIL provides size in pixels, need to convert to points
                    # Standard conversion: 1 inch = 72 points, typical screen DPI = 96 pixels/inch
                    # So: pixels * (72/96) = points, or pixels * 0.75 = points
                    # But for images, we should use the image's actual DPI if available
                    dpi = pilImage.info.get('dpi', (96, 96))[0]  # Default to 96 DPI if not specified
                    if dpi <= 0:
                        dpi = 96  # Fallback to 96 DPI

                    # Convert pixels to points: 1 point = 1/72 inch, so pixels * (72/dpi) = points
                    imgWidthPoints = originalWidth * (72.0 / dpi)
                    imgHeightPoints = originalHeight * (72.0 / dpi)

                    # Scale to fit within available page dimensions while maintaining aspect ratio
                    widthScale = availableWidth / imgWidthPoints if imgWidthPoints > 0 else 1.0
                    heightScale = availableHeight / imgHeightPoints if imgHeightPoints > 0 else 1.0

                    # Use the smaller scale to ensure image fits both width and height
                    scale = min(widthScale, heightScale, 1.0)  # Don't scale up, only down

                    imgWidth = imgWidthPoints * scale
                    imgHeight = imgHeightPoints * scale

                    # Additional safety check: ensure dimensions don't exceed available space
                    if imgWidth > availableWidth:
                        scale = availableWidth / imgWidth
                        imgWidth = availableWidth
                        imgHeight = imgHeight * scale

                    if imgHeight > availableHeight:
                        scale = availableHeight / imgHeight
                        imgHeight = availableHeight
                        imgWidth = imgWidth * scale

                    # Reset stream for reportlab
                    imageStream.seek(0)
                except Exception as e:
                    # Fallback: use default size that fits page
                    self.logger.warning(f"Error calculating image size: {str(e)}, using safe default")
                    # Use 80% of available width as safe default
                    imgWidth = 4 * inch  # ~288 points, safe for ~451pt available width
                    imgHeight = 3 * inch  # ~216 points, safe for ~751pt available height
                    imageStream.seek(0)

                # Create reportlab Image
                reportlabImage = ReportLabImage(imageStream, width=imgWidth, height=imgHeight)

                elements = [reportlabImage]

                # Add caption if available
                if caption:
                    captionStyle = self._createNormalStyle(styles)
                    captionStyle.fontSize = 10
                    captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
                    elements.append(Paragraph(f"<i>{caption}</i>", captionStyle))
                elif alt_text and alt_text != "Image":
                    # Use alt text as caption if no caption provided, but avoid usageHint format
                    if "Render as visual element:" in alt_text:
                        # Extract filename from usageHint if possible
                        parts = alt_text.split("Render as visual element:")
                        if len(parts) > 1:
                            filename = parts[1].strip()
                            caption_text = f"Figure: {filename}"
                        else:
                            caption_text = alt_text
                    else:
                        caption_text = f"Figure: {alt_text}"
                    captionStyle = self._createNormalStyle(styles)
                    captionStyle.fontSize = 10
                    captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
                    elements.append(Paragraph(f"<i>{caption_text}</i>", captionStyle))

                return elements

            except Exception as imgError:
                self.logger.error(f"Error embedding image in PDF: {str(imgError)}")
                # Return error message instead of placeholder
                errorStyle = self._createNormalStyle(styles)
                errorStyle.textColor = self._hexToColor("#FF0000")  # Red color for error
                errorMsg = f"[Error: Could not embed image '{alt_text}'. {str(imgError)}]"
                return [Paragraph(errorMsg, errorStyle)]

        except Exception as e:
            self.logger.error(f"Error rendering image: {str(e)}")
            errorStyle = self._createNormalStyle(styles)
            errorStyle.textColor = self._hexToColor("#FF0000")  # Red color for error
            errorMsg = f"[Error: Could not render image '{image_data.get('altText', 'Image')}'. {str(e)}]"
            return [Paragraph(errorMsg, errorStyle)]