1196 lines
No EOL
58 KiB
Python
1196 lines
No EOL
58 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
||
# All rights reserved.
|
||
"""
|
||
PDF renderer for report generation using reportlab.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import unicodedata
|
||
|
||
from .documentRendererBaseTemplate import BaseRenderer
|
||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||
from typing import Dict, Any, List, Optional
|
||
import io
|
||
import base64
|
||
|
||
try:
|
||
from reportlab.lib.pagesizes import A4
|
||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Preformatted
|
||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||
from reportlab.lib.units import inch
|
||
from reportlab.lib import colors
|
||
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
|
||
REPORTLAB_AVAILABLE = True
|
||
except ImportError:
|
||
REPORTLAB_AVAILABLE = False
|
||
|
||
import re as _re_pdf
|
||
|
||
from ._pdfFontFallback import wrapEmojiSpansInXml as _wrapEmojiSpansInXml
|
||
|
||
# A4 width in pt; margins must match SimpleDocTemplate(leftMargin/rightMargin)
|
||
_PDF_MARGIN_LR_PT = 72.0
|
||
_PDF_A4_WIDTH_PT = 595.27
|
||
_PDF_CONTENT_WIDTH_PT = _PDF_A4_WIDTH_PT - (2 * _PDF_MARGIN_LR_PT)
|
||
|
||
|
||
def _boxDrawingCharToAscii(ch: str) -> str:
|
||
"""Map one box-drawing character to ASCII (Courier has no glyphs for U+2500–U+257F)."""
|
||
nm = unicodedata.name(ch, "")
|
||
v = "VERTICAL" in nm
|
||
h = "HORIZONTAL" in nm
|
||
and_ = "AND" in nm
|
||
if v and h:
|
||
return "+"
|
||
if v and not h and not and_:
|
||
return "|"
|
||
if h and not v and not and_:
|
||
return "-"
|
||
return "+"
|
||
|
||
|
||
def _normalizePdfMonospaceText(text: str) -> str:
|
||
"""Replace Unicode box/block drawing with ASCII so PDF core fonts render readable code/trees."""
|
||
if not text:
|
||
return ""
|
||
out: List[str] = []
|
||
for ch in text:
|
||
o = ord(ch)
|
||
if 0x2500 <= o <= 0x257F:
|
||
out.append(_boxDrawingCharToAscii(ch))
|
||
elif 0x2580 <= o <= 0x259F:
|
||
out.append("#")
|
||
else:
|
||
out.append(ch)
|
||
return "".join(out)
|
||
|
||
|
||
def _prepareCodeBlockPlainText(text: str) -> str:
|
||
"""Normalize newlines/tabs for preformatted code (no HTML/XML; spaces must stay significant)."""
|
||
if not text:
|
||
return ""
|
||
text = text.replace("\r\n", "\n").replace("\r", "\n")
|
||
return text.expandtabs(4)
|
||
|
||
|
||
class RendererPdf(BaseRenderer):
|
||
"""Renders content to PDF format using reportlab."""
|
||
|
||
@classmethod
|
||
def getSupportedFormats(cls) -> List[str]:
|
||
"""Return supported PDF formats."""
|
||
return ['pdf']
|
||
|
||
@classmethod
|
||
def getFormatAliases(cls) -> List[str]:
|
||
"""Return format aliases."""
|
||
return ['document', 'print']
|
||
|
||
@classmethod
|
||
def getPriority(cls) -> int:
|
||
"""Return priority for PDF renderer."""
|
||
return 120
|
||
|
||
@classmethod
|
||
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
|
||
"""Return output style classification: PDF documents are formatted documents."""
|
||
return 'document'
|
||
|
||
@classmethod
|
||
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
|
||
"""
|
||
Return list of section content types that PDF renderer accepts.
|
||
PDF renderer accepts all section types (PDF documents can contain all content types).
|
||
"""
|
||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||
return list(supportedSectionTypes)
|
||
|
||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
||
try:
|
||
if not REPORTLAB_AVAILABLE:
|
||
# Fallback to HTML if reportlab not available
|
||
from .rendererHtml import RendererHtml
|
||
html_renderer = RendererHtml()
|
||
return await html_renderer.render(extractedContent, title, userPrompt, aiService, style=style)
|
||
|
||
# Generate PDF using AI-analyzed styling
|
||
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style)
|
||
|
||
# Extract metadata for document type and other info
|
||
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
||
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
|
||
|
||
# Determine filename from document or title
|
||
documents = extractedContent.get("documents", [])
|
||
if documents and isinstance(documents[0], dict):
|
||
filename = documents[0].get("filename")
|
||
if not filename:
|
||
filename = self._determineFilename(title, "application/pdf")
|
||
else:
|
||
filename = self._determineFilename(title, "application/pdf")
|
||
|
||
# Convert PDF content to bytes if it's a string (base64)
|
||
if isinstance(pdf_content, str):
|
||
# Try to decode as base64, otherwise encode as UTF-8
|
||
try:
|
||
pdf_bytes = base64.b64decode(pdf_content)
|
||
except Exception:
|
||
pdf_bytes = pdf_content.encode('utf-8')
|
||
else:
|
||
pdf_bytes = pdf_content
|
||
|
||
return [
|
||
RenderedDocument(
|
||
documentData=pdf_bytes,
|
||
mimeType="application/pdf",
|
||
filename=filename,
|
||
documentType=documentType,
|
||
metadata=metadata if isinstance(metadata, dict) else None
|
||
)
|
||
]
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Error rendering PDF: {str(e)}")
|
||
# Return minimal fallback
|
||
fallbackContent = f"PDF Generation Error: {str(e)}"
|
||
return [
|
||
RenderedDocument(
|
||
documentData=fallbackContent.encode('utf-8'),
|
||
mimeType="text/plain",
|
||
filename=self._determineFilename(title, "text/plain")
|
||
)
|
||
]
|
||
|
||
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, unifiedStyle: Dict[str, Any] = None) -> str:
|
||
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
||
try:
|
||
# Get style set from unified style or legacy approach
|
||
if unifiedStyle:
|
||
styles = self._convertUnifiedStyleToInternal(unifiedStyle)
|
||
self._unifiedStyle = unifiedStyle
|
||
for level in range(1, 7):
|
||
hKey = f"heading{level}"
|
||
if hKey not in styles:
|
||
styles[hKey] = self._defaultHeadingStyleDef(level)
|
||
else:
|
||
styles[hKey].setdefault("space_after", 12)
|
||
styles[hKey].setdefault("space_before", 12)
|
||
styles["paragraph"].setdefault("space_after", 6)
|
||
styles["paragraph"].setdefault("line_height", unifiedStyle["paragraph"].get("lineSpacing", 1.2))
|
||
styles["bullet_list"].setdefault("space_after", 3)
|
||
styles["code_block"].setdefault("space_after", 6)
|
||
styles["code_block"].setdefault("align", "left")
|
||
else:
|
||
styles = await self._getStyleSet(json_content, userPrompt, aiService)
|
||
self._unifiedStyle = None
|
||
|
||
# Validate JSON structure
|
||
if not self._validateJsonStructure(json_content):
|
||
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
|
||
|
||
# Extract sections and metadata from standardized schema
|
||
sections = self._extractSections(json_content)
|
||
|
||
# Create a buffer to hold the PDF
|
||
buffer = io.BytesIO()
|
||
|
||
# Create PDF document with unified page margins or defaults
|
||
pageCfg = unifiedStyle["page"] if unifiedStyle else None
|
||
if pageCfg:
|
||
m = pageCfg["marginsPt"]
|
||
doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=m["right"], leftMargin=m["left"], topMargin=m["top"], bottomMargin=m["bottom"])
|
||
else:
|
||
doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
|
||
|
||
# Build PDF content (no cover page — body starts on page 1; filename still uses `title`)
|
||
story = []
|
||
|
||
# Process each section (sections already extracted above)
|
||
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
|
||
for i, section in enumerate(sections):
|
||
self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
|
||
section_elements = self._renderJsonSection(section, styles)
|
||
self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
|
||
story.extend(section_elements)
|
||
|
||
# Build PDF — retry with oversized flowables removed on LayoutError
|
||
self._buildPdfWithOverflowGuard(doc, story, buffer)
|
||
|
||
buffer.seek(0)
|
||
pdf_bytes = buffer.getvalue()
|
||
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
||
|
||
return pdf_base64
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
|
||
raise Exception(f"PDF generation failed: {str(e)}")
|
||
|
||
def _buildPdfWithOverflowGuard(self, doc, story: List[Any], buffer) -> None:
|
||
"""Try doc.build(); on 'too large on page' LayoutError, drop the offending
|
||
flowable, log a warning, and retry (up to 5 times)."""
|
||
maxRetries = 5
|
||
for attempt in range(maxRetries + 1):
|
||
try:
|
||
buffer.seek(0)
|
||
buffer.truncate()
|
||
doc.build(story)
|
||
return
|
||
except Exception as e:
|
||
msg = str(e)
|
||
if "too large on page" not in msg or attempt == maxRetries:
|
||
raise
|
||
# Identify the offending flowable from the error repr
|
||
self.logger.warning(f"PDF overflow (attempt {attempt + 1}): {msg} — removing oversized element and retrying")
|
||
removed = False
|
||
for idx, flowable in enumerate(story):
|
||
fRepr = repr(flowable)
|
||
if "Image" in fRepr and hasattr(flowable, 'drawWidth') and hasattr(flowable, 'drawHeight'):
|
||
from reportlab.platypus import Image as ReportLabImage
|
||
if isinstance(flowable, ReportLabImage):
|
||
frameH = 650.0
|
||
frameW = 450.0
|
||
if flowable.drawHeight > frameH or flowable.drawWidth > frameW:
|
||
scaleW = frameW / flowable.drawWidth if flowable.drawWidth > frameW else 1.0
|
||
scaleH = frameH / flowable.drawHeight if flowable.drawHeight > frameH else 1.0
|
||
s = min(scaleW, scaleH) * 0.9
|
||
flowable.drawWidth = flowable.drawWidth * s
|
||
flowable.drawHeight = flowable.drawHeight * s
|
||
flowable._width = flowable.drawWidth
|
||
flowable._height = flowable.drawHeight
|
||
removed = True
|
||
break
|
||
if "Table" in fRepr and hasattr(flowable, '_cellvalues'):
|
||
try:
|
||
nRows = len(flowable._cellvalues)
|
||
nCols = len(flowable._cellvalues[0]) if flowable._cellvalues else 0
|
||
if nRows == 1 and nCols == 1:
|
||
errPara = Paragraph(
|
||
"[Code block omitted - content too large for PDF page]",
|
||
self._createNormalStyle({}),
|
||
)
|
||
story[idx] = errPara
|
||
removed = True
|
||
break
|
||
except Exception:
|
||
pass
|
||
if not removed:
|
||
raise
|
||
|
||
async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
|
||
"""Get style set - use styles from document generation metadata if available,
|
||
otherwise enhance default styles with AI if userPrompt provided.
|
||
|
||
WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
|
||
not be generated separately by renderers. Only fall back to AI if styles not provided.
|
||
|
||
Args:
|
||
extractedContent: Document content with metadata (may contain styles)
|
||
userPrompt: User's prompt (AI will detect style instructions in any language)
|
||
aiService: AI service (used only if styles not in metadata and userPrompt provided)
|
||
templateName: Name of template style set (None = default)
|
||
|
||
Returns:
|
||
Dict with style definitions for all document styles
|
||
"""
|
||
# Get default style set
|
||
defaultStyleSet = self._getDefaultStyleSet()
|
||
|
||
# FIRST: Check if styles are provided in document generation metadata (preferred approach)
|
||
if extractedContent:
|
||
metadata = extractedContent.get("metadata", {})
|
||
if isinstance(metadata, dict):
|
||
styles = metadata.get("styles")
|
||
if styles and isinstance(styles, dict):
|
||
self.logger.debug("Using styles from document generation metadata")
|
||
enhancedStyleSet = self._convertColorsFormat(styles)
|
||
return self._validateStylesContrast(enhancedStyleSet)
|
||
|
||
# FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
|
||
if userPrompt and aiService:
|
||
self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
|
||
enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
|
||
# Convert colors to PDF format after getting styles
|
||
enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet)
|
||
return self._validateStylesContrast(enhancedStyleSet)
|
||
else:
|
||
# Use default styles only
|
||
return defaultStyleSet
|
||
|
||
async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
|
||
"""Enhance default styles with AI based on user prompt."""
|
||
try:
|
||
style_template = self._createAiStyleTemplate("pdf", userPrompt, defaultStyleSet)
|
||
enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet)
|
||
return enhanced_styles
|
||
except Exception as e:
|
||
self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
|
||
return defaultStyleSet
|
||
|
||
def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""Validate and fix contrast issues in AI-generated styles."""
|
||
try:
|
||
# Fix table header contrast
|
||
if "table_header" in styles:
|
||
header = styles["table_header"]
|
||
bg_color = header.get("background", "#FFFFFF")
|
||
text_color = header.get("text_color", "#000000")
|
||
|
||
# If both are white or both are dark, fix it
|
||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
||
header["background"] = "#4F4F4F"
|
||
header["text_color"] = "#FFFFFF"
|
||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
||
header["background"] = "#4F4F4F"
|
||
header["text_color"] = "#FFFFFF"
|
||
|
||
# Fix table cell contrast
|
||
if "table_cell" in styles:
|
||
cell = styles["table_cell"]
|
||
bg_color = cell.get("background", "#FFFFFF")
|
||
text_color = cell.get("text_color", "#000000")
|
||
|
||
# If both are white or both are dark, fix it
|
||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
||
cell["background"] = "#FFFFFF"
|
||
cell["text_color"] = "#2F2F2F"
|
||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
||
cell["background"] = "#FFFFFF"
|
||
cell["text_color"] = "#2F2F2F"
|
||
|
||
return styles
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"Style validation failed: {str(e)}")
|
||
return self._getDefaultStyleSet()
|
||
|
||
def _getDefaultStyleSet(self) -> Dict[str, Any]:
|
||
"""Default PDF style set - used when no style instructions present."""
|
||
return {
|
||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
|
||
# Markdown #..###### — sizes must strictly decrease (H1 largest … H6 smallest).
|
||
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
|
||
"heading2": {"font_size": 15, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 10, "space_before": 10},
|
||
"heading3": {"font_size": 13, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
|
||
"heading4": {"font_size": 12, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 6, "space_before": 6},
|
||
"heading5": {"font_size": 11, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 6, "space_before": 6},
|
||
"heading6": {"font_size": 10, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 4, "space_before": 4},
|
||
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
|
||
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "left", "font_size": 12},
|
||
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
|
||
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
|
||
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6, "align": "left"}
|
||
}
|
||
|
||
async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""Get AI styles with proper PDF color conversion."""
|
||
if not ai_service:
|
||
return default_styles
|
||
|
||
try:
|
||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||
|
||
request_options = AiCallOptions()
|
||
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
||
|
||
request = AiCallRequest(prompt=style_template, context="", options=request_options)
|
||
|
||
# Check if AI service is properly configured
|
||
if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
|
||
self.logger.warning("AI service not properly configured, using defaults")
|
||
return default_styles
|
||
|
||
response = await ai_service.callAi(request)
|
||
|
||
# Check if response is valid
|
||
if not response:
|
||
self.logger.warning("AI service returned no response, using defaults")
|
||
return default_styles
|
||
|
||
import json
|
||
import re
|
||
|
||
# Clean and parse JSON
|
||
result = response.content.strip() if response and response.content else ""
|
||
|
||
# Check if result is empty
|
||
if not result:
|
||
self.logger.warning("AI styling returned empty response, using defaults")
|
||
return default_styles
|
||
|
||
# Log the raw response for debugging
|
||
self.logger.debug(f"AI styling raw response: {result[:200]}...")
|
||
|
||
# Extract JSON from various formats
|
||
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
||
if json_match:
|
||
result = json_match.group(1).strip()
|
||
elif result.startswith('```json'):
|
||
result = re.sub(r'^```json\s*', '', result)
|
||
result = re.sub(r'\s*```$', '', result)
|
||
elif result.startswith('```'):
|
||
result = re.sub(r'^```\s*', '', result)
|
||
result = re.sub(r'\s*```$', '', result)
|
||
|
||
# Try to extract JSON from explanatory text
|
||
json_patterns = [
|
||
r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object
|
||
r'\{.*?"title".*?\}', # JSON with title field
|
||
r'\{.*?"font_size".*?\}', # JSON with font_size field
|
||
]
|
||
|
||
for pattern in json_patterns:
|
||
json_match = re.search(pattern, result, re.DOTALL)
|
||
if json_match:
|
||
result = json_match.group(0)
|
||
break
|
||
|
||
# Additional cleanup - remove any leading/trailing whitespace and newlines
|
||
result = result.strip()
|
||
|
||
# Check if result is still empty after cleanup
|
||
if not result:
|
||
self.logger.warning("AI styling returned empty content after cleanup, using defaults")
|
||
return default_styles
|
||
|
||
# Try to parse JSON
|
||
try:
|
||
styles = json.loads(result)
|
||
self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
|
||
except json.JSONDecodeError as json_error:
|
||
self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
|
||
|
||
# Use print instead of logger to avoid truncation
|
||
self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "PDF_RENDERER")
|
||
self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "PDF_RENDERER")
|
||
|
||
self.logger.warning(f"Raw content that failed to parse: {result}")
|
||
|
||
# Try to fix incomplete JSON by adding missing closing braces
|
||
open_braces = result.count('{')
|
||
close_braces = result.count('}')
|
||
|
||
if open_braces > close_braces:
|
||
# JSON is incomplete, add missing closing braces
|
||
missing_braces = open_braces - close_braces
|
||
result = result + '}' * missing_braces
|
||
self.logger.info(f"Added {missing_braces} missing closing brace(s)")
|
||
|
||
# Try parsing the fixed JSON
|
||
try:
|
||
styles = json.loads(result)
|
||
self.logger.info("Successfully fixed incomplete JSON")
|
||
except json.JSONDecodeError as fix_error:
|
||
self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
|
||
# Try to extract just the JSON part if it's embedded in text
|
||
json_start = result.find('{')
|
||
json_end = result.rfind('}')
|
||
if json_start != -1 and json_end != -1 and json_end > json_start:
|
||
json_part = result[json_start:json_end+1]
|
||
try:
|
||
styles = json.loads(json_part)
|
||
self.logger.info("Successfully extracted JSON from explanatory text")
|
||
except json.JSONDecodeError:
|
||
self.logger.warning("Could not extract valid JSON from response, using defaults")
|
||
return default_styles
|
||
else:
|
||
return default_styles
|
||
else:
|
||
# Try to extract just the JSON part if it's embedded in text
|
||
json_start = result.find('{')
|
||
json_end = result.rfind('}')
|
||
if json_start != -1 and json_end != -1 and json_end > json_start:
|
||
json_part = result[json_start:json_end+1]
|
||
try:
|
||
styles = json.loads(json_part)
|
||
self.logger.info("Successfully extracted JSON from explanatory text")
|
||
except json.JSONDecodeError:
|
||
self.logger.warning("Could not extract valid JSON from response, using defaults")
|
||
return default_styles
|
||
else:
|
||
return default_styles
|
||
|
||
# Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
|
||
styles = self._convertColorsFormat(styles)
|
||
|
||
return styles
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||
return default_styles
|
||
|
||
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""Convert colors to proper format for PDF compatibility."""
|
||
try:
|
||
for style_name, style_config in styles.items():
|
||
if isinstance(style_config, dict):
|
||
for prop, value in style_config.items():
|
||
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
|
||
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel) for consistency
|
||
styles[style_name][prop] = f"FF{value[1:]}"
|
||
elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
|
||
# Already aRGB format, keep as is
|
||
pass
|
||
return styles
|
||
except Exception as e:
|
||
self.logger.warning(f"Color conversion failed: {str(e)}")
|
||
return styles
|
||
|
||
def _getSafeColor(self, color_value: str, default: str = "#000000") -> str:
|
||
"""Get a safe hex color value for PDF."""
|
||
if isinstance(color_value, str) and color_value.startswith('#'):
|
||
if len(color_value) == 7:
|
||
return f"FF{color_value[1:]}"
|
||
elif len(color_value) == 9:
|
||
return color_value
|
||
return default
|
||
|
||
def _defaultHeadingStyleDef(self, level: int) -> Dict[str, Any]:
|
||
"""When heading{N} is missing from styles, never fall back to heading1 (that made H3 > H2)."""
|
||
sizes = {1: 18, 2: 15, 3: 13, 4: 12, 5: 11, 6: 10}
|
||
fs = sizes.get(level, 10)
|
||
sb = max(4, 14 - level)
|
||
return {
|
||
"font_size": fs,
|
||
"color": "#2F2F2F" if level <= 2 else "#4F4F4F",
|
||
"bold": True,
|
||
"align": "left",
|
||
"space_after": sb,
|
||
"space_before": sb,
|
||
}
|
||
|
||
def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
|
||
"""Create heading style from style definitions."""
|
||
heading_key = f"heading{level}"
|
||
heading_style_def = styles.get(heading_key) or self._defaultHeadingStyleDef(level)
|
||
fs = heading_style_def.get("font_size", self._defaultHeadingStyleDef(level)["font_size"])
|
||
bold = heading_style_def.get("bold", True)
|
||
return ParagraphStyle(
|
||
f'CustomHeading{level}',
|
||
fontName="Helvetica-Bold" if bold else "Helvetica",
|
||
fontSize=fs,
|
||
spaceAfter=heading_style_def.get("space_after", 12),
|
||
spaceBefore=heading_style_def.get("space_before", 12),
|
||
alignment=self._getAlignment(heading_style_def.get("align", "left")),
|
||
textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F")),
|
||
leading=fs * 1.35,
|
||
)
|
||
|
||
def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
||
"""Create normal paragraph style from style definitions."""
|
||
paragraph_style_def = styles.get("paragraph", {})
|
||
|
||
return ParagraphStyle(
|
||
'CustomNormal',
|
||
fontSize=paragraph_style_def.get("font_size", 11),
|
||
spaceAfter=paragraph_style_def.get("space_after", 6),
|
||
alignment=self._getAlignment(paragraph_style_def.get("align", "left")),
|
||
textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")),
|
||
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
|
||
)
|
||
|
||
def _getAlignment(self, align: str) -> int:
|
||
"""Convert alignment string to reportlab alignment constant."""
|
||
if not align or not isinstance(align, str):
|
||
return TA_LEFT
|
||
|
||
align_map = {
|
||
"center": TA_CENTER,
|
||
"left": TA_LEFT,
|
||
"justify": TA_JUSTIFY,
|
||
"right": TA_LEFT, # ReportLab doesn't have TA_RIGHT, use LEFT as fallback
|
||
"0": TA_LEFT, # Handle numeric strings
|
||
"1": TA_CENTER,
|
||
"2": TA_JUSTIFY
|
||
}
|
||
return align_map.get(align.lower().strip(), TA_LEFT)
|
||
|
||
def _hexToColor(self, hex_color: str) -> colors.Color:
|
||
"""Convert hex color to reportlab color."""
|
||
try:
|
||
hex_color = hex_color.lstrip('#')
|
||
|
||
# Handle aRGB format (8 characters: FF + RGB)
|
||
if len(hex_color) == 8:
|
||
# Skip the alpha channel (first 2 characters)
|
||
hex_color = hex_color[2:]
|
||
|
||
# Handle RGB format (6 characters)
|
||
if len(hex_color) == 6:
|
||
r = int(hex_color[0:2], 16) / 255.0
|
||
g = int(hex_color[2:4], 16) / 255.0
|
||
b = int(hex_color[4:6], 16) / 255.0
|
||
return colors.Color(r, g, b)
|
||
|
||
# Fallback for other formats
|
||
return colors.black
|
||
except:
|
||
return colors.black
|
||
|
||
def _escapeReportlabXml(self, text: str) -> str:
|
||
"""Escape text for ReportLab Paragraph markup."""
|
||
if not text:
|
||
return ""
|
||
return (
|
||
text.replace("&", "&")
|
||
.replace("<", "<")
|
||
.replace(">", ">")
|
||
)
|
||
|
||
def _renderInlineRunsToPdfXml(self, runs: list) -> str:
|
||
"""Convert inline runs to ReportLab Paragraph XML."""
|
||
parts = []
|
||
us = getattr(self, '_unifiedStyle', None)
|
||
monoFont = us["fonts"]["monospace"] if us else "Courier"
|
||
for run in runs:
|
||
runType = run.get("type", "text")
|
||
value = self._escapeReportlabXml(run.get("value", ""))
|
||
if runType == "text":
|
||
parts.append(value)
|
||
elif runType == "bold":
|
||
parts.append(f"<b>{value}</b>")
|
||
elif runType == "italic":
|
||
parts.append(f"<i>{value}</i>")
|
||
elif runType == "code":
|
||
parts.append(f'<font name="{monoFont}">{value}</font>')
|
||
elif runType == "link":
|
||
href = self._escapeReportlabXml(run.get("href", ""))
|
||
parts.append(f'<a href="{href}">{value}</a>')
|
||
elif runType == "image":
|
||
parts.append(f"[Image: {value}]")
|
||
else:
|
||
parts.append(value)
|
||
return "".join(parts)
|
||
|
||
def _applyInlineMarkdownToEscapedPlain(self, text: str) -> str:
|
||
"""Escape XML then apply bold/italic to a segment with no `code` spans (code is handled separately)."""
|
||
if not text:
|
||
return ""
|
||
s = self._escapeReportlabXml(text)
|
||
s = _re_pdf.sub(r"\*\*(.+?)\*\*", r"<b>\1</b>", s, flags=_re_pdf.DOTALL)
|
||
s = _re_pdf.sub(r"__(.+?)__", r"<b>\1</b>", s, flags=_re_pdf.DOTALL)
|
||
s = _re_pdf.sub(r"(?<!\*)\*([^*\n]+?)\*(?!\*)", r"<i>\1</i>", s)
|
||
s = _re_pdf.sub(r"(?<![\w/])_([^_\n]+?)_(?![\w/])", r"<i>\1</i>", s)
|
||
return s
|
||
|
||
def _markdownInlineToReportlabXml(self, text: str) -> str:
|
||
"""Turn common markdown inline (**bold**, *italic*, `code`) into ReportLab XML.
|
||
Backtick spans are extracted first so paths like `.../<Slug>/...` are not corrupted by
|
||
markdown patterns and XML escaping stays well-formed inside <font name=\"Courier\">.
|
||
Emoji codepoints are wrapped in <font name="NotoEmoji">...</font> so they render
|
||
as monochrome glyphs instead of missing-glyph squares from the WinAnsi core fonts.
|
||
"""
|
||
if not text:
|
||
return ""
|
||
text = _normalizePdfMonospaceText(text)
|
||
out: List[str] = []
|
||
pos = 0
|
||
for m in _re_pdf.finditer(r"`([^`]*)`", text):
|
||
before = text[pos:m.start()]
|
||
out.append(self._applyInlineMarkdownToEscapedPlain(before))
|
||
code = m.group(1)
|
||
out.append(f'<font name="Courier">{self._escapeReportlabXml(code)}</font>')
|
||
pos = m.end()
|
||
out.append(self._applyInlineMarkdownToEscapedPlain(text[pos:]))
|
||
return _wrapEmojiSpansInXml("".join(out))
|
||
|
||
def _paragraphFromInlineMarkdown(self, text: str, style: ParagraphStyle) -> Paragraph:
|
||
return Paragraph(self._markdownInlineToReportlabXml(text), style)
|
||
|
||
def _createTableCellParagraphStyle(
|
||
self, styles: Dict[str, Any], *, header: bool, tableStyleKey: str
|
||
) -> ParagraphStyle:
|
||
"""Paragraph style for table cells (word wrap within colWidth)."""
|
||
tdef = styles.get(tableStyleKey, {})
|
||
fs = tdef.get("font_size", 12 if header else 10)
|
||
defaultTc = "#FFFFFF" if header else "#2F2F2F"
|
||
return ParagraphStyle(
|
||
f"TblCell{'H' if header else 'B'}{tableStyleKey}",
|
||
fontSize=fs,
|
||
leading=fs * 1.25,
|
||
alignment=TA_LEFT,
|
||
textColor=self._hexToColor(tdef.get("text_color", defaultTc)),
|
||
fontName="Helvetica-Bold" if header and tdef.get("bold", True) else "Helvetica",
|
||
)
|
||
|
||
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||
"""Render a single JSON section to PDF elements using AI-generated styles.
|
||
Supports three content formats: reference, object (base64), extracted_text.
|
||
"""
|
||
try:
|
||
section_type = self._getSectionType(section)
|
||
elements = self._getSectionData(section)
|
||
|
||
# Process each element in the section
|
||
all_elements = []
|
||
for element in elements:
|
||
element_type = element.get("type", "") if isinstance(element, dict) else ""
|
||
|
||
# Support three content formats from Phase 5D
|
||
if element_type == "reference":
|
||
# Document reference format
|
||
doc_ref = element.get("documentReference", "")
|
||
label = element.get("label", "Reference")
|
||
ref_style = ParagraphStyle(
|
||
'Reference',
|
||
parent=self._createNormalStyle(styles),
|
||
fontStyle='italic',
|
||
textColor=colors.grey
|
||
)
|
||
all_elements.append(Paragraph(f"[Reference: {label}]", ref_style))
|
||
all_elements.append(Spacer(1, 6))
|
||
continue
|
||
elif element_type == "extracted_text":
|
||
# Extracted text format
|
||
content = element.get("content", "")
|
||
source = element.get("source", "")
|
||
if content:
|
||
bodyXml = self._markdownInlineToReportlabXml(content)
|
||
if source:
|
||
bodyXml = f"{bodyXml} <i>(Source: {self._escapeReportlabXml(source)})</i>"
|
||
all_elements.append(Paragraph(bodyXml, self._createNormalStyle(styles)))
|
||
all_elements.append(Spacer(1, 6))
|
||
continue
|
||
|
||
# Check element type, not section type (elements can have different types than section)
|
||
if element_type == "table":
|
||
all_elements.extend(self._renderJsonTable(element, styles))
|
||
elif element_type == "bullet_list":
|
||
all_elements.extend(self._renderJsonBulletList(element, styles))
|
||
elif element_type == "heading":
|
||
all_elements.extend(self._renderJsonHeading(element, styles))
|
||
elif element_type == "paragraph":
|
||
all_elements.extend(self._renderJsonParagraph(element, styles))
|
||
elif element_type == "code_block":
|
||
all_elements.extend(self._renderJsonCodeBlock(element, styles))
|
||
elif element_type == "image":
|
||
all_elements.extend(self._renderJsonImage(element, styles))
|
||
else:
|
||
# Fallback: if element_type not set, use section_type as fallback
|
||
if section_type == "table":
|
||
all_elements.extend(self._renderJsonTable(element, styles))
|
||
elif section_type == "bullet_list":
|
||
all_elements.extend(self._renderJsonBulletList(element, styles))
|
||
elif section_type == "heading":
|
||
all_elements.extend(self._renderJsonHeading(element, styles))
|
||
elif section_type == "paragraph":
|
||
all_elements.extend(self._renderJsonParagraph(element, styles))
|
||
elif section_type == "code_block":
|
||
all_elements.extend(self._renderJsonCodeBlock(element, styles))
|
||
elif section_type == "image":
|
||
all_elements.extend(self._renderJsonImage(element, styles))
|
||
else:
|
||
# Final fallback to paragraph for unknown types
|
||
all_elements.extend(self._renderJsonParagraph(element, styles))
|
||
|
||
return all_elements
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
|
||
return [Paragraph(f"[Error rendering section: {str(e)}]", self._createNormalStyle(styles))]
|
||
|
||
def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||
"""Render a JSON table: left-aligned, width capped to printable area, cells wrap."""
|
||
try:
|
||
content = table_data.get("content", {})
|
||
if not isinstance(content, dict):
|
||
return []
|
||
headers = content.get("headers", [])
|
||
rows = content.get("rows", [])
|
||
|
||
if not headers or not rows:
|
||
return []
|
||
|
||
numCols = len(headers)
|
||
colWidth = _PDF_CONTENT_WIDTH_PT / max(numCols, 1)
|
||
colWidths = [colWidth] * numCols
|
||
|
||
hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header")
|
||
cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell")
|
||
|
||
def _cellPara(cell, ps):
|
||
runs = self._inlineRunsForCell(cell)
|
||
if isinstance(cell, list):
|
||
xml = self._renderInlineRunsToPdfXml(runs)
|
||
return Paragraph(_wrapEmojiSpansInXml(xml), ps)
|
||
return self._paragraphFromInlineMarkdown(str(cell) if cell is not None else "", ps)
|
||
|
||
headerRow = [_cellPara(h, hdrPs) for h in headers]
|
||
bodyRows = []
|
||
for row in rows:
|
||
padded = list(row) + [""] * max(0, numCols - len(row))
|
||
padded = padded[:numCols]
|
||
bodyRows.append([_cellPara(c, cellPs) for c in padded])
|
||
|
||
table_matrix = [headerRow] + bodyRows
|
||
table = Table(table_matrix, colWidths=colWidths, repeatRows=1)
|
||
|
||
table_header_style = styles.get("table_header", {})
|
||
table_cell_style = styles.get("table_cell", {})
|
||
|
||
table_style = [
|
||
("BACKGROUND", (0, 0), (-1, 0), self._hexToColor(table_header_style.get("background", "#4F4F4F"))),
|
||
("BACKGROUND", (0, 1), (-1, -1), self._hexToColor(table_cell_style.get("background", "#FFFFFF"))),
|
||
("ALIGN", (0, 0), (-1, -1), "LEFT"),
|
||
("VALIGN", (0, 0), (-1, -1), "TOP"),
|
||
("LEFTPADDING", (0, 0), (-1, -1), 4),
|
||
("RIGHTPADDING", (0, 0), (-1, -1), 4),
|
||
("TOPPADDING", (0, 0), (-1, 0), 6),
|
||
("BOTTOMPADDING", (0, 0), (-1, 0), 8),
|
||
("TOPPADDING", (0, 1), (-1, -1), 4),
|
||
("BOTTOMPADDING", (0, 1), (-1, -1), 4),
|
||
("GRID", (0, 0), (-1, -1), 0.5, colors.black),
|
||
]
|
||
table.setStyle(TableStyle(table_style))
|
||
return [table, Spacer(1, 12)]
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||
return []
|
||
|
||
def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
|
||
try:
|
||
content = list_data.get("content", {})
|
||
if not isinstance(content, dict):
|
||
return []
|
||
items = content.get("items", [])
|
||
bulletStyleDef = styles.get("bullet_list", {})
|
||
normalStyle = self._createNormalStyle(styles)
|
||
|
||
elements = []
|
||
for item in items:
|
||
runs = self._inlineRunsForListItem(item)
|
||
if isinstance(item, list):
|
||
xml = self._renderInlineRunsToPdfXml(runs)
|
||
elements.append(Paragraph(f"\u2022 {_wrapEmojiSpansInXml(xml)}", normalStyle))
|
||
elif isinstance(item, str):
|
||
elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item)}", normalStyle))
|
||
elif isinstance(item, dict) and "text" in item:
|
||
elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item['text'])}", normalStyle))
|
||
|
||
if elements:
|
||
elements.append(Spacer(1, bulletStyleDef.get("space_after", 3)))
|
||
|
||
return elements
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||
return []
|
||
|
||
def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||
"""Render a JSON heading to PDF elements using AI-generated styles."""
|
||
try:
|
||
# Extract from nested content structure
|
||
content = heading_data.get("content", {})
|
||
if not isinstance(content, dict):
|
||
return []
|
||
text = content.get("text", "")
|
||
level = content.get("level", 1)
|
||
|
||
if text:
|
||
level = max(1, min(6, level))
|
||
heading_style = self._createHeadingStyle(styles, level)
|
||
return [self._paragraphFromInlineMarkdown(text, heading_style)]
|
||
|
||
return []
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||
return []
|
||
|
||
def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
|
||
try:
|
||
content = paragraph_data.get("content", {})
|
||
if isinstance(content, str):
|
||
content = {"text": content}
|
||
if not isinstance(content, dict):
|
||
return []
|
||
|
||
normalStyle = self._createNormalStyle(styles)
|
||
|
||
if "inlineRuns" in content:
|
||
runs = self._inlineRunsFromContent(content)
|
||
xml = self._renderInlineRunsToPdfXml(runs)
|
||
if xml:
|
||
return [Paragraph(_wrapEmojiSpansInXml(xml), normalStyle)]
|
||
return []
|
||
|
||
text = content.get("text", "")
|
||
if text:
|
||
return [self._paragraphFromInlineMarkdown(text, normalStyle)]
|
||
|
||
return []
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||
return []
|
||
|
||
def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||
"""Render a JSON code block to PDF elements using AI-generated styles."""
|
||
try:
|
||
# Extract from nested content structure
|
||
content = code_data.get("content", {})
|
||
if not isinstance(content, dict):
|
||
return []
|
||
code = content.get("code", "")
|
||
language = content.get("language", "")
|
||
code_style_def = styles.get("code_block", {})
|
||
|
||
if code:
|
||
code = _prepareCodeBlockPlainText(code)
|
||
code = _normalizePdfMonospaceText(code)
|
||
elements = []
|
||
fs = code_style_def.get("font_size", 9)
|
||
mono = code_style_def.get("font", "Courier")
|
||
|
||
if language:
|
||
lang_style = ParagraphStyle(
|
||
"CodeLanguage",
|
||
fontSize=fs,
|
||
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
|
||
fontName="Helvetica-Bold",
|
||
alignment=TA_LEFT,
|
||
)
|
||
elements.append(
|
||
Paragraph(
|
||
self._escapeReportlabXml(f"Code ({language}):"),
|
||
lang_style,
|
||
)
|
||
)
|
||
|
||
approxCharWPt = max(fs * 0.52, 4.5)
|
||
usableWidth = _PDF_CONTENT_WIDTH_PT - 16 # left+right padding
|
||
maxLineChars = max(48, int(usableWidth / approxCharWPt))
|
||
bg_col = self._hexToColor(code_style_def.get("background", "#F5F5F5"))
|
||
leading = fs * 1.2
|
||
spaceAfter = code_style_def.get("space_after", 6)
|
||
|
||
# Each source line may wrap to ceil(len/maxLineChars) visual lines.
|
||
# Frame height ~740pt minus padding → keep rendered height < 600pt.
|
||
maxVisualLinesPerChunk = max(8, int(600 / leading))
|
||
srcLines = code.split("\n")
|
||
chunks: List[List[str]] = []
|
||
curChunk: List[str] = []
|
||
curVisual = 0
|
||
for sl in srcLines:
|
||
wrapped = max(1, -(-len(sl) // maxLineChars)) if sl else 1
|
||
if curVisual + wrapped > maxVisualLinesPerChunk and curChunk:
|
||
chunks.append(curChunk)
|
||
curChunk = []
|
||
curVisual = 0
|
||
curChunk.append(sl)
|
||
curVisual += wrapped
|
||
if curChunk:
|
||
chunks.append(curChunk)
|
||
|
||
for ci, chunkLines in enumerate(chunks):
|
||
chunkText = "\n".join(chunkLines)
|
||
styleId = f"CodePre_{id(code_data) & 0xFFFFFFFF}_{ci}"
|
||
codePrStyle = ParagraphStyle(
|
||
styleId,
|
||
fontName=mono,
|
||
fontSize=fs,
|
||
leading=leading,
|
||
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
|
||
alignment=TA_LEFT,
|
||
leftIndent=0,
|
||
rightIndent=0,
|
||
)
|
||
pf = Preformatted(chunkText, codePrStyle, dedent=0, maxLineLength=maxLineChars)
|
||
tbl = Table([[pf]], colWidths=[_PDF_CONTENT_WIDTH_PT])
|
||
tbl.setStyle(
|
||
TableStyle(
|
||
[
|
||
("BACKGROUND", (0, 0), (-1, -1), bg_col),
|
||
("VALIGN", (0, 0), (-1, -1), "TOP"),
|
||
("LEFTPADDING", (0, 0), (-1, -1), 8),
|
||
("RIGHTPADDING", (0, 0), (-1, -1), 8),
|
||
("TOPPADDING", (0, 0), (-1, -1), 6),
|
||
("BOTTOMPADDING", (0, 0), (-1, -1), 6),
|
||
]
|
||
)
|
||
)
|
||
tbl.spaceAfter = 0 if ci < len(chunks) - 1 else spaceAfter
|
||
elements.append(tbl)
|
||
return elements
|
||
|
||
return []
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||
return []
|
||
|
||
def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||
"""Render a JSON image to PDF elements using reportlab."""
|
||
try:
|
||
# Extract from nested content structure
|
||
content = image_data.get("content", {})
|
||
base64_data = ""
|
||
alt_text = "Image"
|
||
caption = ""
|
||
|
||
if isinstance(content, dict):
|
||
# Nested content structure
|
||
base64_data = content.get("base64Data", "")
|
||
alt_text = content.get("altText", "Image")
|
||
caption = content.get("caption", "")
|
||
elif isinstance(content, str):
|
||
# Content might be base64 string directly (shouldn't happen, but handle it)
|
||
self.logger.warning("Image content is a string, not a dict. This should not happen.")
|
||
return [Paragraph(f"[Image: Invalid format]", self._createNormalStyle(styles))]
|
||
|
||
# If base64Data not found in content, try direct element fields (fallback)
|
||
if not base64_data:
|
||
base64_data = image_data.get("base64Data", "")
|
||
if not alt_text or alt_text == "Image":
|
||
alt_text = image_data.get("altText", "Image")
|
||
if not caption:
|
||
caption = image_data.get("caption", "")
|
||
|
||
# If base64Data still not found, try extracting from url data URI
|
||
if not base64_data:
|
||
url = image_data.get("url", "") or (content.get("url", "") if isinstance(content, dict) else "")
|
||
if url and isinstance(url, str) and url.startswith("data:image/"):
|
||
# Extract base64 from data URI: data:image/png;base64,<base64>
|
||
import re
|
||
match = re.match(r'data:image/[^;]+;base64,(.+)', url)
|
||
if match:
|
||
base64_data = match.group(1)
|
||
|
||
if not base64_data:
|
||
self.logger.warning(f"No base64 data found for image. Alt text: {alt_text}")
|
||
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
|
||
|
||
# Validate that base64_data is actually base64 (not the entire element rendered as text)
|
||
if len(base64_data) > 10000: # Very long string might be entire element JSON
|
||
self.logger.warning(f"Base64 data seems too long ({len(base64_data)} chars), might be incorrectly extracted")
|
||
|
||
# Ensure base64_data is a string, not bytes or other type
|
||
if not isinstance(base64_data, str):
|
||
self.logger.warning(f"Base64 data is not a string: {type(base64_data)}")
|
||
return [Paragraph(f"[Image: {alt_text} - Invalid data type]", self._createNormalStyle(styles))]
|
||
|
||
try:
|
||
from reportlab.platypus import Image as ReportLabImage
|
||
from reportlab.lib.units import inch
|
||
import base64
|
||
import io
|
||
|
||
# Decode base64 image data
|
||
imageBytes = base64.b64decode(base64_data)
|
||
imageStream = io.BytesIO(imageBytes)
|
||
|
||
# Create reportlab Image element
|
||
# Try to get image dimensions from PIL
|
||
try:
|
||
from PIL import Image as PILImage
|
||
from reportlab.lib.pagesizes import A4
|
||
|
||
pilImage = PILImage.open(imageStream)
|
||
originalWidth, originalHeight = pilImage.size
|
||
|
||
pageWidth = A4[0] # 595.27 points
|
||
pageHeight = A4[1] # 841.89 points
|
||
# Use page dimensions minus margins with generous safety buffer
|
||
# A4 = 595.27 x 841.89 pt; frame = page - margins - internal padding
|
||
_us = getattr(self, '_unifiedStyle', None) or {}
|
||
_pageMgn = (_us.get('page') or {}).get('marginsPt') or {}
|
||
marginTop = _pageMgn.get('top', 60)
|
||
marginBottom = _pageMgn.get('bottom', 60)
|
||
marginLeft = _pageMgn.get('left', 60)
|
||
marginRight = _pageMgn.get('right', 60)
|
||
availableWidth = pageWidth - marginLeft - marginRight - 20 # 20pt safety
|
||
availableHeight = pageHeight - marginTop - marginBottom - 80 # 80pt safety for header/footer
|
||
|
||
# Convert original image size from pixels to points
|
||
# PIL provides size in pixels, need to convert to points
|
||
# Standard conversion: 1 inch = 72 points, typical screen DPI = 96 pixels/inch
|
||
# So: pixels * (72/96) = points, or pixels * 0.75 = points
|
||
# But for images, we should use the image's actual DPI if available
|
||
dpi = pilImage.info.get('dpi', (96, 96))[0] # Default to 96 DPI if not specified
|
||
if dpi <= 0:
|
||
dpi = 96 # Fallback to 96 DPI
|
||
|
||
# Convert pixels to points: 1 point = 1/72 inch, so pixels * (72/dpi) = points
|
||
imgWidthPoints = originalWidth * (72.0 / dpi)
|
||
imgHeightPoints = originalHeight * (72.0 / dpi)
|
||
|
||
# Scale to fit within available page dimensions while maintaining aspect ratio
|
||
widthScale = availableWidth / imgWidthPoints if imgWidthPoints > 0 else 1.0
|
||
heightScale = availableHeight / imgHeightPoints if imgHeightPoints > 0 else 1.0
|
||
|
||
# Use the smaller scale to ensure image fits both width and height
|
||
scale = min(widthScale, heightScale, 1.0) # Don't scale up, only down
|
||
|
||
imgWidth = imgWidthPoints * scale
|
||
imgHeight = imgHeightPoints * scale
|
||
|
||
# Additional safety check: ensure dimensions don't exceed available space
|
||
if imgWidth > availableWidth:
|
||
scale = availableWidth / imgWidth
|
||
imgWidth = availableWidth
|
||
imgHeight = imgHeight * scale
|
||
|
||
if imgHeight > availableHeight:
|
||
scale = availableHeight / imgHeight
|
||
imgHeight = availableHeight
|
||
imgWidth = imgWidth * scale
|
||
|
||
# Reset stream for reportlab
|
||
imageStream.seek(0)
|
||
except Exception as e:
|
||
# Fallback: use default size that fits page
|
||
self.logger.warning(f"Error calculating image size: {str(e)}, using safe default")
|
||
# Use 80% of available width as safe default
|
||
imgWidth = 4 * inch # ~288 points, safe for ~451pt available width
|
||
imgHeight = 3 * inch # ~216 points, safe for ~751pt available height
|
||
imageStream.seek(0)
|
||
|
||
# Create reportlab Image
|
||
reportlabImage = ReportLabImage(imageStream, width=imgWidth, height=imgHeight)
|
||
|
||
elements = [reportlabImage]
|
||
|
||
# Add caption if available
|
||
if caption:
|
||
captionStyle = self._createNormalStyle(styles)
|
||
captionStyle.fontSize = 10
|
||
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
|
||
elements.append(Paragraph(f"<i>{caption}</i>", captionStyle))
|
||
elif alt_text and alt_text != "Image":
|
||
# Use alt text as caption if no caption provided, but avoid usageHint format
|
||
if "Render as visual element:" in alt_text:
|
||
# Extract filename from usageHint if possible
|
||
parts = alt_text.split("Render as visual element:")
|
||
if len(parts) > 1:
|
||
filename = parts[1].strip()
|
||
caption_text = f"Figure: {filename}"
|
||
else:
|
||
caption_text = alt_text
|
||
else:
|
||
caption_text = f"Figure: {alt_text}"
|
||
captionStyle = self._createNormalStyle(styles)
|
||
captionStyle.fontSize = 10
|
||
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
|
||
elements.append(Paragraph(f"<i>{caption_text}</i>", captionStyle))
|
||
|
||
return elements
|
||
|
||
except Exception as imgError:
|
||
self.logger.error(f"Error embedding image in PDF: {str(imgError)}")
|
||
# Return error message instead of placeholder
|
||
errorStyle = self._createNormalStyle(styles)
|
||
errorStyle.textColor = self._hexToColor("#FF0000") # Red color for error
|
||
errorMsg = f"[Error: Could not embed image '{alt_text}'. {str(imgError)}]"
|
||
return [Paragraph(errorMsg, errorStyle)]
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Error rendering image: {str(e)}")
|
||
errorStyle = self._createNormalStyle(styles)
|
||
errorStyle.textColor = self._hexToColor("#FF0000") # Red color for error
|
||
errorMsg = f"[Error: Could not render image '{image_data.get('altText', 'Image')}'. {str(e)}]"
|
||
return [Paragraph(errorMsg, errorStyle)] |