fixed rendering issues
This commit is contained in:
parent
8cf31077a6
commit
9186c60ad2
10 changed files with 1011 additions and 634 deletions
|
|
@ -19,7 +19,11 @@ class DataSource(BaseModel):
|
|||
connectionId: str = Field(description="FK to UserConnection")
|
||||
sourceType: str = Field(description="sharepointFolder, googleDriveFolder, outlookFolder, ftpFolder")
|
||||
path: str = Field(description="External path (e.g. '/sites/MySite/Documents/Reports')")
|
||||
label: str = Field(description="User-visible label")
|
||||
label: str = Field(description="User-visible label (often the last path segment)")
|
||||
displayPath: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Human-readable full path for UI (connection-relative, slash-separated)",
|
||||
)
|
||||
featureInstanceId: Optional[str] = Field(default=None, description="Scoped to feature instance")
|
||||
mandateId: Optional[str] = Field(default=None, description="Mandate scope")
|
||||
userId: str = Field(default="", description="Owner user ID")
|
||||
|
|
@ -37,6 +41,7 @@ registerModelLabels(
|
|||
"sourceType": {"en": "Source Type", "de": "Quellentyp", "fr": "Type de source"},
|
||||
"path": {"en": "Path", "de": "Pfad", "fr": "Chemin"},
|
||||
"label": {"en": "Label", "de": "Bezeichnung", "fr": "Libellé"},
|
||||
"displayPath": {"en": "Display path", "de": "Anzeigepfad", "fr": "Chemin affiché"},
|
||||
"featureInstanceId": {"en": "Feature Instance", "de": "Feature-Instanz", "fr": "Instance de fonctionnalité"},
|
||||
"mandateId": {"en": "Mandate ID", "de": "Mandanten-ID", "fr": "ID du mandat"},
|
||||
"userId": {"en": "User ID", "de": "Benutzer-ID", "fr": "ID utilisateur"},
|
||||
|
|
|
|||
|
|
@ -1139,6 +1139,7 @@ class CreateDataSourceRequest(BaseModel):
|
|||
sourceType: str = Field(description="Source type")
|
||||
path: str = Field(description="Path")
|
||||
label: str = Field(description="Label")
|
||||
displayPath: Optional[str] = Field(default=None, description="Full human-readable path for tooltips")
|
||||
|
||||
|
||||
@router.post("/{instanceId}/datasources")
|
||||
|
|
@ -1165,6 +1166,7 @@ async def createWorkspaceDataSource(
|
|||
path=body.path,
|
||||
label=body.label,
|
||||
featureInstanceId=instanceId,
|
||||
displayPath=body.displayPath,
|
||||
)
|
||||
return JSONResponse(dataSource if isinstance(dataSource, dict) else dataSource.model_dump())
|
||||
|
||||
|
|
@ -1214,7 +1216,7 @@ async def listFeatureConnections(
|
|||
|
||||
userMandates = rootIf.getUserMandates(userId)
|
||||
if not userMandates:
|
||||
return JSONResponse({"featureConnections": []})
|
||||
return JSONResponse({"featureConnectionsByMandate": []})
|
||||
|
||||
mandateLabels: dict = {}
|
||||
for um in userMandates:
|
||||
|
|
@ -1226,7 +1228,7 @@ async def listFeatureConnections(
|
|||
except Exception:
|
||||
mandateLabels[um.mandateId] = um.mandateId
|
||||
|
||||
items = []
|
||||
byMandate: dict = {}
|
||||
seenIds: set = set()
|
||||
for um in userMandates:
|
||||
allInstances = rootIf.getFeatureInstancesByMandate(um.mandateId)
|
||||
|
|
@ -1244,20 +1246,33 @@ async def listFeatureConnections(
|
|||
|
||||
featureDef = catalog.getFeatureDefinition(inst.featureCode) or {}
|
||||
dataObjects = catalog.getDataObjects(inst.featureCode)
|
||||
mLabel = mandateLabels.get(inst.mandateId, "")
|
||||
label = inst.label or inst.featureCode
|
||||
if mLabel:
|
||||
label = f"{label} ({mLabel})"
|
||||
items.append({
|
||||
mid = inst.mandateId
|
||||
connItem = {
|
||||
"featureInstanceId": inst.id,
|
||||
"featureCode": inst.featureCode,
|
||||
"mandateId": inst.mandateId,
|
||||
"mandateId": mid,
|
||||
"label": label,
|
||||
"icon": featureDef.get("icon", "mdi-database"),
|
||||
"tableCount": len(dataObjects),
|
||||
})
|
||||
}
|
||||
if mid not in byMandate:
|
||||
byMandate[mid] = []
|
||||
byMandate[mid].append(connItem)
|
||||
|
||||
return JSONResponse({"featureConnections": items})
|
||||
def _sortKeyLabel(x: dict) -> str:
|
||||
return (x.get("label") or "").lower()
|
||||
|
||||
groups = []
|
||||
for mid in sorted(byMandate.keys(), key=lambda m: (mandateLabels.get(m, m) or "").lower()):
|
||||
conns = sorted(byMandate[mid], key=_sortKeyLabel)
|
||||
groups.append({
|
||||
"mandateId": mid,
|
||||
"mandateLabel": mandateLabels.get(mid, mid),
|
||||
"featureConnections": conns,
|
||||
})
|
||||
|
||||
return JSONResponse({"featureConnectionsByMandate": groups})
|
||||
|
||||
|
||||
@router.get("/{instanceId}/feature-connections/{fiId}/tables")
|
||||
|
|
|
|||
|
|
@ -332,6 +332,7 @@ class AiObjects:
|
|||
errorCount=0,
|
||||
toolCalls=responseToolCalls
|
||||
)
|
||||
response._modelMaxTokens = model.maxTokens
|
||||
|
||||
if self.billingCallback:
|
||||
try:
|
||||
|
|
@ -470,6 +471,7 @@ class AiObjects:
|
|||
errorCount=0,
|
||||
toolCalls=responseToolCalls,
|
||||
)
|
||||
response._modelMaxTokens = model.maxTokens
|
||||
|
||||
if self.billingCallback:
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -276,6 +276,7 @@ async def runAgentLoop(
|
|||
"userId": userId,
|
||||
"featureInstanceId": featureInstanceId,
|
||||
"mandateId": mandateId,
|
||||
"modelMaxOutputTokens": getattr(aiResponse, "_modelMaxTokens", None) or 0,
|
||||
})
|
||||
state.totalToolCalls += len(results)
|
||||
|
||||
|
|
@ -439,6 +440,29 @@ def _repairTruncatedJson(raw: str) -> Optional[Dict[str, Any]]:
|
|||
return None
|
||||
|
||||
|
||||
def _validateRepairedToolArgs(toolName: str, args: Dict[str, Any]) -> Optional[str]:
|
||||
"""After closeJsonStructures + json.loads, args can be syntactically valid but useless (truncation
|
||||
cut off before required fields). Return a user-facing _parseError message, or None if OK.
|
||||
|
||||
Without this, renderDocument runs with missing `content` and only returns \"content is required\",
|
||||
hiding the real cause (output token limit).
|
||||
"""
|
||||
if toolName == "renderDocument":
|
||||
content = args.get("content")
|
||||
sourceFileId = args.get("sourceFileId")
|
||||
hasInline = isinstance(content, str) and bool(content.strip())
|
||||
hasFile = isinstance(sourceFileId, str) and bool(sourceFileId.strip())
|
||||
if not hasInline and not hasFile:
|
||||
return (
|
||||
"Your tool call JSON was repaired after truncation, but neither `content` nor `sourceFileId` is usable. "
|
||||
"Large documents must not be inlined in the tool call (output limit).\n"
|
||||
"Preferred: writeFile(mode='create') + writeFile(mode='append') to build a .md file, then "
|
||||
"renderDocument(sourceFileId=<that file id>, outputFormat='pdf', title='...') — the tool call stays small.\n"
|
||||
"Alternatives: replaceInFile for edits; shorter outline first."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _parseToolCalls(aiResponse: AiCallResponse) -> List[ToolCallRequest]:
|
||||
"""Parse tool calls from AI response. Supports native function calling and text-based fallback."""
|
||||
toolCalls = []
|
||||
|
|
@ -457,14 +481,20 @@ def _parseToolCalls(aiResponse: AiCallResponse) -> List[ToolCallRequest]:
|
|||
logger.warning(f"Unrecoverable truncated JSON for '{tc['function']['name']}': {rawArgs[:200]}")
|
||||
parsedArgs = {"_parseError": (
|
||||
"Your tool call arguments were truncated (output cut off by token limit). "
|
||||
"The content is too large for a single tool call. Strategies:\n"
|
||||
"1. For new files: use writeFile(mode='create') with the first part, "
|
||||
"then writeFile(fileId=..., mode='append') for subsequent parts (~8000 chars each).\n"
|
||||
"2. For editing existing files: use replaceInFile to change only the specific parts.\n"
|
||||
"3. For documentation: split into multiple smaller files."
|
||||
"Do not put the full document body in renderDocument JSON.\n"
|
||||
"1. writeFile(create) + writeFile(append) to a .md file, then "
|
||||
"renderDocument(sourceFileId=<file id>, outputFormat=..., title=...) — tiny tool call.\n"
|
||||
"2. Or replaceInFile for targeted edits.\n"
|
||||
"3. Or split into multiple smaller files."
|
||||
)}
|
||||
else:
|
||||
logger.info(f"Repaired truncated JSON for '{tc['function']['name']}'")
|
||||
repairIssue = _validateRepairedToolArgs(tc["function"]["name"], parsedArgs)
|
||||
if repairIssue:
|
||||
logger.warning(
|
||||
f"Repaired JSON for '{tc['function']['name']}' still invalid for execution: {repairIssue[:80]}..."
|
||||
)
|
||||
parsedArgs = {"_parseError": repairIssue}
|
||||
else:
|
||||
parsedArgs = rawArgs if rawArgs else {}
|
||||
toolCalls.append(ToolCallRequest(
|
||||
|
|
|
|||
|
|
@ -259,7 +259,9 @@ class AgentService:
|
|||
"Use `readFile(fileId)` to read text content, `readContentObjects(fileId)` for structured access, "
|
||||
"or `describeImage(fileId)` for image analysis.\n"
|
||||
"For folders, use `listFiles(folderId)` to get the files inside, then `readFile(fileId)` for each.\n"
|
||||
"When generating documents with `renderDocument`, embed images using `` in the markdown content.\n\n"
|
||||
"For large PDFs/DOCX, avoid huge `renderDocument` tool JSON: build markdown with "
|
||||
"`writeFile` (create + append), then `renderDocument(sourceFileId=that file id, outputFormat=...)`.\n"
|
||||
"For small docs you may pass `content` inline. Embed images with `` in markdown.\n\n"
|
||||
)
|
||||
header += "\n\n".join(fileDescriptions)
|
||||
return f"{header}\n\n---\n\nUser request: {prompt}"
|
||||
|
|
@ -2209,13 +2211,75 @@ def _registerCoreTools(registry: ToolRegistry, services):
|
|||
async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
"""Render agent-produced markdown content into any document format via the RendererRegistry."""
|
||||
import re as _re
|
||||
sourceFileId = (args.get("sourceFileId") or "").strip()
|
||||
content = args.get("content", "")
|
||||
if not isinstance(content, str):
|
||||
content = str(content) if content is not None else ""
|
||||
outputFormat = args.get("outputFormat", "pdf")
|
||||
title = args.get("title", "Document")
|
||||
language = args.get("language", "de")
|
||||
|
||||
if not content:
|
||||
return ToolResult(toolCallId="", toolName="renderDocument", success=False, error="content is required")
|
||||
if sourceFileId:
|
||||
try:
|
||||
dbMgmt = services.chat.interfaceDbComponent
|
||||
fileRow = dbMgmt.getFile(sourceFileId)
|
||||
if not fileRow:
|
||||
return ToolResult(
|
||||
toolCallId="",
|
||||
toolName="renderDocument",
|
||||
success=False,
|
||||
error=f"sourceFileId not found: {sourceFileId}",
|
||||
)
|
||||
rawBytes = dbMgmt.getFileData(sourceFileId)
|
||||
if not rawBytes:
|
||||
return ToolResult(
|
||||
toolCallId="",
|
||||
toolName="renderDocument",
|
||||
success=False,
|
||||
error=f"sourceFileId has no data: {sourceFileId}",
|
||||
)
|
||||
try:
|
||||
content = rawBytes.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
content = rawBytes.decode("latin-1", errors="replace")
|
||||
except Exception as e:
|
||||
return ToolResult(
|
||||
toolCallId="",
|
||||
toolName="renderDocument",
|
||||
success=False,
|
||||
error=f"Could not read sourceFileId: {e}",
|
||||
)
|
||||
|
||||
if not (content or "").strip():
|
||||
return ToolResult(
|
||||
toolCallId="",
|
||||
toolName="renderDocument",
|
||||
success=False,
|
||||
error=(
|
||||
"Provide non-empty `content` (markdown) or `sourceFileId` (id of a .md/.txt from writeFile). "
|
||||
"For long documents use writeFile create+append, then renderDocument(sourceFileId=...)."
|
||||
),
|
||||
)
|
||||
|
||||
modelMaxTokens = context.get("modelMaxOutputTokens", 0)
|
||||
_inlineCharLimit = int(modelMaxTokens * 3 * 0.5) if modelMaxTokens > 0 else 6000
|
||||
_inlineCharLimit = max(_inlineCharLimit, 3000)
|
||||
|
||||
if not sourceFileId and len(content) > _inlineCharLimit:
|
||||
return ToolResult(
|
||||
toolCallId="",
|
||||
toolName="renderDocument",
|
||||
success=False,
|
||||
error=(
|
||||
f"Inline `content` is {len(content)} chars — over the {_inlineCharLimit} char limit "
|
||||
f"(derived from model output budget of {modelMaxTokens} tokens). "
|
||||
"Large documents must use the file path:\n"
|
||||
"1. writeFile(mode='create', name='draft.md', content=<first ~5000 chars>)\n"
|
||||
"2. writeFile(mode='append', fileId=<id>, content=<next chunk>) — repeat as needed\n"
|
||||
"3. renderDocument(sourceFileId=<id>, outputFormat='pdf', title='...')\n"
|
||||
"This avoids output truncation entirely."
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
structuredContent = _markdownToDocumentJson(content, title, language)
|
||||
|
|
@ -2321,20 +2385,26 @@ def _registerCoreTools(registry: ToolRegistry, services):
|
|||
registry.register(
|
||||
"renderDocument", _renderDocument,
|
||||
description=(
|
||||
"Render markdown content into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
|
||||
"You write the full document content as markdown, then this tool converts and renders it. "
|
||||
"To embed images from uploaded files, use markdown image syntax with the file ID: . "
|
||||
"The images will be resolved from the Knowledge Store and embedded in the output document."
|
||||
"Render markdown into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
|
||||
"For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with "
|
||||
"`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. "
|
||||
"Images:  in the markdown."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {"type": "string", "description": "Full document content as markdown (headings, tables, lists, code blocks, paragraphs, images via )"},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "Full markdown inline. Prefer `sourceFileId` when the document is large (many KB).",
|
||||
},
|
||||
"sourceFileId": {
|
||||
"type": "string",
|
||||
"description": "Chat file id of markdown saved via writeFile (create+append). Use this instead of `content` for long PDFs.",
|
||||
},
|
||||
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
|
||||
"title": {"type": "string", "description": "Document title", "default": "Document"},
|
||||
"language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
|
||||
},
|
||||
"required": ["content"],
|
||||
},
|
||||
readOnly=False,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -508,7 +508,7 @@ class ChatService:
|
|||
|
||||
def createDataSource(
|
||||
self, connectionId: str, sourceType: str, path: str, label: str,
|
||||
featureInstanceId: str = None
|
||||
featureInstanceId: str = None, displayPath: str = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a new external data source reference."""
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
|
|
@ -517,6 +517,7 @@ class ChatService:
|
|||
sourceType=sourceType,
|
||||
path=path,
|
||||
label=label,
|
||||
displayPath=displayPath,
|
||||
featureInstanceId=featureInstanceId or self._context.feature_instance_id or "",
|
||||
mandateId=self._context.mandate_id or "",
|
||||
userId=self.user.id if self.user else "",
|
||||
|
|
|
|||
|
|
@ -281,7 +281,7 @@ class RendererDocx(BaseRenderer):
|
|||
def _getDefaultStyleSet(self) -> Dict[str, Any]:
|
||||
"""Default DOCX style set - used when no style instructions present."""
|
||||
return {
|
||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
|
||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "left"},
|
||||
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"},
|
||||
"heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"},
|
||||
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
|
||||
|
|
@ -349,11 +349,11 @@ class RendererDocx(BaseRenderer):
|
|||
para.runs[0].italic = True
|
||||
continue
|
||||
elif element_type == "extracted_text":
|
||||
# Extracted text format - render as paragraph
|
||||
content = element.get("content", "")
|
||||
source = element.get("source", "")
|
||||
if content:
|
||||
para = doc.add_paragraph(content)
|
||||
para = doc.add_paragraph()
|
||||
self._addMarkdownInlineRuns(para, content)
|
||||
if source:
|
||||
para.add_run(f" (Source: {source})").italic = True
|
||||
continue
|
||||
|
|
@ -406,6 +406,37 @@ class RendererDocx(BaseRenderer):
|
|||
# Add error paragraph as fallback
|
||||
error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")
|
||||
|
||||
# ── Markdown inline → python-docx runs ──────────────────────────────
|
||||
_MD_INLINE_RE = re.compile(
|
||||
r"(\*\*(.+?)\*\*)" # group 1,2: bold
|
||||
r"|(__(.+?)__)" # group 3,4: bold (underscore)
|
||||
r"|(?<!\*)\*([^*\n]+?)\*(?!\*)" # group 5: italic
|
||||
r"|(?<![\w/])_([^_\n]+?)_(?![\w/])" # group 6: italic (underscore)
|
||||
r"|`([^`]+)`" # group 7: inline code
|
||||
)
|
||||
|
||||
def _addMarkdownInlineRuns(self, paragraph, text: str) -> None:
|
||||
"""Parse markdown inline formatting and add corresponding Runs to a python-docx paragraph."""
|
||||
pos = 0
|
||||
for m in self._MD_INLINE_RE.finditer(text):
|
||||
if m.start() > pos:
|
||||
paragraph.add_run(text[pos:m.start()])
|
||||
if m.group(2):
|
||||
paragraph.add_run(m.group(2)).bold = True
|
||||
elif m.group(4):
|
||||
paragraph.add_run(m.group(4)).bold = True
|
||||
elif m.group(5):
|
||||
paragraph.add_run(m.group(5)).italic = True
|
||||
elif m.group(6):
|
||||
paragraph.add_run(m.group(6)).italic = True
|
||||
elif m.group(7):
|
||||
run = paragraph.add_run(m.group(7))
|
||||
run.font.name = "Courier New"
|
||||
run.font.size = Pt(9)
|
||||
pos = m.end()
|
||||
if pos < len(text):
|
||||
paragraph.add_run(text[pos:])
|
||||
|
||||
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Render a JSON table to DOCX using AI-generated styles.
|
||||
|
|
@ -480,9 +511,8 @@ class RendererDocx(BaseRenderer):
|
|||
tblW.set(qn('w:w'), '0')
|
||||
tblPr.append(tblW)
|
||||
|
||||
# Center alignment
|
||||
jc = OxmlElement('w:jc')
|
||||
jc.set(qn('w:val'), 'center')
|
||||
jc.set(qn('w:val'), 'left')
|
||||
tblPr.append(jc)
|
||||
|
||||
# Apply table borders directly (works without template styles)
|
||||
|
|
@ -821,10 +851,11 @@ class RendererDocx(BaseRenderer):
|
|||
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
para = doc.add_paragraph(item, style='List Bullet')
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
para = doc.add_paragraph(item["text"], style='List Bullet')
|
||||
itemText = item if isinstance(item, str) else (item.get("text", "") if isinstance(item, dict) else "")
|
||||
if not itemText:
|
||||
continue
|
||||
para = doc.add_paragraph(style='List Bullet')
|
||||
self._addMarkdownInlineRuns(para, itemText)
|
||||
|
||||
# Apply bullet list styling from style set - use cached objects
|
||||
if bullet_style and para.runs:
|
||||
|
|
@ -849,7 +880,6 @@ class RendererDocx(BaseRenderer):
|
|||
def _renderJsonHeading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON heading to DOCX using AI-generated styles."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = heading_data.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return
|
||||
|
|
@ -858,13 +888,13 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
if text:
|
||||
level = max(1, min(6, level))
|
||||
# Use custom heading style if available, otherwise use built-in
|
||||
style_name = f"Heading {level}" if level <= 2 else "Heading 1"
|
||||
# python-docx supports Heading 1 – Heading 9 as built-in styles
|
||||
try:
|
||||
para = doc.add_paragraph(text, style=style_name)
|
||||
except KeyError:
|
||||
# Fallback to built-in heading if custom style doesn't exist
|
||||
doc.add_heading(text, level=level)
|
||||
para = doc.add_heading("", level=level)
|
||||
para.clear()
|
||||
self._addMarkdownInlineRuns(para, text)
|
||||
except (KeyError, ValueError):
|
||||
para = doc.add_paragraph(text)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||
|
|
@ -893,8 +923,8 @@ class RendererDocx(BaseRenderer):
|
|||
return
|
||||
|
||||
if text:
|
||||
para = doc.add_paragraph(text)
|
||||
# Apply paragraph styling from style set - OPTIMIZED: pre-calculate style objects
|
||||
para = doc.add_paragraph()
|
||||
self._addMarkdownInlineRuns(para, text)
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
if paragraph_style:
|
||||
# Pre-calculate and cache style objects
|
||||
|
|
@ -1345,7 +1375,7 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
# Create table
|
||||
table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
|
||||
table.alignment = WD_TABLE_ALIGNMENT.CENTER
|
||||
table.alignment = WD_TABLE_ALIGNMENT.LEFT
|
||||
|
||||
# Add data to table
|
||||
for row_idx, row_data in enumerate(table_data):
|
||||
|
|
|
|||
|
|
@ -4,6 +4,10 @@
|
|||
PDF renderer for report generation using reportlab.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import unicodedata
|
||||
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
|
@ -11,8 +15,8 @@ import io
|
|||
import base64
|
||||
|
||||
try:
|
||||
from reportlab.lib.pagesizes import letter, A4
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Preformatted
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import inch
|
||||
from reportlab.lib import colors
|
||||
|
|
@ -21,6 +25,53 @@ try:
|
|||
except ImportError:
|
||||
REPORTLAB_AVAILABLE = False
|
||||
|
||||
import re as _re_pdf
|
||||
|
||||
# A4 width in pt; margins must match SimpleDocTemplate(leftMargin/rightMargin)
|
||||
_PDF_MARGIN_LR_PT = 72.0
|
||||
_PDF_A4_WIDTH_PT = 595.27
|
||||
_PDF_CONTENT_WIDTH_PT = _PDF_A4_WIDTH_PT - (2 * _PDF_MARGIN_LR_PT)
|
||||
|
||||
|
||||
def _boxDrawingCharToAscii(ch: str) -> str:
|
||||
"""Map one box-drawing character to ASCII (Courier has no glyphs for U+2500–U+257F)."""
|
||||
nm = unicodedata.name(ch, "")
|
||||
v = "VERTICAL" in nm
|
||||
h = "HORIZONTAL" in nm
|
||||
and_ = "AND" in nm
|
||||
if v and h:
|
||||
return "+"
|
||||
if v and not h and not and_:
|
||||
return "|"
|
||||
if h and not v and not and_:
|
||||
return "-"
|
||||
return "+"
|
||||
|
||||
|
||||
def _normalizePdfMonospaceText(text: str) -> str:
|
||||
"""Replace Unicode box/block drawing with ASCII so PDF core fonts render readable code/trees."""
|
||||
if not text:
|
||||
return ""
|
||||
out: List[str] = []
|
||||
for ch in text:
|
||||
o = ord(ch)
|
||||
if 0x2500 <= o <= 0x257F:
|
||||
out.append(_boxDrawingCharToAscii(ch))
|
||||
elif 0x2580 <= o <= 0x259F:
|
||||
out.append("#")
|
||||
else:
|
||||
out.append(ch)
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _prepareCodeBlockPlainText(text: str) -> str:
|
||||
"""Normalize newlines/tabs for preformatted code (no HTML/XML; spaces must stay significant)."""
|
||||
if not text:
|
||||
return ""
|
||||
text = text.replace("\r\n", "\n").replace("\r", "\n")
|
||||
return text.expandtabs(4)
|
||||
|
||||
|
||||
class RendererPdf(BaseRenderer):
|
||||
"""Renders content to PDF format using reportlab."""
|
||||
|
||||
|
|
@ -122,15 +173,6 @@ class RendererPdf(BaseRenderer):
|
|||
|
||||
# Extract sections and metadata from standardized schema
|
||||
sections = self._extractSections(json_content)
|
||||
metadata = self._extractMetadata(json_content)
|
||||
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
document_title = title if title else metadata.get("title", "Generated Document")
|
||||
|
||||
# Make title shorter to prevent wrapping/overlapping
|
||||
if len(document_title) > 40:
|
||||
document_title = "PowerOn - Consent Agreement"
|
||||
|
||||
# Create a buffer to hold the PDF
|
||||
buffer = io.BytesIO()
|
||||
|
|
@ -145,17 +187,9 @@ class RendererPdf(BaseRenderer):
|
|||
bottomMargin=18
|
||||
)
|
||||
|
||||
# Build PDF content
|
||||
# Build PDF content (no cover page — body starts on page 1; filename still uses `title`)
|
||||
story = []
|
||||
|
||||
# Title page
|
||||
title_style = self._createTitleStyle(styles)
|
||||
story.append(Paragraph(document_title, title_style))
|
||||
story.append(Spacer(1, 50)) # Increased spacing to prevent overlap
|
||||
story.append(Paragraph(f"Generated: {self._formatTimestamp()}", self._createNormalStyle(styles)))
|
||||
story.append(Spacer(1, 30)) # Add spacing before page break
|
||||
story.append(PageBreak())
|
||||
|
||||
# Process each section (sections already extracted above)
|
||||
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
|
||||
for i, section in enumerate(sections):
|
||||
|
|
@ -164,10 +198,9 @@ class RendererPdf(BaseRenderer):
|
|||
self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
|
||||
story.extend(section_elements)
|
||||
|
||||
# Build PDF
|
||||
doc.build(story)
|
||||
# Build PDF — retry with oversized flowables removed on LayoutError
|
||||
self._buildPdfWithOverflowGuard(doc, story, buffer)
|
||||
|
||||
# Get PDF content as base64
|
||||
buffer.seek(0)
|
||||
pdf_bytes = buffer.getvalue()
|
||||
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
||||
|
|
@ -178,6 +211,42 @@ class RendererPdf(BaseRenderer):
|
|||
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
|
||||
raise Exception(f"PDF generation failed: {str(e)}")
|
||||
|
||||
def _buildPdfWithOverflowGuard(self, doc, story: List[Any], buffer) -> None:
|
||||
"""Try doc.build(); on 'too large on page' LayoutError, drop the offending
|
||||
flowable, log a warning, and retry (up to 5 times)."""
|
||||
maxRetries = 5
|
||||
for attempt in range(maxRetries + 1):
|
||||
try:
|
||||
buffer.seek(0)
|
||||
buffer.truncate()
|
||||
doc.build(story)
|
||||
return
|
||||
except Exception as e:
|
||||
msg = str(e)
|
||||
if "too large on page" not in msg or attempt == maxRetries:
|
||||
raise
|
||||
# Identify the offending flowable from the error repr
|
||||
self.logger.warning(f"PDF overflow (attempt {attempt + 1}): {msg} — removing oversized element and retrying")
|
||||
removed = False
|
||||
for idx, flowable in enumerate(story):
|
||||
fRepr = repr(flowable)
|
||||
if "Table" in fRepr and hasattr(flowable, '_cellvalues'):
|
||||
try:
|
||||
nRows = len(flowable._cellvalues)
|
||||
nCols = len(flowable._cellvalues[0]) if flowable._cellvalues else 0
|
||||
if nRows == 1 and nCols == 1:
|
||||
errPara = Paragraph(
|
||||
"[Code block omitted — content too large for PDF page]",
|
||||
self._createNormalStyle({}),
|
||||
)
|
||||
story[idx] = errPara
|
||||
removed = True
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if not removed:
|
||||
raise
|
||||
|
||||
async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
|
||||
"""Get style set - use styles from document generation metadata if available,
|
||||
otherwise enhance default styles with AI if userPrompt provided.
|
||||
|
|
@ -269,13 +338,18 @@ class RendererPdf(BaseRenderer):
|
|||
"""Default PDF style set - used when no style instructions present."""
|
||||
return {
|
||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
|
||||
# Markdown #..###### — sizes must strictly decrease (H1 largest … H6 smallest).
|
||||
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
|
||||
"heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
|
||||
"heading2": {"font_size": 15, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 10, "space_before": 10},
|
||||
"heading3": {"font_size": 13, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
|
||||
"heading4": {"font_size": 12, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 6, "space_before": 6},
|
||||
"heading5": {"font_size": 11, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 6, "space_before": 6},
|
||||
"heading6": {"font_size": 10, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 4, "space_before": 4},
|
||||
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
|
||||
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
|
||||
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "left", "font_size": 12},
|
||||
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
|
||||
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
|
||||
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
|
||||
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6, "align": "left"}
|
||||
}
|
||||
|
||||
async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
|
@ -441,39 +515,35 @@ class RendererPdf(BaseRenderer):
|
|||
return color_value
|
||||
return default
|
||||
|
||||
|
||||
def _createTitleStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
||||
"""Create title style from style definitions."""
|
||||
title_style_def = styles.get("title", {})
|
||||
|
||||
# DEBUG: Show what color and spacing is being used for title
|
||||
title_color = title_style_def.get("color", "#1F4E79")
|
||||
title_space_after = title_style_def.get("space_after", 30)
|
||||
self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hexToColor(title_color)}", "PDF_RENDERER")
|
||||
self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
|
||||
|
||||
return ParagraphStyle(
|
||||
'CustomTitle',
|
||||
fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20
|
||||
spaceAfter=title_style_def.get("space_after", 30),
|
||||
alignment=self._getAlignment(title_style_def.get("align", "center")),
|
||||
textColor=self._hexToColor(title_color),
|
||||
leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles
|
||||
spaceBefore=0 # Ensure no space before title
|
||||
)
|
||||
def _defaultHeadingStyleDef(self, level: int) -> Dict[str, Any]:
|
||||
"""When heading{N} is missing from styles, never fall back to heading1 (that made H3 > H2)."""
|
||||
sizes = {1: 18, 2: 15, 3: 13, 4: 12, 5: 11, 6: 10}
|
||||
fs = sizes.get(level, 10)
|
||||
sb = max(4, 14 - level)
|
||||
return {
|
||||
"font_size": fs,
|
||||
"color": "#2F2F2F" if level <= 2 else "#4F4F4F",
|
||||
"bold": True,
|
||||
"align": "left",
|
||||
"space_after": sb,
|
||||
"space_before": sb,
|
||||
}
|
||||
|
||||
def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
|
||||
"""Create heading style from style definitions."""
|
||||
heading_key = f"heading{level}"
|
||||
heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
|
||||
|
||||
heading_style_def = styles.get(heading_key) or self._defaultHeadingStyleDef(level)
|
||||
fs = heading_style_def.get("font_size", self._defaultHeadingStyleDef(level)["font_size"])
|
||||
bold = heading_style_def.get("bold", True)
|
||||
return ParagraphStyle(
|
||||
f'CustomHeading{level}',
|
||||
fontSize=heading_style_def.get("font_size", 18 - level * 2),
|
||||
fontName="Helvetica-Bold" if bold else "Helvetica",
|
||||
fontSize=fs,
|
||||
spaceAfter=heading_style_def.get("space_after", 12),
|
||||
spaceBefore=heading_style_def.get("space_before", 12),
|
||||
alignment=self._getAlignment(heading_style_def.get("align", "left")),
|
||||
textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F"))
|
||||
textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F")),
|
||||
leading=fs * 1.35,
|
||||
)
|
||||
|
||||
def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
||||
|
|
@ -505,22 +575,6 @@ class RendererPdf(BaseRenderer):
|
|||
}
|
||||
return align_map.get(align.lower().strip(), TA_LEFT)
|
||||
|
||||
def _getTableAlignment(self, align: str) -> str:
|
||||
"""Convert alignment string to ReportLab table alignment string."""
|
||||
if not align or not isinstance(align, str):
|
||||
return 'LEFT'
|
||||
|
||||
align_map = {
|
||||
"center": 'CENTER',
|
||||
"left": 'LEFT',
|
||||
"justify": 'LEFT', # Tables don't support justify, use LEFT
|
||||
"right": 'RIGHT',
|
||||
"0": 'LEFT', # Handle numeric strings
|
||||
"1": 'CENTER',
|
||||
"2": 'LEFT' # Tables don't support justify, use LEFT
|
||||
}
|
||||
return align_map.get(align.lower().strip(), 'LEFT')
|
||||
|
||||
def _hexToColor(self, hex_color: str) -> colors.Color:
|
||||
"""Convert hex color to reportlab color."""
|
||||
try:
|
||||
|
|
@ -543,6 +597,65 @@ class RendererPdf(BaseRenderer):
|
|||
except:
|
||||
return colors.black
|
||||
|
||||
def _escapeReportlabXml(self, text: str) -> str:
|
||||
"""Escape text for ReportLab Paragraph markup."""
|
||||
if not text:
|
||||
return ""
|
||||
return (
|
||||
text.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
)
|
||||
|
||||
def _applyInlineMarkdownToEscapedPlain(self, text: str) -> str:
|
||||
"""Escape XML then apply bold/italic to a segment with no `code` spans (code is handled separately)."""
|
||||
if not text:
|
||||
return ""
|
||||
s = self._escapeReportlabXml(text)
|
||||
s = _re_pdf.sub(r"\*\*(.+?)\*\*", r"<b>\1</b>", s, flags=_re_pdf.DOTALL)
|
||||
s = _re_pdf.sub(r"__(.+?)__", r"<b>\1</b>", s, flags=_re_pdf.DOTALL)
|
||||
s = _re_pdf.sub(r"(?<!\*)\*([^*\n]+?)\*(?!\*)", r"<i>\1</i>", s)
|
||||
s = _re_pdf.sub(r"(?<![\w/])_([^_\n]+?)_(?![\w/])", r"<i>\1</i>", s)
|
||||
return s
|
||||
|
||||
def _markdownInlineToReportlabXml(self, text: str) -> str:
|
||||
"""Turn common markdown inline (**bold**, *italic*, `code`) into ReportLab XML.
|
||||
Backtick spans are extracted first so paths like `.../<Slug>/...` are not corrupted by
|
||||
markdown patterns and XML escaping stays well-formed inside <font name=\"Courier\">.
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
text = _normalizePdfMonospaceText(text)
|
||||
out: List[str] = []
|
||||
pos = 0
|
||||
for m in _re_pdf.finditer(r"`([^`]*)`", text):
|
||||
before = text[pos:m.start()]
|
||||
out.append(self._applyInlineMarkdownToEscapedPlain(before))
|
||||
code = m.group(1)
|
||||
out.append(f'<font name="Courier">{self._escapeReportlabXml(code)}</font>')
|
||||
pos = m.end()
|
||||
out.append(self._applyInlineMarkdownToEscapedPlain(text[pos:]))
|
||||
return "".join(out)
|
||||
|
||||
def _paragraphFromInlineMarkdown(self, text: str, style: ParagraphStyle) -> Paragraph:
|
||||
return Paragraph(self._markdownInlineToReportlabXml(text), style)
|
||||
|
||||
def _createTableCellParagraphStyle(
|
||||
self, styles: Dict[str, Any], *, header: bool, tableStyleKey: str
|
||||
) -> ParagraphStyle:
|
||||
"""Paragraph style for table cells (word wrap within colWidth)."""
|
||||
tdef = styles.get(tableStyleKey, {})
|
||||
fs = tdef.get("font_size", 12 if header else 10)
|
||||
defaultTc = "#FFFFFF" if header else "#2F2F2F"
|
||||
return ParagraphStyle(
|
||||
f"TblCell{'H' if header else 'B'}{tableStyleKey}",
|
||||
fontSize=fs,
|
||||
leading=fs * 1.25,
|
||||
alignment=TA_LEFT,
|
||||
textColor=self._hexToColor(tdef.get("text_color", defaultTc)),
|
||||
fontName="Helvetica-Bold" if header and tdef.get("bold", True) else "Helvetica",
|
||||
)
|
||||
|
||||
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a single JSON section to PDF elements using AI-generated styles.
|
||||
Supports three content formats: reference, object (base64), extracted_text.
|
||||
|
|
@ -575,8 +688,10 @@ class RendererPdf(BaseRenderer):
|
|||
content = element.get("content", "")
|
||||
source = element.get("source", "")
|
||||
if content:
|
||||
source_text = f" <i>(Source: {source})</i>" if source else ""
|
||||
all_elements.append(Paragraph(f"{content}{source_text}", self._createNormalStyle(styles)))
|
||||
bodyXml = self._markdownInlineToReportlabXml(content)
|
||||
if source:
|
||||
bodyXml = f"{bodyXml} <i>(Source: {self._escapeReportlabXml(source)})</i>"
|
||||
all_elements.append(Paragraph(bodyXml, self._createNormalStyle(styles)))
|
||||
all_elements.append(Spacer(1, 6))
|
||||
continue
|
||||
|
||||
|
|
@ -618,10 +733,8 @@ class RendererPdf(BaseRenderer):
|
|||
return [Paragraph(f"[Error rendering section: {str(e)}]", self._createNormalStyle(styles))]
|
||||
|
||||
def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON table to PDF elements using AI-generated styles."""
|
||||
"""Render a JSON table: left-aligned, width capped to printable area, cells wrap."""
|
||||
try:
|
||||
# Handle nested content structure: element.content.headers vs element.headers
|
||||
# Extract from nested content structure
|
||||
content = table_data.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return []
|
||||
|
|
@ -631,30 +744,43 @@ class RendererPdf(BaseRenderer):
|
|||
if not headers or not rows:
|
||||
return []
|
||||
|
||||
# Prepare table data
|
||||
table_data_list = [headers] + rows
|
||||
numCols = len(headers)
|
||||
colWidth = _PDF_CONTENT_WIDTH_PT / max(numCols, 1)
|
||||
colWidths = [colWidth] * numCols
|
||||
|
||||
# Create table
|
||||
table = Table(table_data_list)
|
||||
hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header")
|
||||
cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell")
|
||||
|
||||
def _cellPara(val, ps):
|
||||
return self._paragraphFromInlineMarkdown(str(val) if val is not None else "", ps)
|
||||
|
||||
headerRow = [_cellPara(h, hdrPs) for h in headers]
|
||||
bodyRows = []
|
||||
for row in rows:
|
||||
padded = list(row) + [""] * max(0, numCols - len(row))
|
||||
padded = padded[:numCols]
|
||||
bodyRows.append([_cellPara(c, cellPs) for c in padded])
|
||||
|
||||
table_matrix = [headerRow] + bodyRows
|
||||
table = Table(table_matrix, colWidths=colWidths, repeatRows=1)
|
||||
|
||||
# Apply styling
|
||||
table_header_style = styles.get("table_header", {})
|
||||
table_cell_style = styles.get("table_cell", {})
|
||||
|
||||
table_style = [
|
||||
('BACKGROUND', (0, 0), (-1, 0), self._hexToColor(table_header_style.get("background", "#4F4F4F"))),
|
||||
('TEXTCOLOR', (0, 0), (-1, 0), self._hexToColor(table_header_style.get("text_color", "#FFFFFF"))),
|
||||
('ALIGN', (0, 0), (-1, -1), self._getTableAlignment(table_cell_style.get("align", "left"))),
|
||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
|
||||
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
|
||||
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||||
('BACKGROUND', (0, 1), (-1, -1), self._hexToColor(table_cell_style.get("background", "#FFFFFF"))),
|
||||
('FONTSIZE', (0, 1), (-1, -1), table_cell_style.get("font_size", 10)),
|
||||
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
||||
("BACKGROUND", (0, 0), (-1, 0), self._hexToColor(table_header_style.get("background", "#4F4F4F"))),
|
||||
("BACKGROUND", (0, 1), (-1, -1), self._hexToColor(table_cell_style.get("background", "#FFFFFF"))),
|
||||
("ALIGN", (0, 0), (-1, -1), "LEFT"),
|
||||
("VALIGN", (0, 0), (-1, -1), "TOP"),
|
||||
("LEFTPADDING", (0, 0), (-1, -1), 4),
|
||||
("RIGHTPADDING", (0, 0), (-1, -1), 4),
|
||||
("TOPPADDING", (0, 0), (-1, 0), 6),
|
||||
("BOTTOMPADDING", (0, 0), (-1, 0), 8),
|
||||
("TOPPADDING", (0, 1), (-1, -1), 4),
|
||||
("BOTTOMPADDING", (0, 1), (-1, -1), 4),
|
||||
("GRID", (0, 0), (-1, -1), 0.5, colors.black),
|
||||
]
|
||||
|
||||
table.setStyle(TableStyle(table_style))
|
||||
|
||||
return [table, Spacer(1, 12)]
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -674,9 +800,16 @@ class RendererPdf(BaseRenderer):
|
|||
elements = []
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
elements.append(Paragraph(f"• {item}", self._createNormalStyle(styles)))
|
||||
elements.append(
|
||||
Paragraph(f"• {self._markdownInlineToReportlabXml(item)}", self._createNormalStyle(styles))
|
||||
)
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
elements.append(Paragraph(f"• {item['text']}", self._createNormalStyle(styles)))
|
||||
elements.append(
|
||||
Paragraph(
|
||||
f"• {self._markdownInlineToReportlabXml(item['text'])}",
|
||||
self._createNormalStyle(styles),
|
||||
)
|
||||
)
|
||||
|
||||
if elements:
|
||||
elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
|
||||
|
|
@ -700,7 +833,7 @@ class RendererPdf(BaseRenderer):
|
|||
if text:
|
||||
level = max(1, min(6, level))
|
||||
heading_style = self._createHeadingStyle(styles, level)
|
||||
return [Paragraph(text, heading_style)]
|
||||
return [self._paragraphFromInlineMarkdown(text, heading_style)]
|
||||
|
||||
return []
|
||||
|
||||
|
|
@ -721,7 +854,7 @@ class RendererPdf(BaseRenderer):
|
|||
text = ""
|
||||
|
||||
if text:
|
||||
return [Paragraph(text, self._createNormalStyle(styles))]
|
||||
return [self._paragraphFromInlineMarkdown(text, self._createNormalStyle(styles))]
|
||||
|
||||
return []
|
||||
|
||||
|
|
@ -741,27 +874,81 @@ class RendererPdf(BaseRenderer):
|
|||
code_style_def = styles.get("code_block", {})
|
||||
|
||||
if code:
|
||||
code = _prepareCodeBlockPlainText(code)
|
||||
code = _normalizePdfMonospaceText(code)
|
||||
elements = []
|
||||
fs = code_style_def.get("font_size", 9)
|
||||
mono = code_style_def.get("font", "Courier")
|
||||
|
||||
if language:
|
||||
lang_style = ParagraphStyle(
|
||||
'CodeLanguage',
|
||||
fontSize=code_style_def.get("font_size", 9),
|
||||
"CodeLanguage",
|
||||
fontSize=fs,
|
||||
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
|
||||
fontName='Helvetica-Bold'
|
||||
fontName="Helvetica-Bold",
|
||||
alignment=TA_LEFT,
|
||||
)
|
||||
elements.append(
|
||||
Paragraph(
|
||||
self._escapeReportlabXml(f"Code ({language}):"),
|
||||
lang_style,
|
||||
)
|
||||
)
|
||||
elements.append(Paragraph(f"Code ({language}):", lang_style))
|
||||
|
||||
code_style = ParagraphStyle(
|
||||
'CodeBlock',
|
||||
fontSize=code_style_def.get("font_size", 9),
|
||||
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
|
||||
fontName=code_style_def.get("font", "Courier"),
|
||||
backColor=self._hexToColor(code_style_def.get("background", "#F5F5F5")),
|
||||
spaceAfter=code_style_def.get("space_after", 6)
|
||||
)
|
||||
elements.append(Paragraph(code, code_style))
|
||||
approxCharWPt = max(fs * 0.52, 4.5)
|
||||
usableWidth = _PDF_CONTENT_WIDTH_PT - 16 # left+right padding
|
||||
maxLineChars = max(48, int(usableWidth / approxCharWPt))
|
||||
bg_col = self._hexToColor(code_style_def.get("background", "#F5F5F5"))
|
||||
leading = fs * 1.2
|
||||
spaceAfter = code_style_def.get("space_after", 6)
|
||||
|
||||
# Each source line may wrap to ceil(len/maxLineChars) visual lines.
|
||||
# Frame height ~740pt minus padding → keep rendered height < 600pt.
|
||||
maxVisualLinesPerChunk = max(8, int(600 / leading))
|
||||
srcLines = code.split("\n")
|
||||
chunks: List[List[str]] = []
|
||||
curChunk: List[str] = []
|
||||
curVisual = 0
|
||||
for sl in srcLines:
|
||||
wrapped = max(1, -(-len(sl) // maxLineChars)) if sl else 1
|
||||
if curVisual + wrapped > maxVisualLinesPerChunk and curChunk:
|
||||
chunks.append(curChunk)
|
||||
curChunk = []
|
||||
curVisual = 0
|
||||
curChunk.append(sl)
|
||||
curVisual += wrapped
|
||||
if curChunk:
|
||||
chunks.append(curChunk)
|
||||
|
||||
for ci, chunkLines in enumerate(chunks):
|
||||
chunkText = "\n".join(chunkLines)
|
||||
styleId = f"CodePre_{id(code_data) & 0xFFFFFFFF}_{ci}"
|
||||
codePrStyle = ParagraphStyle(
|
||||
styleId,
|
||||
fontName=mono,
|
||||
fontSize=fs,
|
||||
leading=leading,
|
||||
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
|
||||
alignment=TA_LEFT,
|
||||
leftIndent=0,
|
||||
rightIndent=0,
|
||||
)
|
||||
pf = Preformatted(chunkText, codePrStyle, dedent=0, maxLineLength=maxLineChars)
|
||||
tbl = Table([[pf]], colWidths=[_PDF_CONTENT_WIDTH_PT])
|
||||
tbl.setStyle(
|
||||
TableStyle(
|
||||
[
|
||||
("BACKGROUND", (0, 0), (-1, -1), bg_col),
|
||||
("VALIGN", (0, 0), (-1, -1), "TOP"),
|
||||
("LEFTPADDING", (0, 0), (-1, -1), 8),
|
||||
("RIGHTPADDING", (0, 0), (-1, -1), 8),
|
||||
("TOPPADDING", (0, 0), (-1, -1), 6),
|
||||
("BOTTOMPADDING", (0, 0), (-1, -1), 6),
|
||||
]
|
||||
)
|
||||
)
|
||||
tbl.spaceAfter = 0 if ci < len(chunks) - 1 else spaceAfter
|
||||
elements.append(tbl)
|
||||
return elements
|
||||
|
||||
return []
|
||||
|
|
|
|||
|
|
@ -13,6 +13,15 @@ from modules.datamodels.datamodelDocument import RenderedDocument
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_PPTX_MD_INLINE_RE = re.compile(
|
||||
r"(\*\*(.+?)\*\*)"
|
||||
r"|(__(.+?)__)"
|
||||
r"|(?<!\*)\*([^*\n]+?)\*(?!\*)"
|
||||
r"|(?<![\w/])_([^_\n]+?)_(?![\w/])"
|
||||
r"|`([^`]+)`"
|
||||
)
|
||||
|
||||
|
||||
class RendererPptx(BaseRenderer):
|
||||
"""Renderer for PowerPoint (.pptx) files using python-pptx library."""
|
||||
|
||||
|
|
@ -99,56 +108,75 @@ class RendererPptx(BaseRenderer):
|
|||
|
||||
for i, slide_data in enumerate(slidesData):
|
||||
slide_sections = slide_data.get("sections", [])
|
||||
slide_images = list(slide_data.get("images", [])) # Make copy so we can append
|
||||
slide_images = list(slide_data.get("images", []))
|
||||
slide_content = slide_data.get('content', '')
|
||||
hasSections = slide_sections and len(slide_sections) > 0
|
||||
hasImages = len(slide_images) > 0
|
||||
isTitleSlide = slide_data.get("_isTitleSlide", False)
|
||||
|
||||
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - sections: {len(slide_sections)}, images: {len(slide_images)}, content: {len(slide_content)} chars")
|
||||
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - sections: {len(slide_sections)}, images: {len(slide_images)}, content: {len(slide_content)} chars, titleSlide={isTitleSlide}")
|
||||
|
||||
# Use blank layout for all slides to avoid placeholder interference
|
||||
# Find blank layout (typically index 6, fallback to 5)
|
||||
# Title slide uses the built-in Title Slide layout (index 0)
|
||||
if isTitleSlide:
|
||||
titleLayout = prs.slide_layouts[0]
|
||||
slide = prs.slides.add_slide(titleLayout)
|
||||
try:
|
||||
titleShape = slide.shapes.title
|
||||
titleShape.text = slide_data.get("title", "")
|
||||
titleStyle = styles.get("title", {})
|
||||
tf = titleShape.text_frame
|
||||
if tf.paragraphs:
|
||||
p = tf.paragraphs[0]
|
||||
p.font.size = Pt(titleStyle.get("font_size", 36))
|
||||
p.font.bold = titleStyle.get("bold", True)
|
||||
tColor = self._getSafeColor(titleStyle.get("color", (31, 78, 121)))
|
||||
p.font.color.rgb = RGBColor(*tColor)
|
||||
except Exception as titleErr:
|
||||
logger.warning(f"Could not style title slide: {titleErr}")
|
||||
# Clear subtitle placeholder
|
||||
try:
|
||||
sub = slide.placeholders[1]
|
||||
sub.text = ""
|
||||
except (KeyError, IndexError):
|
||||
pass
|
||||
continue
|
||||
|
||||
# Content slides: use blank layout
|
||||
slideLayoutIndex = None
|
||||
for idx in [6, 5]:
|
||||
if idx < len(prs.slide_layouts):
|
||||
try:
|
||||
layout = prs.slide_layouts[idx]
|
||||
# Check if it's a blank layout (no placeholders)
|
||||
if len(layout.placeholders) == 0:
|
||||
slideLayoutIndex = idx
|
||||
break
|
||||
except (AttributeError, IndexError):
|
||||
continue
|
||||
|
||||
# If no blank layout found, use layout with fewest placeholders
|
||||
if slideLayoutIndex is None:
|
||||
min_placeholders = float('inf')
|
||||
minPh = float('inf')
|
||||
for idx in range(len(prs.slide_layouts)):
|
||||
try:
|
||||
layout = prs.slide_layouts[idx]
|
||||
placeholder_count = len(layout.placeholders) if hasattr(layout, 'placeholders') else 0
|
||||
if placeholder_count < min_placeholders:
|
||||
min_placeholders = placeholder_count
|
||||
phCount = len(layout.placeholders) if hasattr(layout, 'placeholders') else 0
|
||||
if phCount < minPh:
|
||||
minPh = phCount
|
||||
slideLayoutIndex = idx
|
||||
except:
|
||||
continue
|
||||
|
||||
# Fallback to first layout if still None
|
||||
if slideLayoutIndex is None:
|
||||
slideLayoutIndex = 0
|
||||
|
||||
slide_layout = prs.slide_layouts[slideLayoutIndex]
|
||||
slide = prs.slides.add_slide(slide_layout)
|
||||
|
||||
# Clear placeholder text instead of removing placeholders (safer approach)
|
||||
# This avoids corrupting the PPTX file structure
|
||||
try:
|
||||
for shape in slide.shapes:
|
||||
if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
|
||||
try:
|
||||
if hasattr(shape, 'text_frame'):
|
||||
shape.text_frame.clear()
|
||||
# Set text to empty string to remove "Click to add text"
|
||||
if len(shape.text_frame.paragraphs) > 0:
|
||||
shape.text_frame.paragraphs[0].text = ""
|
||||
except:
|
||||
|
|
@ -156,7 +184,7 @@ class RendererPptx(BaseRenderer):
|
|||
except Exception as placeholder_error:
|
||||
logger.warning(f"Could not clear placeholders: {str(placeholder_error)}")
|
||||
|
||||
# Add title as textbox (smaller size for slides)
|
||||
# Add title as textbox
|
||||
from pptx.util import Inches
|
||||
titleBox = slide.shapes.add_textbox(Inches(0.5), Inches(0.2), prs.slide_width - Inches(1), Inches(0.6))
|
||||
titleFrame = titleBox.text_frame
|
||||
|
|
@ -232,15 +260,14 @@ class RendererPptx(BaseRenderer):
|
|||
else:
|
||||
p.alignment = PP_ALIGN.LEFT
|
||||
|
||||
# If no slides were created, create a default slide
|
||||
# If no slides were created, create a single slide with the document title
|
||||
if not slidesData:
|
||||
slide_layout = prs.slide_layouts[0] # Title slide layout
|
||||
slide_layout = prs.slide_layouts[0]
|
||||
slide = prs.slides.add_slide(slide_layout)
|
||||
|
||||
title_shape = slide.shapes.title
|
||||
title_shape.text = title
|
||||
|
||||
# Apply title styling to default slide
|
||||
title_style = styles.get("title", {})
|
||||
if title_shape.text_frame.paragraphs[0].font:
|
||||
title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 48))
|
||||
|
|
@ -248,16 +275,12 @@ class RendererPptx(BaseRenderer):
|
|||
title_color = self._getSafeColor(title_style.get("color", (31, 78, 121)))
|
||||
title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
|
||||
|
||||
subtitle_shape = slide.placeholders[1]
|
||||
subtitle_shape.text = "Generated by PowerOn AI System"
|
||||
|
||||
# Apply subtitle styling
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
if subtitle_shape.text_frame.paragraphs[0].font:
|
||||
subtitle_shape.text_frame.paragraphs[0].font.size = Pt(paragraph_style.get("font_size", 20))
|
||||
subtitle_shape.text_frame.paragraphs[0].font.bold = paragraph_style.get("bold", False)
|
||||
paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
|
||||
subtitle_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*paragraph_color)
|
||||
# Clear subtitle placeholder instead of adding filler text
|
||||
try:
|
||||
subtitle_shape = slide.placeholders[1]
|
||||
subtitle_shape.text = ""
|
||||
except (KeyError, IndexError):
|
||||
pass
|
||||
|
||||
# Save to buffer
|
||||
buffer = io.BytesIO()
|
||||
|
|
@ -625,24 +648,23 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
sections = self._extractSections(json_content)
|
||||
metadata = self._extractMetadata(json_content)
|
||||
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
document_title = title if title else metadata.get("title", "Generated Document")
|
||||
|
||||
# Create title slide
|
||||
# Title slide (clean — just the document title, no filler text)
|
||||
slides.append({
|
||||
"title": document_title,
|
||||
"content": "Generated by PowerOn AI System\n\n" + self._formatTimestamp()
|
||||
"content": "",
|
||||
"_isTitleSlide": True,
|
||||
})
|
||||
|
||||
# Process sections into slides based on content and user intent
|
||||
slides.extend(self._createSlidesFromSections(sections, styles))
|
||||
|
||||
# If no content slides were created, create a default content slide
|
||||
if len(slides) == 1: # Only title slide
|
||||
# Content slides split by chapter headings
|
||||
contentSlides = self._createSlidesFromSections(sections, styles)
|
||||
if contentSlides:
|
||||
slides.extend(contentSlides)
|
||||
else:
|
||||
slides.append({
|
||||
"title": "Content Overview",
|
||||
"content": "No structured content found in the source documents.\n\nPlease check the source documents and try again."
|
||||
"content": ""
|
||||
})
|
||||
|
||||
return slides
|
||||
|
|
@ -941,9 +963,8 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
content = slide_data.get("content", "")
|
||||
title = slide_data.get("title", "")
|
||||
|
||||
# Check if it's a title slide (first slide)
|
||||
if not content or "Generated by PowerOn AI System" in content:
|
||||
return 0 # Title slide layout
|
||||
if not content:
|
||||
return 0
|
||||
|
||||
# Professional layout selection based on content
|
||||
if "|" in content and "-" in content:
|
||||
|
|
@ -970,67 +991,71 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
return 1 # Default to title and content layout
|
||||
|
||||
def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Create slides from sections: each heading level 1 (chapter) creates a new slide, content accumulates until next level 1 heading."""
|
||||
"""Create slides from sections: each top-level heading creates a new slide.
|
||||
|
||||
The split level is determined dynamically: if there is exactly one H1 (the
|
||||
document title), chapters are H2; otherwise chapters are H1.
|
||||
"""
|
||||
try:
|
||||
# First pass: discover heading levels to choose the split level
|
||||
headingLevels: List[int] = []
|
||||
for section in sections:
|
||||
if section.get("content_type") == "heading":
|
||||
for el in section.get("elements", []):
|
||||
if isinstance(el, dict):
|
||||
c = el.get("content", {})
|
||||
if isinstance(c, dict):
|
||||
headingLevels.append(c.get("level", 1))
|
||||
|
||||
h1Count = headingLevels.count(1)
|
||||
h2Count = headingLevels.count(2)
|
||||
# If there's at most one H1 but multiple H2s, split on H2
|
||||
splitLevel = 2 if h1Count <= 1 and h2Count > 1 else 1
|
||||
|
||||
slides = []
|
||||
current_slide_sections = [] # Store sections (not formatted text) for proper rendering
|
||||
current_slide_title = "Content Overview"
|
||||
currentSlideSections = []
|
||||
currentSlideTitle = "Content Overview"
|
||||
|
||||
for section in sections:
|
||||
section_type = section.get("content_type", "paragraph")
|
||||
sectionType = section.get("content_type", "paragraph")
|
||||
elements = section.get("elements", [])
|
||||
|
||||
# Skip sections with no elements (unless they're headings that should create new slides)
|
||||
if not elements and section_type != "heading":
|
||||
if not elements and sectionType != "heading":
|
||||
continue
|
||||
|
||||
if section_type == "heading":
|
||||
# Extract heading level
|
||||
level = 1 # Default
|
||||
heading_text = ""
|
||||
if sectionType == "heading":
|
||||
level = 1
|
||||
headingText = ""
|
||||
for element in elements:
|
||||
if isinstance(element, dict):
|
||||
# Extract from nested content structure
|
||||
content = element.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
heading_text = content.get("text", "")
|
||||
headingText = content.get("text", "")
|
||||
level = content.get("level", 1)
|
||||
elif isinstance(content, str):
|
||||
heading_text = content
|
||||
headingText = content
|
||||
level = 1
|
||||
|
||||
# Only level 1 headings (chapters) create new slides
|
||||
if level == 1:
|
||||
# If we have accumulated content, create a slide
|
||||
if current_slide_sections:
|
||||
if level <= splitLevel:
|
||||
if currentSlideSections:
|
||||
slides.append({
|
||||
"title": current_slide_title,
|
||||
"sections": current_slide_sections.copy(), # Store sections for proper rendering
|
||||
"title": currentSlideTitle,
|
||||
"sections": currentSlideSections.copy(),
|
||||
"images": []
|
||||
})
|
||||
current_slide_sections = []
|
||||
|
||||
# Start new slide with heading as title
|
||||
if heading_text:
|
||||
current_slide_title = heading_text
|
||||
else:
|
||||
# If no heading text found but this is a heading section, use section ID or default
|
||||
current_slide_title = section.get("id", "Untitled Section")
|
||||
currentSlideSections = []
|
||||
currentSlideTitle = headingText or section.get("id", "Untitled Section")
|
||||
else:
|
||||
# Level 2+ headings are added as sections to current slide
|
||||
current_slide_sections.append(section)
|
||||
elif section_type == "image":
|
||||
# Images are added to current slide (will be organized in frames)
|
||||
current_slide_sections.append(section)
|
||||
currentSlideSections.append(section)
|
||||
elif sectionType == "image":
|
||||
currentSlideSections.append(section)
|
||||
else:
|
||||
# Add section to current slide (will be rendered properly)
|
||||
current_slide_sections.append(section)
|
||||
currentSlideSections.append(section)
|
||||
|
||||
# Add final slide if there's content
|
||||
if current_slide_sections:
|
||||
if currentSlideSections:
|
||||
slides.append({
|
||||
"title": current_slide_title,
|
||||
"sections": current_slide_sections.copy(),
|
||||
"title": currentSlideTitle,
|
||||
"sections": currentSlideSections.copy(),
|
||||
"images": []
|
||||
})
|
||||
|
||||
|
|
@ -1225,14 +1250,66 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
import traceback
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
|
||||
def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float, max_width: float = None) -> None:
|
||||
def _addMarkdownInlineRuns(self, paragraph, text: str, fontSize=None, fontColor=None, fontBold=None) -> None:
|
||||
"""Parse markdown inline formatting and add Runs to a pptx paragraph.
|
||||
|
||||
Every piece of text is added as an explicit Run with font properties set,
|
||||
so the paragraph never falls back to the slide-master default font.
|
||||
"""
|
||||
from pptx.util import Pt
|
||||
|
||||
paragraph.text = ""
|
||||
|
||||
def _applyBase(run, bold=None):
|
||||
if fontSize:
|
||||
run.font.size = fontSize
|
||||
if fontColor:
|
||||
run.font.color.rgb = fontColor
|
||||
if bold is not None:
|
||||
run.font.bold = bold
|
||||
elif fontBold is not None:
|
||||
run.font.bold = fontBold
|
||||
|
||||
pos = 0
|
||||
for m in _PPTX_MD_INLINE_RE.finditer(text):
|
||||
if m.start() > pos:
|
||||
r = paragraph.add_run()
|
||||
r.text = text[pos:m.start()]
|
||||
_applyBase(r)
|
||||
if m.group(2) or m.group(4):
|
||||
r = paragraph.add_run()
|
||||
r.text = m.group(2) or m.group(4)
|
||||
_applyBase(r, bold=True)
|
||||
elif m.group(5) or m.group(6):
|
||||
r = paragraph.add_run()
|
||||
r.text = m.group(5) or m.group(6)
|
||||
r.font.italic = True
|
||||
_applyBase(r)
|
||||
elif m.group(7):
|
||||
r = paragraph.add_run()
|
||||
r.text = m.group(7)
|
||||
r.font.name = "Courier New"
|
||||
if fontSize and hasattr(fontSize, 'pt'):
|
||||
r.font.size = Pt(max(8, int(fontSize.pt * 0.85)))
|
||||
elif fontSize:
|
||||
r.font.size = fontSize
|
||||
if fontColor:
|
||||
r.font.color.rgb = fontColor
|
||||
pos = m.end()
|
||||
|
||||
# Remaining tail (or entire string if no matches)
|
||||
if pos < len(text):
|
||||
r = paragraph.add_run()
|
||||
r.text = text[pos:]
|
||||
_applyBase(r)
|
||||
|
||||
def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float = None, max_width: float = None) -> None:
|
||||
"""Add a PowerPoint table to slide."""
|
||||
try:
|
||||
from pptx.util import Inches, Pt
|
||||
from pptx.enum.text import PP_ALIGN
|
||||
from pptx.dml.color import RGBColor
|
||||
|
||||
# Extract from nested content structure
|
||||
content = element.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return
|
||||
|
|
@ -1243,11 +1320,9 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
if not headers:
|
||||
return
|
||||
|
||||
# Calculate table dimensions
|
||||
num_cols = int(len(headers)) # Ensure integer
|
||||
num_rows = int(len(rows) + 1) # +1 for header row, ensure integer
|
||||
num_cols = int(len(headers))
|
||||
num_rows = int(len(rows) + 1)
|
||||
left = Inches(0.5)
|
||||
# Get presentation from stored reference or slide
|
||||
if hasattr(self, '_currentPresentation'):
|
||||
prs = self._currentPresentation
|
||||
else:
|
||||
|
|
@ -1255,7 +1330,15 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
width = max_width if max_width is not None else (prs.slide_width - Inches(1))
|
||||
row_height = Inches(0.4)
|
||||
|
||||
# Create table - ensure all parameters are proper types
|
||||
# Auto-calculate top from existing shapes when not specified
|
||||
if top is None:
|
||||
maxBottom = Inches(1.5)
|
||||
for shape in slide.shapes:
|
||||
shapeBottom = shape.top + shape.height
|
||||
if shapeBottom > maxBottom:
|
||||
maxBottom = shapeBottom
|
||||
top = maxBottom + Inches(0.15)
|
||||
|
||||
table_height = row_height * num_rows
|
||||
table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, table_height)
|
||||
table = table_shape.table
|
||||
|
|
@ -1361,109 +1444,49 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
logger.warning(f"Error adding table to slide: {str(e)}")
|
||||
|
||||
def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None:
|
||||
"""Add bullet list to slide text frame."""
|
||||
"""Add bullet list to slide text frame with consistent formatting."""
|
||||
try:
|
||||
from pptx.util import Pt
|
||||
from pptx.dml.color import RGBColor
|
||||
from pptx.enum.text import PP_ALIGN
|
||||
|
||||
# Extract from nested content structure
|
||||
content = element.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return
|
||||
|
||||
items = content.get("items", [])
|
||||
if not items:
|
||||
return
|
||||
|
||||
list_style = styles.get("bullet_list", {})
|
||||
base_font_size = list_style.get("font_size", 14)
|
||||
calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability
|
||||
listStyle = styles.get("paragraph", {})
|
||||
fontSize = Pt(max(10, int(listStyle.get("font_size", 14) * font_size_multiplier)))
|
||||
fontColor = RGBColor(*self._getSafeColor(listStyle.get("color", (47, 47, 47))))
|
||||
|
||||
# Pre-calculate and cache style objects to avoid repeated parsing
|
||||
font_size_pt = Pt(calculated_size)
|
||||
text_color = self._getSafeColor(list_style.get("color", (47, 47, 47)))
|
||||
text_color_rgb = RGBColor(*text_color)
|
||||
space_before_pt = Pt(2)
|
||||
space_after_pt = Pt(2)
|
||||
|
||||
logger.debug(f"Rendering bullet list with {len(items)} items")
|
||||
|
||||
for idx, item in enumerate(items):
|
||||
try:
|
||||
# Get text content first
|
||||
if isinstance(item, dict):
|
||||
item_text = item.get("text", "")
|
||||
else:
|
||||
item_text = str(item)
|
||||
|
||||
# Skip empty items
|
||||
if not item_text or len(item_text.strip()) == 0:
|
||||
logger.debug(f"Skipping empty bullet item {idx}")
|
||||
continue
|
||||
|
||||
# Create new paragraph for each bullet item
|
||||
p = text_frame.add_paragraph()
|
||||
|
||||
# Set level to 1 for bullet points BEFORE setting text
|
||||
# In python-pptx, setting level > 0 should automatically enable bullets
|
||||
p.level = 1
|
||||
|
||||
# Set text content
|
||||
p.text = item_text
|
||||
|
||||
# Apply formatting - use cached objects
|
||||
p.font.size = font_size_pt
|
||||
p.font.color.rgb = text_color_rgb
|
||||
p.alignment = PP_ALIGN.LEFT # Left align bullet lists
|
||||
p.space_before = space_before_pt # Small spacing before
|
||||
p.space_after = space_after_pt # Small spacing after
|
||||
|
||||
# In python-pptx, setting level > 0 should enable bullets automatically
|
||||
# However, some versions may not support paragraph_format, so we'll use manual bullets as fallback
|
||||
# Always add manual bullet character to ensure visibility
|
||||
if not (p.text.startswith('•') or p.text.startswith('-') or p.text.startswith('*') or p.text.startswith('◦')):
|
||||
p.text = '• ' + p.text
|
||||
logger.debug(f"Added manual bullet character to item {idx}")
|
||||
|
||||
# Set proper indentation for multiline bullets (hanging indent)
|
||||
# For multiline bullets: bullet at left margin, text indented, wrapped lines align with text
|
||||
try:
|
||||
# Try accessing paragraph_format - it may not exist in all python-pptx versions
|
||||
if hasattr(p, 'paragraph_format'):
|
||||
pf = p.paragraph_format
|
||||
# Left indent: indents the entire paragraph (bullet + text)
|
||||
pf.left_indent = Pt(18)
|
||||
# First line indent: negative value creates hanging indent
|
||||
# This brings the bullet back to the left while keeping text indented
|
||||
pf.first_line_indent = Pt(-18) # Negative to create hanging indent
|
||||
logger.debug(f"Set hanging indent for bullet item {idx}")
|
||||
else:
|
||||
# Try via _element if paragraph_format not available
|
||||
try:
|
||||
from pptx.util import Pt as PtUtil
|
||||
pPr = p._element.get_or_add_pPr()
|
||||
# Set left margin (indents entire paragraph)
|
||||
pPr.left_margin = PtUtil(18)
|
||||
# Set first line indent (negative for hanging indent)
|
||||
pPr.first_line_indent = PtUtil(-18)
|
||||
logger.debug(f"Set hanging indent via XML for bullet item {idx}")
|
||||
except Exception as xml_error:
|
||||
logger.debug(f"Could not set hanging indent via XML: {str(xml_error)}")
|
||||
# Indentation is optional, continue without it
|
||||
pass
|
||||
except Exception as indent_error:
|
||||
logger.debug(f"Could not set indent for item {idx}: {str(indent_error)}")
|
||||
# Continue without indent - bullets will still show, but multiline won't be properly indented
|
||||
|
||||
logger.debug(f"Successfully added bullet item {idx}: '{item_text[:50]}...'")
|
||||
|
||||
except Exception as item_error:
|
||||
logger.error(f"Error adding bullet item {idx}: {str(item_error)}", exc_info=True)
|
||||
# Continue with next item even if one fails
|
||||
for item in items:
|
||||
itemText = item.get("text", "") if isinstance(item, dict) else str(item)
|
||||
if not itemText or not itemText.strip():
|
||||
continue
|
||||
|
||||
logger.debug(f"Completed rendering bullet list, added {len(text_frame.paragraphs)} paragraphs")
|
||||
p = text_frame.add_paragraph()
|
||||
p.level = 0
|
||||
p.alignment = PP_ALIGN.LEFT
|
||||
p.space_before = Pt(2)
|
||||
p.space_after = Pt(2)
|
||||
|
||||
# Consistent bullet prefix
|
||||
self._addMarkdownInlineRuns(p, f" • {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
|
||||
|
||||
# Subitems
|
||||
if isinstance(item, dict):
|
||||
for sub in item.get("subitems", []):
|
||||
subText = sub.get("text", "") if isinstance(sub, dict) else str(sub)
|
||||
if not subText:
|
||||
continue
|
||||
sp = text_frame.add_paragraph()
|
||||
sp.level = 0
|
||||
sp.alignment = PP_ALIGN.LEFT
|
||||
sp.space_before = Pt(1)
|
||||
sp.space_after = Pt(1)
|
||||
self._addMarkdownInlineRuns(sp, f" – {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error adding bullet list to slide: {str(e)}")
|
||||
|
|
@ -1484,25 +1507,22 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
|
||||
if text:
|
||||
p = text_frame.add_paragraph()
|
||||
p.text = text
|
||||
# Headings should be level 0 (no indentation) regardless of heading level
|
||||
p.level = 0
|
||||
|
||||
heading_style = styles.get("heading", {})
|
||||
# Different font sizes for different heading levels
|
||||
if level == 1:
|
||||
base_font_size = heading_style.get("font_size", 28) # Largest for H1
|
||||
base_font_size = heading_style.get("font_size", 28)
|
||||
elif level == 2:
|
||||
base_font_size = heading_style.get("font_size", 22) # Medium for H2
|
||||
base_font_size = heading_style.get("font_size", 22)
|
||||
elif level == 3:
|
||||
base_font_size = heading_style.get("font_size", 18) # Smaller for H3
|
||||
base_font_size = heading_style.get("font_size", 18)
|
||||
else:
|
||||
base_font_size = heading_style.get("font_size", 16) # Default for H4+
|
||||
base_font_size = heading_style.get("font_size", 16)
|
||||
|
||||
calculated_size = max(12, int(base_font_size * font_size_multiplier)) # Minimum 12pt for headings
|
||||
p.font.size = Pt(calculated_size)
|
||||
p.font.bold = heading_style.get("bold", True)
|
||||
p.font.color.rgb = RGBColor(*self._getSafeColor(heading_style.get("color", (31, 78, 121))))
|
||||
calculated_size = max(12, int(base_font_size * font_size_multiplier))
|
||||
fSize = Pt(calculated_size)
|
||||
fColor = RGBColor(*self._getSafeColor(heading_style.get("color", (31, 78, 121))))
|
||||
self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=True)
|
||||
# Add spacing before and after headings
|
||||
p.space_before = Pt(12 if level == 1 else 8) # More space before H1
|
||||
p.space_after = Pt(6) # Space after heading
|
||||
|
|
@ -1528,11 +1548,8 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
|
||||
if text:
|
||||
p = text_frame.add_paragraph()
|
||||
p.text = text
|
||||
# Explicitly set level to 0 for regular paragraphs (not bullets)
|
||||
p.level = 0
|
||||
|
||||
# Ensure no bullet formatting
|
||||
try:
|
||||
if hasattr(p, 'paragraph_format'):
|
||||
p.paragraph_format.bullet.type = None
|
||||
|
|
@ -1540,11 +1557,12 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
pass
|
||||
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
base_font_size = paragraph_style.get("font_size", 14) # Smaller default for better readability
|
||||
calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability
|
||||
p.font.size = Pt(calculated_size)
|
||||
p.font.bold = paragraph_style.get("bold", False)
|
||||
p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
|
||||
base_font_size = paragraph_style.get("font_size", 14)
|
||||
calculated_size = max(10, int(base_font_size * font_size_multiplier))
|
||||
fSize = Pt(calculated_size)
|
||||
fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
|
||||
fBold = paragraph_style.get("bold", False)
|
||||
self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=fBold)
|
||||
|
||||
# Add proper spacing
|
||||
p.space_before = Pt(6) # Space before paragraph
|
||||
|
|
@ -1604,261 +1622,31 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
||||
def _renderSlideContentWithFrames(self, slide, slide_sections: List[Dict[str, Any]], slide_images: List[Dict[str, Any]], styles: Dict[str, Any], prs) -> None:
|
||||
"""
|
||||
Organize slide content into frames for better layout.
|
||||
Groups content by type (images, bullet lists, paragraphs, tables) and renders each in appropriately sized frames.
|
||||
"""
|
||||
"""Render all sections sequentially: text/bullets/headings into a shared
|
||||
textbox, tables and images as separate shapes placed below."""
|
||||
try:
|
||||
from pptx.util import Inches, Pt
|
||||
from pptx.enum.text import PP_ALIGN
|
||||
from pptx.dml.color import RGBColor
|
||||
|
||||
# Extract images from sections first
|
||||
images_to_render = list(slide_images) if slide_images else []
|
||||
text_sections = []
|
||||
table_sections = []
|
||||
margin = Inches(0.5)
|
||||
contentTop = Inches(1.3)
|
||||
availableWidth = prs.slide_width - Inches(1)
|
||||
availableHeight = prs.slide_height - contentTop - Inches(0.3)
|
||||
|
||||
# Create a single textbox for all non-table, non-image content
|
||||
textbox = slide.shapes.add_textbox(margin, contentTop, availableWidth, availableHeight)
|
||||
textFrame = textbox.text_frame
|
||||
textFrame.word_wrap = True
|
||||
textFrame.auto_size = None
|
||||
|
||||
for section in slide_sections:
|
||||
section_type = section.get("content_type", "paragraph")
|
||||
elements = section.get("elements", [])
|
||||
self._renderSectionToTextFrame(slide, section, styles, textFrame, font_size_multiplier=1.0)
|
||||
|
||||
if not elements:
|
||||
# Skip empty sections
|
||||
continue
|
||||
|
||||
# Extract images from all sections
|
||||
section_has_images = False
|
||||
for element in elements:
|
||||
if isinstance(element, dict) and element.get("type") == "image":
|
||||
content = element.get("content", {})
|
||||
base64Data = None
|
||||
|
||||
# Handle different content formats
|
||||
if isinstance(content, dict):
|
||||
base64Data = content.get("base64Data")
|
||||
altText = content.get("altText", "Image")
|
||||
caption = content.get("caption", "")
|
||||
elif isinstance(content, str):
|
||||
# If content is a string, it might be base64 data directly
|
||||
# Check if it looks like base64
|
||||
if len(content) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in content[:100]):
|
||||
base64Data = content
|
||||
altText = "Image"
|
||||
caption = ""
|
||||
else:
|
||||
# Not base64, skip
|
||||
continue
|
||||
else:
|
||||
# Try to get base64Data directly from element
|
||||
base64Data = element.get("base64Data")
|
||||
altText = element.get("altText", "Image")
|
||||
caption = element.get("caption", "")
|
||||
|
||||
if base64Data:
|
||||
images_to_render.append({
|
||||
"base64Data": base64Data,
|
||||
"altText": altText,
|
||||
"caption": caption
|
||||
})
|
||||
section_has_images = True
|
||||
|
||||
# Skip image-only sections (they're already added to images_to_render)
|
||||
if section_type == "image" and section_has_images:
|
||||
continue
|
||||
|
||||
# Categorize sections (excluding image elements)
|
||||
has_table = False
|
||||
non_image_elements = []
|
||||
|
||||
for element in elements:
|
||||
if isinstance(element, dict):
|
||||
element_type = element.get("type", "")
|
||||
# Skip image elements when categorizing
|
||||
if element_type == "image":
|
||||
continue
|
||||
if element_type == "table" or section_type == "table":
|
||||
has_table = True
|
||||
non_image_elements.append(element)
|
||||
|
||||
# Only add sections that have non-image content
|
||||
if non_image_elements:
|
||||
if has_table:
|
||||
# Create a copy of section without image elements for table rendering
|
||||
table_section = {
|
||||
**section,
|
||||
"elements": non_image_elements
|
||||
}
|
||||
table_sections.append(table_section)
|
||||
else:
|
||||
# Create a copy of section without image elements for text rendering
|
||||
text_section = {
|
||||
**section,
|
||||
"elements": non_image_elements
|
||||
}
|
||||
text_sections.append(text_section)
|
||||
|
||||
# Calculate layout dimensions
|
||||
title_height = Inches(1.5)
|
||||
available_height = prs.slide_height - title_height - Inches(0.5) # Title + margin
|
||||
available_width = prs.slide_width - Inches(1) # Margins
|
||||
margin = Inches(0.5)
|
||||
|
||||
current_y = title_height + Inches(0.3)
|
||||
|
||||
# Determine layout strategy based on content types
|
||||
has_images = len(images_to_render) > 0
|
||||
has_tables = len(table_sections) > 0
|
||||
has_text = len(text_sections) > 0
|
||||
|
||||
# Layout 1: Images + Text (horizontal split for landscape)
|
||||
if has_images and has_text and not has_tables:
|
||||
# Horizontal split: images on left, text on right (landscape format)
|
||||
img_width = available_width * 0.48
|
||||
text_width = available_width * 0.48
|
||||
img_left = margin
|
||||
text_left = margin + img_width + Inches(0.2)
|
||||
|
||||
# Render images in left column (full height)
|
||||
if images_to_render:
|
||||
img_height = available_height - Inches(0.2)
|
||||
self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height)
|
||||
|
||||
# Render text in right column (full height, adaptive font size)
|
||||
if text_sections:
|
||||
text_height = available_height - Inches(0.2)
|
||||
self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True)
|
||||
|
||||
# Layout 2: Tables + Text (horizontal split for landscape)
|
||||
elif has_tables and has_text:
|
||||
# Horizontal split: tables on left, text on right (landscape format)
|
||||
table_width = available_width * 0.48
|
||||
text_width = available_width * 0.48
|
||||
table_left = margin
|
||||
text_left = margin + table_width + Inches(0.2)
|
||||
|
||||
# Render tables in left column (full height)
|
||||
table_y = current_y
|
||||
for table_section in table_sections:
|
||||
elements = table_section.get("elements", [])
|
||||
for element in elements:
|
||||
if isinstance(element, dict) and element.get("type") == "table":
|
||||
try:
|
||||
self._addTableToSlide(slide, element, styles, table_y, max_width=table_width)
|
||||
# Calculate actual table height
|
||||
content = element.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
rows = content.get("rows", [])
|
||||
num_rows = len(rows) + 1 # +1 for header
|
||||
actual_height = Inches(0.4) * num_rows
|
||||
table_y += actual_height + Inches(0.15)
|
||||
else:
|
||||
table_y += Inches(2)
|
||||
except Exception as table_error:
|
||||
logger.error(f"Error rendering table: {str(table_error)}")
|
||||
# Continue with next table
|
||||
break
|
||||
|
||||
# Render text in right column (full height, adaptive font size)
|
||||
if text_sections:
|
||||
text_height = available_height - Inches(0.2)
|
||||
self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True)
|
||||
|
||||
# Layout 3: Images + Tables + Text (horizontal split for landscape)
|
||||
elif has_images and has_tables and has_text:
|
||||
# Horizontal split: Images (left), Tables (middle), Text (right)
|
||||
img_width = available_width * 0.31
|
||||
table_width = available_width * 0.31
|
||||
text_width = available_width * 0.31
|
||||
img_left = margin
|
||||
table_left = margin + img_width + Inches(0.15)
|
||||
text_left = margin + img_width + table_width + Inches(0.3)
|
||||
|
||||
# Render images in left column (full height)
|
||||
if images_to_render:
|
||||
img_height = available_height - Inches(0.2)
|
||||
self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height)
|
||||
|
||||
# Render tables in middle column (full height)
|
||||
table_y = current_y
|
||||
for table_section in table_sections:
|
||||
elements = table_section.get("elements", [])
|
||||
for element in elements:
|
||||
if isinstance(element, dict) and element.get("type") == "table":
|
||||
try:
|
||||
self._addTableToSlide(slide, element, styles, table_y, max_width=table_width)
|
||||
content = element.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
rows = content.get("rows", [])
|
||||
num_rows = len(rows) + 1
|
||||
actual_height = Inches(0.4) * num_rows
|
||||
table_y += actual_height + Inches(0.15)
|
||||
else:
|
||||
table_y += Inches(2)
|
||||
except Exception as table_error:
|
||||
logger.error(f"Error rendering table: {str(table_error)}")
|
||||
break
|
||||
|
||||
# Render text in right column (full height, adaptive font size)
|
||||
if text_sections:
|
||||
text_height = available_height - Inches(0.2)
|
||||
self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True)
|
||||
|
||||
# Layout 4: Images only
|
||||
elif has_images and not has_text and not has_tables:
|
||||
img_width = available_width * 0.8
|
||||
img_height = available_height * 0.8
|
||||
img_left = (available_width - img_width) / 2 + margin
|
||||
self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height)
|
||||
|
||||
# Layout 5: Text only (default, adaptive font size)
|
||||
elif has_text and not has_images and not has_tables:
|
||||
text_height = available_height - Inches(0.2)
|
||||
self._renderTextSectionsInFrame(slide, text_sections, styles, margin, current_y, available_width, text_height, adaptiveFontSize=True)
|
||||
|
||||
# Layout 6: Tables only
|
||||
elif has_tables and not has_images and not has_text:
|
||||
table_height = available_height / max(len(table_sections), 1)
|
||||
table_width = available_width
|
||||
for table_section in table_sections:
|
||||
elements = table_section.get("elements", [])
|
||||
for element in elements:
|
||||
if isinstance(element, dict) and element.get("type") == "table":
|
||||
try:
|
||||
self._addTableToSlide(slide, element, styles, current_y, max_width=table_width)
|
||||
# Calculate actual table height
|
||||
content = element.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
rows = content.get("rows", [])
|
||||
num_rows = len(rows) + 1 # +1 for header
|
||||
actual_height = min(Inches(0.4) * num_rows, table_height)
|
||||
current_y += actual_height + Inches(0.2)
|
||||
else:
|
||||
current_y += table_height + Inches(0.2)
|
||||
except Exception as table_error:
|
||||
logger.error(f"Error rendering table: {str(table_error)}")
|
||||
# Continue with next table
|
||||
break
|
||||
# Render standalone images that were passed alongside sections
|
||||
if slide_images:
|
||||
self._addImagesToSlideInFrame(slide, slide_images, styles, margin, contentTop, availableWidth, availableHeight)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rendering slide content with frames: {str(e)}")
|
||||
# Fallback to simple rendering
|
||||
try:
|
||||
content_shape = slide.placeholders[1]
|
||||
text_frame = content_shape.text_frame
|
||||
text_frame.clear()
|
||||
except (AttributeError, IndexError):
|
||||
from pptx.util import Inches
|
||||
left = Inches(0.5)
|
||||
top = Inches(1.5)
|
||||
width = prs.slide_width - Inches(1)
|
||||
height = prs.slide_height - top - Inches(0.5)
|
||||
textbox = slide.shapes.add_textbox(left, top, width, height)
|
||||
text_frame = textbox.text_frame
|
||||
text_frame.word_wrap = True
|
||||
|
||||
# Simple fallback rendering
|
||||
for section in slide_sections:
|
||||
self._renderSectionToTextFrame(slide, section, styles, text_frame, font_size_multiplier=1.0)
|
||||
logger.error(f"Error rendering slide content: {str(e)}")
|
||||
|
||||
def _renderTextSectionsInFrame(self, slide, text_sections: List[Dict[str, Any]], styles: Dict[str, Any], left: float, top: float, width: float, height: float, adaptiveFontSize: bool = False) -> None:
|
||||
"""Render text sections (paragraphs, lists, headings) in a text frame."""
|
||||
|
|
@ -1935,6 +1723,14 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
except Exception as e:
|
||||
logger.warning(f"Error rendering text sections in frame: {str(e)}")
|
||||
|
||||
@staticmethod
|
||||
def _isHorizontalRule(element: Dict[str, Any]) -> bool:
|
||||
"""Detect markdown horizontal rules (---, ***, ___) that should be skipped on slides."""
|
||||
content = element.get("content", {})
|
||||
text = content.get("text", "") if isinstance(content, dict) else (content if isinstance(content, str) else "")
|
||||
stripped = text.strip()
|
||||
return bool(stripped) and all(c in "-*_ " for c in stripped) and len(stripped.replace(" ", "")) >= 3
|
||||
|
||||
def _renderSectionToTextFrame(self, slide, section: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None:
|
||||
"""Render a single section to a text frame."""
|
||||
try:
|
||||
|
|
@ -1942,7 +1738,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
from pptx.enum.text import PP_ALIGN
|
||||
from pptx.dml.color import RGBColor
|
||||
|
||||
section_type = section.get("content_type", "paragraph")
|
||||
sectionType = section.get("content_type", "paragraph")
|
||||
elements = section.get("elements", [])
|
||||
|
||||
if not elements:
|
||||
|
|
@ -1952,54 +1748,42 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
if not isinstance(element, dict):
|
||||
continue
|
||||
|
||||
element_type = element.get("type", "")
|
||||
if not element_type:
|
||||
element_type = section_type
|
||||
elementType = element.get("type", "") or sectionType
|
||||
|
||||
# Skip images - handled separately
|
||||
if element_type == "image":
|
||||
if elementType == "image":
|
||||
continue
|
||||
|
||||
if element_type == "bullet_list" or element_type == "list":
|
||||
# Skip horizontal rules (---, ***, ___)
|
||||
if elementType == "paragraph" and self._isHorizontalRule(element):
|
||||
continue
|
||||
|
||||
if elementType == "table":
|
||||
self._addTableToSlide(slide, element, styles)
|
||||
elif elementType in ("bullet_list", "list"):
|
||||
self._addBulletListToSlide(slide, element, styles, text_frame, font_size_multiplier)
|
||||
elif element_type == "heading":
|
||||
elif elementType == "heading":
|
||||
self._addHeadingToSlide(slide, element, styles, text_frame, font_size_multiplier)
|
||||
elif element_type == "paragraph":
|
||||
elif elementType == "paragraph":
|
||||
self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier)
|
||||
elif element_type == "code_block" or element_type == "code":
|
||||
elif elementType in ("code_block", "code"):
|
||||
self._addCodeBlockToSlide(slide, element, styles, text_frame, font_size_multiplier)
|
||||
elif element_type == "extracted_text":
|
||||
elif elementType == "extracted_text":
|
||||
content = element.get("content", "")
|
||||
source = element.get("source", "")
|
||||
if content:
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
p = text_frame.add_paragraph()
|
||||
p.text = content
|
||||
base_font_size = paragraph_style.get("font_size", 18)
|
||||
p.font.size = Pt(int(base_font_size * font_size_multiplier))
|
||||
p.font.bold = paragraph_style.get("bold", False)
|
||||
p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
|
||||
pStyle = styles.get("paragraph", {})
|
||||
fSize = Pt(max(10, int(pStyle.get("font_size", 14) * font_size_multiplier)))
|
||||
fColor = RGBColor(*self._getSafeColor(pStyle.get("color", (47, 47, 47))))
|
||||
self._addMarkdownInlineRuns(p, content, fontSize=fSize, fontColor=fColor)
|
||||
p.alignment = PP_ALIGN.LEFT
|
||||
if source:
|
||||
p.add_run(f" (Source: {source})").font.italic = True
|
||||
elif element_type == "reference":
|
||||
elif elementType == "reference":
|
||||
label = element.get("label", "Reference")
|
||||
p = text_frame.add_paragraph()
|
||||
p.text = f"[Reference: {label}]"
|
||||
p.font.italic = True
|
||||
p.alignment = PP_ALIGN.LEFT
|
||||
else:
|
||||
# Fallback to paragraph
|
||||
content = element.get("content", "")
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
else:
|
||||
text = ""
|
||||
|
||||
if text:
|
||||
self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier=1.0)
|
||||
self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error rendering section to text frame: {str(e)}")
|
||||
|
|
|
|||
253
tests/unit/services/test_renderer_pdf_smoke.py
Normal file
253
tests/unit/services/test_renderer_pdf_smoke.py
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Smoke test: RendererPdf with every JSON section/element shape the pipeline supports.
|
||||
|
||||
Canonical section types (datamodelJson.supportedSectionTypes): table, bullet_list, heading,
|
||||
paragraph, code_block, image.
|
||||
|
||||
PDF renderer additionally handles element types: reference, extracted_text (Phase 5D).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from modules.serviceCenter.services.serviceGeneration.renderers.rendererPdf import (
|
||||
REPORTLAB_AVAILABLE,
|
||||
RendererPdf,
|
||||
_normalizePdfMonospaceText,
|
||||
_prepareCodeBlockPlainText,
|
||||
)
|
||||
|
||||
# 1×1 transparent PNG
|
||||
_MIN_PNG_B64 = (
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="
|
||||
)
|
||||
|
||||
|
||||
def _fakeServices():
|
||||
"""RendererPdf calls services.utils.debugLogToFile; avoid None."""
|
||||
|
||||
def _noop(msg, tag=None):
|
||||
pass
|
||||
|
||||
return SimpleNamespace(utils=SimpleNamespace(debugLogToFile=_noop))
|
||||
|
||||
|
||||
def _fullDocumentJson() -> dict:
|
||||
"""One document covering all supported content_type values plus reference/extracted_text elements."""
|
||||
return {
|
||||
"metadata": {
|
||||
"split_strategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "smoke_test",
|
||||
"title": "PDF Renderer Smoke",
|
||||
"language": "de",
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc_smoke",
|
||||
"title": "PDF Renderer Smoke",
|
||||
"filename": "pdf_renderer_smoke.pdf",
|
||||
"sections": [
|
||||
{
|
||||
"id": "sec_h1",
|
||||
"content_type": "heading",
|
||||
"order": 1,
|
||||
"elements": [
|
||||
{
|
||||
"content": {
|
||||
"text": "H1 with **bold** and a very long subtitle line that should wrap cleanly without overlapping",
|
||||
"level": 1,
|
||||
}
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "sec_h2",
|
||||
"content_type": "heading",
|
||||
"order": 2,
|
||||
"elements": [{"content": {"text": "H2 *italic* and `inline code`", "level": 2}}],
|
||||
},
|
||||
{
|
||||
"id": "sec_para",
|
||||
"content_type": "paragraph",
|
||||
"order": 3,
|
||||
"elements": [
|
||||
{
|
||||
"content": {
|
||||
"text": (
|
||||
"Paragraph: **strong**, *emphasis*, __under-like bold__, "
|
||||
"_single underscores_, and `var = 1`."
|
||||
)
|
||||
}
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "sec_bullets",
|
||||
"content_type": "bullet_list",
|
||||
"order": 4,
|
||||
"elements": [
|
||||
{
|
||||
"content": {
|
||||
"items": [
|
||||
"Bullet **one**",
|
||||
{"text": "Bullet two with *italic*"},
|
||||
],
|
||||
"list_type": "bullet",
|
||||
}
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "sec_numbered",
|
||||
"content_type": "bullet_list",
|
||||
"order": 5,
|
||||
"elements": [
|
||||
{
|
||||
"content": {
|
||||
"items": [{"text": "First numbered"}, {"text": "Second **numbered**"}],
|
||||
"list_type": "numbered",
|
||||
}
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "sec_table",
|
||||
"content_type": "table",
|
||||
"order": 6,
|
||||
"elements": [
|
||||
{
|
||||
"content": {
|
||||
"headers": ["Col A", "Col B", "Col C"],
|
||||
"rows": [
|
||||
["Short", "Medium length cell", "**Bold** in cell"],
|
||||
["R2", "Data", "`code`"],
|
||||
],
|
||||
}
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "sec_code",
|
||||
"content_type": "code_block",
|
||||
"order": 7,
|
||||
"elements": [
|
||||
{
|
||||
"content": {
|
||||
"language": "python",
|
||||
"code": (
|
||||
'def hello():\n print("<tag> & ampersand")\n return 42\n'
|
||||
"\n# tree (Unicode box drawing must not produce tofu in PDF)\n"
|
||||
"Reports/\n\u251c\u2500\u2500 2025/\n\u2502 \u2514\u2500\u2500 file.txt\n"
|
||||
),
|
||||
}
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "sec_image",
|
||||
"content_type": "image",
|
||||
"order": 8,
|
||||
"elements": [
|
||||
{
|
||||
"content": {
|
||||
"base64Data": _MIN_PNG_B64,
|
||||
"altText": "Smoke pixel",
|
||||
"caption": "Minimal PNG (1×1)",
|
||||
}
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "sec_reference",
|
||||
"content_type": "paragraph",
|
||||
"order": 9,
|
||||
"elements": [
|
||||
{
|
||||
"type": "reference",
|
||||
"label": "External spec",
|
||||
"documentReference": "urn:smoke:ref",
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "sec_extracted",
|
||||
"content_type": "paragraph",
|
||||
"order": 10,
|
||||
"elements": [
|
||||
{
|
||||
"type": "extracted_text",
|
||||
"content": "Extracted **body** with formatting.",
|
||||
"source": "fixture/source.md",
|
||||
}
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_renderer_pdf_all_json_elements(tmp_path):
|
||||
if not REPORTLAB_AVAILABLE:
|
||||
pytest.skip("reportlab is not installed")
|
||||
renderer = RendererPdf(services=_fakeServices())
|
||||
payload = _fullDocumentJson()
|
||||
docs = await renderer.render(
|
||||
extractedContent=payload,
|
||||
title="PDF_Renderer_Smoke",
|
||||
userPrompt=None,
|
||||
aiService=None,
|
||||
)
|
||||
assert len(docs) == 1
|
||||
out = docs[0]
|
||||
assert out.mimeType == "application/pdf"
|
||||
assert out.documentData[:4] == b"%PDF"
|
||||
assert out.filename.endswith(".pdf")
|
||||
|
||||
outPath = tmp_path / "pdf_renderer_smoke.pdf"
|
||||
outPath.write_bytes(out.documentData)
|
||||
assert outPath.stat().st_size > 500
|
||||
|
||||
|
||||
def test_prepare_code_block_preserves_indentation_spaces():
|
||||
raw = "def x():\n return 1\n two leading on line"
|
||||
assert " return" in _prepareCodeBlockPlainText(raw)
|
||||
assert "\t" not in _prepareCodeBlockPlainText("a\tb")
|
||||
|
||||
|
||||
def test_normalize_pdf_monospace_replaces_box_drawing():
|
||||
raw = "\u2500\u2502\u251c\u2514\u252c\nReports/\n"
|
||||
norm = _normalizePdfMonospaceText(raw)
|
||||
assert "\u2500" not in norm
|
||||
assert "\u2502" not in norm
|
||||
assert "Reports/" in norm
|
||||
|
||||
|
||||
def test_pdf_heading_font_sizes_strictly_decrease():
|
||||
"""H3 must not fall back to H1 styles (previous bug: ## smaller than ###)."""
|
||||
renderer = RendererPdf(services=_fakeServices())
|
||||
styles = renderer._getDefaultStyleSet()
|
||||
assert styles["heading1"]["font_size"] > styles["heading2"]["font_size"] > styles["heading3"]["font_size"]
|
||||
assert renderer._defaultHeadingStyleDef(2)["font_size"] > renderer._defaultHeadingStyleDef(3)["font_size"]
|
||||
if REPORTLAB_AVAILABLE:
|
||||
s1 = renderer._createHeadingStyle(styles, 1).fontSize
|
||||
s2 = renderer._createHeadingStyle(styles, 2).fontSize
|
||||
s3 = renderer._createHeadingStyle(styles, 3).fontSize
|
||||
assert s1 > s2 > s3
|
||||
partial = {"heading1": styles["heading1"], "heading2": styles["heading2"]}
|
||||
assert renderer._createHeadingStyle(partial, 3).fontSize < renderer._createHeadingStyle(partial, 2).fontSize
|
||||
|
||||
|
||||
def test_inline_code_angle_brackets_escaped_in_font_span():
|
||||
"""Paths like `.../<Slug>/` must not break ReportLab XML inside Courier."""
|
||||
renderer = RendererPdf(services=_fakeServices())
|
||||
xml = renderer._markdownInlineToReportlabXml("unter `Eingabe/<Slug>/` speichern")
|
||||
assert 'name="Courier"' in xml
|
||||
assert "<Slug>" in xml
|
||||
Loading…
Reference in a new issue