optimized renderers docx pptx xlsx for performance

This commit is contained in:
ValueOn AG 2026-01-05 01:41:28 +01:00
parent 25cbaa45b8
commit ecababe600
3 changed files with 253 additions and 137 deletions

View file

@ -439,35 +439,61 @@ class RendererDocx(BaseRenderer):
# else: no borders
self.logger.debug(f"_renderJsonTable: Borders applied in {time.time() - border_start:.2f}s")
# Add headers with AI-generated styling
# Add headers with AI-generated styling - OPTIMIZED for performance
header_start = time.time()
header_row = table.rows[0]
header_style = styles["table_header"]
# Pre-calculate and cache style objects to avoid repeated parsing
bg_color_hex = header_style["background"].lstrip('#')
header_bg_rgb = RGBColor(int(bg_color_hex[0:2], 16), int(bg_color_hex[2:4], 16), int(bg_color_hex[4:6], 16))
text_color_hex = header_style["text_color"].lstrip('#')
header_text_rgb = RGBColor(int(text_color_hex[0:2], 16), int(text_color_hex[2:4], 16), int(text_color_hex[4:6], 16))
header_font_size = Pt(11)
header_align = WD_ALIGN_PARAGRAPH.CENTER if header_style["align"] == "center" else WD_ALIGN_PARAGRAPH.LEFT
header_bold = header_style["bold"]
for i, header in enumerate(headers):
if i < len(header_row.cells):
cell = header_row.cells[i]
cell.text = str(header)
# Apply background color
bg_color = header_style["background"].lstrip('#')
self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
self._setCellBackground(cell, header_bg_rgb)
# Apply text styling
for paragraph in cell.paragraphs:
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER if header_style["align"] == "center" else WD_ALIGN_PARAGRAPH.LEFT
for run in paragraph.runs:
run.bold = header_style["bold"]
run.font.size = Pt(11)
text_color = header_style["text_color"].lstrip('#')
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
# Apply text styling - use direct access instead of iterating
if len(cell.paragraphs) > 0:
para = cell.paragraphs[0]
para.alignment = header_align
# Use direct run access instead of iterating
if len(para.runs) > 0:
run = para.runs[0]
run.bold = header_bold
run.font.size = header_font_size
run.font.color.rgb = header_text_rgb
else:
# Create run if none exists
run = para.add_run()
run.bold = header_bold
run.font.size = header_font_size
run.font.color.rgb = header_text_rgb
self.logger.debug(f"_renderJsonTable: Headers rendered in {time.time() - header_start:.2f}s")
# Add data rows with AI-generated styling
# Add data rows with AI-generated styling - OPTIMIZED for performance
rows_start = time.time()
cell_style = styles["table_cell"]
total_cells = len(rows) * len(headers)
log_interval = max(1, total_cells // 20) # Log every 5% progress
# Pre-calculate and cache style objects to avoid repeated parsing
needsStyling = cell_style.get("text_color") != "#2F2F2F" or cell_style.get("font_size") != 10
cell_text_color_rgb = None
cell_font_size = None
if needsStyling:
text_color_hex = cell_style["text_color"].lstrip('#')
cell_text_color_rgb = RGBColor(int(text_color_hex[0:2], 16), int(text_color_hex[2:4], 16), int(text_color_hex[4:6], 16))
cell_font_size = Pt(cell_style.get("font_size", 10))
for row_idx, row_data in enumerate(rows):
if row_idx + 1 < len(table.rows):
table_row = table.rows[row_idx + 1]
@ -476,16 +502,22 @@ class RendererDocx(BaseRenderer):
cell = table_row.cells[col_idx]
cell.text = str(cell_data)
# Apply text styling - OPTIMIZED: Only style if needed
# For large tables, styling every cell can be very slow
# Check if we need to apply styling (only if style differs from default)
if cell_style.get("text_color") != "#2F2F2F" or cell_style.get("font_size") != 10:
for paragraph in cell.paragraphs:
paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
for run in paragraph.runs:
run.font.size = Pt(10)
text_color = cell_style["text_color"].lstrip('#')
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
# Apply text styling - OPTIMIZED: Only style if needed, use direct access
if needsStyling:
# Use direct paragraph access instead of iterating
if len(cell.paragraphs) > 0:
para = cell.paragraphs[0]
para.alignment = WD_ALIGN_PARAGRAPH.LEFT
# Use direct run access instead of iterating
if len(para.runs) > 0:
run = para.runs[0]
run.font.size = cell_font_size
run.font.color.rgb = cell_text_color_rgb
else:
# Create run if none exists
run = para.add_run()
run.font.size = cell_font_size
run.font.color.rgb = cell_text_color_rgb
# Log progress for large tables
if (row_idx + 1) % log_interval == 0 or row_idx == len(rows) - 1:
@ -595,7 +627,7 @@ class RendererDocx(BaseRenderer):
def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON bullet list to DOCX using AI-generated styles."""
"""Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance."""
try:
# Extract from nested content structure
content = list_data.get("content", {})
@ -604,20 +636,38 @@ class RendererDocx(BaseRenderer):
items = content.get("items", [])
bullet_style = styles.get("bullet_list", {})
# Pre-calculate and cache style objects to avoid repeated parsing
font_size_pt = None
text_color_rgb = None
if bullet_style:
if "font_size" in bullet_style:
font_size_pt = Pt(bullet_style["font_size"])
if "color" in bullet_style:
color_hex = bullet_style["color"].lstrip('#')
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
for item in items:
if isinstance(item, str):
para = doc.add_paragraph(item, style='List Bullet')
elif isinstance(item, dict) and "text" in item:
para = doc.add_paragraph(item["text"], style='List Bullet')
# Apply bullet list styling from style set
# Apply bullet list styling from style set - use cached objects
if bullet_style and para.runs:
for run in para.runs:
if "font_size" in bullet_style:
run.font.size = Pt(bullet_style["font_size"])
if "color" in bullet_style:
color_hex = bullet_style["color"].lstrip('#')
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
# Use direct access instead of iterating
if len(para.runs) > 0:
run = para.runs[0]
if font_size_pt:
run.font.size = font_size_pt
if text_color_rgb:
run.font.color.rgb = text_color_rgb
else:
# Create run if none exists
run = para.add_run()
if font_size_pt:
run.font.size = font_size_pt
if text_color_rgb:
run.font.color.rgb = text_color_rgb
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
@ -670,17 +720,36 @@ class RendererDocx(BaseRenderer):
if text:
para = doc.add_paragraph(text)
# Apply paragraph styling from style set
# Apply paragraph styling from style set - OPTIMIZED: pre-calculate style objects
paragraph_style = styles.get("paragraph", {})
if paragraph_style:
for run in para.runs:
if "font_size" in paragraph_style:
run.font.size = Pt(paragraph_style["font_size"])
if "bold" in paragraph_style:
run.font.bold = paragraph_style["bold"]
if "color" in paragraph_style:
color_hex = paragraph_style["color"].lstrip('#')
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
# Pre-calculate and cache style objects
font_size_pt = None
text_color_rgb = None
if "font_size" in paragraph_style:
font_size_pt = Pt(paragraph_style["font_size"])
if "color" in paragraph_style:
color_hex = paragraph_style["color"].lstrip('#')
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
bold = paragraph_style.get("bold", False)
# Use direct access instead of iterating
if len(para.runs) > 0:
run = para.runs[0]
if font_size_pt:
run.font.size = font_size_pt
run.font.bold = bold
if text_color_rgb:
run.font.color.rgb = text_color_rgb
else:
# Create run if none exists
run = para.add_run()
if font_size_pt:
run.font.size = font_size_pt
run.font.bold = bold
if text_color_rgb:
run.font.color.rgb = text_color_rgb
if "align" in paragraph_style:
align = paragraph_style["align"]
if align == "center":
@ -707,16 +776,32 @@ class RendererDocx(BaseRenderer):
if code:
if language:
lang_para = doc.add_paragraph(f"Code ({language}):")
if lang_para.runs:
if len(lang_para.runs) > 0:
lang_para.runs[0].bold = True
# Pre-calculate and cache style objects
code_font_name = code_style.get("font", "Courier New")
code_font_size_pt = Pt(code_style.get("font_size", 9))
code_text_color_rgb = None
if "color" in code_style:
color_hex = code_style["color"].lstrip('#')
code_text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
code_para = doc.add_paragraph(code)
for run in code_para.runs:
run.font.name = code_style.get("font", "Courier New")
run.font.size = Pt(code_style.get("font_size", 9))
if "color" in code_style:
color_hex = code_style["color"].lstrip('#')
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
# Use direct access instead of iterating
if len(code_para.runs) > 0:
run = code_para.runs[0]
run.font.name = code_font_name
run.font.size = code_font_size_pt
if code_text_color_rgb:
run.font.color.rgb = code_text_color_rgb
else:
# Create run if none exists
run = code_para.add_run()
run.font.name = code_font_name
run.font.size = code_font_size_pt
if code_text_color_rgb:
run.font.color.rgb = code_text_color_rgb
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")

View file

@ -1266,78 +1266,96 @@ JSON ONLY. NO OTHER TEXT."""
for col_idx in range(num_cols):
table.columns[col_idx].width = col_width_emu
# Add headers with styling
# Add headers with styling - OPTIMIZED: pre-calculate color/style objects
header_style = styles.get("table_header", {})
header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121)))
header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255)))
header_font_size = header_style.get("font_size", 18)
# Pre-calculate and cache RGB color objects
header_bg_rgb = RGBColor(*header_bg_color)
header_text_rgb = RGBColor(*header_text_color)
header_font_size_pt = Pt(header_font_size)
header_bold = header_style.get("bold", True)
# Determine alignment once
align = header_style.get("align", "center")
if align == "left":
header_alignment = PP_ALIGN.LEFT
elif align == "right":
header_alignment = PP_ALIGN.RIGHT
else:
header_alignment = PP_ALIGN.CENTER
for col_idx, header in enumerate(headers):
cell = table.cell(0, col_idx)
# Clear existing text and set new text
cell.text_frame.clear()
cell.text = str(header) if header else ""
header_text = str(header) if header else ""
cell.text = header_text
# Ensure paragraph exists
if len(cell.text_frame.paragraphs) == 0:
cell.text_frame.add_paragraph()
# Apply styling
# Apply styling - use cached objects
cell.fill.solid()
cell.fill.fore_color.rgb = RGBColor(*header_bg_color)
cell.fill.fore_color.rgb = header_bg_rgb
para = cell.text_frame.paragraphs[0]
para.font.bold = header_style.get("bold", True)
para.font.size = Pt(header_font_size)
para.font.color.rgb = RGBColor(*header_text_color)
align = header_style.get("align", "center")
if align == "left":
para.alignment = PP_ALIGN.LEFT
elif align == "right":
para.alignment = PP_ALIGN.RIGHT
else:
para.alignment = PP_ALIGN.CENTER
para.font.bold = header_bold
para.font.size = header_font_size_pt
para.font.color.rgb = header_text_rgb
para.alignment = header_alignment
# Ensure text is set on paragraph
if not para.text:
para.text = str(header) if header else ""
para.text = header_text
# Add data rows with styling
# Add data rows with styling - OPTIMIZED: pre-calculate color/style objects
cell_style = styles.get("table_cell", {})
cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255)))
cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47)))
cell_font_size = cell_style.get("font_size", 16)
# Pre-calculate and cache RGB color objects
cell_bg_rgb = RGBColor(*cell_bg_color)
cell_text_rgb = RGBColor(*cell_text_color)
cell_font_size_pt = Pt(cell_font_size)
cell_bold = cell_style.get("bold", False)
# Determine alignment once
align = cell_style.get("align", "left")
if align == "center":
cell_alignment = PP_ALIGN.CENTER
elif align == "right":
cell_alignment = PP_ALIGN.RIGHT
else:
cell_alignment = PP_ALIGN.LEFT
for row_idx, row_data in enumerate(rows, 1):
for col_idx, cell_data in enumerate(row_data[:num_cols]):
cell = table.cell(row_idx, col_idx)
# Clear existing text and set new text
cell.text_frame.clear()
cell.text = str(cell_data) if cell_data is not None else ""
cell_text = str(cell_data) if cell_data is not None else ""
cell.text = cell_text
# Ensure paragraph exists
if len(cell.text_frame.paragraphs) == 0:
cell.text_frame.add_paragraph()
# Apply styling
# Apply styling - use cached objects
cell.fill.solid()
cell.fill.fore_color.rgb = RGBColor(*cell_bg_color)
cell.fill.fore_color.rgb = cell_bg_rgb
para = cell.text_frame.paragraphs[0]
para.font.size = Pt(cell_font_size)
para.font.bold = cell_style.get("bold", False)
para.font.color.rgb = RGBColor(*cell_text_color)
align = cell_style.get("align", "left")
if align == "center":
para.alignment = PP_ALIGN.CENTER
elif align == "right":
para.alignment = PP_ALIGN.RIGHT
else:
para.alignment = PP_ALIGN.LEFT
para.font.size = cell_font_size_pt
para.font.bold = cell_bold
para.font.color.rgb = cell_text_rgb
para.alignment = cell_alignment
# Ensure text is set on paragraph
if not para.text:
para.text = str(cell_data) if cell_data is not None else ""
para.text = cell_text
except Exception as e:
logger.warning(f"Error adding table to slide: {str(e)}")
@ -1362,6 +1380,13 @@ JSON ONLY. NO OTHER TEXT."""
base_font_size = list_style.get("font_size", 14)
calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability
# Pre-calculate and cache style objects to avoid repeated parsing
font_size_pt = Pt(calculated_size)
text_color = self._getSafeColor(list_style.get("color", (47, 47, 47)))
text_color_rgb = RGBColor(*text_color)
space_before_pt = Pt(2)
space_after_pt = Pt(2)
logger.debug(f"Rendering bullet list with {len(items)} items")
for idx, item in enumerate(items):
@ -1387,12 +1412,12 @@ JSON ONLY. NO OTHER TEXT."""
# Set text content
p.text = item_text
# Apply formatting first
p.font.size = Pt(calculated_size)
p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47))))
# Apply formatting - use cached objects
p.font.size = font_size_pt
p.font.color.rgb = text_color_rgb
p.alignment = PP_ALIGN.LEFT # Left align bullet lists
p.space_before = Pt(2) # Small spacing before
p.space_after = Pt(2) # Small spacing after
p.space_before = space_before_pt # Small spacing before
p.space_after = space_after_pt # Small spacing after
# In python-pptx, setting level > 0 should enable bullets automatically
# However, some versions may not support paragraph_format, so we'll use manual bullets as fallback

View file

@ -1148,60 +1148,69 @@ class RendererXlsx(BaseRenderer):
headerRow = startRow
header_style = styles.get("table_header", {})
# Add headers with formatting
# Pre-calculate and cache style objects to avoid repeated parsing
header_font_color = self._getSafeColor(header_style.get("text_color", "FF000000"))
header_font = Font(bold=header_style.get("bold", True), color=header_font_color)
header_bg_color = None
header_fill = None
if header_style.get("background"):
header_bg_color = self._getSafeColor(header_style["background"])
header_fill = PatternFill(start_color=header_bg_color, end_color=header_bg_color, fill_type="solid")
header_alignment = Alignment(
horizontal=self._getSafeAlignment(header_style.get("align", "left")),
vertical="center"
)
# Add headers with formatting - OPTIMIZED: use cached style objects
for col, header in enumerate(headers, 1):
sanitized_header = self._sanitizeCellValue(header)
cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
# Apply styling with fallbacks - don't let styling errors prevent data rendering
# Apply styling with fallbacks - use pre-calculated objects
try:
# Font styling
cell.font = Font(
bold=header_style.get("bold", True),
color=self._getSafeColor(header_style.get("text_color", "FF000000"))
)
cell.font = header_font
except Exception:
# Fallback to default font if styling fails
try:
cell.font = Font(bold=True, color=self._getSafeColor("FF000000"))
except Exception:
pass # Continue even if font fails
pass
try:
# Background color
if header_style.get("background"):
cell.fill = PatternFill(
start_color=self._getSafeColor(header_style["background"]),
end_color=self._getSafeColor(header_style["background"]),
fill_type="solid"
)
if header_fill:
cell.fill = header_fill
except Exception:
pass # Continue without background color if it fails
pass
try:
# Alignment
cell.alignment = Alignment(
horizontal=self._getSafeAlignment(header_style.get("align", "left")),
vertical="center"
)
cell.alignment = header_alignment
except Exception:
# Fallback to default alignment if it fails
try:
cell.alignment = Alignment(horizontal="left", vertical="center")
except Exception:
pass # Continue even if alignment fails
pass
try:
# Border
cell.border = thin_border
except Exception:
pass # Continue without border if it fails
pass
startRow += 1
# Add rows with formatting
# Add rows with formatting - OPTIMIZED: pre-calculate style objects
cell_style = styles.get("table_cell", {})
header_count = len(headers)
# Pre-calculate and cache style objects to avoid repeated parsing
cell_text_color = None
cell_font = None
if cell_style.get("text_color"):
cell_text_color = self._getSafeColor(cell_style["text_color"])
cell_font = Font(color=cell_text_color)
cell_alignment = Alignment(
horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
vertical="center"
)
for row_data in rows:
# Handle different row formats
if isinstance(row_data, list):
@ -1223,32 +1232,25 @@ class RendererXlsx(BaseRenderer):
sanitized_value = self._sanitizeCellValue(cell_value)
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
# Apply styling with fallbacks - don't let styling errors prevent data rendering
# Apply styling with fallbacks - use pre-calculated objects
try:
# Font styling
if cell_style.get("text_color"):
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
if cell_font:
cell.font = cell_font
except Exception:
pass # Continue without font color if it fails
pass
try:
# Alignment
cell.alignment = Alignment(
horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
vertical="center"
)
cell.alignment = cell_alignment
except Exception:
# Fallback to default alignment if it fails
try:
cell.alignment = Alignment(horizontal="left", vertical="center")
except Exception:
pass # Continue even if alignment fails
pass
try:
# Border
cell.border = thin_border
except Exception:
pass # Continue without border if it fails
pass
startRow += 1
@ -1448,28 +1450,32 @@ class RendererXlsx(BaseRenderer):
if code:
code_style = styles.get("code_block", {})
# Pre-calculate and cache style objects to avoid repeated parsing
code_font_name = code_style.get("font", "Courier New")
code_font_size = code_style.get("font_size", 10)
code_text_color = self._getSafeColor(code_style.get("color", "FF2F2F2F"))
code_font = Font(name=code_font_name, size=code_font_size, color=code_text_color)
code_bg_color = None
code_fill = None
if code_style.get("background"):
code_bg_color = self._getSafeColor(code_style["background"])
code_fill = PatternFill(start_color=code_bg_color, end_color=code_bg_color, fill_type="solid")
# Add language label if present
if language:
langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):")
langCell.font = Font(bold=True, color=self._getSafeColor(code_style.get("color", "FF000000")))
langCell.font = Font(bold=True, color=code_text_color)
startRow += 1
# Split code into lines and add each line
# Split code into lines and add each line - use cached style objects
code_lines = code.split('\n')
for line in code_lines:
codeCell = sheet.cell(row=startRow, column=1, value=line)
codeCell.font = Font(
name=code_style.get("font", "Courier New"),
size=code_style.get("font_size", 10),
color=self._getSafeColor(code_style.get("color", "FF2F2F2F"))
)
codeCell.font = code_font
# Set background color if specified
if code_style.get("background"):
codeCell.fill = PatternFill(
start_color=self._getSafeColor(code_style["background"]),
end_color=self._getSafeColor(code_style["background"]),
fill_type="solid"
)
if code_fill:
codeCell.fill = code_fill
startRow += 1
# Add spacing after code block