From ecababe6000885b364f1b99d33d4291a81774929 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 5 Jan 2026 01:41:28 +0100 Subject: [PATCH] optimized renderers docx pptx xlsx for performance --- .../renderers/rendererDocx.py | 177 +++++++++++++----- .../renderers/rendererPptx.py | 99 ++++++---- .../renderers/rendererXlsx.py | 114 +++++------ 3 files changed, 253 insertions(+), 137 deletions(-) diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index de08f5f1..f4ad9994 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -439,35 +439,61 @@ class RendererDocx(BaseRenderer): # else: no borders self.logger.debug(f"_renderJsonTable: Borders applied in {time.time() - border_start:.2f}s") - # Add headers with AI-generated styling + # Add headers with AI-generated styling - OPTIMIZED for performance header_start = time.time() header_row = table.rows[0] header_style = styles["table_header"] + + # Pre-calculate and cache style objects to avoid repeated parsing + bg_color_hex = header_style["background"].lstrip('#') + header_bg_rgb = RGBColor(int(bg_color_hex[0:2], 16), int(bg_color_hex[2:4], 16), int(bg_color_hex[4:6], 16)) + text_color_hex = header_style["text_color"].lstrip('#') + header_text_rgb = RGBColor(int(text_color_hex[0:2], 16), int(text_color_hex[2:4], 16), int(text_color_hex[4:6], 16)) + header_font_size = Pt(11) + header_align = WD_ALIGN_PARAGRAPH.CENTER if header_style["align"] == "center" else WD_ALIGN_PARAGRAPH.LEFT + header_bold = header_style["bold"] + for i, header in enumerate(headers): if i < len(header_row.cells): cell = header_row.cells[i] cell.text = str(header) # Apply background color - bg_color = header_style["background"].lstrip('#') - self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16))) + self._setCellBackground(cell, header_bg_rgb) - # Apply text styling - for paragraph in cell.paragraphs: - paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER if header_style["align"] == "center" else WD_ALIGN_PARAGRAPH.LEFT - for run in paragraph.runs: - run.bold = header_style["bold"] - run.font.size = Pt(11) - text_color = header_style["text_color"].lstrip('#') - run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16)) + # Apply text styling - use direct access instead of iterating + if len(cell.paragraphs) > 0: + para = cell.paragraphs[0] + para.alignment = header_align + # Use direct run access instead of iterating + if len(para.runs) > 0: + run = para.runs[0] + run.bold = header_bold + run.font.size = header_font_size + run.font.color.rgb = header_text_rgb + else: + # Create run if none exists + run = para.add_run() + run.bold = header_bold + run.font.size = header_font_size + run.font.color.rgb = header_text_rgb self.logger.debug(f"_renderJsonTable: Headers rendered in {time.time() - header_start:.2f}s") - # Add data rows with AI-generated styling + # Add data rows with AI-generated styling - OPTIMIZED for performance rows_start = time.time() cell_style = styles["table_cell"] total_cells = len(rows) * len(headers) log_interval = max(1, total_cells // 20) # Log every 5% progress + # Pre-calculate and cache style objects to avoid repeated parsing + needsStyling = cell_style.get("text_color") != "#2F2F2F" or cell_style.get("font_size") != 10 + cell_text_color_rgb = None + cell_font_size = None + if needsStyling: + text_color_hex = cell_style["text_color"].lstrip('#') + cell_text_color_rgb = RGBColor(int(text_color_hex[0:2], 16), int(text_color_hex[2:4], 16), int(text_color_hex[4:6], 16)) + cell_font_size = Pt(cell_style.get("font_size", 10)) + for row_idx, row_data in enumerate(rows): if row_idx + 1 < len(table.rows): table_row = table.rows[row_idx + 1] @@ -476,16 +502,22 @@ class RendererDocx(BaseRenderer): cell = table_row.cells[col_idx] cell.text = str(cell_data) - # Apply text styling - OPTIMIZED: Only style if needed - # For large tables, styling every cell can be very slow - # Check if we need to apply styling (only if style differs from default) - if cell_style.get("text_color") != "#2F2F2F" or cell_style.get("font_size") != 10: - for paragraph in cell.paragraphs: - paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT - for run in paragraph.runs: - run.font.size = Pt(10) - text_color = cell_style["text_color"].lstrip('#') - run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16)) + # Apply text styling - OPTIMIZED: Only style if needed, use direct access + if needsStyling: + # Use direct paragraph access instead of iterating + if len(cell.paragraphs) > 0: + para = cell.paragraphs[0] + para.alignment = WD_ALIGN_PARAGRAPH.LEFT + # Use direct run access instead of iterating + if len(para.runs) > 0: + run = para.runs[0] + run.font.size = cell_font_size + run.font.color.rgb = cell_text_color_rgb + else: + # Create run if none exists + run = para.add_run() + run.font.size = cell_font_size + run.font.color.rgb = cell_text_color_rgb # Log progress for large tables if (row_idx + 1) % log_interval == 0 or row_idx == len(rows) - 1: @@ -595,7 +627,7 @@ class RendererDocx(BaseRenderer): def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None: - """Render a JSON bullet list to DOCX using AI-generated styles.""" + """Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance.""" try: # Extract from nested content structure content = list_data.get("content", {}) @@ -604,20 +636,38 @@ class RendererDocx(BaseRenderer): items = content.get("items", []) bullet_style = styles.get("bullet_list", {}) + # Pre-calculate and cache style objects to avoid repeated parsing + font_size_pt = None + text_color_rgb = None + if bullet_style: + if "font_size" in bullet_style: + font_size_pt = Pt(bullet_style["font_size"]) + if "color" in bullet_style: + color_hex = bullet_style["color"].lstrip('#') + text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + for item in items: if isinstance(item, str): para = doc.add_paragraph(item, style='List Bullet') elif isinstance(item, dict) and "text" in item: para = doc.add_paragraph(item["text"], style='List Bullet') - # Apply bullet list styling from style set + # Apply bullet list styling from style set - use cached objects if bullet_style and para.runs: - for run in para.runs: - if "font_size" in bullet_style: - run.font.size = Pt(bullet_style["font_size"]) - if "color" in bullet_style: - color_hex = bullet_style["color"].lstrip('#') - run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + # Use direct access instead of iterating + if len(para.runs) > 0: + run = para.runs[0] + if font_size_pt: + run.font.size = font_size_pt + if text_color_rgb: + run.font.color.rgb = text_color_rgb + else: + # Create run if none exists + run = para.add_run() + if font_size_pt: + run.font.size = font_size_pt + if text_color_rgb: + run.font.color.rgb = text_color_rgb except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") @@ -670,17 +720,36 @@ class RendererDocx(BaseRenderer): if text: para = doc.add_paragraph(text) - # Apply paragraph styling from style set + # Apply paragraph styling from style set - OPTIMIZED: pre-calculate style objects paragraph_style = styles.get("paragraph", {}) if paragraph_style: - for run in para.runs: - if "font_size" in paragraph_style: - run.font.size = Pt(paragraph_style["font_size"]) - if "bold" in paragraph_style: - run.font.bold = paragraph_style["bold"] - if "color" in paragraph_style: - color_hex = paragraph_style["color"].lstrip('#') - run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + # Pre-calculate and cache style objects + font_size_pt = None + text_color_rgb = None + if "font_size" in paragraph_style: + font_size_pt = Pt(paragraph_style["font_size"]) + if "color" in paragraph_style: + color_hex = paragraph_style["color"].lstrip('#') + text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + bold = paragraph_style.get("bold", False) + + # Use direct access instead of iterating + if len(para.runs) > 0: + run = para.runs[0] + if font_size_pt: + run.font.size = font_size_pt + run.font.bold = bold + if text_color_rgb: + run.font.color.rgb = text_color_rgb + else: + # Create run if none exists + run = para.add_run() + if font_size_pt: + run.font.size = font_size_pt + run.font.bold = bold + if text_color_rgb: + run.font.color.rgb = text_color_rgb + if "align" in paragraph_style: align = paragraph_style["align"] if align == "center": @@ -707,16 +776,32 @@ class RendererDocx(BaseRenderer): if code: if language: lang_para = doc.add_paragraph(f"Code ({language}):") - if lang_para.runs: + if len(lang_para.runs) > 0: lang_para.runs[0].bold = True + # Pre-calculate and cache style objects + code_font_name = code_style.get("font", "Courier New") + code_font_size_pt = Pt(code_style.get("font_size", 9)) + code_text_color_rgb = None + if "color" in code_style: + color_hex = code_style["color"].lstrip('#') + code_text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + code_para = doc.add_paragraph(code) - for run in code_para.runs: - run.font.name = code_style.get("font", "Courier New") - run.font.size = Pt(code_style.get("font_size", 9)) - if "color" in code_style: - color_hex = code_style["color"].lstrip('#') - run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + # Use direct access instead of iterating + if len(code_para.runs) > 0: + run = code_para.runs[0] + run.font.name = code_font_name + run.font.size = code_font_size_pt + if code_text_color_rgb: + run.font.color.rgb = code_text_color_rgb + else: + # Create run if none exists + run = code_para.add_run() + run.font.name = code_font_name + run.font.size = code_font_size_pt + if code_text_color_rgb: + run.font.color.rgb = code_text_color_rgb except Exception as e: self.logger.warning(f"Error rendering code block: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index 9ada961a..800b21ba 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -1266,78 +1266,96 @@ JSON ONLY. NO OTHER TEXT.""" for col_idx in range(num_cols): table.columns[col_idx].width = col_width_emu - # Add headers with styling + # Add headers with styling - OPTIMIZED: pre-calculate color/style objects header_style = styles.get("table_header", {}) header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121))) header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255))) header_font_size = header_style.get("font_size", 18) + # Pre-calculate and cache RGB color objects + header_bg_rgb = RGBColor(*header_bg_color) + header_text_rgb = RGBColor(*header_text_color) + header_font_size_pt = Pt(header_font_size) + header_bold = header_style.get("bold", True) + + # Determine alignment once + align = header_style.get("align", "center") + if align == "left": + header_alignment = PP_ALIGN.LEFT + elif align == "right": + header_alignment = PP_ALIGN.RIGHT + else: + header_alignment = PP_ALIGN.CENTER + for col_idx, header in enumerate(headers): cell = table.cell(0, col_idx) # Clear existing text and set new text cell.text_frame.clear() - cell.text = str(header) if header else "" + header_text = str(header) if header else "" + cell.text = header_text # Ensure paragraph exists if len(cell.text_frame.paragraphs) == 0: cell.text_frame.add_paragraph() - # Apply styling + # Apply styling - use cached objects cell.fill.solid() - cell.fill.fore_color.rgb = RGBColor(*header_bg_color) + cell.fill.fore_color.rgb = header_bg_rgb para = cell.text_frame.paragraphs[0] - para.font.bold = header_style.get("bold", True) - para.font.size = Pt(header_font_size) - para.font.color.rgb = RGBColor(*header_text_color) - - align = header_style.get("align", "center") - if align == "left": - para.alignment = PP_ALIGN.LEFT - elif align == "right": - para.alignment = PP_ALIGN.RIGHT - else: - para.alignment = PP_ALIGN.CENTER + para.font.bold = header_bold + para.font.size = header_font_size_pt + para.font.color.rgb = header_text_rgb + para.alignment = header_alignment # Ensure text is set on paragraph if not para.text: - para.text = str(header) if header else "" + para.text = header_text - # Add data rows with styling + # Add data rows with styling - OPTIMIZED: pre-calculate color/style objects cell_style = styles.get("table_cell", {}) cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255))) cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47))) cell_font_size = cell_style.get("font_size", 16) + # Pre-calculate and cache RGB color objects + cell_bg_rgb = RGBColor(*cell_bg_color) + cell_text_rgb = RGBColor(*cell_text_color) + cell_font_size_pt = Pt(cell_font_size) + cell_bold = cell_style.get("bold", False) + + # Determine alignment once + align = cell_style.get("align", "left") + if align == "center": + cell_alignment = PP_ALIGN.CENTER + elif align == "right": + cell_alignment = PP_ALIGN.RIGHT + else: + cell_alignment = PP_ALIGN.LEFT + for row_idx, row_data in enumerate(rows, 1): for col_idx, cell_data in enumerate(row_data[:num_cols]): cell = table.cell(row_idx, col_idx) # Clear existing text and set new text cell.text_frame.clear() - cell.text = str(cell_data) if cell_data is not None else "" + cell_text = str(cell_data) if cell_data is not None else "" + cell.text = cell_text # Ensure paragraph exists if len(cell.text_frame.paragraphs) == 0: cell.text_frame.add_paragraph() - # Apply styling + # Apply styling - use cached objects cell.fill.solid() - cell.fill.fore_color.rgb = RGBColor(*cell_bg_color) + cell.fill.fore_color.rgb = cell_bg_rgb para = cell.text_frame.paragraphs[0] - para.font.size = Pt(cell_font_size) - para.font.bold = cell_style.get("bold", False) - para.font.color.rgb = RGBColor(*cell_text_color) - - align = cell_style.get("align", "left") - if align == "center": - para.alignment = PP_ALIGN.CENTER - elif align == "right": - para.alignment = PP_ALIGN.RIGHT - else: - para.alignment = PP_ALIGN.LEFT + para.font.size = cell_font_size_pt + para.font.bold = cell_bold + para.font.color.rgb = cell_text_rgb + para.alignment = cell_alignment # Ensure text is set on paragraph if not para.text: - para.text = str(cell_data) if cell_data is not None else "" + para.text = cell_text except Exception as e: logger.warning(f"Error adding table to slide: {str(e)}") @@ -1362,6 +1380,13 @@ JSON ONLY. NO OTHER TEXT.""" base_font_size = list_style.get("font_size", 14) calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability + # Pre-calculate and cache style objects to avoid repeated parsing + font_size_pt = Pt(calculated_size) + text_color = self._getSafeColor(list_style.get("color", (47, 47, 47))) + text_color_rgb = RGBColor(*text_color) + space_before_pt = Pt(2) + space_after_pt = Pt(2) + logger.debug(f"Rendering bullet list with {len(items)} items") for idx, item in enumerate(items): @@ -1387,12 +1412,12 @@ JSON ONLY. NO OTHER TEXT.""" # Set text content p.text = item_text - # Apply formatting first - p.font.size = Pt(calculated_size) - p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47)))) + # Apply formatting - use cached objects + p.font.size = font_size_pt + p.font.color.rgb = text_color_rgb p.alignment = PP_ALIGN.LEFT # Left align bullet lists - p.space_before = Pt(2) # Small spacing before - p.space_after = Pt(2) # Small spacing after + p.space_before = space_before_pt # Small spacing before + p.space_after = space_after_pt # Small spacing after # In python-pptx, setting level > 0 should enable bullets automatically # However, some versions may not support paragraph_format, so we'll use manual bullets as fallback diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index 750f4eb0..195aa80d 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -1148,60 +1148,69 @@ class RendererXlsx(BaseRenderer): headerRow = startRow header_style = styles.get("table_header", {}) - # Add headers with formatting + # Pre-calculate and cache style objects to avoid repeated parsing + header_font_color = self._getSafeColor(header_style.get("text_color", "FF000000")) + header_font = Font(bold=header_style.get("bold", True), color=header_font_color) + header_bg_color = None + header_fill = None + if header_style.get("background"): + header_bg_color = self._getSafeColor(header_style["background"]) + header_fill = PatternFill(start_color=header_bg_color, end_color=header_bg_color, fill_type="solid") + header_alignment = Alignment( + horizontal=self._getSafeAlignment(header_style.get("align", "left")), + vertical="center" + ) + + # Add headers with formatting - OPTIMIZED: use cached style objects for col, header in enumerate(headers, 1): sanitized_header = self._sanitizeCellValue(header) cell = sheet.cell(row=headerRow, column=col, value=sanitized_header) - # Apply styling with fallbacks - don't let styling errors prevent data rendering + # Apply styling with fallbacks - use pre-calculated objects try: - # Font styling - cell.font = Font( - bold=header_style.get("bold", True), - color=self._getSafeColor(header_style.get("text_color", "FF000000")) - ) + cell.font = header_font except Exception: - # Fallback to default font if styling fails try: cell.font = Font(bold=True, color=self._getSafeColor("FF000000")) except Exception: - pass # Continue even if font fails + pass try: - # Background color - if header_style.get("background"): - cell.fill = PatternFill( - start_color=self._getSafeColor(header_style["background"]), - end_color=self._getSafeColor(header_style["background"]), - fill_type="solid" - ) + if header_fill: + cell.fill = header_fill except Exception: - pass # Continue without background color if it fails + pass try: - # Alignment - cell.alignment = Alignment( - horizontal=self._getSafeAlignment(header_style.get("align", "left")), - vertical="center" - ) + cell.alignment = header_alignment except Exception: - # Fallback to default alignment if it fails try: cell.alignment = Alignment(horizontal="left", vertical="center") except Exception: - pass # Continue even if alignment fails + pass try: - # Border cell.border = thin_border except Exception: - pass # Continue without border if it fails + pass startRow += 1 - # Add rows with formatting + # Add rows with formatting - OPTIMIZED: pre-calculate style objects cell_style = styles.get("table_cell", {}) header_count = len(headers) + + # Pre-calculate and cache style objects to avoid repeated parsing + cell_text_color = None + cell_font = None + if cell_style.get("text_color"): + cell_text_color = self._getSafeColor(cell_style["text_color"]) + cell_font = Font(color=cell_text_color) + cell_alignment = Alignment( + horizontal=self._getSafeAlignment(cell_style.get("align", "left")), + vertical="center" + ) + for row_data in rows: # Handle different row formats if isinstance(row_data, list): @@ -1223,32 +1232,25 @@ class RendererXlsx(BaseRenderer): sanitized_value = self._sanitizeCellValue(cell_value) cell = sheet.cell(row=startRow, column=col, value=sanitized_value) - # Apply styling with fallbacks - don't let styling errors prevent data rendering + # Apply styling with fallbacks - use pre-calculated objects try: - # Font styling - if cell_style.get("text_color"): - cell.font = Font(color=self._getSafeColor(cell_style["text_color"])) + if cell_font: + cell.font = cell_font except Exception: - pass # Continue without font color if it fails + pass try: - # Alignment - cell.alignment = Alignment( - horizontal=self._getSafeAlignment(cell_style.get("align", "left")), - vertical="center" - ) + cell.alignment = cell_alignment except Exception: - # Fallback to default alignment if it fails try: cell.alignment = Alignment(horizontal="left", vertical="center") except Exception: - pass # Continue even if alignment fails + pass try: - # Border cell.border = thin_border except Exception: - pass # Continue without border if it fails + pass startRow += 1 @@ -1448,28 +1450,32 @@ class RendererXlsx(BaseRenderer): if code: code_style = styles.get("code_block", {}) + # Pre-calculate and cache style objects to avoid repeated parsing + code_font_name = code_style.get("font", "Courier New") + code_font_size = code_style.get("font_size", 10) + code_text_color = self._getSafeColor(code_style.get("color", "FF2F2F2F")) + code_font = Font(name=code_font_name, size=code_font_size, color=code_text_color) + + code_bg_color = None + code_fill = None + if code_style.get("background"): + code_bg_color = self._getSafeColor(code_style["background"]) + code_fill = PatternFill(start_color=code_bg_color, end_color=code_bg_color, fill_type="solid") + # Add language label if present if language: langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):") - langCell.font = Font(bold=True, color=self._getSafeColor(code_style.get("color", "FF000000"))) + langCell.font = Font(bold=True, color=code_text_color) startRow += 1 - # Split code into lines and add each line + # Split code into lines and add each line - use cached style objects code_lines = code.split('\n') for line in code_lines: codeCell = sheet.cell(row=startRow, column=1, value=line) - codeCell.font = Font( - name=code_style.get("font", "Courier New"), - size=code_style.get("font_size", 10), - color=self._getSafeColor(code_style.get("color", "FF2F2F2F")) - ) + codeCell.font = code_font # Set background color if specified - if code_style.get("background"): - codeCell.fill = PatternFill( - start_color=self._getSafeColor(code_style["background"]), - end_color=self._getSafeColor(code_style["background"]), - fill_type="solid" - ) + if code_fill: + codeCell.fill = code_fill startRow += 1 # Add spacing after code block