gateway/modules/services/serviceGeneration/renderers/rendererXlsx.py

1486 lines
69 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Excel renderer for report generation using openpyxl.
"""
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import io
import base64
from datetime import datetime, UTC, date
try:
from dateutil import parser as date_parser
DATEUTIL_AVAILABLE = True
except ImportError:
DATEUTIL_AVAILABLE = False
try:
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.table import Table, TableStyleInfo
OPENPYXL_AVAILABLE = True
except ImportError:
OPENPYXL_AVAILABLE = False
class RendererXlsx(BaseRenderer):
"""Renders content to Excel format using openpyxl."""
@classmethod
def getSupportedFormats(cls) -> List[str]:
"""Return supported Excel formats."""
return ['xlsx', 'xls', 'excel']
@classmethod
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['spreadsheet', 'workbook']
@classmethod
def getPriority(cls) -> int:
"""Return priority for Excel renderer."""
return 110
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: Excel spreadsheets are formatted documents."""
return 'document'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that Excel renderer accepts.
Excel renderer accepts all section types (spreadsheets can contain tables, text, headings, etc.).
"""
from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
try:
if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available
from .rendererCsv import RendererCsv
csvRenderer = RendererCsv()
return await csvRenderer.render(extractedContent, title, userPrompt, aiService)
# Generate Excel using AI-analyzed styling
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
# Extract metadata for document type and other info
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
# Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
else:
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
# Convert Excel content to bytes if it's a string (base64)
if isinstance(excelContent, str):
try:
excel_bytes = base64.b64decode(excelContent)
except Exception:
excel_bytes = excelContent.encode('utf-8')
else:
excel_bytes = excelContent
return [
RenderedDocument(
documentData=excel_bytes,
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
filename=filename,
documentType=documentType,
metadata=metadata if isinstance(metadata, dict) else None
)
]
except Exception as e:
self.logger.error(f"Error rendering Excel: {str(e)}")
# Return CSV fallback with metadata
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
fallbackContent = f"Title,Content\n{title},Error rendering Excel report: {str(e)}"
return [
RenderedDocument(
documentData=fallbackContent.encode('utf-8'),
mimeType="text/csv",
filename=self._determineFilename(title, "text/csv"),
documentType=documentType,
metadata=metadata if isinstance(metadata, dict) else None
)
]
def _generateExcel(self, content: str, title: str) -> str:
"""Generate Excel content using openpyxl."""
try:
# Create workbook
wb = Workbook()
# Remove default sheet
wb.remove(wb.active)
# Create sheets
summarySheet = wb.create_sheet("Summary", 0)
dataSheet = wb.create_sheet("Data", 1)
analysisSheet = wb.create_sheet("Analysis", 2)
# Add content to sheets
self._populateSummarySheet(summarySheet, title, wb)
self._populateDataSheet(dataSheet, content)
self._populateAnalysisSheet(analysisSheet, content)
# Ensure workbook has at least one sheet (Excel requirement)
if len(wb.worksheets) == 0:
wb.create_sheet("Sheet1")
# Save to buffer with error handling
buffer = io.BytesIO()
try:
wb.save(buffer)
buffer.seek(0)
except Exception as save_error:
self.logger.error(f"Error saving Excel workbook: {str(save_error)}")
# Try to fix common issues and retry
try:
# Remove any invalid sheet names or empty sheets
for sheet in list(wb.worksheets):
if not sheet.title or len(sheet.title.strip()) == 0:
wb.remove(sheet)
# Ensure at least one sheet exists
if len(wb.worksheets) == 0:
wb.create_sheet("Sheet1")
# Retry save
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
except Exception as retry_error:
self.logger.error(f"Retry save also failed: {str(retry_error)}")
raise Exception(f"Failed to save Excel workbook: {str(save_error)}")
# Convert to base64
excelBytes = buffer.getvalue()
excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
return excelBase64
except Exception as e:
self.logger.error(f"Error generating Excel: {str(e)}")
raise
def _populateSummarySheet(self, sheet, title: str, wb: Workbook = None):
"""Populate the summary sheet."""
try:
# Title
sheet['A1'] = title
sheet['A1'].font = Font(size=16, bold=True)
sheet['A1'].alignment = Alignment(horizontal='left')
# Generation info
sheet['A3'] = "Generated:"
sheet['B3'] = self._formatTimestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
# Key metrics placeholder
sheet['A6'] = "Key Metrics:"
sheet['A6'].font = Font(bold=True)
sheet['A7'] = "Total Items:"
# Only add formula if Data sheet exists (check workbook sheets)
if wb and "Data" in [s.title for s in wb.worksheets]:
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
else:
sheet['B7'] = "N/A" # Data sheet not available
# Auto-adjust column widths
sheet.column_dimensions['A'].width = 20
sheet.column_dimensions['B'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
def _populateDataSheet(self, sheet, content: str):
"""Populate the data sheet."""
try:
# Headers
headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=1, column=col, value=header)
cell.font = Font(bold=True)
cell.fill = PatternFill(start_color="FFCCCCCC", end_color="FFCCCCCC", fill_type="solid")
# Process content
lines = content.split('\n')
row = 2
for line in lines:
line = line.strip()
if not line:
continue
# Check for table data (lines with |)
if '|' in line:
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
for col, cellData in enumerate(cells[:5], 1): # Limit to 5 columns
sheet.cell(row=row, column=col, value=cellData)
row += 1
else:
# Regular content
sheet.cell(row=row, column=1, value=line)
row += 1
# Auto-adjust column widths
for col in range(1, 6):
sheet.column_dimensions[get_column_letter(col)].width = 20
except Exception as e:
self.logger.warning(f"Could not populate data sheet: {str(e)}")
def _populateAnalysisSheet(self, sheet, content: str):
"""Populate the analysis sheet."""
try:
# Title
sheet['A1'] = "Analysis & Insights"
sheet['A1'].font = Font(size=14, bold=True)
# Content analysis
lines = content.split('\n')
row = 3
sheet['A3'] = "Content Analysis:"
sheet['A3'].font = Font(bold=True)
row += 1
# Count different types of content
tableLines = sum(1 for line in lines if '|' in line)
listLines = sum(1 for line in lines if line.startswith(('- ', '* ')))
textLines = len(lines) - tableLines - listLines
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
row += 1
sheet[f'A{row}'] = f"Table Rows: {tableLines}"
row += 1
sheet[f'A{row}'] = f"List Items: {listLines}"
row += 1
sheet[f'A{row}'] = f"Text Lines: {textLines}"
row += 2
# Recommendations
sheet[f'A{row}'] = "Recommendations:"
sheet[f'A{row}'].font = Font(bold=True)
row += 1
sheet[f'A{row}'] = "1. Review data accuracy"
row += 1
sheet[f'A{row}'] = "2. Consider additional analysis"
row += 1
sheet[f'A{row}'] = "3. Update regularly"
# Auto-adjust column width
sheet.column_dimensions['A'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate Excel content from structured JSON document using AI-generated styling."""
try:
# Debug output
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER")
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER")
# Get style set: use styles from metadata if available, otherwise enhance with AI
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
if not self._validateJsonStructure(jsonContent):
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
# Extract metadata from standardized schema
metadata = self._extractMetadata(jsonContent)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
document_title = title if title else metadata.get("title", "Generated Document")
# Create workbook
wb = Workbook()
# Create sheets based on content
sheets = self._createExcelSheets(wb, jsonContent, styles)
self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER")
# Populate sheets with content
self._populateExcelSheets(sheets, jsonContent, styles)
# Ensure workbook has at least one sheet (Excel requirement)
if len(wb.worksheets) == 0:
wb.create_sheet("Sheet1")
# Save to buffer with error handling
buffer = io.BytesIO()
try:
wb.save(buffer)
buffer.seek(0)
except Exception as save_error:
self.logger.error(f"Error saving Excel workbook: {str(save_error)}")
# Try to fix common issues and retry
try:
# Remove any invalid sheet names or empty sheets
for sheet in list(wb.worksheets):
if not sheet.title or len(sheet.title.strip()) == 0:
wb.remove(sheet)
# Ensure at least one sheet exists
if len(wb.worksheets) == 0:
wb.create_sheet("Sheet1")
# Retry save
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
except Exception as retry_error:
self.logger.error(f"Retry save also failed: {str(retry_error)}")
raise Exception(f"Failed to save Excel workbook: {str(save_error)}")
# Convert to base64
excelBytes = buffer.getvalue()
self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excelBytes)}", "EXCEL_RENDERER")
try:
excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excelBase64)}", "EXCEL_RENDERER")
except Exception as b64_error:
self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER")
raise
return excelBase64
except Exception as e:
self.logger.error(f"Error generating Excel from JSON: {str(e)}")
raise Exception(f"Excel generation failed: {str(e)}")
async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
"""Get style set - use styles from document generation metadata if available,
otherwise enhance default styles with AI if userPrompt provided.
WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
not be generated separately by renderers. Only fall back to AI if styles not provided.
Args:
extractedContent: Document content with metadata (may contain styles)
userPrompt: User's prompt (AI will detect style instructions in any language)
aiService: AI service (used only if styles not in metadata and userPrompt provided)
templateName: Name of template style set (None = default)
Returns:
Dict with style definitions for all document styles
"""
# Get default style set
defaultStyleSet = self._getDefaultStyleSet()
# FIRST: Check if styles are provided in document generation metadata (preferred approach)
if extractedContent:
metadata = extractedContent.get("metadata", {})
if isinstance(metadata, dict):
styles = metadata.get("styles")
if styles and isinstance(styles, dict):
self.logger.debug("Using styles from document generation metadata")
enhancedStyleSet = self._convertColorsFormat(styles)
return self._validateStylesContrast(enhancedStyleSet)
# FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
if userPrompt and aiService:
self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
# Convert colors to Excel format after getting styles
enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet)
return self._validateStylesContrast(enhancedStyleSet)
else:
# Use default styles only
return defaultStyleSet
async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
"""Enhance default styles with AI based on user prompt."""
try:
style_template = self._createAiStyleTemplate("xlsx", userPrompt, defaultStyleSet)
enhanced_styles = await self._getAiStylesWithExcelColors(aiService, style_template, defaultStyleSet)
return enhanced_styles
except Exception as e:
self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
return defaultStyleSet
def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
bgColor = header.get("background", "FFFFFFFF")
textColor = header.get("text_color", "FF000000")
# Normalize colors (remove # if present, ensure aRGB format)
bgColor = self._normalizeColor(bgColor)
textColor = self._normalizeColor(textColor)
# If both are white or both are dark, fix it
if bgColor.upper() == "FFFFFFFF" and textColor.upper() == "FFFFFFFF":
header["background"] = "FF4F4F4F"
header["text_color"] = "FFFFFFFF"
elif bgColor.upper() == "FF000000" and textColor.upper() == "FF000000":
header["background"] = "FF4F4F4F"
header["text_color"] = "FFFFFFFF"
else:
# Ensure colors are in correct format
header["background"] = bgColor
header["text_color"] = textColor
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
bgColor = cell.get("background", "FFFFFFFF")
textColor = cell.get("text_color", "FF000000")
# Normalize colors (remove # if present, ensure aRGB format)
bgColor = self._normalizeColor(bgColor)
textColor = self._normalizeColor(textColor)
# If both are white or both are dark, fix it
if bgColor.upper() == "FFFFFFFF" and textColor.upper() == "FFFFFFFF":
cell["background"] = "FFFFFFFF"
cell["text_color"] = "FF2F2F2F"
elif bgColor.upper() == "FF000000" and textColor.upper() == "FF000000":
cell["background"] = "FFFFFFFF"
cell["text_color"] = "FF2F2F2F"
else:
# Ensure colors are in correct format
cell["background"] = bgColor
cell["text_color"] = textColor
return styles
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
return self._getDefaultStyleSet()
def _normalizeColor(self, colorValue: str) -> str:
"""Normalize color to aRGB format without # prefix."""
if not isinstance(colorValue, str):
return "FF000000"
# Remove # prefix if present
if colorValue.startswith('#'):
colorValue = colorValue[1:]
# Convert to uppercase for consistency
colorValue = colorValue.upper()
# Ensure aRGB format (8 characters)
if len(colorValue) == 6:
# Convert RRGGBB to AARRGGBB (add FF alpha channel)
return f"FF{colorValue}"
elif len(colorValue) == 8:
# Already aRGB format
return colorValue
else:
# Unexpected format, return default black
return "FF000000"
def _getDefaultStyleSet(self) -> Dict[str, Any]:
"""Default Excel style set - used when no style instructions present."""
return {
"title": {"font_size": 16, "color": "FF1F4E79", "bold": True, "align": "left"},
"heading": {"font_size": 14, "color": "FF2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "FF4F4F4F", "text_color": "FFFFFFFF", "bold": True, "align": "center"},
"table_cell": {"background": "FFFFFFFF", "text_color": "FF2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 11, "color": "FF2F2F2F", "indent": 2},
"paragraph": {"font_size": 11, "color": "FF2F2F2F", "bold": False, "align": "left"},
"code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"}
}
async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper Excel color conversion."""
if not aiService:
return defaultStyles
try:
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
requestOptions = AiCallOptions()
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
response = await aiService.callAi(request)
import json
import re
# Clean and parse JSON
result = response.content.strip() if response and response.content else ""
# Check if result is empty
if not result:
self.logger.warning("AI styling returned empty response, using defaults")
return defaultStyles
# Extract JSON from markdown if present
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if json_match:
result = json_match.group(1).strip()
self.services.utils.debugLogToFile(f"EXTRACTED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
self.services.utils.debugLogToFile(f"CLEANED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
self.services.utils.debugLogToFile(f"CLEANED JSON FROM GENERIC MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
# Try to parse JSON
try:
styles = json.loads(result)
except json.JSONDecodeError as json_error:
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
return defaultStyles
# Convert colors to Excel aRGB format
styles = self._convertColorsFormat(styles)
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return defaultStyles
def _getSafeAlignment(self, alignValue: Any) -> str:
"""Get safe alignment value for openpyxl. Valid values: 'left', 'general', 'distributed', 'fill', 'justify', 'center', 'right', 'centerContinuous'."""
if not alignValue:
return "left"
alignStr = str(alignValue).lower().strip()
# Map common alignment values to openpyxl values
alignmentMap = {
"left": "left",
"right": "right",
"center": "center",
"centre": "center",
"general": "general",
"distributed": "distributed",
"fill": "fill",
"justify": "justify",
"centercontinuous": "centerContinuous",
"center-continuous": "centerContinuous",
"start": "left",
"end": "right",
"middle": "center"
}
# Check direct mapping
if alignStr in alignmentMap:
return alignmentMap[alignStr]
# Check if it contains alignment keywords
if "left" in alignStr or "start" in alignStr:
return "left"
elif "right" in alignStr or "end" in alignStr:
return "right"
elif "center" in alignStr or "centre" in alignStr or "middle" in alignStr:
return "center"
# Default to left if unknown
return "left"
def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str:
"""Get a safe aRGB color value for Excel (without # prefix)."""
if not isinstance(colorValue, str):
return default
# Use the normalize function for consistency
try:
normalized = self._normalizeColor(colorValue)
return normalized
except Exception:
return default
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert hex colors to aRGB format for Excel compatibility (without # prefix)."""
try:
self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
for styleName, styleConfig in styles.items():
if isinstance(styleConfig, dict):
for prop, value in styleConfig.items():
if isinstance(value, str):
# Normalize color to aRGB format without # prefix
styles[styleName][prop] = self._normalizeColor(value)
return styles
except Exception as e:
self.logger.warning(f"Color conversion failed: {str(e)}")
return styles
def _createExcelSheets(self, wb: Workbook, jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
"""Create Excel sheets based on content structure and user intent."""
sheets = {}
# Get sheet names from AI styles or generate based on content
sheetNames = styles.get("sheet_names", self._generateSheetNamesFromContent(jsonContent))
self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheetNames}", "EXCEL_RENDERER")
# Create sheets
for i, sheetName in enumerate(sheetNames):
# Sanitize sheet name before creating
sanitized_name = self._sanitizeSheetName(sheetName)
if i == 0:
# Use the default sheet for the first sheet
sheet = wb.active
sheet.title = sanitized_name
else:
# Create additional sheets
sheet = wb.create_sheet(sanitized_name, i)
# Use sanitized name as key (lowercase for lookup)
sheets[sanitized_name.lower()] = sheet
return sheets
def _sanitizeSheetName(self, name: str) -> str:
"""Sanitize sheet name: remove invalid characters and ensure valid length."""
if not name:
return "Sheet"
# Remove invalid characters: [ ] : * ? / \
invalid_chars = ['[', ']', ':', '*', '?', '/', '\\']
sanitized = name
for char in invalid_chars:
sanitized = sanitized.replace(char, '')
# Remove leading/trailing spaces and apostrophes
sanitized = sanitized.strip().strip("'")
# Ensure not empty
if not sanitized:
sanitized = "Sheet"
# Excel sheet name limit is 31 characters
return sanitized[:31]
def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
"""Generate sheet names: each heading level 1 (chapter) creates a new tab."""
sections = self._extractSections(jsonContent)
# If no sections, create a single sheet
if not sections:
return ["Content"]
# Only heading level 1 (chapters) create new tabs
sheetNames = []
for section in sections:
if section.get("content_type") == "heading":
# Extract heading text and level from elements
elements = section.get("elements", [])
if elements and isinstance(elements, list) and len(elements) > 0:
headingElement = elements[0]
content = headingElement.get("content", {})
if isinstance(content, dict):
headingText = content.get("text", "")
level = content.get("level", 1)
elif isinstance(content, str):
headingText = content
level = 1
else:
headingText = ""
level = 1
# Only level 1 headings (chapters) create tabs
if headingText and level == 1:
sanitized_name = self._sanitizeSheetName(headingText)
# Ensure unique sheet names
if sanitized_name not in sheetNames:
sheetNames.append(sanitized_name)
else:
# Add number suffix for duplicates
counter = 1
base_name = sanitized_name[:28] # Leave room for " (1)"
while f"{base_name} ({counter})" in sheetNames:
counter += 1
sheetNames.append(f"{base_name} ({counter})"[:31])
# If no level 1 headings found, use document title
if not sheetNames:
# Use documents[].title as primary source, fallback to metadata.title
documents = jsonContent.get("documents", [])
if documents and isinstance(documents[0], dict) and documents[0].get("title"):
documentTitle = documents[0].get("title")
else:
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
sheetNames.append(self._sanitizeSheetName(documentTitle))
return sheetNames
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Populate Excel sheets: each heading level 1 (chapter) creates a new tab, all following content goes in that tab."""
try:
# Get the actual sheet names that were created (keys are lowercase)
sheetNames = list(sheets.keys())
if not sheetNames:
return
sections = self._extractSections(jsonContent)
# Only heading level 1 (chapters) create new tabs
currentSheetIndex = 0
currentSheet = None
currentRow = 1
for section in sections:
contentType = section.get("content_type", "paragraph")
# Heading section: check if it's level 1 (chapter) to switch to next sheet
if contentType == "heading":
# Extract level from heading element
elements = section.get("elements", [])
level = 1 # Default
if elements and isinstance(elements, list) and len(elements) > 0:
headingElement = elements[0]
content = headingElement.get("content", {})
if isinstance(content, dict):
level = content.get("level", 1)
# Only level 1 headings (chapters) create new tabs
if level == 1:
if currentSheetIndex < len(sheetNames):
sheetName = sheetNames[currentSheetIndex]
currentSheet = sheets[sheetName] # sheets dict uses lowercase keys
currentSheetIndex += 1
currentRow = 1 # Start at row 1 for new sheet
else:
# More headings than sheets - use last sheet
if sheetNames:
currentSheet = sheets[sheetNames[-1]]
# Render content in current sheet (or first sheet if no headings yet)
if currentSheet is None and sheetNames:
currentSheet = sheets[sheetNames[0]]
if currentSheet:
currentRow = self._addSectionToSheet(currentSheet, section, styles, currentRow)
currentRow += 1 # Add spacing between sections
except Exception as e:
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
def _populateTableSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheetTitle: str):
"""Populate a sheet with a single table section."""
try:
# Sheet title
sheet['A1'] = sheetTitle
title_style = styles.get("title", {})
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79")))
sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style.get("align", "left")))
# Get table data from elements (canonical JSON format)
elements = section.get("elements", [])
if elements and isinstance(elements, list) and len(elements) > 0:
table_element = elements[0]
# Extract from nested content structure
content = table_element.get("content", {})
if not isinstance(content, dict):
headers = []
rows = []
else:
headers = content.get("headers") or []
rows = content.get("rows") or []
# Ensure headers and rows are lists
if not isinstance(headers, list):
headers = []
if not isinstance(rows, list):
rows = []
else:
headers = []
rows = []
if not headers and not rows:
sheet['A3'] = "No table data available"
return
# Add headers
header_style = styles.get("table_header", {})
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=3, column=col, value=header)
if header_style.get("bold"):
cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
if header_style.get("background"):
cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
# Add rows - handle both array format and cells object format
cell_style = styles.get("table_cell", {})
header_count = len(headers)
for row_idx, row_data in enumerate(rows, 4):
# Handle different row formats
if isinstance(row_data, list):
# Array format: [value1, value2, ...]
cell_values = row_data
elif isinstance(row_data, dict) and "cells" in row_data:
# Cells object format: {"cells": [{"value": ...}, ...]}
cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])]
else:
# Unknown format, skip
continue
# Validate row column count matches headers - pad or truncate if needed
if len(cell_values) < header_count:
# Pad with empty strings if row has fewer columns
cell_values.extend([""] * (header_count - len(cell_values)))
elif len(cell_values) > header_count:
# Truncate if row has more columns than headers
cell_values = cell_values[:header_count]
for col_idx, cell_value in enumerate(cell_values, 1):
# Extract value if it's a dict with "value" key
if isinstance(cell_value, dict):
actual_value = cell_value.get("value", "")
else:
actual_value = cell_value
cell = sheet.cell(row=row_idx, column=col_idx, value=actual_value)
if cell_style.get("text_color"):
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
# Auto-adjust column widths
for col in range(1, len(headers) + 1):
sheet.column_dimensions[get_column_letter(col)].width = 20
except Exception as e:
self.logger.warning(f"Could not populate table sheet: {str(e)}")
def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
"""Populate the main sheet with document overview and all content."""
try:
# Document title - use documents[].title as primary source, fallback to metadata.title
documents = jsonContent.get("documents", [])
if documents and isinstance(documents[0], dict) and documents[0].get("title"):
documentTitle = documents[0].get("title")
else:
documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
sheet['A1'] = documentTitle
# Safety check for title style
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "left"})
try:
safe_color = self._getSafeColor(title_style["color"])
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
except Exception as font_error:
# Try with a safe color
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000")
sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
# Generation info
sheet['A3'] = "Generated:"
sheet['B3'] = self._formatTimestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
# Document metadata
metadata = jsonContent.get("metadata", {})
if metadata:
sheet['A6'] = "Document Information:"
sheet['A6'].font = Font(bold=True)
row = 7
for key, value in metadata.items():
if key != "title":
sheet[f'A{row}'] = f"{key.title()}:"
sheet[f'B{row}'] = str(value)
row += 1
# Content overview
sections = self._extractSections(jsonContent)
sheet[f'A{row + 1}'] = "Content Overview:"
sheet[f'A{row + 1}'].font = Font(bold=True)
row += 2
sheet[f'A{row}'] = f"Total Sections: {len(sections)}"
# Count different content types
content_types = {}
for section in sections:
content_type = section.get("content_type", "unknown")
content_types[content_type] = content_types.get(content_type, 0) + 1
for content_type, count in content_types.items():
row += 1
sheet[f'A{row}'] = f"{content_type.title()} Sections: {count}"
# Add all content to this sheet
row += 2
for section in sections:
row = self._addSectionToSheet(sheet, section, styles, row)
row += 1 # Empty row between sections
# Auto-adjust column widths
sheet.column_dimensions['A'].width = 20
sheet.column_dimensions['B'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate main sheet: {str(e)}")
def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]):
"""Populate additional sheets based on content types."""
try:
sections = self._extractSections(jsonContent)
for sheetName in sheetNames:
if sheetName not in sheets:
continue
sheet = sheets[sheetName]
sheetTitle = sheetName.title()
sheet['A1'] = sheetTitle
sheet['A1'].font = Font(size=16, bold=True)
row = 3
# Filter sections by content type
if sheetName == "tables":
filtered_sections = [s for s in sections if s.get("content_type") == "table"]
elif sheetName == "lists":
filtered_sections = [s for s in sections if s.get("content_type") == "list"]
elif sheetName == "text":
filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
else:
filtered_sections = sections
for section in filtered_sections:
row = self._addSectionToSheet(sheet, section, styles, row)
row += 1 # Empty row between sections
# Auto-adjust column widths
for col in range(1, 6):
sheet.column_dimensions[get_column_letter(col)].width = 20
except Exception as e:
self.logger.warning(f"Could not populate content type sheets: {str(e)}")
def _addSectionToSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a section to a sheet and return the next row."""
try:
# Add section title
section_title = section.get("title")
if section_title:
sheet[f'A{startRow}'] = f"# {section_title}"
sheet[f'A{startRow}'].font = Font(bold=True)
startRow += 1
# Process section based on type
section_type = section.get("content_type", "paragraph")
# Handle all section types using elements array
elements = section.get("elements", [])
for element in elements:
# Check element type, not section type (elements can have different types than section)
element_type = element.get("type", "") if isinstance(element, dict) else ""
if element_type == "table":
startRow = self._addTableToExcel(sheet, element, styles, startRow)
elif element_type == "bullet_list" or element_type == "list":
startRow = self._addListToExcel(sheet, element, styles, startRow)
elif element_type == "paragraph":
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
elif element_type == "heading":
startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
elif element_type == "image":
startRow = self._addImageToExcel(sheet, element, styles, startRow)
elif element_type == "code_block" or element_type == "code":
startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow)
else:
# Fallback: if element_type not set, use section_type
if section_type == "table":
startRow = self._addTableToExcel(sheet, element, styles, startRow)
elif section_type == "bullet_list" or section_type == "list":
startRow = self._addListToExcel(sheet, element, styles, startRow)
elif section_type == "paragraph":
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
elif section_type == "heading":
startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
elif section_type == "image":
startRow = self._addImageToExcel(sheet, element, styles, startRow)
elif section_type == "code_block" or section_type == "code":
startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow)
else:
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
return startRow
except Exception as e:
self.logger.warning(f"Could not add section to sheet: {str(e)}")
return startRow + 1
def _parseDateString(self, text: str) -> Any:
"""Try to parse a string as a date/datetime. Returns datetime object if successful, None otherwise."""
if not text or not isinstance(text, str):
return None
text = text.strip()
if not text:
return None
# Common date formats to try (in order of likelihood)
date_formats = [
"%Y-%m-%d", # 2025-01-01
"%d.%m.%Y", # 01.01.2025
"%d/%m/%Y", # 01/01/2025
"%m/%d/%Y", # 01/01/2025 (US format)
"%Y-%m-%d %H:%M:%S", # 2025-01-01 12:00:00
"%d.%m.%Y %H:%M:%S", # 01.01.2025 12:00:00
"%d/%m/%Y %H:%M:%S", # 01/01/2025 12:00:00
"%Y-%m-%d %H:%M", # 2025-01-01 12:00
"%d.%m.%Y %H:%M", # 01.01.2025 12:00
"%d/%m/%Y %H:%M", # 01/01/2025 12:00
]
# Try parsing with common formats first
for date_format in date_formats:
try:
parsed_date = datetime.strptime(text, date_format)
return parsed_date
except ValueError:
continue
# If dateutil is available, use it for more flexible parsing
if DATEUTIL_AVAILABLE:
try:
parsed_date = date_parser.parse(text, dayfirst=True, yearfirst=False)
return parsed_date
except (ValueError, TypeError):
pass
return None
def _sanitizeCellValue(self, value: Any) -> Any:
"""Sanitize cell value: remove markdown, convert to string, handle None, limit length. Preserve numbers as numbers."""
if value is None:
return ""
if isinstance(value, dict):
# Extract value from dict if present
value = value.get("value", "")
if isinstance(value, (int, float)):
return value # Keep numbers as-is
# Convert to string and remove markdown formatting
text = str(value)
# Remove markdown bold (**text**)
text = text.replace("**", "")
# Remove markdown italic (*text*)
text = text.replace("*", "")
# Remove other markdown
text = text.replace("__", "").replace("_", "")
text = text.strip()
# Try to convert numeric strings to actual numbers
# This ensures Excel treats them as numbers, not strings
if text:
# Clean text for number conversion: remove common formatting characters
# but preserve the original for fallback
cleaned_for_number = text.replace("'", "").replace(",", "").replace(" ", "").strip()
# Only attempt conversion if cleaned text looks like a number
# (starts with digit, +, -, or . followed by digit)
if cleaned_for_number and (cleaned_for_number[0].isdigit() or cleaned_for_number[0] in '+-.'):
# Try integer first (more restrictive)
try:
# Check if it's a valid integer (no decimal point, no scientific notation)
if '.' not in cleaned_for_number and 'e' not in cleaned_for_number.lower() and 'E' not in cleaned_for_number:
int_value = int(cleaned_for_number)
return int_value
except (ValueError, OverflowError):
pass
# Try float if integer conversion failed
try:
float_value = float(cleaned_for_number)
# Only return as float if it's actually a number representation
# Avoid converting things like "NaN", "inf" which are valid floats but not useful
if cleaned_for_number.lower() not in ['nan', 'inf', '-inf', 'infinity', '-infinity']:
# Check for reasonable float values (not too large/small)
if abs(float_value) < 1e308: # Avoid overflow
return float_value
except (ValueError, OverflowError):
pass
# Try to convert date strings to datetime objects
# This ensures Excel treats them as dates, not strings
# Use original text (not cleaned) for date parsing
date_value = self._parseDateString(text)
if date_value is not None:
return date_value
# Excel cell value limit is 32,767 characters - truncate if necessary
if len(text) > 32767:
text = text[:32764] + "..."
return text
def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a table element to Excel sheet with proper formatting and borders."""
try:
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return startRow
headers = content.get("headers", [])
rows = content.get("rows", [])
# Ensure headers and rows are lists
if not isinstance(headers, list):
headers = []
if not isinstance(rows, list):
rows = []
if not headers and not rows:
return startRow
# Define border style
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
headerRow = startRow
header_style = styles.get("table_header", {})
# Add headers with formatting
for col, header in enumerate(headers, 1):
sanitized_header = self._sanitizeCellValue(header)
cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
# Apply styling with fallbacks - don't let styling errors prevent data rendering
try:
# Font styling
cell.font = Font(
bold=header_style.get("bold", True),
color=self._getSafeColor(header_style.get("text_color", "FF000000"))
)
except Exception:
# Fallback to default font if styling fails
try:
cell.font = Font(bold=True, color=self._getSafeColor("FF000000"))
except Exception:
pass # Continue even if font fails
try:
# Background color
if header_style.get("background"):
cell.fill = PatternFill(
start_color=self._getSafeColor(header_style["background"]),
end_color=self._getSafeColor(header_style["background"]),
fill_type="solid"
)
except Exception:
pass # Continue without background color if it fails
try:
# Alignment
cell.alignment = Alignment(
horizontal=self._getSafeAlignment(header_style.get("align", "left")),
vertical="center"
)
except Exception:
# Fallback to default alignment if it fails
try:
cell.alignment = Alignment(horizontal="left", vertical="center")
except Exception:
pass # Continue even if alignment fails
try:
# Border
cell.border = thin_border
except Exception:
pass # Continue without border if it fails
startRow += 1
# Add rows with formatting
cell_style = styles.get("table_cell", {})
header_count = len(headers)
for row_data in rows:
# Handle different row formats
if isinstance(row_data, list):
cell_values = row_data
elif isinstance(row_data, dict) and "cells" in row_data:
cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])]
else:
continue
# Validate row column count matches headers - pad or truncate if needed
if len(cell_values) < header_count:
# Pad with empty strings if row has fewer columns
cell_values.extend([""] * (header_count - len(cell_values)))
elif len(cell_values) > header_count:
# Truncate if row has more columns than headers
cell_values = cell_values[:header_count]
for col, cell_value in enumerate(cell_values, 1):
sanitized_value = self._sanitizeCellValue(cell_value)
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
# Apply styling with fallbacks - don't let styling errors prevent data rendering
try:
# Font styling
if cell_style.get("text_color"):
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
except Exception:
pass # Continue without font color if it fails
try:
# Alignment
cell.alignment = Alignment(
horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
vertical="center"
)
except Exception:
# Fallback to default alignment if it fails
try:
cell.alignment = Alignment(horizontal="left", vertical="center")
except Exception:
pass # Continue even if alignment fails
try:
# Border
cell.border = thin_border
except Exception:
pass # Continue without border if it fails
startRow += 1
# Auto-adjust column widths
for col in range(1, len(headers) + 1):
column_letter = get_column_letter(col)
sheet.column_dimensions[column_letter].width = 20
return startRow
except Exception as e:
self.logger.warning(f"Could not add table to Excel: {str(e)}")
return startRow + 1
def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a list element to Excel sheet. Expects nested content structure."""
try:
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return startRow
list_items = content.get("items") or []
# Ensure list_items is a list
if not isinstance(list_items, list):
list_items = []
list_style = styles.get("bullet_list", {})
for item in list_items:
sheet.cell(row=startRow, column=1, value=f"{item}")
if list_style.get("color"):
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"]))
startRow += 1
return startRow
except Exception as e:
self.logger.warning(f"Could not add list to Excel: {str(e)}")
return startRow + 1
def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a paragraph element to Excel sheet. Expects nested content structure."""
try:
# Extract from nested content structure
content = element.get("content", {})
if isinstance(content, dict):
text = content.get("text", "")
elif isinstance(content, str):
text = content
else:
text = ""
if text:
sheet.cell(row=startRow, column=1, value=text)
paragraph_style = styles.get("paragraph", {})
if paragraph_style.get("color"):
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(paragraph_style["color"]))
startRow += 1
return startRow
except Exception as e:
self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
return startRow + 1
def _addHeadingToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a heading element to Excel sheet. Expects nested content structure."""
try:
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return startRow
text = content.get("text", "")
level = content.get("level", 1)
if text:
sheet.cell(row=startRow, column=1, value=text)
heading_style = styles.get("heading", {})
font_size = heading_style.get("font_size", 14)
if level > 1:
font_size = max(10, font_size - (level - 1) * 2)
sheet.cell(row=startRow, column=1).font = Font(
size=font_size,
bold=True,
color=self._getSafeColor(heading_style.get("color", "FF000000"))
)
startRow += 1
return startRow
except Exception as e:
self.logger.warning(f"Could not add heading to Excel: {str(e)}")
return startRow + 1
def _addImageToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add an image element to Excel sheet using openpyxl. Expects nested content structure."""
try:
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return startRow
base64Data = content.get("base64Data", "")
altText = content.get("altText", "Image")
caption = content.get("caption", "")
if not base64Data:
# No image data - add placeholder text
sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]")
return startRow + 1
try:
from openpyxl.drawing.image import Image as OpenpyxlImage
import base64
import io
# Decode base64 image data
imageBytes = base64.b64decode(base64Data)
imageStream = io.BytesIO(imageBytes)
# Create openpyxl Image
img = OpenpyxlImage(imageStream)
# Calculate max width based on Excel column width
# Excel default column width is ~64 pixels (8.43 characters at default font)
# Use multiple columns for image width (typically 8-10 columns = ~512-640 pixels)
# Standard Excel sheet width is ~1024 pixels (14.5 inches at 72 DPI)
# Use 80% of sheet width to leave margins
maxWidth = 800 # pixels (approximately 11 inches at 72 DPI, fits within page)
maxHeight = 600 # pixels (approximately 8.3 inches at 72 DPI)
# Scale image to fit within page dimensions while maintaining aspect ratio
width_scale = maxWidth / img.width if img.width > maxWidth else 1.0
height_scale = maxHeight / img.height if img.height > maxHeight else 1.0
scale = min(width_scale, height_scale, 1.0) # Don't scale up, only down
img.width = int(img.width * scale)
img.height = int(img.height * scale)
# Anchor image to cell (A column, current row)
img.anchor = f'A{startRow}'
# Add image to sheet
sheet.add_image(img)
# Calculate height needed for image (approximate)
# Excel row height is in points (1/72 inch), image height is in pixels
# Assuming 72 DPI: pixels = points
imageHeightPoints = img.height / 1.33 # Approximate conversion
sheet.row_dimensions[startRow].height = max(15, imageHeightPoints) # Min 15 points
# Add caption below image if available
if caption:
startRow += 1
sheet.cell(row=startRow, column=1, value=caption)
sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10)
sheet.cell(row=startRow, column=1).alignment = Alignment(horizontal="left")
elif altText and altText != "Image":
startRow += 1
sheet.cell(row=startRow, column=1, value=f"Figure: {altText}")
sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10)
return startRow + 1
except ImportError:
self.logger.error("openpyxl.drawing.image not available, cannot embed image")
errorMsg = f"[Error: Image embedding not available. Image: {altText}]"
errorCell = sheet.cell(row=startRow, column=1, value=errorMsg)
errorCell.font = Font(color="FFFF0000", italic=True) # Red color
return startRow + 1
except Exception as imgError:
self.logger.error(f"Error embedding image in Excel: {str(imgError)}")
errorMsg = f"[Error: Could not embed image '{altText}'. {str(imgError)}]"
errorCell = sheet.cell(row=startRow, column=1, value=errorMsg)
errorCell.font = Font(color="FFFF0000", italic=True) # Red color
return startRow + 1
except Exception as e:
self.logger.error(f"Error adding image to Excel: {str(e)}")
errorMsg = f"[Error: Could not process image. {str(e)}]"
errorCell = sheet.cell(row=startRow, column=1, value=errorMsg)
errorCell.font = Font(color="FFFF0000", italic=True) # Red color
return startRow + 1
def _addCodeBlockToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a code block element to Excel sheet. Expects nested content structure."""
try:
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return startRow
code = content.get("code", "")
language = content.get("language", "")
if code:
code_style = styles.get("code_block", {})
# Add language label if present
if language:
langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):")
langCell.font = Font(bold=True, color=self._getSafeColor(code_style.get("color", "FF000000")))
startRow += 1
# Split code into lines and add each line
code_lines = code.split('\n')
for line in code_lines:
codeCell = sheet.cell(row=startRow, column=1, value=line)
codeCell.font = Font(
name=code_style.get("font", "Courier New"),
size=code_style.get("font_size", 10),
color=self._getSafeColor(code_style.get("color", "FF2F2F2F"))
)
# Set background color if specified
if code_style.get("background"):
codeCell.fill = PatternFill(
start_color=self._getSafeColor(code_style["background"]),
end_color=self._getSafeColor(code_style["background"]),
fill_type="solid"
)
startRow += 1
# Add spacing after code block
startRow += 1
return startRow
except Exception as e:
self.logger.warning(f"Could not add code block to Excel: {str(e)}")
return startRow + 1
def _formatTimestamp(self) -> str:
"""Format current timestamp for document generation."""
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")