908 lines
41 KiB
Python
908 lines
41 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Excel renderer for report generation using openpyxl.
|
|
"""
|
|
|
|
from .rendererBaseTemplate import BaseRenderer
|
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
|
from typing import Dict, Any, List
|
|
import io
|
|
import base64
|
|
from datetime import datetime, UTC
|
|
|
|
try:
|
|
from openpyxl import Workbook
|
|
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
|
from openpyxl.utils import get_column_letter
|
|
from openpyxl.worksheet.table import Table, TableStyleInfo
|
|
OPENPYXL_AVAILABLE = True
|
|
except ImportError:
|
|
OPENPYXL_AVAILABLE = False
|
|
|
|
class RendererXlsx(BaseRenderer):
|
|
"""Renders content to Excel format using openpyxl."""
|
|
|
|
@classmethod
|
|
def getSupportedFormats(cls) -> List[str]:
|
|
"""Return supported Excel formats."""
|
|
return ['xlsx', 'xls', 'excel']
|
|
|
|
@classmethod
|
|
def getFormatAliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return ['spreadsheet', 'workbook']
|
|
|
|
@classmethod
|
|
def getPriority(cls) -> int:
|
|
"""Return priority for Excel renderer."""
|
|
return 110
|
|
|
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
|
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
|
|
try:
|
|
if not OPENPYXL_AVAILABLE:
|
|
# Fallback to CSV if openpyxl not available
|
|
from .rendererCsv import RendererCsv
|
|
csvRenderer = RendererCsv()
|
|
return await csvRenderer.render(extractedContent, title, userPrompt, aiService)
|
|
|
|
# Generate Excel using AI-analyzed styling
|
|
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
|
|
|
|
# Determine filename from document or title
|
|
documents = extractedContent.get("documents", [])
|
|
if documents and isinstance(documents[0], dict):
|
|
filename = documents[0].get("filename")
|
|
if not filename:
|
|
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
|
else:
|
|
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
|
|
|
# Convert Excel content to bytes if it's a string (base64)
|
|
if isinstance(excelContent, str):
|
|
try:
|
|
excel_bytes = base64.b64decode(excelContent)
|
|
except Exception:
|
|
excel_bytes = excelContent.encode('utf-8')
|
|
else:
|
|
excel_bytes = excelContent
|
|
|
|
return [
|
|
RenderedDocument(
|
|
documentData=excel_bytes,
|
|
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
filename=filename
|
|
)
|
|
]
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering Excel: {str(e)}")
|
|
# Return CSV fallback
|
|
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
|
|
|
|
def _generateExcel(self, content: str, title: str) -> str:
|
|
"""Generate Excel content using openpyxl."""
|
|
try:
|
|
# Create workbook
|
|
wb = Workbook()
|
|
|
|
# Remove default sheet
|
|
wb.remove(wb.active)
|
|
|
|
# Create sheets
|
|
summarySheet = wb.create_sheet("Summary", 0)
|
|
dataSheet = wb.create_sheet("Data", 1)
|
|
analysisSheet = wb.create_sheet("Analysis", 2)
|
|
|
|
# Add content to sheets
|
|
self._populateSummarySheet(summarySheet, title)
|
|
self._populateDataSheet(dataSheet, content)
|
|
self._populateAnalysisSheet(analysisSheet, content)
|
|
|
|
# Save to buffer
|
|
buffer = io.BytesIO()
|
|
wb.save(buffer)
|
|
buffer.seek(0)
|
|
|
|
# Convert to base64
|
|
excelBytes = buffer.getvalue()
|
|
excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
|
|
|
|
return excelBase64
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error generating Excel: {str(e)}")
|
|
raise
|
|
|
|
def _populateSummarySheet(self, sheet, title: str):
|
|
"""Populate the summary sheet."""
|
|
try:
|
|
# Title
|
|
sheet['A1'] = title
|
|
sheet['A1'].font = Font(size=16, bold=True)
|
|
sheet['A1'].alignment = Alignment(horizontal='left')
|
|
|
|
# Generation info
|
|
sheet['A3'] = "Generated:"
|
|
sheet['B3'] = self._formatTimestamp()
|
|
sheet['A4'] = "Status:"
|
|
sheet['B4'] = "Generated Successfully"
|
|
|
|
# Key metrics placeholder
|
|
sheet['A6'] = "Key Metrics:"
|
|
sheet['A6'].font = Font(bold=True)
|
|
sheet['A7'] = "Total Items:"
|
|
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
|
|
|
|
# Auto-adjust column widths
|
|
sheet.column_dimensions['A'].width = 20
|
|
sheet.column_dimensions['B'].width = 30
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
|
|
|
|
def _populateDataSheet(self, sheet, content: str):
|
|
"""Populate the data sheet."""
|
|
try:
|
|
# Headers
|
|
headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
|
|
for col, header in enumerate(headers, 1):
|
|
cell = sheet.cell(row=1, column=col, value=header)
|
|
cell.font = Font(bold=True)
|
|
cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
|
|
|
|
# Process content
|
|
lines = content.split('\n')
|
|
row = 2
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
# Check for table data (lines with |)
|
|
if '|' in line:
|
|
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
|
for col, cellData in enumerate(cells[:5], 1): # Limit to 5 columns
|
|
sheet.cell(row=row, column=col, value=cellData)
|
|
row += 1
|
|
else:
|
|
# Regular content
|
|
sheet.cell(row=row, column=1, value=line)
|
|
row += 1
|
|
|
|
# Auto-adjust column widths
|
|
for col in range(1, 6):
|
|
sheet.column_dimensions[get_column_letter(col)].width = 20
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not populate data sheet: {str(e)}")
|
|
|
|
def _populateAnalysisSheet(self, sheet, content: str):
|
|
"""Populate the analysis sheet."""
|
|
try:
|
|
# Title
|
|
sheet['A1'] = "Analysis & Insights"
|
|
sheet['A1'].font = Font(size=14, bold=True)
|
|
|
|
# Content analysis
|
|
lines = content.split('\n')
|
|
row = 3
|
|
|
|
sheet['A3'] = "Content Analysis:"
|
|
sheet['A3'].font = Font(bold=True)
|
|
row += 1
|
|
|
|
# Count different types of content
|
|
tableLines = sum(1 for line in lines if '|' in line)
|
|
listLines = sum(1 for line in lines if line.startswith(('- ', '* ')))
|
|
textLines = len(lines) - tableLines - listLines
|
|
|
|
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
|
|
row += 1
|
|
sheet[f'A{row}'] = f"Table Rows: {tableLines}"
|
|
row += 1
|
|
sheet[f'A{row}'] = f"List Items: {listLines}"
|
|
row += 1
|
|
sheet[f'A{row}'] = f"Text Lines: {textLines}"
|
|
row += 2
|
|
|
|
# Recommendations
|
|
sheet[f'A{row}'] = "Recommendations:"
|
|
sheet[f'A{row}'].font = Font(bold=True)
|
|
row += 1
|
|
sheet[f'A{row}'] = "1. Review data accuracy"
|
|
row += 1
|
|
sheet[f'A{row}'] = "2. Consider additional analysis"
|
|
row += 1
|
|
sheet[f'A{row}'] = "3. Update regularly"
|
|
|
|
# Auto-adjust column width
|
|
sheet.column_dimensions['A'].width = 30
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
|
|
|
|
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
|
"""Generate Excel content from structured JSON document using AI-generated styling."""
|
|
try:
|
|
# Debug output
|
|
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER")
|
|
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER")
|
|
|
|
# Get style set: default styles, enhanced with AI if userPrompt provided
|
|
styles = await self._getStyleSet(userPrompt, aiService)
|
|
|
|
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
|
|
if not self._validateJsonStructure(jsonContent):
|
|
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
|
|
|
|
# Extract metadata from standardized schema
|
|
metadata = self._extractMetadata(jsonContent)
|
|
|
|
# Use title from JSON metadata if available, otherwise use provided title
|
|
document_title = metadata.get("title", title)
|
|
|
|
# Create workbook
|
|
wb = Workbook()
|
|
|
|
# Create sheets based on content
|
|
sheets = self._createExcelSheets(wb, jsonContent, styles)
|
|
self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER")
|
|
|
|
# Populate sheets with content
|
|
self._populateExcelSheets(sheets, jsonContent, styles)
|
|
|
|
# Save to buffer
|
|
buffer = io.BytesIO()
|
|
wb.save(buffer)
|
|
buffer.seek(0)
|
|
|
|
# Convert to base64
|
|
excelBytes = buffer.getvalue()
|
|
self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excelBytes)}", "EXCEL_RENDERER")
|
|
try:
|
|
excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
|
|
self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excelBase64)}", "EXCEL_RENDERER")
|
|
except Exception as b64_error:
|
|
self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER")
|
|
raise
|
|
|
|
return excelBase64
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error generating Excel from JSON: {str(e)}")
|
|
raise Exception(f"Excel generation failed: {str(e)}")
|
|
|
|
async def _getStyleSet(self, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
|
|
"""Get style set - default styles, enhanced with AI if userPrompt provided.
|
|
|
|
Args:
|
|
userPrompt: User's prompt (AI will detect style instructions in any language)
|
|
aiService: AI service (used only if userPrompt provided)
|
|
templateName: Name of template style set (None = default)
|
|
|
|
Returns:
|
|
Dict with style definitions for all document styles
|
|
"""
|
|
# Get default style set
|
|
defaultStyleSet = self._getDefaultStyleSet()
|
|
|
|
# Enhance with AI if userPrompt provided (AI handles multilingual style detection)
|
|
if userPrompt and aiService:
|
|
# AI will naturally detect style instructions in any language
|
|
self.logger.info(f"Enhancing styles with AI based on user prompt...")
|
|
enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
|
|
# Convert colors to Excel format after getting styles
|
|
enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet)
|
|
return self._validateStylesContrast(enhancedStyleSet)
|
|
else:
|
|
# Use default styles only
|
|
return defaultStyleSet
|
|
|
|
async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
|
|
"""Enhance default styles with AI based on user prompt."""
|
|
try:
|
|
style_template = self._createAiStyleTemplate("xlsx", userPrompt, defaultStyleSet)
|
|
enhanced_styles = await self._getAiStylesWithExcelColors(aiService, style_template, defaultStyleSet)
|
|
return enhanced_styles
|
|
except Exception as e:
|
|
self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
|
|
return defaultStyleSet
|
|
|
|
def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Validate and fix contrast issues in AI-generated styles."""
|
|
try:
|
|
# Fix table header contrast
|
|
if "table_header" in styles:
|
|
header = styles["table_header"]
|
|
bgColor = header.get("background", "#FFFFFF")
|
|
textColor = header.get("text_color", "#000000")
|
|
|
|
# If both are white or both are dark, fix it
|
|
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
|
|
header["background"] = "#FF4F4F4F"
|
|
header["text_color"] = "#FFFFFFFF"
|
|
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
|
|
header["background"] = "#FF4F4F4F"
|
|
header["text_color"] = "#FFFFFFFF"
|
|
|
|
# Fix table cell contrast
|
|
if "table_cell" in styles:
|
|
cell = styles["table_cell"]
|
|
bgColor = cell.get("background", "#FFFFFF")
|
|
textColor = cell.get("text_color", "#000000")
|
|
|
|
# If both are white or both are dark, fix it
|
|
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
|
|
cell["background"] = "#FFFFFFFF"
|
|
cell["text_color"] = "#FF2F2F2F"
|
|
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
|
|
cell["background"] = "#FFFFFFFF"
|
|
cell["text_color"] = "#FF2F2F2F"
|
|
|
|
return styles
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Style validation failed: {str(e)}")
|
|
return self._getDefaultStyleSet()
|
|
|
|
def _getDefaultStyleSet(self) -> Dict[str, Any]:
|
|
"""Default Excel style set - used when no style instructions present."""
|
|
return {
|
|
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "left"},
|
|
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
|
|
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
|
|
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
|
|
"bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
|
|
"paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
|
|
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
|
|
}
|
|
|
|
async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Get AI styles with proper Excel color conversion."""
|
|
if not aiService:
|
|
return defaultStyles
|
|
|
|
try:
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
|
|
|
requestOptions = AiCallOptions()
|
|
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
|
|
|
|
request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
|
|
response = await aiService.callAi(request)
|
|
|
|
import json
|
|
import re
|
|
|
|
# Clean and parse JSON
|
|
result = response.content.strip() if response and response.content else ""
|
|
|
|
# Check if result is empty
|
|
if not result:
|
|
self.logger.warning("AI styling returned empty response, using defaults")
|
|
return defaultStyles
|
|
|
|
# Extract JSON from markdown if present
|
|
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
|
if json_match:
|
|
result = json_match.group(1).strip()
|
|
self.services.utils.debugLogToFile(f"EXTRACTED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
|
|
elif result.startswith('```json'):
|
|
result = re.sub(r'^```json\s*', '', result)
|
|
result = re.sub(r'\s*```$', '', result)
|
|
self.services.utils.debugLogToFile(f"CLEANED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
|
|
elif result.startswith('```'):
|
|
result = re.sub(r'^```\s*', '', result)
|
|
result = re.sub(r'\s*```$', '', result)
|
|
self.services.utils.debugLogToFile(f"CLEANED JSON FROM GENERIC MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
|
|
|
|
# Try to parse JSON
|
|
try:
|
|
styles = json.loads(result)
|
|
except json.JSONDecodeError as json_error:
|
|
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
|
|
return defaultStyles
|
|
|
|
# Convert colors to Excel aRGB format
|
|
styles = self._convertColorsFormat(styles)
|
|
|
|
return styles
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
|
return defaultStyles
|
|
|
|
def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str:
|
|
"""Get a safe aRGB color value for Excel (without # prefix)."""
|
|
if not isinstance(colorValue, str):
|
|
return default
|
|
|
|
# Remove # prefix if present
|
|
if colorValue.startswith('#'):
|
|
colorValue = colorValue[1:]
|
|
|
|
if len(colorValue) == 6:
|
|
# Convert RRGGBB to AARRGGBB
|
|
return f"FF{colorValue}"
|
|
elif len(colorValue) == 8:
|
|
# Already aRGB format
|
|
return colorValue
|
|
else:
|
|
# Unexpected format, return default
|
|
return default
|
|
|
|
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Convert hex colors to aRGB format for Excel compatibility."""
|
|
try:
|
|
self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
|
|
for styleName, styleConfig in styles.items():
|
|
if isinstance(styleConfig, dict):
|
|
for prop, value in styleConfig.items():
|
|
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
|
|
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
|
|
styles[styleName][prop] = f"FF{value[1:]}"
|
|
elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
|
|
pass # Already aRGB format
|
|
elif isinstance(value, str) and value.startswith('#'):
|
|
pass # Unexpected format, keep as is
|
|
return styles
|
|
except Exception as e:
|
|
return styles
|
|
|
|
def _createExcelSheets(self, wb: Workbook, jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Create Excel sheets based on content structure and user intent."""
|
|
sheets = {}
|
|
|
|
# Get sheet names from AI styles or generate based on content
|
|
sheetNames = styles.get("sheet_names", self._generateSheetNamesFromContent(jsonContent))
|
|
self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheetNames}", "EXCEL_RENDERER")
|
|
|
|
# Create sheets
|
|
for i, sheetName in enumerate(sheetNames):
|
|
if i == 0:
|
|
# Use the default sheet for the first sheet
|
|
sheet = wb.active
|
|
sheet.title = sheetName
|
|
else:
|
|
# Create additional sheets
|
|
sheet = wb.create_sheet(sheetName, i)
|
|
sheets[sheetName.lower()] = sheet
|
|
|
|
return sheets
|
|
|
|
def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
|
|
"""Generate sheet names based on actual content structure."""
|
|
sections = self._extractSections(jsonContent)
|
|
|
|
# If no sections, create a single sheet
|
|
if not sections:
|
|
return ["Content"]
|
|
|
|
# Generate sheet names based on content structure
|
|
sheetNames = []
|
|
|
|
# Check if we have multiple table sections
|
|
tableSections = [s for s in sections if s.get("content_type") == "table"]
|
|
|
|
if len(tableSections) > 1:
|
|
# Create separate sheets for each table
|
|
for i, section in enumerate(tableSections, 1):
|
|
# Try to get caption from table element first, then section title, then fallback
|
|
sectionTitle = None
|
|
elements = section.get("elements", [])
|
|
if elements and isinstance(elements, list) and len(elements) > 0:
|
|
tableElement = elements[0]
|
|
sectionTitle = tableElement.get("caption")
|
|
|
|
if not sectionTitle:
|
|
sectionTitle = section.get("title")
|
|
|
|
if not sectionTitle:
|
|
sectionTitle = f"Table {i}"
|
|
|
|
sheetNames.append(sectionTitle[:31]) # Excel sheet name limit
|
|
else:
|
|
# Single table or mixed content - create only main sheet
|
|
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
|
|
sheetNames.append(documentTitle[:31]) # Excel sheet name limit
|
|
|
|
return sheetNames
|
|
|
|
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
|
"""Populate Excel sheets with content from JSON based on actual sheet names."""
|
|
try:
|
|
# Get the actual sheet names that were created
|
|
sheetNames = list(sheets.keys())
|
|
|
|
if not sheetNames:
|
|
return
|
|
|
|
sections = self._extractSections(jsonContent)
|
|
tableSections = [s for s in sections if s.get("content_type") == "table"]
|
|
|
|
if len(tableSections) > 1:
|
|
# Multiple tables - populate each sheet with its corresponding table
|
|
for i, section in enumerate(tableSections):
|
|
if i < len(sheetNames):
|
|
sheetName = sheetNames[i]
|
|
sheet = sheets[sheetName]
|
|
# Use the caption from table element as sheet title, or fallback to sheet name
|
|
sheetTitle = sheetName
|
|
elements = section.get("elements", [])
|
|
if elements and isinstance(elements, list) and len(elements) > 0:
|
|
tableElement = elements[0]
|
|
caption = tableElement.get("caption")
|
|
if caption:
|
|
sheetTitle = caption
|
|
self._populateTableSheet(sheet, section, styles, sheetTitle)
|
|
else:
|
|
# Single table or mixed content - populate only main sheet
|
|
firstSheetName = sheetNames[0]
|
|
self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
|
|
|
|
def _populateTableSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheetTitle: str):
|
|
"""Populate a sheet with a single table section."""
|
|
try:
|
|
# Sheet title
|
|
sheet['A1'] = sheetTitle
|
|
title_style = styles.get("title", {})
|
|
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79")))
|
|
sheet['A1'].alignment = Alignment(horizontal=title_style.get("align", "left"))
|
|
|
|
# Get table data from elements (canonical JSON format)
|
|
elements = section.get("elements", [])
|
|
if elements and isinstance(elements, list) and len(elements) > 0:
|
|
table_data = elements[0]
|
|
headers = table_data.get("headers", [])
|
|
rows = table_data.get("rows", [])
|
|
else:
|
|
headers = []
|
|
rows = []
|
|
|
|
if not headers and not rows:
|
|
sheet['A3'] = "No table data available"
|
|
return
|
|
|
|
# Add headers
|
|
header_style = styles.get("table_header", {})
|
|
for col, header in enumerate(headers, 1):
|
|
cell = sheet.cell(row=3, column=col, value=header)
|
|
if header_style.get("bold"):
|
|
cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
|
|
if header_style.get("background"):
|
|
cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
|
|
|
|
# Add rows
|
|
cell_style = styles.get("table_cell", {})
|
|
for row_idx, row_data in enumerate(rows, 4):
|
|
for col_idx, cell_value in enumerate(row_data, 1):
|
|
cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value)
|
|
if cell_style.get("text_color"):
|
|
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
|
|
|
|
# Auto-adjust column widths
|
|
for col in range(1, len(headers) + 1):
|
|
sheet.column_dimensions[get_column_letter(col)].width = 20
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not populate table sheet: {str(e)}")
|
|
|
|
def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
|
|
"""Populate the main sheet with document overview and all content."""
|
|
try:
|
|
# Document title
|
|
documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
|
|
sheet['A1'] = documentTitle
|
|
|
|
# Safety check for title style
|
|
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "left"})
|
|
try:
|
|
safe_color = self._getSafeColor(title_style["color"])
|
|
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
|
|
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
|
|
except Exception as font_error:
|
|
# Try with a safe color
|
|
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000")
|
|
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
|
|
|
|
# Generation info
|
|
sheet['A3'] = "Generated:"
|
|
sheet['B3'] = self._formatTimestamp()
|
|
sheet['A4'] = "Status:"
|
|
sheet['B4'] = "Generated Successfully"
|
|
|
|
# Document metadata
|
|
metadata = jsonContent.get("metadata", {})
|
|
if metadata:
|
|
sheet['A6'] = "Document Information:"
|
|
sheet['A6'].font = Font(bold=True)
|
|
|
|
row = 7
|
|
for key, value in metadata.items():
|
|
if key != "title":
|
|
sheet[f'A{row}'] = f"{key.title()}:"
|
|
sheet[f'B{row}'] = str(value)
|
|
row += 1
|
|
|
|
# Content overview
|
|
sections = self._extractSections(jsonContent)
|
|
sheet[f'A{row + 1}'] = "Content Overview:"
|
|
sheet[f'A{row + 1}'].font = Font(bold=True)
|
|
|
|
row += 2
|
|
sheet[f'A{row}'] = f"Total Sections: {len(sections)}"
|
|
|
|
# Count different content types
|
|
content_types = {}
|
|
for section in sections:
|
|
content_type = section.get("content_type", "unknown")
|
|
content_types[content_type] = content_types.get(content_type, 0) + 1
|
|
|
|
for content_type, count in content_types.items():
|
|
row += 1
|
|
sheet[f'A{row}'] = f"{content_type.title()} Sections: {count}"
|
|
|
|
# Add all content to this sheet
|
|
row += 2
|
|
for section in sections:
|
|
row = self._addSectionToSheet(sheet, section, styles, row)
|
|
row += 1 # Empty row between sections
|
|
|
|
# Auto-adjust column widths
|
|
sheet.column_dimensions['A'].width = 20
|
|
sheet.column_dimensions['B'].width = 30
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not populate main sheet: {str(e)}")
|
|
|
|
def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]):
|
|
"""Populate additional sheets based on content types."""
|
|
try:
|
|
sections = self._extractSections(jsonContent)
|
|
|
|
for sheetName in sheetNames:
|
|
if sheetName not in sheets:
|
|
continue
|
|
|
|
sheet = sheets[sheetName]
|
|
sheetTitle = sheetName.title()
|
|
sheet['A1'] = sheetTitle
|
|
sheet['A1'].font = Font(size=16, bold=True)
|
|
|
|
row = 3
|
|
|
|
# Filter sections by content type
|
|
if sheetName == "tables":
|
|
filtered_sections = [s for s in sections if s.get("content_type") == "table"]
|
|
elif sheetName == "lists":
|
|
filtered_sections = [s for s in sections if s.get("content_type") == "list"]
|
|
elif sheetName == "text":
|
|
filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
|
|
else:
|
|
filtered_sections = sections
|
|
|
|
for section in filtered_sections:
|
|
row = self._addSectionToSheet(sheet, section, styles, row)
|
|
row += 1 # Empty row between sections
|
|
|
|
# Auto-adjust column widths
|
|
for col in range(1, 6):
|
|
sheet.column_dimensions[get_column_letter(col)].width = 20
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not populate content type sheets: {str(e)}")
|
|
|
|
def _addSectionToSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
|
"""Add a section to a sheet and return the next row."""
|
|
try:
|
|
# Add section title
|
|
section_title = section.get("title")
|
|
if section_title:
|
|
sheet[f'A{startRow}'] = f"# {section_title}"
|
|
sheet[f'A{startRow}'].font = Font(bold=True)
|
|
startRow += 1
|
|
|
|
# Process section based on type
|
|
section_type = section.get("content_type", "paragraph")
|
|
|
|
# Handle all section types using elements array
|
|
elements = section.get("elements", [])
|
|
for element in elements:
|
|
if section_type == "table":
|
|
startRow = self._addTableToExcel(sheet, element, styles, startRow)
|
|
elif section_type == "bullet_list" or section_type == "list":
|
|
startRow = self._addListToExcel(sheet, element, styles, startRow)
|
|
elif section_type == "paragraph":
|
|
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
|
|
elif section_type == "heading":
|
|
startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
|
|
elif section_type == "image":
|
|
startRow = self._addImageToExcel(sheet, element, styles, startRow)
|
|
else:
|
|
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
|
|
|
|
return startRow
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not add section to sheet: {str(e)}")
|
|
return startRow + 1
|
|
|
|
def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
|
"""Add a table element to Excel sheet."""
|
|
try:
|
|
# In canonical JSON format, table elements have headers and rows directly
|
|
headers = element.get("headers", [])
|
|
rows = element.get("rows", [])
|
|
|
|
if not headers and not rows:
|
|
return startRow
|
|
|
|
# Add headers
|
|
header_style = styles.get("table_header", {})
|
|
for col, header in enumerate(headers, 1):
|
|
cell = sheet.cell(row=startRow, column=col, value=header)
|
|
if header_style.get("bold"):
|
|
cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
|
|
if header_style.get("background"):
|
|
cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
|
|
|
|
startRow += 1
|
|
|
|
# Add rows
|
|
cell_style = styles.get("table_cell", {})
|
|
for row_data in rows:
|
|
for col, cell_value in enumerate(row_data, 1):
|
|
cell = sheet.cell(row=startRow, column=col, value=cell_value)
|
|
if cell_style.get("text_color"):
|
|
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
|
|
startRow += 1
|
|
|
|
return startRow
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not add table to Excel: {str(e)}")
|
|
return startRow + 1
|
|
|
|
def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
|
"""Add a list element to Excel sheet."""
|
|
try:
|
|
list_items = element.get("items", [])
|
|
|
|
list_style = styles.get("bullet_list", {})
|
|
for item in list_items:
|
|
sheet.cell(row=startRow, column=1, value=f"• {item}")
|
|
if list_style.get("color"):
|
|
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"]))
|
|
startRow += 1
|
|
|
|
return startRow
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not add list to Excel: {str(e)}")
|
|
return startRow + 1
|
|
|
|
def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
|
"""Add a paragraph element to Excel sheet."""
|
|
try:
|
|
text = element.get("text", "")
|
|
if text:
|
|
sheet.cell(row=startRow, column=1, value=text)
|
|
|
|
paragraph_style = styles.get("paragraph", {})
|
|
if paragraph_style.get("color"):
|
|
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(paragraph_style["color"]))
|
|
|
|
startRow += 1
|
|
|
|
return startRow
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
|
|
return startRow + 1
|
|
|
|
def _addHeadingToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
|
"""Add a heading element to Excel sheet."""
|
|
try:
|
|
text = element.get("text", "")
|
|
level = element.get("level", 1)
|
|
|
|
if text:
|
|
sheet.cell(row=startRow, column=1, value=text)
|
|
|
|
heading_style = styles.get("heading", {})
|
|
font_size = heading_style.get("font_size", 14)
|
|
if level > 1:
|
|
font_size = max(10, font_size - (level - 1) * 2)
|
|
|
|
sheet.cell(row=startRow, column=1).font = Font(
|
|
size=font_size,
|
|
bold=True,
|
|
color=self._getSafeColor(heading_style.get("color", "FF000000"))
|
|
)
|
|
|
|
startRow += 1
|
|
|
|
return startRow
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not add heading to Excel: {str(e)}")
|
|
return startRow + 1
|
|
|
|
def _addImageToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
|
"""Add an image element to Excel sheet using openpyxl."""
|
|
try:
|
|
base64Data = element.get("base64Data", "")
|
|
altText = element.get("altText", "Image")
|
|
caption = element.get("caption", "")
|
|
|
|
if not base64Data:
|
|
# No image data - add placeholder text
|
|
sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]")
|
|
return startRow + 1
|
|
|
|
try:
|
|
from openpyxl.drawing.image import Image as OpenpyxlImage
|
|
import base64
|
|
import io
|
|
|
|
# Decode base64 image data
|
|
imageBytes = base64.b64decode(base64Data)
|
|
imageStream = io.BytesIO(imageBytes)
|
|
|
|
# Create openpyxl Image
|
|
img = OpenpyxlImage(imageStream)
|
|
|
|
# Set image size (max width 6 inches, maintain aspect ratio)
|
|
maxWidth = 400 # pixels (approximately 6 inches at 72 DPI)
|
|
if img.width > maxWidth:
|
|
scale = maxWidth / img.width
|
|
img.width = maxWidth
|
|
img.height = int(img.height * scale)
|
|
|
|
# Anchor image to cell (A column, current row)
|
|
img.anchor = f'A{startRow}'
|
|
|
|
# Add image to sheet
|
|
sheet.add_image(img)
|
|
|
|
# Calculate height needed for image (approximate)
|
|
# Excel row height is in points (1/72 inch), image height is in pixels
|
|
# Assuming 72 DPI: pixels = points
|
|
imageHeightPoints = img.height / 1.33 # Approximate conversion
|
|
sheet.row_dimensions[startRow].height = max(15, imageHeightPoints) # Min 15 points
|
|
|
|
# Add caption below image if available
|
|
if caption:
|
|
startRow += 1
|
|
sheet.cell(row=startRow, column=1, value=caption)
|
|
sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10)
|
|
sheet.cell(row=startRow, column=1).alignment = Alignment(horizontal="left")
|
|
elif altText and altText != "Image":
|
|
startRow += 1
|
|
sheet.cell(row=startRow, column=1, value=f"Figure: {altText}")
|
|
sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10)
|
|
|
|
return startRow + 1
|
|
|
|
except ImportError:
|
|
self.logger.warning("openpyxl.drawing.image not available, using placeholder")
|
|
sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]")
|
|
return startRow + 1
|
|
except Exception as imgError:
|
|
self.logger.warning(f"Error embedding image in Excel: {str(imgError)}")
|
|
sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]")
|
|
return startRow + 1
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not add image to Excel: {str(e)}")
|
|
return startRow + 1
|
|
|
|
def _formatTimestamp(self) -> str:
|
|
"""Format current timestamp for document generation."""
|
|
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|