gateway/modules/services/serviceGeneration/renderers/rendererExcel.py
2025-10-12 00:51:23 +02:00

644 lines
28 KiB
Python

"""
Excel renderer for report generation using openpyxl.
"""
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC
try:
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.table import Table, TableStyleInfo
OPENPYXL_AVAILABLE = True
except ImportError:
OPENPYXL_AVAILABLE = False
class RendererExcel(BaseRenderer):
"""Renders content to Excel format using openpyxl."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported Excel formats."""
return ['xlsx', 'xls', 'excel']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['spreadsheet', 'workbook']
@classmethod
def get_priority(cls) -> int:
"""Return priority for Excel renderer."""
return 110
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
try:
if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available
from .rendererCsv import RendererCsv
csv_renderer = RendererCsv()
csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service)
return csv_content, "text/csv"
# Generate Excel using AI-analyzed styling
excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service)
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
except Exception as e:
self.logger.error(f"Error rendering Excel: {str(e)}")
# Return CSV fallback
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
def _generate_excel(self, content: str, title: str) -> str:
"""Generate Excel content using openpyxl."""
try:
# Create workbook
wb = Workbook()
# Remove default sheet
wb.remove(wb.active)
# Create sheets
summary_sheet = wb.create_sheet("Summary", 0)
data_sheet = wb.create_sheet("Data", 1)
analysis_sheet = wb.create_sheet("Analysis", 2)
# Add content to sheets
self._populate_summary_sheet(summary_sheet, title)
self._populate_data_sheet(data_sheet, content)
self._populate_analysis_sheet(analysis_sheet, content)
# Save to buffer
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
# Convert to base64
excel_bytes = buffer.getvalue()
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
return excel_base64
except Exception as e:
self.logger.error(f"Error generating Excel: {str(e)}")
raise
def _populate_summary_sheet(self, sheet, title: str):
"""Populate the summary sheet."""
try:
# Title
sheet['A1'] = title
sheet['A1'].font = Font(size=16, bold=True)
sheet['A1'].alignment = Alignment(horizontal='center')
# Generation info
sheet['A3'] = "Generated:"
sheet['B3'] = self._format_timestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
# Key metrics placeholder
sheet['A6'] = "Key Metrics:"
sheet['A6'].font = Font(bold=True)
sheet['A7'] = "Total Items:"
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
# Auto-adjust column widths
sheet.column_dimensions['A'].width = 20
sheet.column_dimensions['B'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
def _populate_data_sheet(self, sheet, content: str):
"""Populate the data sheet."""
try:
# Headers
headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=1, column=col, value=header)
cell.font = Font(bold=True)
cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
# Process content
lines = content.split('\n')
row = 2
for line in lines:
line = line.strip()
if not line:
continue
# Check for table data (lines with |)
if '|' in line:
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
sheet.cell(row=row, column=col, value=cell_data)
row += 1
else:
# Regular content
sheet.cell(row=row, column=1, value=line)
row += 1
# Auto-adjust column widths
for col in range(1, 6):
sheet.column_dimensions[get_column_letter(col)].width = 20
except Exception as e:
self.logger.warning(f"Could not populate data sheet: {str(e)}")
def _populate_analysis_sheet(self, sheet, content: str):
"""Populate the analysis sheet."""
try:
# Title
sheet['A1'] = "Analysis & Insights"
sheet['A1'].font = Font(size=14, bold=True)
# Content analysis
lines = content.split('\n')
row = 3
sheet['A3'] = "Content Analysis:"
sheet['A3'].font = Font(bold=True)
row += 1
# Count different types of content
table_lines = sum(1 for line in lines if '|' in line)
list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
text_lines = len(lines) - table_lines - list_lines
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
row += 1
sheet[f'A{row}'] = f"Table Rows: {table_lines}"
row += 1
sheet[f'A{row}'] = f"List Items: {list_lines}"
row += 1
sheet[f'A{row}'] = f"Text Lines: {text_lines}"
row += 2
# Recommendations
sheet[f'A{row}'] = "Recommendations:"
sheet[f'A{row}'].font = Font(bold=True)
row += 1
sheet[f'A{row}'] = "1. Review data accuracy"
row += 1
sheet[f'A{row}'] = "2. Consider additional analysis"
row += 1
sheet[f'A{row}'] = "3. Update regularly"
# Auto-adjust column width
sheet.column_dimensions['A'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
"""Generate Excel content from structured JSON document using AI-generated styling."""
try:
# Debug output
print(f"🔍 EXCEL JSON CONTENT TYPE: {type(json_content)}")
print(f"🔍 EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}")
# Get AI-generated styling definitions
styles = await self._get_excel_styles(user_prompt, ai_service)
# Validate JSON structure
if not isinstance(json_content, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
# Create workbook
wb = Workbook()
# Create sheets based on content
sheets = self._create_excel_sheets(wb, json_content, styles)
print(f"🔍 EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}")
# Populate sheets with content
self._populate_excel_sheets(sheets, json_content, styles)
# Save to buffer
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
# Convert to base64
excel_bytes = buffer.getvalue()
print(f"🔍 EXCEL BYTES LENGTH: {len(excel_bytes)}")
try:
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
print(f"🔍 EXCEL BASE64 LENGTH: {len(excel_base64)}")
except Exception as b64_error:
print(f"🔍 BASE64 ENCODING ERROR: {b64_error}")
raise
return excel_base64
except Exception as e:
self.logger.error(f"Error generating Excel from JSON: {str(e)}")
raise Exception(f"Excel generation failed: {str(e)}")
async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Get Excel styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
"paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
}
style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_excel_styles())
# Convert colors to aRGB format and validate
styles = self._convert_colors_format(styles)
return self._validate_excel_styles_contrast(styles)
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert hex colors to aRGB format for Excel compatibility."""
try:
for style_name, style_config in styles.items():
if isinstance(style_config, dict):
for prop, value in style_config.items():
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
styles[style_name][prop] = f"FF{value[1:]}"
print(f"🔍 CONVERTED COLOR: {value}{styles[style_name][prop]}")
return styles
except Exception as e:
print(f"🔍 COLOR CONVERSION ERROR: {e}")
return styles
def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
bg_color = header.get("background", "#FFFFFF")
text_color = header.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
bg_color = cell.get("background", "#FFFFFF")
text_color = cell.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
return styles
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_excel_styles()
def _get_default_excel_styles(self) -> Dict[str, Any]:
"""Default Excel styles with aRGB color format."""
return {
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
"paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
}
def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
"""Create Excel sheets based on content structure and user intent."""
sheets = {}
# Get sheet names from AI styles or generate based on content
sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content))
print(f"🔍 EXCEL SHEET NAMES: {sheet_names}")
# Create sheets
for i, sheet_name in enumerate(sheet_names):
if i == 0:
# Use the default sheet for the first sheet
sheet = wb.active
sheet.title = sheet_name
else:
# Create additional sheets
sheet = wb.create_sheet(sheet_name, i)
sheets[sheet_name.lower()] = sheet
return sheets
def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]:
"""Generate sheet names based on actual content structure."""
sections = json_content.get("sections", [])
# If no sections, create a single sheet
if not sections:
return ["Content"]
# Generate sheet names based on content types
sheet_names = []
# Always start with a main content sheet
document_title = json_content.get("metadata", {}).get("title", "Document")
sheet_names.append(document_title[:31]) # Excel sheet name limit
# Add sheets based on content types found
content_types = set()
for section in sections:
content_type = section.get("content_type", "paragraph")
content_types.add(content_type)
# Create sheets for different content types if we have multiple types
if len(content_types) > 1:
if "table" in content_types:
sheet_names.append("Tables")
if "list" in content_types:
sheet_names.append("Lists")
if "paragraph" in content_types or "heading" in content_types:
sheet_names.append("Text")
# Limit to 4 sheets maximum
return sheet_names[:4]
def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Populate Excel sheets with content from JSON based on actual sheet names."""
try:
# Get the actual sheet names that were created
sheet_names = list(sheets.keys())
if not sheet_names:
return
# Populate the first sheet with all content
first_sheet_name = sheet_names[0]
self._populate_main_sheet(sheets[first_sheet_name], json_content, styles)
# If we have multiple sheets, distribute content by type
if len(sheet_names) > 1:
self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:])
except Exception as e:
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]):
"""Populate the main sheet with document overview and all content."""
try:
# Document title
document_title = json_content.get("metadata", {}).get("title", "Generated Report")
sheet['A1'] = document_title
# Safety check for title style
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
print(f"🔍 EXCEL TITLE STYLE: {title_style}")
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=title_style["color"])
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
# Generation info
sheet['A3'] = "Generated:"
sheet['B3'] = self._format_timestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
# Document metadata
metadata = json_content.get("metadata", {})
if metadata:
sheet['A6'] = "Document Information:"
sheet['A6'].font = Font(bold=True)
row = 7
for key, value in metadata.items():
if key != "title":
sheet[f'A{row}'] = f"{key.title()}:"
sheet[f'B{row}'] = str(value)
row += 1
# Content overview
sections = json_content.get("sections", [])
sheet[f'A{row + 1}'] = "Content Overview:"
sheet[f'A{row + 1}'].font = Font(bold=True)
row += 2
sheet[f'A{row}'] = f"Total Sections: {len(sections)}"
# Count different content types
content_types = {}
for section in sections:
content_type = section.get("content_type", "unknown")
content_types[content_type] = content_types.get(content_type, 0) + 1
for content_type, count in content_types.items():
row += 1
sheet[f'A{row}'] = f"{content_type.title()} Sections: {count}"
# Add all content to this sheet
row += 2
for section in sections:
row = self._add_section_to_sheet(sheet, section, styles, row)
row += 1 # Empty row between sections
# Auto-adjust column widths
sheet.column_dimensions['A'].width = 20
sheet.column_dimensions['B'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate main sheet: {str(e)}")
def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]):
"""Populate additional sheets based on content types."""
try:
sections = json_content.get("sections", [])
for sheet_name in sheet_names:
if sheet_name not in sheets:
continue
sheet = sheets[sheet_name]
sheet_title = sheet_name.title()
sheet['A1'] = sheet_title
sheet['A1'].font = Font(size=16, bold=True)
row = 3
# Filter sections by content type
if sheet_name == "tables":
filtered_sections = [s for s in sections if s.get("content_type") == "table"]
elif sheet_name == "lists":
filtered_sections = [s for s in sections if s.get("content_type") == "list"]
elif sheet_name == "text":
filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
else:
filtered_sections = sections
for section in filtered_sections:
row = self._add_section_to_sheet(sheet, section, styles, row)
row += 1 # Empty row between sections
# Auto-adjust column widths
for col in range(1, 6):
sheet.column_dimensions[get_column_letter(col)].width = 20
except Exception as e:
self.logger.warning(f"Could not populate content type sheets: {str(e)}")
def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
"""Add a section to a sheet and return the next row."""
try:
# Add section title
section_title = section.get("title")
if section_title:
sheet[f'A{start_row}'] = f"# {section_title}"
sheet[f'A{start_row}'].font = Font(bold=True)
start_row += 1
# Process section elements
elements = section.get("elements", [])
content_type = section.get("content_type", "paragraph")
for element in elements:
if content_type == "table":
start_row = self._add_table_to_excel(sheet, element, styles, start_row)
elif content_type == "list":
start_row = self._add_list_to_excel(sheet, element, styles, start_row)
elif content_type == "paragraph":
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
elif content_type == "heading":
start_row = self._add_heading_to_excel(sheet, element, styles, start_row)
else:
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
return start_row
except Exception as e:
self.logger.warning(f"Could not add section to sheet: {str(e)}")
return start_row + 1
def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
"""Add a table element to Excel sheet."""
try:
table_data = element.get("data", {})
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
if not headers and not rows:
return start_row
# Add headers
header_style = styles.get("table_header", {})
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=start_row, column=col, value=header)
if header_style.get("bold"):
cell.font = Font(bold=True, color=header_style.get("text_color", "#FF000000"))
if header_style.get("background"):
cell.fill = PatternFill(start_color=header_style["background"], end_color=header_style["background"], fill_type="solid")
start_row += 1
# Add rows
cell_style = styles.get("table_cell", {})
for row_data in rows:
for col, cell_value in enumerate(row_data, 1):
cell = sheet.cell(row=start_row, column=col, value=cell_value)
if cell_style.get("text_color"):
cell.font = Font(color=cell_style["text_color"])
start_row += 1
return start_row
except Exception as e:
self.logger.warning(f"Could not add table to Excel: {str(e)}")
return start_row + 1
def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
"""Add a list element to Excel sheet."""
try:
list_items = element.get("items", [])
list_style = styles.get("bullet_list", {})
for item in list_items:
sheet.cell(row=start_row, column=1, value=f"{item}")
if list_style.get("color"):
sheet.cell(row=start_row, column=1).font = Font(color=list_style["color"])
start_row += 1
return start_row
except Exception as e:
self.logger.warning(f"Could not add list to Excel: {str(e)}")
return start_row + 1
def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
"""Add a paragraph element to Excel sheet."""
try:
text = element.get("text", "")
if text:
sheet.cell(row=start_row, column=1, value=text)
paragraph_style = styles.get("paragraph", {})
if paragraph_style.get("color"):
sheet.cell(row=start_row, column=1).font = Font(color=paragraph_style["color"])
start_row += 1
return start_row
except Exception as e:
self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
return start_row + 1
def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
"""Add a heading element to Excel sheet."""
try:
text = element.get("text", "")
level = element.get("level", 1)
if text:
sheet.cell(row=start_row, column=1, value=text)
heading_style = styles.get("heading", {})
font_size = heading_style.get("font_size", 14)
if level > 1:
font_size = max(10, font_size - (level - 1) * 2)
sheet.cell(row=start_row, column=1).font = Font(
size=font_size,
bold=True,
color=heading_style.get("color", "#FF000000")
)
start_row += 1
return start_row
except Exception as e:
self.logger.warning(f"Could not add heading to Excel: {str(e)}")
return start_row + 1
def _format_timestamp(self) -> str:
"""Format current timestamp for document generation."""
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")