gateway/modules/services/serviceGeneration/renderers/excel_renderer.py
2025-10-11 23:39:13 +02:00

565 lines
24 KiB
Python

"""
Excel renderer for report generation using openpyxl.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC
try:
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.table import Table, TableStyleInfo
OPENPYXL_AVAILABLE = True
except ImportError:
OPENPYXL_AVAILABLE = False
class ExcelRenderer(BaseRenderer):
"""Renders content to Excel format using openpyxl."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported Excel formats."""
return ['xlsx', 'xls', 'excel']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['spreadsheet', 'workbook']
@classmethod
def get_priority(cls) -> int:
"""Return priority for Excel renderer."""
return 110
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only Excel-specific guidelines; global prompt is built centrally."""
return (
"EXCEL FORMAT GUIDELINES:\n"
"- Extract structured data from source documents into JSON format\n"
"- Focus on tabular data, lists, and structured information suitable for spreadsheets\n"
"- For tables: Extract headers and rows as separate arrays with clear column names\n"
"- For lists: Extract items with optional sub-items and metadata\n"
"- Structure content into sections with clear content types (table, list, paragraph)\n"
"- Use proper JSON structure with metadata, sections, and elements\n"
"- Ensure data is clean and ready for Excel conversion with proper formatting\n"
"OUTPUT: Return structured JSON that can be converted to Excel format."
)
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
try:
if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available
from .csv_renderer import CsvRenderer
csv_renderer = CsvRenderer()
csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service)
return csv_content, "text/csv"
# Generate Excel using AI-analyzed styling
excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service)
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
except Exception as e:
self.logger.error(f"Error rendering Excel: {str(e)}")
# Return CSV fallback
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
def _generate_excel(self, content: str, title: str) -> str:
"""Generate Excel content using openpyxl."""
try:
# Create workbook
wb = Workbook()
# Remove default sheet
wb.remove(wb.active)
# Create sheets
summary_sheet = wb.create_sheet("Summary", 0)
data_sheet = wb.create_sheet("Data", 1)
analysis_sheet = wb.create_sheet("Analysis", 2)
# Add content to sheets
self._populate_summary_sheet(summary_sheet, title)
self._populate_data_sheet(data_sheet, content)
self._populate_analysis_sheet(analysis_sheet, content)
# Save to buffer
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
# Convert to base64
excel_bytes = buffer.getvalue()
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
return excel_base64
except Exception as e:
self.logger.error(f"Error generating Excel: {str(e)}")
raise
def _populate_summary_sheet(self, sheet, title: str):
"""Populate the summary sheet."""
try:
# Title
sheet['A1'] = title
sheet['A1'].font = Font(size=16, bold=True)
sheet['A1'].alignment = Alignment(horizontal='center')
# Generation info
sheet['A3'] = "Generated:"
sheet['B3'] = self._format_timestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
# Key metrics placeholder
sheet['A6'] = "Key Metrics:"
sheet['A6'].font = Font(bold=True)
sheet['A7'] = "Total Items:"
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
# Auto-adjust column widths
sheet.column_dimensions['A'].width = 20
sheet.column_dimensions['B'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
def _populate_data_sheet(self, sheet, content: str):
"""Populate the data sheet."""
try:
# Headers
headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=1, column=col, value=header)
cell.font = Font(bold=True)
cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
# Process content
lines = content.split('\n')
row = 2
for line in lines:
line = line.strip()
if not line:
continue
# Check for table data (lines with |)
if '|' in line:
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
sheet.cell(row=row, column=col, value=cell_data)
row += 1
else:
# Regular content
sheet.cell(row=row, column=1, value=line)
row += 1
# Auto-adjust column widths
for col in range(1, 6):
sheet.column_dimensions[get_column_letter(col)].width = 20
except Exception as e:
self.logger.warning(f"Could not populate data sheet: {str(e)}")
def _populate_analysis_sheet(self, sheet, content: str):
"""Populate the analysis sheet."""
try:
# Title
sheet['A1'] = "Analysis & Insights"
sheet['A1'].font = Font(size=14, bold=True)
# Content analysis
lines = content.split('\n')
row = 3
sheet['A3'] = "Content Analysis:"
sheet['A3'].font = Font(bold=True)
row += 1
# Count different types of content
table_lines = sum(1 for line in lines if '|' in line)
list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
text_lines = len(lines) - table_lines - list_lines
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
row += 1
sheet[f'A{row}'] = f"Table Rows: {table_lines}"
row += 1
sheet[f'A{row}'] = f"List Items: {list_lines}"
row += 1
sheet[f'A{row}'] = f"Text Lines: {text_lines}"
row += 2
# Recommendations
sheet[f'A{row}'] = "Recommendations:"
sheet[f'A{row}'].font = Font(bold=True)
row += 1
sheet[f'A{row}'] = "1. Review data accuracy"
row += 1
sheet[f'A{row}'] = "2. Consider additional analysis"
row += 1
sheet[f'A{row}'] = "3. Update regularly"
# Auto-adjust column width
sheet.column_dimensions['A'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
"""Generate Excel content from structured JSON document using AI-generated styling."""
try:
# Get AI-generated styling definitions
styles = await self._get_excel_styles(user_prompt, ai_service)
# Validate JSON structure
if not isinstance(json_content, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
# Create workbook
wb = Workbook()
# Remove default sheet
wb.remove(wb.active)
# Create sheets based on content
sheets = self._create_excel_sheets(wb, json_content, styles)
# Populate sheets with content
self._populate_excel_sheets(sheets, json_content, styles)
# Save to buffer
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
# Convert to base64
excel_bytes = buffer.getvalue()
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
return excel_base64
except Exception as e:
self.logger.error(f"Error generating Excel from JSON: {str(e)}")
raise Exception(f"Excel generation failed: {str(e)}")
async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Simple AI call to get Excel styling definitions."""
if not ai_service:
return self._get_default_excel_styles()
try:
prompt = f"""
For this Excel document request: "{user_prompt}"
Provide styling definitions for Excel elements. Respond with ONLY JSON:
{{
"title": {{"font_size": 16, "color": "#1F4E79", "bold": true, "align": "center"}},
"heading": {{"font_size": 14, "color": "#2F2F2F", "bold": true, "align": "left"}},
"table_header": {{"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": true, "align": "center"}},
"table_cell": {{"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": false, "align": "left"}},
"bullet_list": {{"font_size": 11, "color": "#2F2F2F", "indent": 2}},
"paragraph": {{"font_size": 11, "color": "#2F2F2F", "bold": false, "align": "left"}},
"code_block": {{"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}}
}}
CRITICAL: Table headers must have dark background with light text, table cells must have light background with dark text for readability.
"""
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
request = AiCallRequest(prompt=prompt, context="", options=request_options)
response = await ai_service.aiObjects.call(request)
import json
import re
# Clean and parse JSON
result = response.content.strip()
if result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
styles = json.loads(result)
# Validate and fix contrast issues
styles = self._validate_excel_styles_contrast(styles)
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return self._get_default_excel_styles()
def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
bg_color = header.get("background", "#FFFFFF")
text_color = header.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
bg_color = cell.get("background", "#FFFFFF")
text_color = cell.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
return styles
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_excel_styles()
def _get_default_excel_styles(self) -> Dict[str, Any]:
"""Default Excel styles."""
return {
"title": {"font_size": 16, "color": "#1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 14, "color": "#2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 2},
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
}
def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
"""Create Excel sheets based on content structure and user intent."""
sheets = {}
# Get sheet names from AI styles or generate based on content
sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content))
# Create sheets
for i, sheet_name in enumerate(sheet_names):
if i == 0:
sheet = wb.active
sheet.title = sheet_name
else:
sheet = wb.create_sheet(sheet_name, i)
sheets[sheet_name.lower()] = sheet
return sheets
def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]:
"""Generate sheet names based on actual content structure."""
sections = json_content.get("sections", [])
# If no sections, create a single sheet
if not sections:
return ["Content"]
# Generate sheet names based on content types
sheet_names = []
# Always start with a main content sheet
document_title = json_content.get("metadata", {}).get("title", "Document")
sheet_names.append(document_title[:31]) # Excel sheet name limit
# Add sheets based on content types found
content_types = set()
for section in sections:
content_type = section.get("content_type", "paragraph")
content_types.add(content_type)
# Create sheets for different content types if we have multiple types
if len(content_types) > 1:
if "table" in content_types:
sheet_names.append("Tables")
if "list" in content_types:
sheet_names.append("Lists")
if "paragraph" in content_types or "heading" in content_types:
sheet_names.append("Text")
# Limit to 4 sheets maximum
return sheet_names[:4]
def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Populate Excel sheets with content from JSON based on actual sheet names."""
try:
# Get the actual sheet names that were created
sheet_names = list(sheets.keys())
if not sheet_names:
return
# Populate the first sheet with all content
first_sheet_name = sheet_names[0]
self._populate_main_sheet(sheets[first_sheet_name], json_content, styles)
# If we have multiple sheets, distribute content by type
if len(sheet_names) > 1:
self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:])
except Exception as e:
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]):
"""Populate the main sheet with document overview and all content."""
try:
# Document title
document_title = json_content.get("metadata", {}).get("title", "Generated Report")
sheet['A1'] = document_title
title_style = styles["title"]
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=title_style["color"])
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
# Generation info
sheet['A3'] = "Generated:"
sheet['B3'] = self._format_timestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
# Document metadata
metadata = json_content.get("metadata", {})
if metadata:
sheet['A6'] = "Document Information:"
sheet['A6'].font = Font(bold=True)
row = 7
for key, value in metadata.items():
if key != "title":
sheet[f'A{row}'] = f"{key.title()}:"
sheet[f'B{row}'] = str(value)
row += 1
# Content overview
sections = json_content.get("sections", [])
sheet[f'A{row + 1}'] = "Content Overview:"
sheet[f'A{row + 1}'].font = Font(bold=True)
row += 2
sheet[f'A{row}'] = f"Total Sections: {len(sections)}"
# Count different content types
content_types = {}
for section in sections:
content_type = section.get("content_type", "unknown")
content_types[content_type] = content_types.get(content_type, 0) + 1
for content_type, count in content_types.items():
row += 1
sheet[f'A{row}'] = f"{content_type.title()} Sections: {count}"
# Add all content to this sheet
row += 2
for section in sections:
row = self._add_section_to_sheet(sheet, section, styles, row)
row += 1 # Empty row between sections
# Auto-adjust column widths
sheet.column_dimensions['A'].width = 20
sheet.column_dimensions['B'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate main sheet: {str(e)}")
def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]):
"""Populate additional sheets based on content types."""
try:
sections = json_content.get("sections", [])
for sheet_name in sheet_names:
if sheet_name not in sheets:
continue
sheet = sheets[sheet_name]
sheet_title = sheet_name.title()
sheet['A1'] = sheet_title
sheet['A1'].font = Font(size=16, bold=True)
row = 3
# Filter sections by content type
if sheet_name == "tables":
filtered_sections = [s for s in sections if s.get("content_type") == "table"]
elif sheet_name == "lists":
filtered_sections = [s for s in sections if s.get("content_type") == "list"]
elif sheet_name == "text":
filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
else:
filtered_sections = sections
for section in filtered_sections:
row = self._add_section_to_sheet(sheet, section, styles, row)
row += 1 # Empty row between sections
# Auto-adjust column widths
for col in range(1, 6):
sheet.column_dimensions[get_column_letter(col)].width = 20
except Exception as e:
self.logger.warning(f"Could not populate content type sheets: {str(e)}")
def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
"""Add a section to a sheet and return the next row."""
try:
# Add section title
section_title = section.get("title")
if section_title:
sheet[f'A{start_row}'] = f"# {section_title}"
sheet[f'A{start_row}'].font = Font(bold=True)
start_row += 1
# Process section elements
elements = section.get("elements", [])
content_type = section.get("content_type", "paragraph")
for element in elements:
if content_type == "table":
start_row = self._add_table_to_excel(sheet, element, styles, start_row)
elif content_type == "list":
start_row = self._add_list_to_excel(sheet, element, styles, start_row)
elif content_type == "paragraph":
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
elif content_type == "heading":
start_row = self._add_heading_to_excel(sheet, element, styles, start_row)
else:
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
return start_row
except Exception as e:
self.logger.warning(f"Could not add section to sheet: {str(e)}")
return start_row + 1
def _format_timestamp(self) -> str:
"""Format current timestamp for document generation."""
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")