gateway/modules/services/serviceGeneration/renderers/excel_renderer.py
2025-10-06 15:39:25 +02:00

210 lines
8 KiB
Python

"""
Excel renderer for report generation using openpyxl.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC
try:
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.table import Table, TableStyleInfo
OPENPYXL_AVAILABLE = True
except ImportError:
OPENPYXL_AVAILABLE = False
class ExcelRenderer(BaseRenderer):
"""Renders content to Excel format using openpyxl."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported Excel formats."""
return ['xlsx', 'xls', 'excel']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['spreadsheet', 'workbook']
@classmethod
def get_priority(cls) -> int:
"""Return priority for Excel renderer."""
return 110
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only Excel-specific guidelines; global prompt is built centrally."""
return (
"EXCEL FORMAT GUIDELINES:\n"
"- Output one or more pipe-delimited tables with a single header row.\n"
"- Let user intent define columns; use clear names and ISO dates.\n"
"- Separate multiple tables by a single blank line.\n"
"- No markdown/HTML/code fences; tables only unless user explicitly asks for notes.\n"
"OUTPUT: Return ONLY pipe-delimited tables suitable for import."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to Excel format."""
try:
if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available
from .csv_renderer import CsvRenderer
csv_renderer = CsvRenderer()
csv_content, _ = await csv_renderer.render(extracted_content, title)
return csv_content, "text/csv"
# Generate Excel using openpyxl
excel_content = self._generate_excel(extracted_content, title)
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
except Exception as e:
self.logger.error(f"Error rendering Excel: {str(e)}")
# Return CSV fallback
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
def _generate_excel(self, content: str, title: str) -> str:
"""Generate Excel content using openpyxl."""
try:
# Create workbook
wb = Workbook()
# Remove default sheet
wb.remove(wb.active)
# Create sheets
summary_sheet = wb.create_sheet("Summary", 0)
data_sheet = wb.create_sheet("Data", 1)
analysis_sheet = wb.create_sheet("Analysis", 2)
# Add content to sheets
self._populate_summary_sheet(summary_sheet, title)
self._populate_data_sheet(data_sheet, content)
self._populate_analysis_sheet(analysis_sheet, content)
# Save to buffer
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
# Convert to base64
excel_bytes = buffer.getvalue()
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
return excel_base64
except Exception as e:
self.logger.error(f"Error generating Excel: {str(e)}")
raise
def _populate_summary_sheet(self, sheet, title: str):
"""Populate the summary sheet."""
try:
# Title
sheet['A1'] = title
sheet['A1'].font = Font(size=16, bold=True)
sheet['A1'].alignment = Alignment(horizontal='center')
# Generation info
sheet['A3'] = "Generated:"
sheet['B3'] = self._format_timestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
# Key metrics placeholder
sheet['A6'] = "Key Metrics:"
sheet['A6'].font = Font(bold=True)
sheet['A7'] = "Total Items:"
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
# Auto-adjust column widths
sheet.column_dimensions['A'].width = 20
sheet.column_dimensions['B'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
def _populate_data_sheet(self, sheet, content: str):
"""Populate the data sheet."""
try:
# Headers
headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=1, column=col, value=header)
cell.font = Font(bold=True)
cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
# Process content
lines = content.split('\n')
row = 2
for line in lines:
line = line.strip()
if not line:
continue
# Check for table data (lines with |)
if '|' in line:
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
sheet.cell(row=row, column=col, value=cell_data)
row += 1
else:
# Regular content
sheet.cell(row=row, column=1, value=line)
row += 1
# Auto-adjust column widths
for col in range(1, 6):
sheet.column_dimensions[get_column_letter(col)].width = 20
except Exception as e:
self.logger.warning(f"Could not populate data sheet: {str(e)}")
def _populate_analysis_sheet(self, sheet, content: str):
"""Populate the analysis sheet."""
try:
# Title
sheet['A1'] = "Analysis & Insights"
sheet['A1'].font = Font(size=14, bold=True)
# Content analysis
lines = content.split('\n')
row = 3
sheet['A3'] = "Content Analysis:"
sheet['A3'].font = Font(bold=True)
row += 1
# Count different types of content
table_lines = sum(1 for line in lines if '|' in line)
list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
text_lines = len(lines) - table_lines - list_lines
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
row += 1
sheet[f'A{row}'] = f"Table Rows: {table_lines}"
row += 1
sheet[f'A{row}'] = f"List Items: {list_lines}"
row += 1
sheet[f'A{row}'] = f"Text Lines: {text_lines}"
row += 2
# Recommendations
sheet[f'A{row}'] = "Recommendations:"
sheet[f'A{row}'].font = Font(bold=True)
row += 1
sheet[f'A{row}'] = "1. Review data accuracy"
row += 1
sheet[f'A{row}'] = "2. Consider additional analysis"
row += 1
sheet[f'A{row}'] = "3. Update regularly"
# Auto-adjust column width
sheet.column_dimensions['A'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")