gateway/modules/services/serviceGeneration/renderers/excel_renderer.py
2025-10-03 22:40:41 +02:00

266 lines
9.5 KiB
Python

"""
Excel renderer for report generation using openpyxl.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC
try:
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.table import Table, TableStyleInfo
OPENPYXL_AVAILABLE = True
except ImportError:
OPENPYXL_AVAILABLE = False
class ExcelRenderer(BaseRenderer):
"""Renders content to Excel format using openpyxl."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported Excel formats."""
return ['xlsx', 'xls', 'excel']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['spreadsheet', 'workbook']
@classmethod
def get_priority(cls) -> int:
"""Return priority for Excel renderer."""
return 110
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Get Excel-specific extraction prompt."""
return f"""
{user_prompt}
Generate a comprehensive Excel report with the title: "{title}"
EXCEL FORMAT REQUIREMENTS:
- Create structured data suitable for Excel spreadsheets
- Use clear column headers and organized rows
- Include multiple sheets if needed (Summary, Data, Analysis, etc.)
- Use proper data types (text, numbers, dates)
- Include formulas where appropriate
- Structure data in tables with clear headers
- Include source document information
- Add metadata and generation information
EXCEL STRUCTURE:
- Sheet 1: Summary/Overview with key metrics
- Sheet 2: Detailed data in tabular format
- Sheet 3: Analysis and insights
- Use proper column headers (A, B, C, etc.)
- Include data validation and formatting hints
- Add comments for complex data
FORMATTING RULES:
- Headers: Use bold formatting, clear column names
- Data: Organize in rows and columns, consistent formatting
- Numbers: Use proper number formatting (currency, percentages, etc.)
- Dates: Use standard date format (YYYY-MM-DD)
- Text: Left-aligned, wrap long text
- Formulas: Use Excel formula syntax (=SUM, =AVERAGE, etc.)
- Colors: Use conditional formatting for highlights
SHEET STRUCTURE:
Sheet 1 - Summary:
- Report Title
- Key Metrics (counts, totals, averages)
- Executive Summary
- Generation Date
Sheet 2 - Data:
- Column A: Item/Category
- Column B: Value/Amount
- Column C: Percentage
- Column D: Source Document
- Column E: Notes/Comments
Sheet 3 - Analysis:
- Trends and patterns
- Comparisons
- Recommendations
- Charts descriptions
OUTPUT POLICY:
- Return ONLY Excel-compatible data
- No HTML, no markdown, no code blocks
- Structured data that can be imported to Excel
- Include sheet names and structure
- Professional spreadsheet format
- Include all necessary information
CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
Generate the complete Excel report data using the actual data from the source documents:
"""
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to Excel format."""
try:
if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available
from .csv_renderer import CsvRenderer
csv_renderer = CsvRenderer()
csv_content, _ = await csv_renderer.render(extracted_content, title)
return csv_content, "text/csv"
# Generate Excel using openpyxl
excel_content = self._generate_excel(extracted_content, title)
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
except Exception as e:
self.logger.error(f"Error rendering Excel: {str(e)}")
# Return CSV fallback
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
def _generate_excel(self, content: str, title: str) -> str:
"""Generate Excel content using openpyxl."""
try:
# Create workbook
wb = Workbook()
# Remove default sheet
wb.remove(wb.active)
# Create sheets
summary_sheet = wb.create_sheet("Summary", 0)
data_sheet = wb.create_sheet("Data", 1)
analysis_sheet = wb.create_sheet("Analysis", 2)
# Add content to sheets
self._populate_summary_sheet(summary_sheet, title)
self._populate_data_sheet(data_sheet, content)
self._populate_analysis_sheet(analysis_sheet, content)
# Save to buffer
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
# Convert to base64
excel_bytes = buffer.getvalue()
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
return excel_base64
except Exception as e:
self.logger.error(f"Error generating Excel: {str(e)}")
raise
def _populate_summary_sheet(self, sheet, title: str):
"""Populate the summary sheet."""
try:
# Title
sheet['A1'] = title
sheet['A1'].font = Font(size=16, bold=True)
sheet['A1'].alignment = Alignment(horizontal='center')
# Generation info
sheet['A3'] = "Generated:"
sheet['B3'] = self._format_timestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
# Key metrics placeholder
sheet['A6'] = "Key Metrics:"
sheet['A6'].font = Font(bold=True)
sheet['A7'] = "Total Items:"
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
# Auto-adjust column widths
sheet.column_dimensions['A'].width = 20
sheet.column_dimensions['B'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
def _populate_data_sheet(self, sheet, content: str):
"""Populate the data sheet."""
try:
# Headers
headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=1, column=col, value=header)
cell.font = Font(bold=True)
cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
# Process content
lines = content.split('\n')
row = 2
for line in lines:
line = line.strip()
if not line:
continue
# Check for table data (lines with |)
if '|' in line:
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
sheet.cell(row=row, column=col, value=cell_data)
row += 1
else:
# Regular content
sheet.cell(row=row, column=1, value=line)
row += 1
# Auto-adjust column widths
for col in range(1, 6):
sheet.column_dimensions[get_column_letter(col)].width = 20
except Exception as e:
self.logger.warning(f"Could not populate data sheet: {str(e)}")
def _populate_analysis_sheet(self, sheet, content: str):
"""Populate the analysis sheet."""
try:
# Title
sheet['A1'] = "Analysis & Insights"
sheet['A1'].font = Font(size=14, bold=True)
# Content analysis
lines = content.split('\n')
row = 3
sheet['A3'] = "Content Analysis:"
sheet['A3'].font = Font(bold=True)
row += 1
# Count different types of content
table_lines = sum(1 for line in lines if '|' in line)
list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
text_lines = len(lines) - table_lines - list_lines
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
row += 1
sheet[f'A{row}'] = f"Table Rows: {table_lines}"
row += 1
sheet[f'A{row}'] = f"List Items: {list_lines}"
row += 1
sheet[f'A{row}'] = f"Text Lines: {text_lines}"
row += 2
# Recommendations
sheet[f'A{row}'] = "Recommendations:"
sheet[f'A{row}'].font = Font(bold=True)
row += 1
sheet[f'A{row}'] = "1. Review data accuracy"
row += 1
sheet[f'A{row}'] = "2. Consider additional analysis"
row += 1
sheet[f'A{row}'] = "3. Update regularly"
# Auto-adjust column width
sheet.column_dimensions['A'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")