266 lines
9.5 KiB
Python
266 lines
9.5 KiB
Python
"""
|
|
Excel renderer for report generation using openpyxl.
|
|
"""
|
|
|
|
from .base_renderer import BaseRenderer
|
|
from typing import Dict, Any, Tuple, List
|
|
import io
|
|
import base64
|
|
from datetime import datetime, UTC
|
|
|
|
try:
|
|
from openpyxl import Workbook
|
|
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
|
from openpyxl.utils import get_column_letter
|
|
from openpyxl.worksheet.table import Table, TableStyleInfo
|
|
OPENPYXL_AVAILABLE = True
|
|
except ImportError:
|
|
OPENPYXL_AVAILABLE = False
|
|
|
|
class ExcelRenderer(BaseRenderer):
|
|
"""Renders content to Excel format using openpyxl."""
|
|
|
|
@classmethod
|
|
def get_supported_formats(cls) -> List[str]:
|
|
"""Return supported Excel formats."""
|
|
return ['xlsx', 'xls', 'excel']
|
|
|
|
@classmethod
|
|
def get_format_aliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return ['spreadsheet', 'workbook']
|
|
|
|
@classmethod
|
|
def get_priority(cls) -> int:
|
|
"""Return priority for Excel renderer."""
|
|
return 110
|
|
|
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
|
"""Get Excel-specific extraction prompt."""
|
|
return f"""
|
|
{user_prompt}
|
|
|
|
Generate a comprehensive Excel report with the title: "{title}"
|
|
|
|
EXCEL FORMAT REQUIREMENTS:
|
|
- Create structured data suitable for Excel spreadsheets
|
|
- Use clear column headers and organized rows
|
|
- Include multiple sheets if needed (Summary, Data, Analysis, etc.)
|
|
- Use proper data types (text, numbers, dates)
|
|
- Include formulas where appropriate
|
|
- Structure data in tables with clear headers
|
|
- Include source document information
|
|
- Add metadata and generation information
|
|
|
|
EXCEL STRUCTURE:
|
|
- Sheet 1: Summary/Overview with key metrics
|
|
- Sheet 2: Detailed data in tabular format
|
|
- Sheet 3: Analysis and insights
|
|
- Use proper column headers (A, B, C, etc.)
|
|
- Include data validation and formatting hints
|
|
- Add comments for complex data
|
|
|
|
FORMATTING RULES:
|
|
- Headers: Use bold formatting, clear column names
|
|
- Data: Organize in rows and columns, consistent formatting
|
|
- Numbers: Use proper number formatting (currency, percentages, etc.)
|
|
- Dates: Use standard date format (YYYY-MM-DD)
|
|
- Text: Left-aligned, wrap long text
|
|
- Formulas: Use Excel formula syntax (=SUM, =AVERAGE, etc.)
|
|
- Colors: Use conditional formatting for highlights
|
|
|
|
SHEET STRUCTURE:
|
|
Sheet 1 - Summary:
|
|
- Report Title
|
|
- Key Metrics (counts, totals, averages)
|
|
- Executive Summary
|
|
- Generation Date
|
|
|
|
Sheet 2 - Data:
|
|
- Column A: Item/Category
|
|
- Column B: Value/Amount
|
|
- Column C: Percentage
|
|
- Column D: Source Document
|
|
- Column E: Notes/Comments
|
|
|
|
Sheet 3 - Analysis:
|
|
- Trends and patterns
|
|
- Comparisons
|
|
- Recommendations
|
|
- Charts descriptions
|
|
|
|
OUTPUT POLICY:
|
|
- Return ONLY Excel-compatible data
|
|
- No HTML, no markdown, no code blocks
|
|
- Structured data that can be imported to Excel
|
|
- Include sheet names and structure
|
|
- Professional spreadsheet format
|
|
- Include all necessary information
|
|
|
|
CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
|
|
|
|
Generate the complete Excel report data using the actual data from the source documents:
|
|
"""
|
|
|
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
|
"""Render extracted content to Excel format."""
|
|
try:
|
|
if not OPENPYXL_AVAILABLE:
|
|
# Fallback to CSV if openpyxl not available
|
|
from .csv_renderer import CsvRenderer
|
|
csv_renderer = CsvRenderer()
|
|
csv_content, _ = await csv_renderer.render(extracted_content, title)
|
|
return csv_content, "text/csv"
|
|
|
|
# Generate Excel using openpyxl
|
|
excel_content = self._generate_excel(extracted_content, title)
|
|
|
|
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering Excel: {str(e)}")
|
|
# Return CSV fallback
|
|
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
|
|
|
|
def _generate_excel(self, content: str, title: str) -> str:
|
|
"""Generate Excel content using openpyxl."""
|
|
try:
|
|
# Create workbook
|
|
wb = Workbook()
|
|
|
|
# Remove default sheet
|
|
wb.remove(wb.active)
|
|
|
|
# Create sheets
|
|
summary_sheet = wb.create_sheet("Summary", 0)
|
|
data_sheet = wb.create_sheet("Data", 1)
|
|
analysis_sheet = wb.create_sheet("Analysis", 2)
|
|
|
|
# Add content to sheets
|
|
self._populate_summary_sheet(summary_sheet, title)
|
|
self._populate_data_sheet(data_sheet, content)
|
|
self._populate_analysis_sheet(analysis_sheet, content)
|
|
|
|
# Save to buffer
|
|
buffer = io.BytesIO()
|
|
wb.save(buffer)
|
|
buffer.seek(0)
|
|
|
|
# Convert to base64
|
|
excel_bytes = buffer.getvalue()
|
|
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
|
|
|
|
return excel_base64
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error generating Excel: {str(e)}")
|
|
raise
|
|
|
|
def _populate_summary_sheet(self, sheet, title: str):
|
|
"""Populate the summary sheet."""
|
|
try:
|
|
# Title
|
|
sheet['A1'] = title
|
|
sheet['A1'].font = Font(size=16, bold=True)
|
|
sheet['A1'].alignment = Alignment(horizontal='center')
|
|
|
|
# Generation info
|
|
sheet['A3'] = "Generated:"
|
|
sheet['B3'] = self._format_timestamp()
|
|
sheet['A4'] = "Status:"
|
|
sheet['B4'] = "Generated Successfully"
|
|
|
|
# Key metrics placeholder
|
|
sheet['A6'] = "Key Metrics:"
|
|
sheet['A6'].font = Font(bold=True)
|
|
sheet['A7'] = "Total Items:"
|
|
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
|
|
|
|
# Auto-adjust column widths
|
|
sheet.column_dimensions['A'].width = 20
|
|
sheet.column_dimensions['B'].width = 30
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
|
|
|
|
def _populate_data_sheet(self, sheet, content: str):
|
|
"""Populate the data sheet."""
|
|
try:
|
|
# Headers
|
|
headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
|
|
for col, header in enumerate(headers, 1):
|
|
cell = sheet.cell(row=1, column=col, value=header)
|
|
cell.font = Font(bold=True)
|
|
cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
|
|
|
|
# Process content
|
|
lines = content.split('\n')
|
|
row = 2
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
# Check for table data (lines with |)
|
|
if '|' in line:
|
|
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
|
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
|
|
sheet.cell(row=row, column=col, value=cell_data)
|
|
row += 1
|
|
else:
|
|
# Regular content
|
|
sheet.cell(row=row, column=1, value=line)
|
|
row += 1
|
|
|
|
# Auto-adjust column widths
|
|
for col in range(1, 6):
|
|
sheet.column_dimensions[get_column_letter(col)].width = 20
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not populate data sheet: {str(e)}")
|
|
|
|
def _populate_analysis_sheet(self, sheet, content: str):
|
|
"""Populate the analysis sheet."""
|
|
try:
|
|
# Title
|
|
sheet['A1'] = "Analysis & Insights"
|
|
sheet['A1'].font = Font(size=14, bold=True)
|
|
|
|
# Content analysis
|
|
lines = content.split('\n')
|
|
row = 3
|
|
|
|
sheet['A3'] = "Content Analysis:"
|
|
sheet['A3'].font = Font(bold=True)
|
|
row += 1
|
|
|
|
# Count different types of content
|
|
table_lines = sum(1 for line in lines if '|' in line)
|
|
list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
|
|
text_lines = len(lines) - table_lines - list_lines
|
|
|
|
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
|
|
row += 1
|
|
sheet[f'A{row}'] = f"Table Rows: {table_lines}"
|
|
row += 1
|
|
sheet[f'A{row}'] = f"List Items: {list_lines}"
|
|
row += 1
|
|
sheet[f'A{row}'] = f"Text Lines: {text_lines}"
|
|
row += 2
|
|
|
|
# Recommendations
|
|
sheet[f'A{row}'] = "Recommendations:"
|
|
sheet[f'A{row}'].font = Font(bold=True)
|
|
row += 1
|
|
sheet[f'A{row}'] = "1. Review data accuracy"
|
|
row += 1
|
|
sheet[f'A{row}'] = "2. Consider additional analysis"
|
|
row += 1
|
|
sheet[f'A{row}'] = "3. Update regularly"
|
|
|
|
# Auto-adjust column width
|
|
sheet.column_dimensions['A'].width = 30
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
|