""" Excel renderer for report generation using openpyxl. """ from .base_renderer import BaseRenderer from typing import Dict, Any, Tuple, List import io import base64 from datetime import datetime, UTC try: from openpyxl import Workbook from openpyxl.styles import Font, PatternFill, Alignment, Border, Side from openpyxl.utils import get_column_letter from openpyxl.worksheet.table import Table, TableStyleInfo OPENPYXL_AVAILABLE = True except ImportError: OPENPYXL_AVAILABLE = False class ExcelRenderer(BaseRenderer): """Renders content to Excel format using openpyxl.""" @classmethod def get_supported_formats(cls) -> List[str]: """Return supported Excel formats.""" return ['xlsx', 'xls', 'excel'] @classmethod def get_format_aliases(cls) -> List[str]: """Return format aliases.""" return ['spreadsheet', 'workbook'] @classmethod def get_priority(cls) -> int: """Return priority for Excel renderer.""" return 110 def getExtractionPrompt(self, user_prompt: str, title: str) -> str: """Return only Excel-specific guidelines; global prompt is built centrally.""" return ( "EXCEL FORMAT GUIDELINES:\n" "- Extract structured data from source documents into JSON format\n" "- Focus on tabular data, lists, and structured information suitable for spreadsheets\n" "- For tables: Extract headers and rows as separate arrays with clear column names\n" "- For lists: Extract items with optional sub-items and metadata\n" "- Structure content into sections with clear content types (table, list, paragraph)\n" "- Use proper JSON structure with metadata, sections, and elements\n" "- Ensure data is clean and ready for Excel conversion with proper formatting\n" "OUTPUT: Return structured JSON that can be converted to Excel format." ) async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: """Render extracted JSON content to Excel format using AI-analyzed styling.""" try: if not OPENPYXL_AVAILABLE: # Fallback to CSV if openpyxl not available from .csv_renderer import CsvRenderer csv_renderer = CsvRenderer() csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service) return csv_content, "text/csv" # Generate Excel using AI-analyzed styling excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service) return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" except Exception as e: self.logger.error(f"Error rendering Excel: {str(e)}") # Return CSV fallback return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv" def _generate_excel(self, content: str, title: str) -> str: """Generate Excel content using openpyxl.""" try: # Create workbook wb = Workbook() # Remove default sheet wb.remove(wb.active) # Create sheets summary_sheet = wb.create_sheet("Summary", 0) data_sheet = wb.create_sheet("Data", 1) analysis_sheet = wb.create_sheet("Analysis", 2) # Add content to sheets self._populate_summary_sheet(summary_sheet, title) self._populate_data_sheet(data_sheet, content) self._populate_analysis_sheet(analysis_sheet, content) # Save to buffer buffer = io.BytesIO() wb.save(buffer) buffer.seek(0) # Convert to base64 excel_bytes = buffer.getvalue() excel_base64 = base64.b64encode(excel_bytes).decode('utf-8') return excel_base64 except Exception as e: self.logger.error(f"Error generating Excel: {str(e)}") raise def _populate_summary_sheet(self, sheet, title: str): """Populate the summary sheet.""" try: # Title sheet['A1'] = title sheet['A1'].font = Font(size=16, bold=True) sheet['A1'].alignment = Alignment(horizontal='center') # Generation info sheet['A3'] = "Generated:" sheet['B3'] = self._format_timestamp() sheet['A4'] = "Status:" sheet['B4'] = "Generated Successfully" # Key metrics placeholder sheet['A6'] = "Key Metrics:" sheet['A6'].font = Font(bold=True) sheet['A7'] = "Total Items:" sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet # Auto-adjust column widths sheet.column_dimensions['A'].width = 20 sheet.column_dimensions['B'].width = 30 except Exception as e: self.logger.warning(f"Could not populate summary sheet: {str(e)}") def _populate_data_sheet(self, sheet, content: str): """Populate the data sheet.""" try: # Headers headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"] for col, header in enumerate(headers, 1): cell = sheet.cell(row=1, column=col, value=header) cell.font = Font(bold=True) cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid") # Process content lines = content.split('\n') row = 2 for line in lines: line = line.strip() if not line: continue # Check for table data (lines with |) if '|' in line: cells = [cell.strip() for cell in line.split('|') if cell.strip()] for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns sheet.cell(row=row, column=col, value=cell_data) row += 1 else: # Regular content sheet.cell(row=row, column=1, value=line) row += 1 # Auto-adjust column widths for col in range(1, 6): sheet.column_dimensions[get_column_letter(col)].width = 20 except Exception as e: self.logger.warning(f"Could not populate data sheet: {str(e)}") def _populate_analysis_sheet(self, sheet, content: str): """Populate the analysis sheet.""" try: # Title sheet['A1'] = "Analysis & Insights" sheet['A1'].font = Font(size=14, bold=True) # Content analysis lines = content.split('\n') row = 3 sheet['A3'] = "Content Analysis:" sheet['A3'].font = Font(bold=True) row += 1 # Count different types of content table_lines = sum(1 for line in lines if '|' in line) list_lines = sum(1 for line in lines if line.startswith(('- ', '* '))) text_lines = len(lines) - table_lines - list_lines sheet[f'A{row}'] = f"Total Lines: {len(lines)}" row += 1 sheet[f'A{row}'] = f"Table Rows: {table_lines}" row += 1 sheet[f'A{row}'] = f"List Items: {list_lines}" row += 1 sheet[f'A{row}'] = f"Text Lines: {text_lines}" row += 2 # Recommendations sheet[f'A{row}'] = "Recommendations:" sheet[f'A{row}'].font = Font(bold=True) row += 1 sheet[f'A{row}'] = "1. Review data accuracy" row += 1 sheet[f'A{row}'] = "2. Consider additional analysis" row += 1 sheet[f'A{row}'] = "3. Update regularly" # Auto-adjust column width sheet.column_dimensions['A'].width = 30 except Exception as e: self.logger.warning(f"Could not populate analysis sheet: {str(e)}") async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: """Generate Excel content from structured JSON document using AI-generated styling.""" try: # Get AI-generated styling definitions styles = await self._get_excel_styles(user_prompt, ai_service) # Validate JSON structure if not isinstance(json_content, dict): raise ValueError("JSON content must be a dictionary") if "sections" not in json_content: raise ValueError("JSON content must contain 'sections' field") # Use title from JSON metadata if available, otherwise use provided title document_title = json_content.get("metadata", {}).get("title", title) # Create workbook wb = Workbook() # Remove default sheet wb.remove(wb.active) # Create sheets based on content sheets = self._create_excel_sheets(wb, json_content, styles) # Populate sheets with content self._populate_excel_sheets(sheets, json_content, styles) # Save to buffer buffer = io.BytesIO() wb.save(buffer) buffer.seek(0) # Convert to base64 excel_bytes = buffer.getvalue() excel_base64 = base64.b64encode(excel_bytes).decode('utf-8') return excel_base64 except Exception as e: self.logger.error(f"Error generating Excel from JSON: {str(e)}") raise Exception(f"Excel generation failed: {str(e)}") async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: """Simple AI call to get Excel styling definitions.""" if not ai_service: return self._get_default_excel_styles() try: prompt = f""" For this Excel document request: "{user_prompt}" Provide styling definitions for Excel elements. Respond with ONLY JSON: {{ "title": {{"font_size": 16, "color": "#1F4E79", "bold": true, "align": "center"}}, "heading": {{"font_size": 14, "color": "#2F2F2F", "bold": true, "align": "left"}}, "table_header": {{"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": true, "align": "center"}}, "table_cell": {{"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": false, "align": "left"}}, "bullet_list": {{"font_size": 11, "color": "#2F2F2F", "indent": 2}}, "paragraph": {{"font_size": 11, "color": "#2F2F2F", "bold": false, "align": "left"}}, "code_block": {{"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}} }} CRITICAL: Table headers must have dark background with light text, table cells must have light background with dark text for readability. """ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType request_options = AiCallOptions() request_options.operationType = OperationType.GENERAL request = AiCallRequest(prompt=prompt, context="", options=request_options) response = await ai_service.aiObjects.call(request) import json import re # Clean and parse JSON result = response.content.strip() if result.startswith('```json'): result = re.sub(r'^```json\s*', '', result) result = re.sub(r'\s*```$', '', result) elif result.startswith('```'): result = re.sub(r'^```\s*', '', result) result = re.sub(r'\s*```$', '', result) styles = json.loads(result) # Validate and fix contrast issues styles = self._validate_excel_styles_contrast(styles) return styles except Exception as e: self.logger.warning(f"AI styling failed: {str(e)}, using defaults") return self._get_default_excel_styles() def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix contrast issues in AI-generated styles.""" try: # Fix table header contrast if "table_header" in styles: header = styles["table_header"] bg_color = header.get("background", "#FFFFFF") text_color = header.get("text_color", "#000000") # If both are white or both are dark, fix it if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": header["background"] = "#4F4F4F" header["text_color"] = "#FFFFFF" elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": header["background"] = "#4F4F4F" header["text_color"] = "#FFFFFF" # Fix table cell contrast if "table_cell" in styles: cell = styles["table_cell"] bg_color = cell.get("background", "#FFFFFF") text_color = cell.get("text_color", "#000000") # If both are white or both are dark, fix it if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": cell["background"] = "#FFFFFF" cell["text_color"] = "#2F2F2F" elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": cell["background"] = "#FFFFFF" cell["text_color"] = "#2F2F2F" return styles except Exception as e: self.logger.warning(f"Style validation failed: {str(e)}") return self._get_default_excel_styles() def _get_default_excel_styles(self) -> Dict[str, Any]: """Default Excel styles.""" return { "title": {"font_size": 16, "color": "#1F4E79", "bold": True, "align": "center"}, "heading": {"font_size": 14, "color": "#2F2F2F", "bold": True, "align": "left"}, "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"}, "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"}, "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 2}, "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"}, "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"} } def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: """Create Excel sheets based on content structure and user intent.""" sheets = {} # Get sheet names from AI styles or generate based on content sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content)) # Create sheets for i, sheet_name in enumerate(sheet_names): if i == 0: sheet = wb.active sheet.title = sheet_name else: sheet = wb.create_sheet(sheet_name, i) sheets[sheet_name.lower()] = sheet return sheets def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]: """Generate sheet names based on actual content structure.""" sections = json_content.get("sections", []) # If no sections, create a single sheet if not sections: return ["Content"] # Generate sheet names based on content types sheet_names = [] # Always start with a main content sheet document_title = json_content.get("metadata", {}).get("title", "Document") sheet_names.append(document_title[:31]) # Excel sheet name limit # Add sheets based on content types found content_types = set() for section in sections: content_type = section.get("content_type", "paragraph") content_types.add(content_type) # Create sheets for different content types if we have multiple types if len(content_types) > 1: if "table" in content_types: sheet_names.append("Tables") if "list" in content_types: sheet_names.append("Lists") if "paragraph" in content_types or "heading" in content_types: sheet_names.append("Text") # Limit to 4 sheets maximum return sheet_names[:4] def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None: """Populate Excel sheets with content from JSON based on actual sheet names.""" try: # Get the actual sheet names that were created sheet_names = list(sheets.keys()) if not sheet_names: return # Populate the first sheet with all content first_sheet_name = sheet_names[0] self._populate_main_sheet(sheets[first_sheet_name], json_content, styles) # If we have multiple sheets, distribute content by type if len(sheet_names) > 1: self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:]) except Exception as e: self.logger.warning(f"Could not populate Excel sheets: {str(e)}") def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]): """Populate the main sheet with document overview and all content.""" try: # Document title document_title = json_content.get("metadata", {}).get("title", "Generated Report") sheet['A1'] = document_title title_style = styles["title"] sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=title_style["color"]) sheet['A1'].alignment = Alignment(horizontal=title_style["align"]) # Generation info sheet['A3'] = "Generated:" sheet['B3'] = self._format_timestamp() sheet['A4'] = "Status:" sheet['B4'] = "Generated Successfully" # Document metadata metadata = json_content.get("metadata", {}) if metadata: sheet['A6'] = "Document Information:" sheet['A6'].font = Font(bold=True) row = 7 for key, value in metadata.items(): if key != "title": sheet[f'A{row}'] = f"{key.title()}:" sheet[f'B{row}'] = str(value) row += 1 # Content overview sections = json_content.get("sections", []) sheet[f'A{row + 1}'] = "Content Overview:" sheet[f'A{row + 1}'].font = Font(bold=True) row += 2 sheet[f'A{row}'] = f"Total Sections: {len(sections)}" # Count different content types content_types = {} for section in sections: content_type = section.get("content_type", "unknown") content_types[content_type] = content_types.get(content_type, 0) + 1 for content_type, count in content_types.items(): row += 1 sheet[f'A{row}'] = f"{content_type.title()} Sections: {count}" # Add all content to this sheet row += 2 for section in sections: row = self._add_section_to_sheet(sheet, section, styles, row) row += 1 # Empty row between sections # Auto-adjust column widths sheet.column_dimensions['A'].width = 20 sheet.column_dimensions['B'].width = 30 except Exception as e: self.logger.warning(f"Could not populate main sheet: {str(e)}") def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]): """Populate additional sheets based on content types.""" try: sections = json_content.get("sections", []) for sheet_name in sheet_names: if sheet_name not in sheets: continue sheet = sheets[sheet_name] sheet_title = sheet_name.title() sheet['A1'] = sheet_title sheet['A1'].font = Font(size=16, bold=True) row = 3 # Filter sections by content type if sheet_name == "tables": filtered_sections = [s for s in sections if s.get("content_type") == "table"] elif sheet_name == "lists": filtered_sections = [s for s in sections if s.get("content_type") == "list"] elif sheet_name == "text": filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]] else: filtered_sections = sections for section in filtered_sections: row = self._add_section_to_sheet(sheet, section, styles, row) row += 1 # Empty row between sections # Auto-adjust column widths for col in range(1, 6): sheet.column_dimensions[get_column_letter(col)].width = 20 except Exception as e: self.logger.warning(f"Could not populate content type sheets: {str(e)}") def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: """Add a section to a sheet and return the next row.""" try: # Add section title section_title = section.get("title") if section_title: sheet[f'A{start_row}'] = f"# {section_title}" sheet[f'A{start_row}'].font = Font(bold=True) start_row += 1 # Process section elements elements = section.get("elements", []) content_type = section.get("content_type", "paragraph") for element in elements: if content_type == "table": start_row = self._add_table_to_excel(sheet, element, styles, start_row) elif content_type == "list": start_row = self._add_list_to_excel(sheet, element, styles, start_row) elif content_type == "paragraph": start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row) elif content_type == "heading": start_row = self._add_heading_to_excel(sheet, element, styles, start_row) else: start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row) return start_row except Exception as e: self.logger.warning(f"Could not add section to sheet: {str(e)}") return start_row + 1 def _format_timestamp(self) -> str: """Format current timestamp for document generation.""" return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")