""" Excel renderer for report generation using openpyxl. """ from .rendererBaseTemplate import BaseRenderer from typing import Dict, Any, Tuple, List import io import base64 from datetime import datetime, UTC try: from openpyxl import Workbook from openpyxl.styles import Font, PatternFill, Alignment, Border, Side from openpyxl.utils import get_column_letter from openpyxl.worksheet.table import Table, TableStyleInfo OPENPYXL_AVAILABLE = True except ImportError: OPENPYXL_AVAILABLE = False class RendererExcel(BaseRenderer): """Renders content to Excel format using openpyxl.""" @classmethod def get_supported_formats(cls) -> List[str]: """Return supported Excel formats.""" return ['xlsx', 'xls', 'excel'] @classmethod def get_format_aliases(cls) -> List[str]: """Return format aliases.""" return ['spreadsheet', 'workbook'] @classmethod def get_priority(cls) -> int: """Return priority for Excel renderer.""" return 110 async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: """Render extracted JSON content to Excel format using AI-analyzed styling.""" try: if not OPENPYXL_AVAILABLE: # Fallback to CSV if openpyxl not available from .rendererCsv import RendererCsv csv_renderer = RendererCsv() csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service) return csv_content, "text/csv" # Generate Excel using AI-analyzed styling excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service) return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" except Exception as e: self.logger.error(f"Error rendering Excel: {str(e)}") # Return CSV fallback return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv" def _generate_excel(self, content: str, title: str) -> str: """Generate Excel content using openpyxl.""" try: # Create workbook wb = Workbook() # Remove default sheet wb.remove(wb.active) # Create sheets summary_sheet = wb.create_sheet("Summary", 0) data_sheet = wb.create_sheet("Data", 1) analysis_sheet = wb.create_sheet("Analysis", 2) # Add content to sheets self._populate_summary_sheet(summary_sheet, title) self._populate_data_sheet(data_sheet, content) self._populate_analysis_sheet(analysis_sheet, content) # Save to buffer buffer = io.BytesIO() wb.save(buffer) buffer.seek(0) # Convert to base64 excel_bytes = buffer.getvalue() excel_base64 = base64.b64encode(excel_bytes).decode('utf-8') return excel_base64 except Exception as e: self.logger.error(f"Error generating Excel: {str(e)}") raise def _populate_summary_sheet(self, sheet, title: str): """Populate the summary sheet.""" try: # Title sheet['A1'] = title sheet['A1'].font = Font(size=16, bold=True) sheet['A1'].alignment = Alignment(horizontal='center') # Generation info sheet['A3'] = "Generated:" sheet['B3'] = self._format_timestamp() sheet['A4'] = "Status:" sheet['B4'] = "Generated Successfully" # Key metrics placeholder sheet['A6'] = "Key Metrics:" sheet['A6'].font = Font(bold=True) sheet['A7'] = "Total Items:" sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet # Auto-adjust column widths sheet.column_dimensions['A'].width = 20 sheet.column_dimensions['B'].width = 30 except Exception as e: self.logger.warning(f"Could not populate summary sheet: {str(e)}") def _populate_data_sheet(self, sheet, content: str): """Populate the data sheet.""" try: # Headers headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"] for col, header in enumerate(headers, 1): cell = sheet.cell(row=1, column=col, value=header) cell.font = Font(bold=True) cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid") # Process content lines = content.split('\n') row = 2 for line in lines: line = line.strip() if not line: continue # Check for table data (lines with |) if '|' in line: cells = [cell.strip() for cell in line.split('|') if cell.strip()] for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns sheet.cell(row=row, column=col, value=cell_data) row += 1 else: # Regular content sheet.cell(row=row, column=1, value=line) row += 1 # Auto-adjust column widths for col in range(1, 6): sheet.column_dimensions[get_column_letter(col)].width = 20 except Exception as e: self.logger.warning(f"Could not populate data sheet: {str(e)}") def _populate_analysis_sheet(self, sheet, content: str): """Populate the analysis sheet.""" try: # Title sheet['A1'] = "Analysis & Insights" sheet['A1'].font = Font(size=14, bold=True) # Content analysis lines = content.split('\n') row = 3 sheet['A3'] = "Content Analysis:" sheet['A3'].font = Font(bold=True) row += 1 # Count different types of content table_lines = sum(1 for line in lines if '|' in line) list_lines = sum(1 for line in lines if line.startswith(('- ', '* '))) text_lines = len(lines) - table_lines - list_lines sheet[f'A{row}'] = f"Total Lines: {len(lines)}" row += 1 sheet[f'A{row}'] = f"Table Rows: {table_lines}" row += 1 sheet[f'A{row}'] = f"List Items: {list_lines}" row += 1 sheet[f'A{row}'] = f"Text Lines: {text_lines}" row += 2 # Recommendations sheet[f'A{row}'] = "Recommendations:" sheet[f'A{row}'].font = Font(bold=True) row += 1 sheet[f'A{row}'] = "1. Review data accuracy" row += 1 sheet[f'A{row}'] = "2. Consider additional analysis" row += 1 sheet[f'A{row}'] = "3. Update regularly" # Auto-adjust column width sheet.column_dimensions['A'].width = 30 except Exception as e: self.logger.warning(f"Could not populate analysis sheet: {str(e)}") async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: """Generate Excel content from structured JSON document using AI-generated styling.""" try: # Debug output print(f"🔍 EXCEL JSON CONTENT TYPE: {type(json_content)}") print(f"🔍 EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}") # Get AI-generated styling definitions styles = await self._get_excel_styles(user_prompt, ai_service) # Validate JSON structure if not isinstance(json_content, dict): raise ValueError("JSON content must be a dictionary") if "sections" not in json_content: raise ValueError("JSON content must contain 'sections' field") # Use title from JSON metadata if available, otherwise use provided title document_title = json_content.get("metadata", {}).get("title", title) # Create workbook wb = Workbook() # Create sheets based on content sheets = self._create_excel_sheets(wb, json_content, styles) print(f"🔍 EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}") # Populate sheets with content self._populate_excel_sheets(sheets, json_content, styles) # Save to buffer buffer = io.BytesIO() wb.save(buffer) buffer.seek(0) # Convert to base64 excel_bytes = buffer.getvalue() print(f"🔍 EXCEL BYTES LENGTH: {len(excel_bytes)}") try: excel_base64 = base64.b64encode(excel_bytes).decode('utf-8') print(f"🔍 EXCEL BASE64 LENGTH: {len(excel_base64)}") except Exception as b64_error: print(f"🔍 BASE64 ENCODING ERROR: {b64_error}") raise return excel_base64 except Exception as e: self.logger.error(f"Error generating Excel from JSON: {str(e)}") raise Exception(f"Excel generation failed: {str(e)}") async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: """Get Excel styling definitions using base template AI styling.""" style_schema = { "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"}, "heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"}, "table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"}, "table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"}, "bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2}, "paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"}, "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"} } style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema) styles = await self._get_ai_styles(ai_service, style_template, self._get_default_excel_styles()) # Convert colors to aRGB format and validate styles = self._convert_colors_format(styles) return self._validate_excel_styles_contrast(styles) def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Convert hex colors to aRGB format for Excel compatibility.""" try: for style_name, style_config in styles.items(): if isinstance(style_config, dict): for prop, value in style_config.items(): if isinstance(value, str) and value.startswith('#') and len(value) == 7: # Convert #RRGGBB to #AARRGGBB (add FF alpha channel) styles[style_name][prop] = f"FF{value[1:]}" print(f"🔍 CONVERTED COLOR: {value} → {styles[style_name][prop]}") return styles except Exception as e: print(f"🔍 COLOR CONVERSION ERROR: {e}") return styles def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix contrast issues in AI-generated styles.""" try: # Fix table header contrast if "table_header" in styles: header = styles["table_header"] bg_color = header.get("background", "#FFFFFF") text_color = header.get("text_color", "#000000") # If both are white or both are dark, fix it if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": header["background"] = "#4F4F4F" header["text_color"] = "#FFFFFF" elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": header["background"] = "#4F4F4F" header["text_color"] = "#FFFFFF" # Fix table cell contrast if "table_cell" in styles: cell = styles["table_cell"] bg_color = cell.get("background", "#FFFFFF") text_color = cell.get("text_color", "#000000") # If both are white or both are dark, fix it if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": cell["background"] = "#FFFFFF" cell["text_color"] = "#2F2F2F" elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": cell["background"] = "#FFFFFF" cell["text_color"] = "#2F2F2F" return styles except Exception as e: self.logger.warning(f"Style validation failed: {str(e)}") return self._get_default_excel_styles() def _get_default_excel_styles(self) -> Dict[str, Any]: """Default Excel styles with aRGB color format.""" return { "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"}, "heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"}, "table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"}, "table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"}, "bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2}, "paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"}, "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"} } def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: """Create Excel sheets based on content structure and user intent.""" sheets = {} # Get sheet names from AI styles or generate based on content sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content)) print(f"🔍 EXCEL SHEET NAMES: {sheet_names}") # Create sheets for i, sheet_name in enumerate(sheet_names): if i == 0: # Use the default sheet for the first sheet sheet = wb.active sheet.title = sheet_name else: # Create additional sheets sheet = wb.create_sheet(sheet_name, i) sheets[sheet_name.lower()] = sheet return sheets def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]: """Generate sheet names based on actual content structure.""" sections = json_content.get("sections", []) # If no sections, create a single sheet if not sections: return ["Content"] # Generate sheet names based on content types sheet_names = [] # Always start with a main content sheet document_title = json_content.get("metadata", {}).get("title", "Document") sheet_names.append(document_title[:31]) # Excel sheet name limit # Add sheets based on content types found content_types = set() for section in sections: content_type = section.get("content_type", "paragraph") content_types.add(content_type) # Create sheets for different content types if we have multiple types if len(content_types) > 1: if "table" in content_types: sheet_names.append("Tables") if "list" in content_types: sheet_names.append("Lists") if "paragraph" in content_types or "heading" in content_types: sheet_names.append("Text") # Limit to 4 sheets maximum return sheet_names[:4] def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None: """Populate Excel sheets with content from JSON based on actual sheet names.""" try: # Get the actual sheet names that were created sheet_names = list(sheets.keys()) if not sheet_names: return # Populate the first sheet with all content first_sheet_name = sheet_names[0] self._populate_main_sheet(sheets[first_sheet_name], json_content, styles) # If we have multiple sheets, distribute content by type if len(sheet_names) > 1: self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:]) except Exception as e: self.logger.warning(f"Could not populate Excel sheets: {str(e)}") def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]): """Populate the main sheet with document overview and all content.""" try: # Document title document_title = json_content.get("metadata", {}).get("title", "Generated Report") sheet['A1'] = document_title # Safety check for title style title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"}) print(f"🔍 EXCEL TITLE STYLE: {title_style}") sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=title_style["color"]) sheet['A1'].alignment = Alignment(horizontal=title_style["align"]) # Generation info sheet['A3'] = "Generated:" sheet['B3'] = self._format_timestamp() sheet['A4'] = "Status:" sheet['B4'] = "Generated Successfully" # Document metadata metadata = json_content.get("metadata", {}) if metadata: sheet['A6'] = "Document Information:" sheet['A6'].font = Font(bold=True) row = 7 for key, value in metadata.items(): if key != "title": sheet[f'A{row}'] = f"{key.title()}:" sheet[f'B{row}'] = str(value) row += 1 # Content overview sections = json_content.get("sections", []) sheet[f'A{row + 1}'] = "Content Overview:" sheet[f'A{row + 1}'].font = Font(bold=True) row += 2 sheet[f'A{row}'] = f"Total Sections: {len(sections)}" # Count different content types content_types = {} for section in sections: content_type = section.get("content_type", "unknown") content_types[content_type] = content_types.get(content_type, 0) + 1 for content_type, count in content_types.items(): row += 1 sheet[f'A{row}'] = f"{content_type.title()} Sections: {count}" # Add all content to this sheet row += 2 for section in sections: row = self._add_section_to_sheet(sheet, section, styles, row) row += 1 # Empty row between sections # Auto-adjust column widths sheet.column_dimensions['A'].width = 20 sheet.column_dimensions['B'].width = 30 except Exception as e: self.logger.warning(f"Could not populate main sheet: {str(e)}") def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]): """Populate additional sheets based on content types.""" try: sections = json_content.get("sections", []) for sheet_name in sheet_names: if sheet_name not in sheets: continue sheet = sheets[sheet_name] sheet_title = sheet_name.title() sheet['A1'] = sheet_title sheet['A1'].font = Font(size=16, bold=True) row = 3 # Filter sections by content type if sheet_name == "tables": filtered_sections = [s for s in sections if s.get("content_type") == "table"] elif sheet_name == "lists": filtered_sections = [s for s in sections if s.get("content_type") == "list"] elif sheet_name == "text": filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]] else: filtered_sections = sections for section in filtered_sections: row = self._add_section_to_sheet(sheet, section, styles, row) row += 1 # Empty row between sections # Auto-adjust column widths for col in range(1, 6): sheet.column_dimensions[get_column_letter(col)].width = 20 except Exception as e: self.logger.warning(f"Could not populate content type sheets: {str(e)}") def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: """Add a section to a sheet and return the next row.""" try: # Add section title section_title = section.get("title") if section_title: sheet[f'A{start_row}'] = f"# {section_title}" sheet[f'A{start_row}'].font = Font(bold=True) start_row += 1 # Process section elements elements = section.get("elements", []) content_type = section.get("content_type", "paragraph") for element in elements: if content_type == "table": start_row = self._add_table_to_excel(sheet, element, styles, start_row) elif content_type == "list": start_row = self._add_list_to_excel(sheet, element, styles, start_row) elif content_type == "paragraph": start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row) elif content_type == "heading": start_row = self._add_heading_to_excel(sheet, element, styles, start_row) else: start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row) return start_row except Exception as e: self.logger.warning(f"Could not add section to sheet: {str(e)}") return start_row + 1 def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: """Add a table element to Excel sheet.""" try: table_data = element.get("data", {}) headers = table_data.get("headers", []) rows = table_data.get("rows", []) if not headers and not rows: return start_row # Add headers header_style = styles.get("table_header", {}) for col, header in enumerate(headers, 1): cell = sheet.cell(row=start_row, column=col, value=header) if header_style.get("bold"): cell.font = Font(bold=True, color=header_style.get("text_color", "#FF000000")) if header_style.get("background"): cell.fill = PatternFill(start_color=header_style["background"], end_color=header_style["background"], fill_type="solid") start_row += 1 # Add rows cell_style = styles.get("table_cell", {}) for row_data in rows: for col, cell_value in enumerate(row_data, 1): cell = sheet.cell(row=start_row, column=col, value=cell_value) if cell_style.get("text_color"): cell.font = Font(color=cell_style["text_color"]) start_row += 1 return start_row except Exception as e: self.logger.warning(f"Could not add table to Excel: {str(e)}") return start_row + 1 def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: """Add a list element to Excel sheet.""" try: list_items = element.get("items", []) list_style = styles.get("bullet_list", {}) for item in list_items: sheet.cell(row=start_row, column=1, value=f"• {item}") if list_style.get("color"): sheet.cell(row=start_row, column=1).font = Font(color=list_style["color"]) start_row += 1 return start_row except Exception as e: self.logger.warning(f"Could not add list to Excel: {str(e)}") return start_row + 1 def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: """Add a paragraph element to Excel sheet.""" try: text = element.get("text", "") if text: sheet.cell(row=start_row, column=1, value=text) paragraph_style = styles.get("paragraph", {}) if paragraph_style.get("color"): sheet.cell(row=start_row, column=1).font = Font(color=paragraph_style["color"]) start_row += 1 return start_row except Exception as e: self.logger.warning(f"Could not add paragraph to Excel: {str(e)}") return start_row + 1 def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: """Add a heading element to Excel sheet.""" try: text = element.get("text", "") level = element.get("level", 1) if text: sheet.cell(row=start_row, column=1, value=text) heading_style = styles.get("heading", {}) font_size = heading_style.get("font_size", 14) if level > 1: font_size = max(10, font_size - (level - 1) * 2) sheet.cell(row=start_row, column=1).font = Font( size=font_size, bold=True, color=heading_style.get("color", "#FF000000") ) start_row += 1 return start_row except Exception as e: self.logger.warning(f"Could not add heading to Excel: {str(e)}") return start_row + 1 def _format_timestamp(self) -> str: """Format current timestamp for document generation.""" return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")