gateway/modules/services/serviceGeneration/renderers/excel_renderer.py

"""
Excel renderer for report generation using openpyxl.
"""

from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC

try:
    from openpyxl import Workbook
    from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
    from openpyxl.utils import get_column_letter
    from openpyxl.worksheet.table import Table, TableStyleInfo
    OPENPYXL_AVAILABLE = True
except ImportError:
    OPENPYXL_AVAILABLE = False

class ExcelRenderer(BaseRenderer):
    """Renders content to Excel format using openpyxl."""

    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported Excel formats."""
        return ['xlsx', 'xls', 'excel']

    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['spreadsheet', 'workbook']

    @classmethod
    def get_priority(cls) -> int:
        """Return priority for Excel renderer."""
        return 110

    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Return only Excel-specific guidelines; global prompt is built centrally."""
        return (
            "EXCEL FORMAT GUIDELINES:\n"
            "- Extract structured data from source documents into JSON format\n"
            "- Focus on tabular data, lists, and structured information suitable for spreadsheets\n"
            "- For tables: Extract headers and rows as separate arrays with clear column names\n"
            "- For lists: Extract items with optional sub-items and metadata\n"
            "- Structure content into sections with clear content types (table, list, paragraph)\n"
            "- Use proper JSON structure with metadata, sections, and elements\n"
            "- Ensure data is clean and ready for Excel conversion with proper formatting\n"
            "OUTPUT: Return structured JSON that can be converted to Excel format."
        )

    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """Render extracted JSON content to Excel format using AI-analyzed styling."""
        try:
            if not OPENPYXL_AVAILABLE:
                # Fallback to CSV if openpyxl not available
                from .csv_renderer import CsvRenderer
                csv_renderer = CsvRenderer()
                csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service)
                return csv_content, "text/csv"

            # Generate Excel using AI-analyzed styling
            excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service)

            return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"

        except Exception as e:
            self.logger.error(f"Error rendering Excel: {str(e)}")
            # Return CSV fallback
            return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"

    def _generate_excel(self, content: str, title: str) -> str:
        """Generate Excel content using openpyxl."""
        try:
            # Create workbook
            wb = Workbook()

            # Remove default sheet
            wb.remove(wb.active)

            # Create sheets
            summary_sheet = wb.create_sheet("Summary", 0)
            data_sheet = wb.create_sheet("Data", 1)
            analysis_sheet = wb.create_sheet("Analysis", 2)

            # Add content to sheets
            self._populate_summary_sheet(summary_sheet, title)
            self._populate_data_sheet(data_sheet, content)
            self._populate_analysis_sheet(analysis_sheet, content)

            # Save to buffer
            buffer = io.BytesIO()
            wb.save(buffer)
            buffer.seek(0)

            # Convert to base64
            excel_bytes = buffer.getvalue()
            excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')

            return excel_base64

        except Exception as e:
            self.logger.error(f"Error generating Excel: {str(e)}")
            raise

    def _populate_summary_sheet(self, sheet, title: str):
        """Populate the summary sheet."""
        try:
            # Title
            sheet['A1'] = title
            sheet['A1'].font = Font(size=16, bold=True)
            sheet['A1'].alignment = Alignment(horizontal='center')

            # Generation info
            sheet['A3'] = "Generated:"
            sheet['B3'] = self._format_timestamp()
            sheet['A4'] = "Status:"
            sheet['B4'] = "Generated Successfully"

            # Key metrics placeholder
            sheet['A6'] = "Key Metrics:"
            sheet['A6'].font = Font(bold=True)
            sheet['A7'] = "Total Items:"
            sheet['B7'] = "=COUNTA(Data!A:A)-1"  # Count non-empty cells in Data sheet

            # Auto-adjust column widths
            sheet.column_dimensions['A'].width = 20
            sheet.column_dimensions['B'].width = 30

        except Exception as e:
            self.logger.warning(f"Could not populate summary sheet: {str(e)}")

    def _populate_data_sheet(self, sheet, content: str):
        """Populate the data sheet."""
        try:
            # Headers
            headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
            for col, header in enumerate(headers, 1):
                cell = sheet.cell(row=1, column=col, value=header)
                cell.font = Font(bold=True)
                cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")

            # Process content
            lines = content.split('\n')
            row = 2

            for line in lines:
                line = line.strip()
                if not line:
                    continue

                # Check for table data (lines with |)
                if '|' in line:
                    cells = [cell.strip() for cell in line.split('|') if cell.strip()]
                    for col, cell_data in enumerate(cells[:5], 1):  # Limit to 5 columns
                        sheet.cell(row=row, column=col, value=cell_data)
                    row += 1
                else:
                    # Regular content
                    sheet.cell(row=row, column=1, value=line)
                    row += 1

            # Auto-adjust column widths
            for col in range(1, 6):
                sheet.column_dimensions[get_column_letter(col)].width = 20

        except Exception as e:
            self.logger.warning(f"Could not populate data sheet: {str(e)}")

    def _populate_analysis_sheet(self, sheet, content: str):
        """Populate the analysis sheet."""
        try:
            # Title
            sheet['A1'] = "Analysis & Insights"
            sheet['A1'].font = Font(size=14, bold=True)

            # Content analysis
            lines = content.split('\n')
            row = 3

            sheet['A3'] = "Content Analysis:"
            sheet['A3'].font = Font(bold=True)
            row += 1

            # Count different types of content
            table_lines = sum(1 for line in lines if '|' in line)
            list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
            text_lines = len(lines) - table_lines - list_lines

            sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
            row += 1
            sheet[f'A{row}'] = f"Table Rows: {table_lines}"
            row += 1
            sheet[f'A{row}'] = f"List Items: {list_lines}"
            row += 1
            sheet[f'A{row}'] = f"Text Lines: {text_lines}"
            row += 2

            # Recommendations
            sheet[f'A{row}'] = "Recommendations:"
            sheet[f'A{row}'].font = Font(bold=True)
            row += 1
            sheet[f'A{row}'] = "1. Review data accuracy"
            row += 1
            sheet[f'A{row}'] = "2. Consider additional analysis"
            row += 1
            sheet[f'A{row}'] = "3. Update regularly"

            # Auto-adjust column width
            sheet.column_dimensions['A'].width = 30

        except Exception as e:
            self.logger.warning(f"Could not populate analysis sheet: {str(e)}")

    async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
        """Generate Excel content from structured JSON document using AI-generated styling."""
        try:
            # Get AI-generated styling definitions
            styles = await self._get_excel_styles(user_prompt, ai_service)

            # Validate JSON structure
            if not isinstance(json_content, dict):
                raise ValueError("JSON content must be a dictionary")

            if "sections" not in json_content:
                raise ValueError("JSON content must contain 'sections' field")

            # Use title from JSON metadata if available, otherwise use provided title
            document_title = json_content.get("metadata", {}).get("title", title)

            # Create workbook
            wb = Workbook()

            # Remove default sheet
            wb.remove(wb.active)

            # Create sheets based on content
            sheets = self._create_excel_sheets(wb, json_content, styles)

            # Populate sheets with content
            self._populate_excel_sheets(sheets, json_content, styles)

            # Save to buffer
            buffer = io.BytesIO()
            wb.save(buffer)
            buffer.seek(0)

            # Convert to base64
            excel_bytes = buffer.getvalue()
            excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')

            return excel_base64

        except Exception as e:
            self.logger.error(f"Error generating Excel from JSON: {str(e)}")
            raise Exception(f"Excel generation failed: {str(e)}")

    async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
        """Simple AI call to get Excel styling definitions."""
        if not ai_service:
            return self._get_default_excel_styles()

        try:
            prompt = f"""
For this Excel document request: "{user_prompt}"

Provide styling definitions for Excel elements. Respond with ONLY JSON:

{{
    "title": {{"font_size": 16, "color": "#1F4E79", "bold": true, "align": "center"}},
    "heading": {{"font_size": 14, "color": "#2F2F2F", "bold": true, "align": "left"}},
    "table_header": {{"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": true, "align": "center"}},
    "table_cell": {{"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": false, "align": "left"}},
    "bullet_list": {{"font_size": 11, "color": "#2F2F2F", "indent": 2}},
    "paragraph": {{"font_size": 11, "color": "#2F2F2F", "bold": false, "align": "left"}},
    "code_block": {{"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}}
}}

CRITICAL: Table headers must have dark background with light text, table cells must have light background with dark text for readability.
"""

            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType

            request_options = AiCallOptions()
            request_options.operationType = OperationType.GENERAL

            request = AiCallRequest(prompt=prompt, context="", options=request_options)
            response = await ai_service.aiObjects.call(request)

            import json
            import re

            # Clean and parse JSON
            result = response.content.strip()
            if result.startswith('```json'):
                result = re.sub(r'^```json\s*', '', result)
                result = re.sub(r'\s*```$', '', result)
            elif result.startswith('```'):
                result = re.sub(r'^```\s*', '', result)
                result = re.sub(r'\s*```$', '', result)

            styles = json.loads(result)

            # Validate and fix contrast issues
            styles = self._validate_excel_styles_contrast(styles)

            return styles

        except Exception as e:
            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
            return self._get_default_excel_styles()

    def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and fix contrast issues in AI-generated styles."""
        try:
            # Fix table header contrast
            if "table_header" in styles:
                header = styles["table_header"]
                bg_color = header.get("background", "#FFFFFF")
                text_color = header.get("text_color", "#000000")

                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"

            # Fix table cell contrast
            if "table_cell" in styles:
                cell = styles["table_cell"]
                bg_color = cell.get("background", "#FFFFFF")
                text_color = cell.get("text_color", "#000000")

                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"

            return styles

        except Exception as e:
            self.logger.warning(f"Style validation failed: {str(e)}")
            return self._get_default_excel_styles()

    def _get_default_excel_styles(self) -> Dict[str, Any]:
        """Default Excel styles."""
        return {
            "title": {"font_size": 16, "color": "#1F4E79", "bold": True, "align": "center"},
            "heading": {"font_size": 14, "color": "#2F2F2F", "bold": True, "align": "left"},
            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 2},
            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
            "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
        }

    def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
        """Create Excel sheets based on content structure and user intent."""
        sheets = {}

        # Get sheet names from AI styles or generate based on content
        sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content))

        # Create sheets
        for i, sheet_name in enumerate(sheet_names):
            if i == 0:
                sheet = wb.active
                sheet.title = sheet_name
            else:
                sheet = wb.create_sheet(sheet_name, i)
            sheets[sheet_name.lower()] = sheet

        return sheets

    def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]:
        """Generate sheet names based on actual content structure."""
        sections = json_content.get("sections", [])

        # If no sections, create a single sheet
        if not sections:
            return ["Content"]

        # Generate sheet names based on content types
        sheet_names = []

        # Always start with a main content sheet
        document_title = json_content.get("metadata", {}).get("title", "Document")
        sheet_names.append(document_title[:31])  # Excel sheet name limit

        # Add sheets based on content types found
        content_types = set()
        for section in sections:
            content_type = section.get("content_type", "paragraph")
            content_types.add(content_type)

        # Create sheets for different content types if we have multiple types
        if len(content_types) > 1:
            if "table" in content_types:
                sheet_names.append("Tables")
            if "list" in content_types:
                sheet_names.append("Lists")
            if "paragraph" in content_types or "heading" in content_types:
                sheet_names.append("Text")

        # Limit to 4 sheets maximum
        return sheet_names[:4]

    def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Populate Excel sheets with content from JSON based on actual sheet names."""
        try:
            # Get the actual sheet names that were created
            sheet_names = list(sheets.keys())

            if not sheet_names:
                return

            # Populate the first sheet with all content
            first_sheet_name = sheet_names[0]
            self._populate_main_sheet(sheets[first_sheet_name], json_content, styles)

            # If we have multiple sheets, distribute content by type
            if len(sheet_names) > 1:
                self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:])

        except Exception as e:
            self.logger.warning(f"Could not populate Excel sheets: {str(e)}")

    def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]):
        """Populate the main sheet with document overview and all content."""
        try:
            # Document title
            document_title = json_content.get("metadata", {}).get("title", "Generated Report")
            sheet['A1'] = document_title

            title_style = styles["title"]
            sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=title_style["color"])
            sheet['A1'].alignment = Alignment(horizontal=title_style["align"])

            # Generation info
            sheet['A3'] = "Generated:"
            sheet['B3'] = self._format_timestamp()
            sheet['A4'] = "Status:"
            sheet['B4'] = "Generated Successfully"

            # Document metadata
            metadata = json_content.get("metadata", {})
            if metadata:
                sheet['A6'] = "Document Information:"
                sheet['A6'].font = Font(bold=True)

                row = 7
                for key, value in metadata.items():
                    if key != "title":
                        sheet[f'A{row}'] = f"{key.title()}:"
                        sheet[f'B{row}'] = str(value)
                        row += 1

            # Content overview
            sections = json_content.get("sections", [])
            sheet[f'A{row + 1}'] = "Content Overview:"
            sheet[f'A{row + 1}'].font = Font(bold=True)

            row += 2
            sheet[f'A{row}'] = f"Total Sections: {len(sections)}"

            # Count different content types
            content_types = {}
            for section in sections:
                content_type = section.get("content_type", "unknown")
                content_types[content_type] = content_types.get(content_type, 0) + 1

            for content_type, count in content_types.items():
                row += 1
                sheet[f'A{row}'] = f"{content_type.title()} Sections: {count}"

            # Add all content to this sheet
            row += 2
            for section in sections:
                row = self._add_section_to_sheet(sheet, section, styles, row)
                row += 1  # Empty row between sections

            # Auto-adjust column widths
            sheet.column_dimensions['A'].width = 20
            sheet.column_dimensions['B'].width = 30

        except Exception as e:
            self.logger.warning(f"Could not populate main sheet: {str(e)}")

    def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]):
        """Populate additional sheets based on content types."""
        try:
            sections = json_content.get("sections", [])

            for sheet_name in sheet_names:
                if sheet_name not in sheets:
                    continue

                sheet = sheets[sheet_name]
                sheet_title = sheet_name.title()
                sheet['A1'] = sheet_title
                sheet['A1'].font = Font(size=16, bold=True)

                row = 3

                # Filter sections by content type
                if sheet_name == "tables":
                    filtered_sections = [s for s in sections if s.get("content_type") == "table"]
                elif sheet_name == "lists":
                    filtered_sections = [s for s in sections if s.get("content_type") == "list"]
                elif sheet_name == "text":
                    filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
                else:
                    filtered_sections = sections

                for section in filtered_sections:
                    row = self._add_section_to_sheet(sheet, section, styles, row)
                    row += 1  # Empty row between sections

                # Auto-adjust column widths
                for col in range(1, 6):
                    sheet.column_dimensions[get_column_letter(col)].width = 20

        except Exception as e:
            self.logger.warning(f"Could not populate content type sheets: {str(e)}")

    def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
        """Add a section to a sheet and return the next row."""
        try:
            # Add section title
            section_title = section.get("title")
            if section_title:
                sheet[f'A{start_row}'] = f"# {section_title}"
                sheet[f'A{start_row}'].font = Font(bold=True)
                start_row += 1

            # Process section elements
            elements = section.get("elements", [])
            content_type = section.get("content_type", "paragraph")

            for element in elements:
                if content_type == "table":
                    start_row = self._add_table_to_excel(sheet, element, styles, start_row)
                elif content_type == "list":
                    start_row = self._add_list_to_excel(sheet, element, styles, start_row)
                elif content_type == "paragraph":
                    start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
                elif content_type == "heading":
                    start_row = self._add_heading_to_excel(sheet, element, styles, start_row)
                else:
                    start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)

            return start_row

        except Exception as e:
            self.logger.warning(f"Could not add section to sheet: {str(e)}")
            return start_row + 1

    def _format_timestamp(self) -> str:
        """Format current timestamp for document generation."""
        return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")