gateway/modules/services/serviceGeneration/renderers/rendererPptx.py

import logging
import base64
import io
from typing import Dict, Any, Optional, Tuple, List
from .rendererBaseTemplate import BaseRenderer

logger = logging.getLogger(__name__)


class RendererPptx(BaseRenderer):
    """Renderer for PowerPoint (.pptx) files using python-pptx library."""

    def __init__(self):
        super().__init__()
        self.supported_formats = ["pptx", "ppt"]
        self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"

    @classmethod
    def get_supported_formats(cls) -> list:
        """Get list of supported output formats."""
        return ["pptx", "ppt"]

    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """
        Render content as PowerPoint presentation from JSON data.

        Args:
            extracted_content: JSON content to render as presentation
            title: Title for the presentation
            user_prompt: User prompt for AI styling
            ai_service: AI service for styling
            **kwargs: Additional rendering options

        Returns:
            Base64-encoded PowerPoint presentation as string
        """
        try:
            # Import python-pptx
            from pptx import Presentation
            from pptx.util import Inches, Pt
            from pptx.enum.text import PP_ALIGN
            from pptx.dml.color import RGBColor
            import re

            # Create new presentation
            prs = Presentation()

            # Set slide size based on user intent (default to 16:9)
            slide_size = styles.get("slide_size", "16:9")
            if slide_size == "4:3":
                prs.slide_width = Inches(10)
                prs.slide_height = Inches(7.5)
            else:  # Default to 16:9
                prs.slide_width = Inches(13.33)
                prs.slide_height = Inches(7.5)

            # Get AI-generated styling definitions
            styles = await self._get_pptx_styles(user_prompt, ai_service)

            # Generate slides from JSON content
            slides_data = await self._parse_json_to_slides(extracted_content, title, styles)
            logger.info(f"Parsed {len(slides_data)} slides from JSON content")

            # Debug: Show first 200 chars of content
            logger.info(f"JSON content preview: {str(extracted_content)[:200]}...")

            for i, slide_data in enumerate(slides_data):
                logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
                # Debug: Show slide content preview
                slide_content = slide_data.get('content', '')
                if slide_content:
                    logger.info(f"  Content preview: '{slide_content[:100]}...'")
                else:
                    logger.warning(f"  ⚠️ Slide {i+1} has NO content!")

                # Create slide with appropriate layout based on content
                slide_layout_index = self._get_slide_layout_index(slide_data, styles)
                slide_layout = prs.slide_layouts[slide_layout_index]
                slide = prs.slides.add_slide(slide_layout)

                # Set title
                title_shape = slide.shapes.title
                title_shape.text = slide_data.get("title", "Slide")

                # Set content
                content_shape = slide.placeholders[1]
                content_text = slide_data.get("content", "")

                # Format content text
                text_frame = content_shape.text_frame
                text_frame.clear()

                # Split content into paragraphs
                paragraphs = content_text.split('\n\n')

                for i, paragraph in enumerate(paragraphs):
                    if paragraph.strip():
                        if i == 0:
                            p = text_frame.paragraphs[0]
                        else:
                            p = text_frame.add_paragraph()

                        p.text = paragraph.strip()

                        # Format based on content type
                        if paragraph.startswith('#'):
                            # Header
                            p.text = paragraph.lstrip('#').strip()
                            p.font.size = Pt(24)
                            p.font.bold = True
                        elif paragraph.startswith('##'):
                            # Subheader
                            p.text = paragraph.lstrip('#').strip()
                            p.font.size = Pt(20)
                            p.font.bold = True
                        elif paragraph.startswith('*') and paragraph.endswith('*'):
                            # Bold text
                            p.text = paragraph.strip('*')
                            p.font.bold = True
                        else:
                            # Regular text
                            p.font.size = Pt(14)

                        p.alignment = PP_ALIGN.LEFT

            # If no slides were created, create a default slide
            if not slides_data:
                slide_layout = prs.slide_layouts[0]  # Title slide layout
                slide = prs.slides.add_slide(slide_layout)

                title_shape = slide.shapes.title
                title_shape.text = title

                subtitle_shape = slide.placeholders[1]
                subtitle_shape.text = "Generated by PowerOn AI System"

            # Save to buffer
            buffer = io.BytesIO()
            prs.save(buffer)
            buffer.seek(0)

            # Convert to base64
            pptx_bytes = buffer.getvalue()
            pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')

            logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
            return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation"

        except ImportError:
            logger.error("python-pptx library not installed. Install with: pip install python-pptx")
            return "python-pptx library not installed", "text/plain"
        except Exception as e:
            logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
            return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"

    def _parse_content_to_slides(self, content: str, title: str) -> list:
        """
        Parse content into slide data structure.

        Args:
            content: Content to parse
            title: Presentation title

        Returns:
            List of slide data dictionaries
        """
        slides = []

        # Split content by slide markers or headers
        slide_sections = self._split_content_into_slides(content)

        for i, section in enumerate(slide_sections):
            if section.strip():
                slide_data = {
                    "title": f"Slide {i + 1}",
                    "content": section.strip()
                }

                # Extract title from content if it starts with #
                lines = section.strip().split('\n')
                if lines and lines[0].startswith('#'):
                    # Remove # symbols and clean up title
                    slide_title = lines[0].lstrip('#').strip()
                    slide_data["title"] = slide_title
                    slide_data["content"] = '\n'.join(lines[1:]).strip()
                elif lines and lines[0].strip():
                    # Use first line as title if it looks like a title
                    first_line = lines[0].strip()
                    if len(first_line) < 100 and not first_line.endswith('.'):
                        slide_data["title"] = first_line
                        slide_data["content"] = '\n'.join(lines[1:]).strip()

                slides.append(slide_data)

        return slides

    def _split_content_into_slides(self, content: str) -> list:
        """
        Split content into individual slides based on headers and structure.

        Args:
            content: Content to split

        Returns:
            List of slide content strings
        """
        import re

        # First, try to split by major headers (# or ##)
        # This is the most common case for AI-generated content
        header_pattern = r'^(#{1,2})\s+(.+)$'
        lines = content.split('\n')
        slides = []
        current_slide = []

        for line in lines:
            # Check if this line is a header
            header_match = re.match(header_pattern, line.strip())
            if header_match:
                # If we have content in current slide, save it
                if current_slide:
                    slide_content = '\n'.join(current_slide).strip()
                    if slide_content:
                        slides.append(slide_content)
                    current_slide = []

                # Start new slide with this header
                current_slide.append(line)
            else:
                # Add line to current slide
                current_slide.append(line)

        # Add the last slide
        if current_slide:
            slide_content = '\n'.join(current_slide).strip()
            if slide_content:
                slides.append(slide_content)

        # If we found slides with headers, return them
        if len(slides) > 1:
            return slides

        # Fallback: Split by double newlines
        sections = content.split('\n\n\n')
        if len(sections) > 1:
            return [s.strip() for s in sections if s.strip()]

        # Another fallback: Split by double newlines
        sections = content.split('\n\n')
        if len(sections) > 1:
            return [s.strip() for s in sections if s.strip()]

        # Last resort: return as single slide
        return [content.strip()]


    def get_output_mime_type(self) -> str:
        """Get MIME type for rendered output."""
        return self.output_mime_type

    async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
        """Get PowerPoint styling definitions using base template AI styling."""
        style_schema = {
            "title": {"font_size": 44, "color": "#1F4E79", "bold": True, "align": "center"},
            "heading": {"font_size": 32, "color": "#2F2F2F", "bold": True, "align": "left"},
            "subheading": {"font_size": 24, "color": "#4F4F4F", "bold": True, "align": "left"},
            "paragraph": {"font_size": 18, "color": "#2F2F2F", "bold": False, "align": "left"},
            "bullet_list": {"font_size": 18, "color": "#2F2F2F", "indent": 20},
            "table_header": {"font_size": 16, "color": "#FFFFFF", "bold": True, "background": "#4F4F4F"},
            "table_cell": {"font_size": 14, "color": "#2F2F2F", "bold": False, "background": "#FFFFFF"},
            "slide_size": "16:9",
            "content_per_slide": "concise"
        }

        style_template = self._create_ai_style_template("pptx", user_prompt, style_schema)
        styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pptx_styles())

        # Validate PowerPoint-specific requirements
        return self._validate_pptx_styles_readability(styles)

    def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and fix readability issues in AI-generated styles."""
        try:
            # Ensure minimum font sizes for PowerPoint readability
            min_font_sizes = {
                "title": 36,
                "heading": 24,
                "subheading": 20,
                "paragraph": 14,
                "bullet_list": 14,
                "table_header": 12,
                "table_cell": 12
            }

            for style_name, min_size in min_font_sizes.items():
                if style_name in styles:
                    current_size = styles[style_name].get("font_size", 12)
                    if current_size < min_size:
                        styles[style_name]["font_size"] = min_size

            return styles

        except Exception as e:
            logger.warning(f"Style validation failed: {str(e)}")
            return self._get_default_pptx_styles()

    def _get_default_pptx_styles(self) -> Dict[str, Any]:
        """Default PowerPoint styles."""
        return {
            "title": {"font_size": 44, "color": "#1F4E79", "bold": True, "align": "center"},
            "heading": {"font_size": 32, "color": "#2F2F2F", "bold": True, "align": "left"},
            "subheading": {"font_size": 24, "color": "#4F4F4F", "bold": True, "align": "left"},
            "paragraph": {"font_size": 18, "color": "#2F2F2F", "bold": False, "align": "left"},
            "bullet_list": {"font_size": 18, "color": "#2F2F2F", "indent": 20},
            "table_header": {"font_size": 16, "color": "#FFFFFF", "bold": True, "background": "#4F4F4F"},
            "table_cell": {"font_size": 14, "color": "#2F2F2F", "bold": False, "background": "#FFFFFF"},
            "slide_size": "16:9",
            "content_per_slide": "concise"
        }

    async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Parse JSON content into slide data structure.

        Args:
            json_content: JSON content to parse
            title: Presentation title
            styles: AI-generated styles

        Returns:
            List of slide data dictionaries
        """
        slides = []

        try:
            # Validate JSON structure
            if not isinstance(json_content, dict):
                raise ValueError("JSON content must be a dictionary")

            if "sections" not in json_content:
                raise ValueError("JSON content must contain 'sections' field")

            # Use title from JSON metadata if available, otherwise use provided title
            document_title = json_content.get("metadata", {}).get("title", title)

            # Create title slide
            slides.append({
                "title": document_title,
                "content": "Generated by PowerOn AI System\n\n" + self._format_timestamp()
            })

            # Process sections into slides based on content and user intent
            sections = json_content.get("sections", [])
            slides.extend(self._create_slides_from_sections(sections, styles))

            # If no content slides were created, create a default content slide
            if len(slides) == 1:  # Only title slide
                slides.append({
                    "title": "Content Overview",
                    "content": "No structured content found in the source documents.\n\nPlease check the source documents and try again."
                })

            return slides

        except Exception as e:
            logger.error(f"Error parsing JSON to slides: {str(e)}")
            # Return minimal fallback slides
            return [
                {
                    "title": title,
                    "content": "Error parsing content for presentation"
                }
            ]

    def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
        """Create a slide from a JSON section."""
        try:
            section_title = section.get("title", "Untitled Section")
            content_type = section.get("content_type", "paragraph")
            elements = section.get("elements", [])

            # Build slide content based on section type
            content_parts = []

            for element in elements:
                if content_type == "table":
                    content_parts.append(self._format_table_for_slide(element))
                elif content_type == "list":
                    content_parts.append(self._format_list_for_slide(element))
                elif content_type == "heading":
                    content_parts.append(self._format_heading_for_slide(element))
                elif content_type == "paragraph":
                    content_parts.append(self._format_paragraph_for_slide(element))
                elif content_type == "code":
                    content_parts.append(self._format_code_for_slide(element))
                else:
                    content_parts.append(self._format_paragraph_for_slide(element))

            # Combine content parts
            slide_content = "\n\n".join(filter(None, content_parts))

            return {
                "title": section_title,
                "content": slide_content
            }

        except Exception as e:
            logger.warning(f"Error creating slide from section: {str(e)}")
            return None

    def _format_table_for_slide(self, table_data: Dict[str, Any]) -> str:
        """Format table data for slide presentation."""
        try:
            headers = table_data.get("headers", [])
            rows = table_data.get("rows", [])

            if not headers:
                return ""

            # Create table representation
            table_lines = []

            # Add headers
            header_line = " | ".join(str(h) for h in headers)
            table_lines.append(header_line)

            # Add separator
            separator = "-" * len(header_line)
            table_lines.append(separator)

            # Add data rows (limit based on content density)
            max_rows = 5  # Default limit
            for row in rows[:max_rows]:
                row_line = " | ".join(str(cell) for cell in row)
                table_lines.append(row_line)

            if len(rows) > max_rows:
                table_lines.append(f"... and {len(rows) - max_rows} more rows")

            return "\n".join(table_lines)

        except Exception as e:
            logger.warning(f"Error formatting table for slide: {str(e)}")
            return ""

    def _format_list_for_slide(self, list_data: Dict[str, Any]) -> str:
        """Format list data for slide presentation."""
        try:
            items = list_data.get("items", [])

            if not items:
                return ""

            # Create list representation
            list_lines = []

            for item in items:
                if isinstance(item, dict):
                    text = item.get("text", "")
                    list_lines.append(f"• {text}")

                    # Add subitems (limit to 3 for readability)
                    subitems = item.get("subitems", [])[:3]
                    for subitem in subitems:
                        if isinstance(subitem, dict):
                            list_lines.append(f"  - {subitem.get('text', '')}")
                        else:
                            list_lines.append(f"  - {subitem}")
                else:
                    list_lines.append(f"• {str(item)}")

            return "\n".join(list_lines)

        except Exception as e:
            logger.warning(f"Error formatting list for slide: {str(e)}")
            return ""

    def _format_heading_for_slide(self, heading_data: Dict[str, Any]) -> str:
        """Format heading data for slide presentation."""
        try:
            text = heading_data.get("text", "")
            level = heading_data.get("level", 1)

            if text:
                return f"{'#' * level} {text}"

            return ""

        except Exception as e:
            logger.warning(f"Error formatting heading for slide: {str(e)}")
            return ""

    def _format_paragraph_for_slide(self, paragraph_data: Dict[str, Any]) -> str:
        """Format paragraph data for slide presentation."""
        try:
            text = paragraph_data.get("text", "")

            if text:
                # Limit paragraph length based on content density
                max_length = 200  # Default limit
                if len(text) > max_length:
                    text = text[:max_length] + "..."

                return text

            return ""

        except Exception as e:
            logger.warning(f"Error formatting paragraph for slide: {str(e)}")
            return ""

    def _format_code_for_slide(self, code_data: Dict[str, Any]) -> str:
        """Format code data for slide presentation."""
        try:
            code = code_data.get("code", "")
            language = code_data.get("language", "")

            if code:
                # Limit code length based on content density
                max_length = 100  # Default limit
                if len(code) > max_length:
                    code = code[:max_length] + "..."

                if language:
                    return f"Code ({language}):\n{code}"
                else:
                    return f"Code:\n{code}"

            return ""

        except Exception as e:
            logger.warning(f"Error formatting code for slide: {str(e)}")
            return ""

    def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
        """Determine the best slide layout based on content."""
        try:
            content = slide_data.get("content", "")
            title = slide_data.get("title", "")

            # Check if it's a title slide (first slide)
            if not content or "Generated by PowerOn AI System" in content:
                return 0  # Title slide layout

            # Check content type to determine layout
            if "|" in content and "-" in content:
                # Has both tables and lists - use content with caption
                return 2
            elif "|" in content:
                # Has tables - use content layout
                return 1
            elif content.count("•") > 2:
                # Has many bullet points - use content layout
                return 1
            else:
                # Default to title and content
                return 1

        except Exception as e:
            logger.warning(f"Error determining slide layout: {str(e)}")
            return 1  # Default to title and content

    def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Create slides from sections based on content density and user intent."""
        try:
            slides = []
            content_per_slide = styles.get("content_per_slide", "concise")

            for section in sections:
                section_slides = self._create_section_slides(section, styles, content_per_slide)
                slides.extend(section_slides)

            return slides

        except Exception as e:
            logger.warning(f"Error creating slides from sections: {str(e)}")
            return []

    def _create_section_slides(self, section: Dict[str, Any], styles: Dict[str, Any], content_per_slide: str) -> List[Dict[str, Any]]:
        """Create one or more slides from a section based on content density."""
        try:
            section_title = section.get("title", "Untitled Section")
            content_type = section.get("content_type", "paragraph")
            elements = section.get("elements", [])

            if not elements:
                return [{
                    "title": section_title,
                    "content": "No content available for this section."
                }]

            # Determine how to split content based on type and density
            if content_per_slide == "detailed" and len(elements) > 3:
                # Split large sections into multiple slides
                return self._split_section_into_multiple_slides(section_title, elements, content_type)
            else:
                # Create single slide for section
                slide_data = self._create_slide_from_section(section, styles)
                return [slide_data] if slide_data else []

        except Exception as e:
            logger.warning(f"Error creating section slides: {str(e)}")
            return []

    def _split_section_into_multiple_slides(self, section_title: str, elements: List[Dict[str, Any]], content_type: str) -> List[Dict[str, Any]]:
        """Split a large section into multiple slides."""
        try:
            slides = []
            max_elements_per_slide = 3

            for i in range(0, len(elements), max_elements_per_slide):
                slide_elements = elements[i:i + max_elements_per_slide]

                # Create slide title
                if i == 0:
                    slide_title = section_title
                else:
                    slide_title = f"{section_title} (Part {i//max_elements_per_slide + 1})"

                # Build content for this slide
                content_parts = []
                for element in slide_elements:
                    if content_type == "table":
                        content_parts.append(self._format_table_for_slide(element))
                    elif content_type == "list":
                        content_parts.append(self._format_list_for_slide(element))
                    elif content_type == "heading":
                        content_parts.append(self._format_heading_for_slide(element))
                    elif content_type == "paragraph":
                        content_parts.append(self._format_paragraph_for_slide(element))
                    elif content_type == "code":
                        content_parts.append(self._format_code_for_slide(element))
                    else:
                        content_parts.append(self._format_paragraph_for_slide(element))

                slide_content = "\n\n".join(filter(None, content_parts))

                slides.append({
                    "title": slide_title,
                    "content": slide_content
                })

            return slides

        except Exception as e:
            logger.warning(f"Error splitting section into slides: {str(e)}")
            return []

    def _format_timestamp(self) -> str:
        """Format current timestamp for presentation generation."""
        from datetime import datetime, UTC
        return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")