gateway/modules/services/serviceGeneration/renderers/pptx_renderer.py

import logging
import base64
import io
from typing import Dict, Any, Optional, Tuple
from .base_renderer import BaseRenderer

logger = logging.getLogger(__name__)


class PptxRenderer(BaseRenderer):
    """Renderer for PowerPoint (.pptx) files using python-pptx library."""

    def __init__(self):
        super().__init__()
        self.supported_formats = ["pptx", "ppt"]
        self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"

    @classmethod
    def get_supported_formats(cls) -> list:
        """Get list of supported output formats."""
        return ["pptx", "ppt"]

    async def render(self, content: str, title: str = "Generated Presentation", **kwargs) -> Tuple[str, str]:
        """
        Render content as PowerPoint presentation.

        Args:
            content: Content to render as presentation
            title: Title for the presentation
            **kwargs: Additional rendering options

        Returns:
            Base64-encoded PowerPoint presentation as string
        """
        try:
            # Import python-pptx
            from pptx import Presentation
            from pptx.util import Inches, Pt
            from pptx.enum.text import PP_ALIGN
            from pptx.dml.color import RGBColor
            import re

            # Create new presentation
            prs = Presentation()

            # Set slide size (16:9)
            prs.slide_width = Inches(13.33)
            prs.slide_height = Inches(7.5)

            # Parse content into slides
            slides_data = self._parse_content_to_slides(content, title)
            logger.info(f"Parsed {len(slides_data)} slides from content")

            # Debug: Show first 200 chars of content
            logger.info(f"Content preview: '{content[:200]}...'")

            for i, slide_data in enumerate(slides_data):
                logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
                # Debug: Show slide content preview
                slide_content = slide_data.get('content', '')
                if slide_content:
                    logger.info(f"  Content preview: '{slide_content[:100]}...'")
                else:
                    logger.warning(f"  ⚠️ Slide {i+1} has NO content!")

                # Create slide with title and content layout
                slide_layout = prs.slide_layouts[1]  # Title and Content layout
                slide = prs.slides.add_slide(slide_layout)

                # Set title
                title_shape = slide.shapes.title
                title_shape.text = slide_data.get("title", "Slide")

                # Set content
                content_shape = slide.placeholders[1]
                content_text = slide_data.get("content", "")

                # Format content text
                text_frame = content_shape.text_frame
                text_frame.clear()

                # Split content into paragraphs
                paragraphs = content_text.split('\n\n')

                for i, paragraph in enumerate(paragraphs):
                    if paragraph.strip():
                        if i == 0:
                            p = text_frame.paragraphs[0]
                        else:
                            p = text_frame.add_paragraph()

                        p.text = paragraph.strip()

                        # Format based on content type
                        if paragraph.startswith('#'):
                            # Header
                            p.text = paragraph.lstrip('#').strip()
                            p.font.size = Pt(24)
                            p.font.bold = True
                        elif paragraph.startswith('##'):
                            # Subheader
                            p.text = paragraph.lstrip('#').strip()
                            p.font.size = Pt(20)
                            p.font.bold = True
                        elif paragraph.startswith('*') and paragraph.endswith('*'):
                            # Bold text
                            p.text = paragraph.strip('*')
                            p.font.bold = True
                        else:
                            # Regular text
                            p.font.size = Pt(14)

                        p.alignment = PP_ALIGN.LEFT

            # If no slides were created, create a default slide
            if not slides_data:
                slide_layout = prs.slide_layouts[0]  # Title slide layout
                slide = prs.slides.add_slide(slide_layout)

                title_shape = slide.shapes.title
                title_shape.text = title

                subtitle_shape = slide.placeholders[1]
                subtitle_shape.text = "Generated by PowerOn AI System"

            # Save to buffer
            buffer = io.BytesIO()
            prs.save(buffer)
            buffer.seek(0)

            # Convert to base64
            pptx_bytes = buffer.getvalue()
            pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')

            logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
            return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation"

        except ImportError:
            logger.error("python-pptx library not installed. Install with: pip install python-pptx")
            return "python-pptx library not installed", "text/plain"
        except Exception as e:
            logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
            return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"

    def _parse_content_to_slides(self, content: str, title: str) -> list:
        """
        Parse content into slide data structure.

        Args:
            content: Content to parse
            title: Presentation title

        Returns:
            List of slide data dictionaries
        """
        slides = []

        # Split content by slide markers or headers
        slide_sections = self._split_content_into_slides(content)

        for i, section in enumerate(slide_sections):
            if section.strip():
                slide_data = {
                    "title": f"Slide {i + 1}",
                    "content": section.strip()
                }

                # Extract title from content if it starts with #
                lines = section.strip().split('\n')
                if lines and lines[0].startswith('#'):
                    # Remove # symbols and clean up title
                    slide_title = lines[0].lstrip('#').strip()
                    slide_data["title"] = slide_title
                    slide_data["content"] = '\n'.join(lines[1:]).strip()
                elif lines and lines[0].strip():
                    # Use first line as title if it looks like a title
                    first_line = lines[0].strip()
                    if len(first_line) < 100 and not first_line.endswith('.'):
                        slide_data["title"] = first_line
                        slide_data["content"] = '\n'.join(lines[1:]).strip()

                slides.append(slide_data)

        return slides

    def _split_content_into_slides(self, content: str) -> list:
        """
        Split content into individual slides based on headers and structure.

        Args:
            content: Content to split

        Returns:
            List of slide content strings
        """
        import re

        # First, try to split by major headers (# or ##)
        # This is the most common case for AI-generated content
        header_pattern = r'^(#{1,2})\s+(.+)$'
        lines = content.split('\n')
        slides = []
        current_slide = []

        for line in lines:
            # Check if this line is a header
            header_match = re.match(header_pattern, line.strip())
            if header_match:
                # If we have content in current slide, save it
                if current_slide:
                    slide_content = '\n'.join(current_slide).strip()
                    if slide_content:
                        slides.append(slide_content)
                    current_slide = []

                # Start new slide with this header
                current_slide.append(line)
            else:
                # Add line to current slide
                current_slide.append(line)

        # Add the last slide
        if current_slide:
            slide_content = '\n'.join(current_slide).strip()
            if slide_content:
                slides.append(slide_content)

        # If we found slides with headers, return them
        if len(slides) > 1:
            return slides

        # Fallback: Split by double newlines
        sections = content.split('\n\n\n')
        if len(sections) > 1:
            return [s.strip() for s in sections if s.strip()]

        # Another fallback: Split by double newlines
        sections = content.split('\n\n')
        if len(sections) > 1:
            return [s.strip() for s in sections if s.strip()]

        # Last resort: return as single slide
        return [content.strip()]


    def get_output_mime_type(self) -> str:
        """Get MIME type for rendered output."""
        return self.output_mime_type

    def getExtractionPrompt(self) -> str:
        """Get extraction prompt for this renderer."""
        return "Extract content for PowerPoint presentation generation"