import logging import base64 import io from typing import Dict, Any, Optional, Tuple from .base_renderer import BaseRenderer logger = logging.getLogger(__name__) class PptxRenderer(BaseRenderer): """Renderer for PowerPoint (.pptx) files using python-pptx library.""" def __init__(self): super().__init__() self.supported_formats = ["pptx", "ppt"] self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation" @classmethod def get_supported_formats(cls) -> list: """Get list of supported output formats.""" return ["pptx", "ppt"] async def render(self, content: str, title: str = "Generated Presentation", **kwargs) -> Tuple[str, str]: """ Render content as PowerPoint presentation. Args: content: Content to render as presentation title: Title for the presentation **kwargs: Additional rendering options Returns: Base64-encoded PowerPoint presentation as string """ try: # Import python-pptx from pptx import Presentation from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor import re # Create new presentation prs = Presentation() # Set slide size (16:9) prs.slide_width = Inches(13.33) prs.slide_height = Inches(7.5) # Parse content into slides slides_data = self._parse_content_to_slides(content, title) logger.info(f"Parsed {len(slides_data)} slides from content") # Debug: Show first 200 chars of content logger.info(f"Content preview: '{content[:200]}...'") for i, slide_data in enumerate(slides_data): logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars") # Debug: Show slide content preview slide_content = slide_data.get('content', '') if slide_content: logger.info(f" Content preview: '{slide_content[:100]}...'") else: logger.warning(f" ⚠️ Slide {i+1} has NO content!") # Create slide with title and content layout slide_layout = prs.slide_layouts[1] # Title and Content layout slide = prs.slides.add_slide(slide_layout) # Set title title_shape = slide.shapes.title title_shape.text = slide_data.get("title", "Slide") # Set content content_shape = slide.placeholders[1] content_text = slide_data.get("content", "") # Format content text text_frame = content_shape.text_frame text_frame.clear() # Split content into paragraphs paragraphs = content_text.split('\n\n') for i, paragraph in enumerate(paragraphs): if paragraph.strip(): if i == 0: p = text_frame.paragraphs[0] else: p = text_frame.add_paragraph() p.text = paragraph.strip() # Format based on content type if paragraph.startswith('#'): # Header p.text = paragraph.lstrip('#').strip() p.font.size = Pt(24) p.font.bold = True elif paragraph.startswith('##'): # Subheader p.text = paragraph.lstrip('#').strip() p.font.size = Pt(20) p.font.bold = True elif paragraph.startswith('*') and paragraph.endswith('*'): # Bold text p.text = paragraph.strip('*') p.font.bold = True else: # Regular text p.font.size = Pt(14) p.alignment = PP_ALIGN.LEFT # If no slides were created, create a default slide if not slides_data: slide_layout = prs.slide_layouts[0] # Title slide layout slide = prs.slides.add_slide(slide_layout) title_shape = slide.shapes.title title_shape.text = title subtitle_shape = slide.placeholders[1] subtitle_shape.text = "Generated by PowerOn AI System" # Save to buffer buffer = io.BytesIO() prs.save(buffer) buffer.seek(0) # Convert to base64 pptx_bytes = buffer.getvalue() pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8') logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes") return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation" except ImportError: logger.error("python-pptx library not installed. Install with: pip install python-pptx") return "python-pptx library not installed", "text/plain" except Exception as e: logger.error(f"Error rendering PowerPoint presentation: {str(e)}") return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain" def _parse_content_to_slides(self, content: str, title: str) -> list: """ Parse content into slide data structure. Args: content: Content to parse title: Presentation title Returns: List of slide data dictionaries """ slides = [] # Split content by slide markers or headers slide_sections = self._split_content_into_slides(content) for i, section in enumerate(slide_sections): if section.strip(): slide_data = { "title": f"Slide {i + 1}", "content": section.strip() } # Extract title from content if it starts with # lines = section.strip().split('\n') if lines and lines[0].startswith('#'): # Remove # symbols and clean up title slide_title = lines[0].lstrip('#').strip() slide_data["title"] = slide_title slide_data["content"] = '\n'.join(lines[1:]).strip() elif lines and lines[0].strip(): # Use first line as title if it looks like a title first_line = lines[0].strip() if len(first_line) < 100 and not first_line.endswith('.'): slide_data["title"] = first_line slide_data["content"] = '\n'.join(lines[1:]).strip() slides.append(slide_data) return slides def _split_content_into_slides(self, content: str) -> list: """ Split content into individual slides based on headers and structure. Args: content: Content to split Returns: List of slide content strings """ import re # First, try to split by major headers (# or ##) # This is the most common case for AI-generated content header_pattern = r'^(#{1,2})\s+(.+)$' lines = content.split('\n') slides = [] current_slide = [] for line in lines: # Check if this line is a header header_match = re.match(header_pattern, line.strip()) if header_match: # If we have content in current slide, save it if current_slide: slide_content = '\n'.join(current_slide).strip() if slide_content: slides.append(slide_content) current_slide = [] # Start new slide with this header current_slide.append(line) else: # Add line to current slide current_slide.append(line) # Add the last slide if current_slide: slide_content = '\n'.join(current_slide).strip() if slide_content: slides.append(slide_content) # If we found slides with headers, return them if len(slides) > 1: return slides # Fallback: Split by double newlines sections = content.split('\n\n\n') if len(sections) > 1: return [s.strip() for s in sections if s.strip()] # Another fallback: Split by double newlines sections = content.split('\n\n') if len(sections) > 1: return [s.strip() for s in sections if s.strip()] # Last resort: return as single slide return [content.strip()] def get_output_mime_type(self) -> str: """Get MIME type for rendered output.""" return self.output_mime_type def getExtractionPrompt(self) -> str: """Get extraction prompt for this renderer.""" return "Extract content for PowerPoint presentation generation"