import logging import base64 import io from typing import Dict, Any, Optional, Tuple from .base_renderer import BaseRenderer logger = logging.getLogger(__name__) class PptxRenderer(BaseRenderer): """Renderer for PowerPoint (.pptx) files using python-pptx library.""" def __init__(self): super().__init__() self.supported_formats = ["pptx", "ppt"] self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation" @classmethod def get_supported_formats(cls) -> list: """Get list of supported output formats.""" return ["pptx", "ppt"] async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: """ Render content as PowerPoint presentation from JSON data. Args: extracted_content: JSON content to render as presentation title: Title for the presentation user_prompt: User prompt for AI styling ai_service: AI service for styling **kwargs: Additional rendering options Returns: Base64-encoded PowerPoint presentation as string """ try: # Import python-pptx from pptx import Presentation from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor import re # Create new presentation prs = Presentation() # Set slide size based on user intent (default to 16:9) slide_size = styles.get("slide_size", "16:9") if slide_size == "4:3": prs.slide_width = Inches(10) prs.slide_height = Inches(7.5) else: # Default to 16:9 prs.slide_width = Inches(13.33) prs.slide_height = Inches(7.5) # Get AI-generated styling definitions styles = await self._get_pptx_styles(user_prompt, ai_service) # Generate slides from JSON content slides_data = await self._parse_json_to_slides(extracted_content, title, styles) logger.info(f"Parsed {len(slides_data)} slides from JSON content") # Debug: Show first 200 chars of content logger.info(f"JSON content preview: {str(extracted_content)[:200]}...") for i, slide_data in enumerate(slides_data): logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars") # Debug: Show slide content preview slide_content = slide_data.get('content', '') if slide_content: logger.info(f" Content preview: '{slide_content[:100]}...'") else: logger.warning(f" ⚠️ Slide {i+1} has NO content!") # Create slide with appropriate layout based on content slide_layout_index = self._get_slide_layout_index(slide_data, styles) slide_layout = prs.slide_layouts[slide_layout_index] slide = prs.slides.add_slide(slide_layout) # Set title title_shape = slide.shapes.title title_shape.text = slide_data.get("title", "Slide") # Set content content_shape = slide.placeholders[1] content_text = slide_data.get("content", "") # Format content text text_frame = content_shape.text_frame text_frame.clear() # Split content into paragraphs paragraphs = content_text.split('\n\n') for i, paragraph in enumerate(paragraphs): if paragraph.strip(): if i == 0: p = text_frame.paragraphs[0] else: p = text_frame.add_paragraph() p.text = paragraph.strip() # Format based on content type if paragraph.startswith('#'): # Header p.text = paragraph.lstrip('#').strip() p.font.size = Pt(24) p.font.bold = True elif paragraph.startswith('##'): # Subheader p.text = paragraph.lstrip('#').strip() p.font.size = Pt(20) p.font.bold = True elif paragraph.startswith('*') and paragraph.endswith('*'): # Bold text p.text = paragraph.strip('*') p.font.bold = True else: # Regular text p.font.size = Pt(14) p.alignment = PP_ALIGN.LEFT # If no slides were created, create a default slide if not slides_data: slide_layout = prs.slide_layouts[0] # Title slide layout slide = prs.slides.add_slide(slide_layout) title_shape = slide.shapes.title title_shape.text = title subtitle_shape = slide.placeholders[1] subtitle_shape.text = "Generated by PowerOn AI System" # Save to buffer buffer = io.BytesIO() prs.save(buffer) buffer.seek(0) # Convert to base64 pptx_bytes = buffer.getvalue() pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8') logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes") return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation" except ImportError: logger.error("python-pptx library not installed. Install with: pip install python-pptx") return "python-pptx library not installed", "text/plain" except Exception as e: logger.error(f"Error rendering PowerPoint presentation: {str(e)}") return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain" def _parse_content_to_slides(self, content: str, title: str) -> list: """ Parse content into slide data structure. Args: content: Content to parse title: Presentation title Returns: List of slide data dictionaries """ slides = [] # Split content by slide markers or headers slide_sections = self._split_content_into_slides(content) for i, section in enumerate(slide_sections): if section.strip(): slide_data = { "title": f"Slide {i + 1}", "content": section.strip() } # Extract title from content if it starts with # lines = section.strip().split('\n') if lines and lines[0].startswith('#'): # Remove # symbols and clean up title slide_title = lines[0].lstrip('#').strip() slide_data["title"] = slide_title slide_data["content"] = '\n'.join(lines[1:]).strip() elif lines and lines[0].strip(): # Use first line as title if it looks like a title first_line = lines[0].strip() if len(first_line) < 100 and not first_line.endswith('.'): slide_data["title"] = first_line slide_data["content"] = '\n'.join(lines[1:]).strip() slides.append(slide_data) return slides def _split_content_into_slides(self, content: str) -> list: """ Split content into individual slides based on headers and structure. Args: content: Content to split Returns: List of slide content strings """ import re # First, try to split by major headers (# or ##) # This is the most common case for AI-generated content header_pattern = r'^(#{1,2})\s+(.+)$' lines = content.split('\n') slides = [] current_slide = [] for line in lines: # Check if this line is a header header_match = re.match(header_pattern, line.strip()) if header_match: # If we have content in current slide, save it if current_slide: slide_content = '\n'.join(current_slide).strip() if slide_content: slides.append(slide_content) current_slide = [] # Start new slide with this header current_slide.append(line) else: # Add line to current slide current_slide.append(line) # Add the last slide if current_slide: slide_content = '\n'.join(current_slide).strip() if slide_content: slides.append(slide_content) # If we found slides with headers, return them if len(slides) > 1: return slides # Fallback: Split by double newlines sections = content.split('\n\n\n') if len(sections) > 1: return [s.strip() for s in sections if s.strip()] # Another fallback: Split by double newlines sections = content.split('\n\n') if len(sections) > 1: return [s.strip() for s in sections if s.strip()] # Last resort: return as single slide return [content.strip()] def get_output_mime_type(self) -> str: """Get MIME type for rendered output.""" return self.output_mime_type def getExtractionPrompt(self, user_prompt: str, title: str) -> str: """Return only PowerPoint-specific guidelines; global prompt is built centrally.""" return ( "POWERPOINT FORMAT GUIDELINES:\n" "- Extract structured data from source documents into JSON format\n" "- Focus on presentation-ready content with clear sections and visual elements\n" "- For tables: Extract headers and rows as separate arrays suitable for slides\n" "- For lists: Extract items with optional sub-items for bullet points\n" "- Structure content into sections with clear content types (heading, paragraph, table, list)\n" "- Use proper JSON structure with metadata, sections, and elements\n" "- Ensure content is concise and suitable for slide presentation\n" "OUTPUT: Return structured JSON that can be converted to PowerPoint slides." ) async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: """Simple AI call to get PowerPoint styling definitions.""" if not ai_service: return self._get_default_pptx_styles() try: prompt = f""" For this PowerPoint presentation request: "{user_prompt}" Provide styling definitions for PowerPoint elements. Respond with ONLY JSON: {{ "title": {{"font_size": 44, "color": "#1F4E79", "bold": true, "align": "center"}}, "heading": {{"font_size": 32, "color": "#2F2F2F", "bold": true, "align": "left"}}, "subheading": {{"font_size": 24, "color": "#4F4F4F", "bold": true, "align": "left"}}, "paragraph": {{"font_size": 18, "color": "#2F2F2F", "bold": false, "align": "left"}}, "bullet_list": {{"font_size": 18, "color": "#2F2F2F", "indent": 20}}, "table_header": {{"font_size": 16, "color": "#FFFFFF", "bold": true, "background": "#4F4F4F"}}, "table_cell": {{"font_size": 14, "color": "#2F2F2F", "bold": false, "background": "#FFFFFF"}}, "slide_size": "16:9", "content_per_slide": "concise" }} CRITICAL: PowerPoint text must be large enough to read from a distance. Minimum font size should be 14pt for body text. """ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType request_options = AiCallOptions() request_options.operationType = OperationType.GENERAL request = AiCallRequest(prompt=prompt, context="", options=request_options) response = await ai_service.aiObjects.call(request) import json import re # Clean and parse JSON result = response.content.strip() if result.startswith('```json'): result = re.sub(r'^```json\s*', '', result) result = re.sub(r'\s*```$', '', result) elif result.startswith('```'): result = re.sub(r'^```\s*', '', result) result = re.sub(r'\s*```$', '', result) styles = json.loads(result) # Validate font sizes for PowerPoint readability styles = self._validate_pptx_styles_readability(styles) return styles except Exception as e: logger.warning(f"AI styling failed: {str(e)}, using defaults") return self._get_default_pptx_styles() def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix readability issues in AI-generated styles.""" try: # Ensure minimum font sizes for PowerPoint readability min_font_sizes = { "title": 36, "heading": 24, "subheading": 20, "paragraph": 14, "bullet_list": 14, "table_header": 12, "table_cell": 12 } for style_name, min_size in min_font_sizes.items(): if style_name in styles: current_size = styles[style_name].get("font_size", 12) if current_size < min_size: styles[style_name]["font_size"] = min_size return styles except Exception as e: logger.warning(f"Style validation failed: {str(e)}") return self._get_default_pptx_styles() def _get_default_pptx_styles(self) -> Dict[str, Any]: """Default PowerPoint styles.""" return { "title": {"font_size": 44, "color": "#1F4E79", "bold": True, "align": "center"}, "heading": {"font_size": 32, "color": "#2F2F2F", "bold": True, "align": "left"}, "subheading": {"font_size": 24, "color": "#4F4F4F", "bold": True, "align": "left"}, "paragraph": {"font_size": 18, "color": "#2F2F2F", "bold": False, "align": "left"}, "bullet_list": {"font_size": 18, "color": "#2F2F2F", "indent": 20}, "table_header": {"font_size": 16, "color": "#FFFFFF", "bold": True, "background": "#4F4F4F"}, "table_cell": {"font_size": 14, "color": "#2F2F2F", "bold": False, "background": "#FFFFFF"}, "slide_size": "16:9", "content_per_slide": "concise" } async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]: """ Parse JSON content into slide data structure. Args: json_content: JSON content to parse title: Presentation title styles: AI-generated styles Returns: List of slide data dictionaries """ slides = [] try: # Validate JSON structure if not isinstance(json_content, dict): raise ValueError("JSON content must be a dictionary") if "sections" not in json_content: raise ValueError("JSON content must contain 'sections' field") # Use title from JSON metadata if available, otherwise use provided title document_title = json_content.get("metadata", {}).get("title", title) # Create title slide slides.append({ "title": document_title, "content": "Generated by PowerOn AI System\n\n" + self._format_timestamp() }) # Process sections into slides based on content and user intent sections = json_content.get("sections", []) slides.extend(self._create_slides_from_sections(sections, styles)) # If no content slides were created, create a default content slide if len(slides) == 1: # Only title slide slides.append({ "title": "Content Overview", "content": "No structured content found in the source documents.\n\nPlease check the source documents and try again." }) return slides except Exception as e: logger.error(f"Error parsing JSON to slides: {str(e)}") # Return minimal fallback slides return [ { "title": title, "content": "Error parsing content for presentation" } ] def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: """Create a slide from a JSON section.""" try: section_title = section.get("title", "Untitled Section") content_type = section.get("content_type", "paragraph") elements = section.get("elements", []) # Build slide content based on section type content_parts = [] for element in elements: if content_type == "table": content_parts.append(self._format_table_for_slide(element)) elif content_type == "list": content_parts.append(self._format_list_for_slide(element)) elif content_type == "heading": content_parts.append(self._format_heading_for_slide(element)) elif content_type == "paragraph": content_parts.append(self._format_paragraph_for_slide(element)) elif content_type == "code": content_parts.append(self._format_code_for_slide(element)) else: content_parts.append(self._format_paragraph_for_slide(element)) # Combine content parts slide_content = "\n\n".join(filter(None, content_parts)) return { "title": section_title, "content": slide_content } except Exception as e: logger.warning(f"Error creating slide from section: {str(e)}") return None def _format_table_for_slide(self, table_data: Dict[str, Any]) -> str: """Format table data for slide presentation.""" try: headers = table_data.get("headers", []) rows = table_data.get("rows", []) if not headers: return "" # Create table representation table_lines = [] # Add headers header_line = " | ".join(str(h) for h in headers) table_lines.append(header_line) # Add separator separator = "-" * len(header_line) table_lines.append(separator) # Add data rows (limit based on content density) max_rows = 5 # Default limit for row in rows[:max_rows]: row_line = " | ".join(str(cell) for cell in row) table_lines.append(row_line) if len(rows) > max_rows: table_lines.append(f"... and {len(rows) - max_rows} more rows") return "\n".join(table_lines) except Exception as e: logger.warning(f"Error formatting table for slide: {str(e)}") return "" def _format_list_for_slide(self, list_data: Dict[str, Any]) -> str: """Format list data for slide presentation.""" try: items = list_data.get("items", []) if not items: return "" # Create list representation list_lines = [] for item in items: if isinstance(item, dict): text = item.get("text", "") list_lines.append(f"• {text}") # Add subitems (limit to 3 for readability) subitems = item.get("subitems", [])[:3] for subitem in subitems: if isinstance(subitem, dict): list_lines.append(f" - {subitem.get('text', '')}") else: list_lines.append(f" - {subitem}") else: list_lines.append(f"• {str(item)}") return "\n".join(list_lines) except Exception as e: logger.warning(f"Error formatting list for slide: {str(e)}") return "" def _format_heading_for_slide(self, heading_data: Dict[str, Any]) -> str: """Format heading data for slide presentation.""" try: text = heading_data.get("text", "") level = heading_data.get("level", 1) if text: return f"{'#' * level} {text}" return "" except Exception as e: logger.warning(f"Error formatting heading for slide: {str(e)}") return "" def _format_paragraph_for_slide(self, paragraph_data: Dict[str, Any]) -> str: """Format paragraph data for slide presentation.""" try: text = paragraph_data.get("text", "") if text: # Limit paragraph length based on content density max_length = 200 # Default limit if len(text) > max_length: text = text[:max_length] + "..." return text return "" except Exception as e: logger.warning(f"Error formatting paragraph for slide: {str(e)}") return "" def _format_code_for_slide(self, code_data: Dict[str, Any]) -> str: """Format code data for slide presentation.""" try: code = code_data.get("code", "") language = code_data.get("language", "") if code: # Limit code length based on content density max_length = 100 # Default limit if len(code) > max_length: code = code[:max_length] + "..." if language: return f"Code ({language}):\n{code}" else: return f"Code:\n{code}" return "" except Exception as e: logger.warning(f"Error formatting code for slide: {str(e)}") return "" def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int: """Determine the best slide layout based on content.""" try: content = slide_data.get("content", "") title = slide_data.get("title", "") # Check if it's a title slide (first slide) if not content or "Generated by PowerOn AI System" in content: return 0 # Title slide layout # Check content type to determine layout if "|" in content and "-" in content: # Has both tables and lists - use content with caption return 2 elif "|" in content: # Has tables - use content layout return 1 elif content.count("•") > 2: # Has many bullet points - use content layout return 1 else: # Default to title and content return 1 except Exception as e: logger.warning(f"Error determining slide layout: {str(e)}") return 1 # Default to title and content def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]: """Create slides from sections based on content density and user intent.""" try: slides = [] content_per_slide = styles.get("content_per_slide", "concise") for section in sections: section_slides = self._create_section_slides(section, styles, content_per_slide) slides.extend(section_slides) return slides except Exception as e: logger.warning(f"Error creating slides from sections: {str(e)}") return [] def _create_section_slides(self, section: Dict[str, Any], styles: Dict[str, Any], content_per_slide: str) -> List[Dict[str, Any]]: """Create one or more slides from a section based on content density.""" try: section_title = section.get("title", "Untitled Section") content_type = section.get("content_type", "paragraph") elements = section.get("elements", []) if not elements: return [{ "title": section_title, "content": "No content available for this section." }] # Determine how to split content based on type and density if content_per_slide == "detailed" and len(elements) > 3: # Split large sections into multiple slides return self._split_section_into_multiple_slides(section_title, elements, content_type) else: # Create single slide for section slide_data = self._create_slide_from_section(section, styles) return [slide_data] if slide_data else [] except Exception as e: logger.warning(f"Error creating section slides: {str(e)}") return [] def _split_section_into_multiple_slides(self, section_title: str, elements: List[Dict[str, Any]], content_type: str) -> List[Dict[str, Any]]: """Split a large section into multiple slides.""" try: slides = [] max_elements_per_slide = 3 for i in range(0, len(elements), max_elements_per_slide): slide_elements = elements[i:i + max_elements_per_slide] # Create slide title if i == 0: slide_title = section_title else: slide_title = f"{section_title} (Part {i//max_elements_per_slide + 1})" # Build content for this slide content_parts = [] for element in slide_elements: if content_type == "table": content_parts.append(self._format_table_for_slide(element)) elif content_type == "list": content_parts.append(self._format_list_for_slide(element)) elif content_type == "heading": content_parts.append(self._format_heading_for_slide(element)) elif content_type == "paragraph": content_parts.append(self._format_paragraph_for_slide(element)) elif content_type == "code": content_parts.append(self._format_code_for_slide(element)) else: content_parts.append(self._format_paragraph_for_slide(element)) slide_content = "\n\n".join(filter(None, content_parts)) slides.append({ "title": slide_title, "content": slide_content }) return slides except Exception as e: logger.warning(f"Error splitting section into slides: {str(e)}") return [] def _format_timestamp(self) -> str: """Format current timestamp for presentation generation.""" from datetime import datetime, UTC return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")