# Copyright (c) 2025 Patrick Motsch # All rights reserved. import logging import base64 import io import json import re from datetime import datetime, UTC from typing import Dict, Any, Optional, List from .documentRendererBaseTemplate import BaseRenderer from modules.datamodels.datamodelDocument import RenderedDocument logger = logging.getLogger(__name__) class RendererPptx(BaseRenderer): """Renderer for PowerPoint (.pptx) files using python-pptx library.""" def __init__(self, services=None): super().__init__(services=services) self.supportedFormats = ["pptx", "ppt"] self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation" @classmethod def getSupportedFormats(cls) -> list: """Get list of supported output formats.""" return ["pptx", "ppt"] @classmethod def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return [] @classmethod def getPriority(cls) -> int: """Return priority for PowerPoint renderer.""" return 105 @classmethod def getOutputStyle(cls, formatName: Optional[str] = None) -> str: """Return output style classification: PowerPoint presentations are formatted documents.""" return 'document' @classmethod def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]: """ Return list of section content types that PowerPoint renderer accepts. PowerPoint renderer accepts all section types (presentations can contain all content types including images). """ from modules.datamodels.datamodelJson import supportedSectionTypes return list(supportedSectionTypes) async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """ Render content as PowerPoint presentation from JSON data. Args: extractedContent: JSON content to render as presentation title: Title for the presentation userPrompt: User prompt for AI styling aiService: AI service for styling **kwargs: Additional rendering options Returns: Base64-encoded PowerPoint presentation as string """ try: # Import python-pptx from pptx import Presentation from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor import re # Get style set: use styles from metadata if available, otherwise enhance with AI styles = await self._getStyleSet(extractedContent, userPrompt, aiService) # Create new presentation prs = Presentation() # Set slide size based on user intent (default to 16:9) slide_size = styles.get("slide_size", "16:9") if slide_size == "4:3": prs.slide_width = Inches(10) prs.slide_height = Inches(7.5) else: # Default to 16:9 prs.slide_width = Inches(13.33) prs.slide_height = Inches(7.5) # Generate slides from JSON content slidesData = await self._parseJsonToSlides(extractedContent, title, styles) logger.info(f"Parsed {len(slidesData)} slides from JSON content") # Debug: Show first 200 chars of content logger.info(f"JSON content preview: {str(extractedContent)[:200]}...") # Store prs reference for image methods self._currentPresentation = prs for i, slide_data in enumerate(slidesData): slide_sections = slide_data.get("sections", []) slide_images = list(slide_data.get("images", [])) # Make copy so we can append slide_content = slide_data.get('content', '') hasSections = slide_sections and len(slide_sections) > 0 hasImages = len(slide_images) > 0 logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - sections: {len(slide_sections)}, images: {len(slide_images)}, content: {len(slide_content)} chars") # Use blank layout for all slides to avoid placeholder interference # Find blank layout (typically index 6, fallback to 5) slideLayoutIndex = None for idx in [6, 5]: if idx < len(prs.slide_layouts): try: layout = prs.slide_layouts[idx] # Check if it's a blank layout (no placeholders) if len(layout.placeholders) == 0: slideLayoutIndex = idx break except (AttributeError, IndexError): continue # If no blank layout found, use layout with fewest placeholders if slideLayoutIndex is None: min_placeholders = float('inf') for idx in range(len(prs.slide_layouts)): try: layout = prs.slide_layouts[idx] placeholder_count = len(layout.placeholders) if hasattr(layout, 'placeholders') else 0 if placeholder_count < min_placeholders: min_placeholders = placeholder_count slideLayoutIndex = idx except: continue # Fallback to first layout if still None if slideLayoutIndex is None: slideLayoutIndex = 0 slide_layout = prs.slide_layouts[slideLayoutIndex] slide = prs.slides.add_slide(slide_layout) # Clear placeholder text instead of removing placeholders (safer approach) # This avoids corrupting the PPTX file structure try: for shape in slide.shapes: if hasattr(shape, 'is_placeholder') and shape.is_placeholder: try: if hasattr(shape, 'text_frame'): shape.text_frame.clear() # Set text to empty string to remove "Click to add text" if len(shape.text_frame.paragraphs) > 0: shape.text_frame.paragraphs[0].text = "" except: pass except Exception as placeholder_error: logger.warning(f"Could not clear placeholders: {str(placeholder_error)}") # Add title as textbox (smaller size for slides) from pptx.util import Inches titleBox = slide.shapes.add_textbox(Inches(0.5), Inches(0.2), prs.slide_width - Inches(1), Inches(0.6)) titleFrame = titleBox.text_frame titleFrame.text = slide_data.get("title", "Slide") title_style = styles.get("title", {}) # Smaller title size for slides (default 32 instead of 44) title_font_size = title_style.get("font_size", 32) # Reduce further for slides (max 32pt, min 10pt for readability) title_font_size = max(10, min(title_font_size, 32)) titleFrame.paragraphs[0].font.size = Pt(title_font_size) titleFrame.paragraphs[0].font.bold = title_style.get("bold", True) title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) titleFrame.paragraphs[0].font.color.rgb = RGBColor(*title_color) titleFrame.paragraphs[0].alignment = PP_ALIGN.LEFT titleFrame.word_wrap = True # Render sections with proper PowerPoint objects (tables, lists, etc.) # Organize content into frames for better layout if hasSections: # Organize sections into content groups for frame-based layout # Images are handled within the frame rendering method self._renderSlideContentWithFrames(slide, slide_sections, slide_images, styles, prs) # Fallback: if no sections but has content text, render in textbox elif slide_content and not hasImages: # Create textbox for content (no placeholders in blank layout) from pptx.util import Inches title_height_used = Inches(1.0) # Title height for blank slides content_left = Inches(0.5) content_top = title_height_used + Inches(0.3) content_width = prs.slide_width - Inches(1) content_height = prs.slide_height - content_top - Inches(0.5) content_textbox = slide.shapes.add_textbox(content_left, content_top, content_width, content_height) text_frame = content_textbox.text_frame text_frame.word_wrap = True text_frame.auto_size = None # Split content into paragraphs paragraphs = slide_content.split('\n\n') for paragraph in paragraphs: if paragraph.strip(): p = text_frame.add_paragraph() p.text = paragraph.strip() # Apply AI-generated styling with adaptive sizing paragraph_style = styles.get("paragraph", {}) base_font_size = paragraph_style.get("font_size", 18) # Calculate adaptive font size based on content length try: total_chars = len(slide_content) chars_per_line = max(1, int(content_width / Pt(10))) lines_needed = total_chars / chars_per_line available_lines = max(1, int(content_height / Pt(14))) font_multiplier = 1.0 if available_lines > 0 and lines_needed > available_lines: font_multiplier = max(0.6, min(1.0, (available_lines / lines_needed) * 1.1)) calculated_size = max(6, int(base_font_size * font_multiplier)) # Minimum 6pt except (ZeroDivisionError, ValueError, TypeError): calculated_size = max(6, base_font_size) # Fallback to base size with minimum p.font.size = Pt(calculated_size) p.font.bold = paragraph_style.get("bold", False) paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) p.font.color.rgb = RGBColor(*paragraph_color) # Apply alignment align = paragraph_style.get("align", "left") if align == "center": p.alignment = PP_ALIGN.CENTER elif align == "right": p.alignment = PP_ALIGN.RIGHT else: p.alignment = PP_ALIGN.LEFT # If no slides were created, create a default slide if not slidesData: slide_layout = prs.slide_layouts[0] # Title slide layout slide = prs.slides.add_slide(slide_layout) title_shape = slide.shapes.title title_shape.text = title # Apply title styling to default slide title_style = styles.get("title", {}) if title_shape.text_frame.paragraphs[0].font: title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 48)) title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True) title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color) subtitle_shape = slide.placeholders[1] subtitle_shape.text = "Generated by PowerOn AI System" # Apply subtitle styling paragraph_style = styles.get("paragraph", {}) if subtitle_shape.text_frame.paragraphs[0].font: subtitle_shape.text_frame.paragraphs[0].font.size = Pt(paragraph_style.get("font_size", 20)) subtitle_shape.text_frame.paragraphs[0].font.bold = paragraph_style.get("bold", False) paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47))) subtitle_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*paragraph_color) # Save to buffer buffer = io.BytesIO() prs.save(buffer) buffer.seek(0) # Convert to base64 pptx_bytes = buffer.getvalue() pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8') logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes") # Determine filename from document or title documents = extractedContent.get("documents", []) if documents and isinstance(documents[0], dict): filename = documents[0].get("filename") if not filename: filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation") else: filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation") # Extract metadata for document type and other info metadata = extractedContent.get("metadata", {}) if extractedContent else {} documentType = metadata.get("documentType") if isinstance(metadata, dict) else None return [ RenderedDocument( documentData=pptx_bytes, mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation", filename=filename, documentType=documentType, metadata=metadata if isinstance(metadata, dict) else None ) ] except ImportError: logger.error("python-pptx library not installed. Install with: pip install python-pptx") fallbackContent = "python-pptx library not installed" metadata = extractedContent.get("metadata", {}) if extractedContent else {} documentType = metadata.get("documentType") if isinstance(metadata, dict) else None return [ RenderedDocument( documentData=fallbackContent.encode('utf-8'), mimeType="text/plain", filename=self._determineFilename(title, "text/plain"), documentType=documentType, metadata=metadata if isinstance(metadata, dict) else None ) ] except Exception as e: logger.error(f"Error rendering PowerPoint presentation: {str(e)}") fallbackContent = f"Error rendering PowerPoint presentation: {str(e)}" metadata = extractedContent.get("metadata", {}) if extractedContent else {} documentType = metadata.get("documentType") if isinstance(metadata, dict) else None return [ RenderedDocument( documentData=fallbackContent.encode('utf-8'), mimeType="text/plain", filename=self._determineFilename(title, "text/plain"), documentType=documentType, metadata=metadata if isinstance(metadata, dict) else None ) ] def _parseContentToSlides(self, content: str, title: str) -> list: """ Parse content into slide data structure. Args: content: Content to parse title: Presentation title Returns: List of slide data dictionaries """ slides = [] # Split content by slide markers or headers slide_sections = self._splitContentIntoSlides(content) for i, section in enumerate(slide_sections): if section.strip(): slide_data = { "title": f"Slide {i + 1}", "content": section.strip() } # Extract title from content if it starts with # lines = section.strip().split('\n') if lines and lines[0].startswith('#'): # Remove # symbols and clean up title slide_title = lines[0].lstrip('#').strip() slide_data["title"] = slide_title slide_data["content"] = '\n'.join(lines[1:]).strip() elif lines and lines[0].strip(): # Use first line as title if it looks like a title first_line = lines[0].strip() if len(first_line) < 100 and not first_line.endswith('.'): slide_data["title"] = first_line slide_data["content"] = '\n'.join(lines[1:]).strip() slides.append(slide_data) return slides def _splitContentIntoSlides(self, content: str) -> list: """ Split content into individual slides based on headers and structure. Args: content: Content to split Returns: List of slide content strings """ # re is already imported at module level # First, try to split by major headers (# or ##) # This is the most common case for AI-generated content header_pattern = r'^(#{1,2})\s+(.+)$' lines = content.split('\n') slides = [] current_slide = [] for line in lines: # Check if this line is a header header_match = re.match(header_pattern, line.strip()) if header_match: # If we have content in current slide, save it if current_slide: slide_content = '\n'.join(current_slide).strip() if slide_content: slides.append(slide_content) current_slide = [] # Start new slide with this header current_slide.append(line) else: # Add line to current slide current_slide.append(line) # Add the last slide if current_slide: slide_content = '\n'.join(current_slide).strip() if slide_content: slides.append(slide_content) # If we found slides with headers, return them if len(slides) > 1: return slides # Fallback: Split by double newlines sections = content.split('\n\n\n') if len(sections) > 1: return [s.strip() for s in sections if s.strip()] # Another fallback: Split by double newlines sections = content.split('\n\n') if len(sections) > 1: return [s.strip() for s in sections if s.strip()] # Last resort: return as single slide return [content.strip()] def getOutputMimeType(self) -> str: """Get MIME type for rendered output.""" return self.outputMimeType async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: """Get style set - use styles from document generation metadata if available, otherwise enhance default styles with AI if userPrompt provided. WICHTIG: In a dynamic scalable AI system, styling should come from document generation, not be generated separately by renderers. Only fall back to AI if styles not provided. Args: extractedContent: Document content with metadata (may contain styles) userPrompt: User's prompt (AI will detect style instructions in any language) aiService: AI service (used only if styles not in metadata and userPrompt provided) templateName: Name of template style set (None = default) Returns: Dict with style definitions for all document styles """ # Get default style set defaultStyleSet = self._getDefaultStyleSet() # FIRST: Check if styles are provided in document generation metadata (preferred approach) if extractedContent: metadata = extractedContent.get("metadata", {}) if isinstance(metadata, dict): styles = metadata.get("styles") if styles and isinstance(styles, dict): self.logger.debug("Using styles from document generation metadata") enhancedStyleSet = self._convertColorsFormat(styles) return self._validateStylesReadability(enhancedStyleSet) # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata) if userPrompt and aiService: self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) # Colors already converted in _getAiStylesWithPptxColors return self._validateStylesReadability(enhancedStyleSet) else: # Use default styles only return defaultStyleSet async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]: """Enhance default styles with AI based on user prompt.""" try: style_template = self._createProfessionalPptxTemplate(userPrompt, defaultStyleSet) enhanced_styles = await self._getAiStylesWithPptxColors(aiService, style_template, defaultStyleSet) return enhanced_styles except Exception as e: self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles") return defaultStyleSet def _validateStylesReadability(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix readability issues in AI-generated styles.""" try: # Ensure minimum font sizes for PowerPoint readability min_font_sizes = { "title": 36, "heading": 24, "subheading": 20, "paragraph": 14, "bullet_list": 14, "table_header": 12, "table_cell": 12 } for style_name, min_size in min_font_sizes.items(): if style_name in styles: current_size = styles[style_name].get("font_size", 12) if current_size < min_size: styles[style_name]["font_size"] = min_size return styles except Exception as e: logger.warning(f"Style validation failed: {str(e)}") return self._getDefaultStyleSet() def _getDefaultStyleSet(self) -> Dict[str, Any]: """Default PowerPoint style set - used when no style instructions present.""" return { "title": {"font_size": 32, "color": "#1B365D", "bold": True, "align": "left"}, "heading": {"font_size": 24, "color": "#1B365D", "bold": True, "align": "left"}, "subheading": {"font_size": 20, "color": "#4A90E2", "bold": True, "align": "left"}, "paragraph": {"font_size": 14, "color": "#2F2F2F", "bold": False, "align": "left"}, "bullet_list": {"font_size": 14, "color": "#2F2F2F", "indent": 20}, "table_header": {"font_size": 18, "color": "#FFFFFF", "bold": True, "background": "#1B365D"}, "table_cell": {"font_size": 16, "color": "#2F2F2F", "bold": False, "background": "#F8F9FA"}, "slide_size": "16:9", "content_per_slide": "concise", "design_theme": "corporate", "color_scheme": "professional", "background_style": "clean", "accent_colors": ["#1B365D", "#2C5F2D", "#4A90E2", "#6B7280"], "professional_grade": True, "executive_ready": True } def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str: """Create a professional PowerPoint-specific AI style template for corporate-quality slides.""" # json is already imported at module level schema_json = json.dumps(style_schema, indent=4) return f"""Customize the JSON below for professional PowerPoint slides. User Request: {userPrompt or "Create professional corporate slides"} Rules: - Use professional colors (blues, grays, deep greens) - Large, readable font sizes - High contrast - Sophisticated color palettes Return ONLY this JSON with your changes: {schema_json} JSON ONLY. NO OTHER TEXT.""" async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: """Get AI styles with proper PowerPoint color conversion. Uses base _getAiStyles for debug file writing.""" if not aiService: return default_styles try: # Use base template method which handles debug file writing enhanced_styles = await self._getAiStyles(aiService, style_template, default_styles) # Convert colors to PPTX format (RGB tuples) return self._convertColorsFormat(enhanced_styles) except Exception as e: self.logger.warning(f"AI style enhancement failed: {str(e)}, using defaults") return default_styles def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Convert hex colors to RGB format for PowerPoint compatibility.""" try: for style_name, style_config in styles.items(): if isinstance(style_config, dict): for prop, value in style_config.items(): if isinstance(value, str) and value.startswith('#'): # Convert hex to RGB tuple for PowerPoint hex_color = value.lstrip('#') if len(hex_color) == 6: r = int(hex_color[0:2], 16) g = int(hex_color[2:4], 16) b = int(hex_color[4:6], 16) styles[style_name][prop] = (r, g, b) elif len(hex_color) == 8: # aRGB format r = int(hex_color[2:4], 16) g = int(hex_color[4:6], 16) b = int(hex_color[6:8], 16) styles[style_name][prop] = (r, g, b) return styles except Exception as e: self.logger.warning(f"Color conversion failed: {str(e)}") return styles def _getSafeColor(self, color_value, default=(0, 0, 0)) -> tuple: """Get a safe RGB color tuple for PowerPoint.""" if isinstance(color_value, tuple) and len(color_value) == 3: return color_value elif isinstance(color_value, str) and color_value.startswith('#'): hex_color = color_value.lstrip('#') if len(hex_color) == 6: r = int(hex_color[0:2], 16) g = int(hex_color[2:4], 16) b = int(hex_color[4:6], 16) return (r, g, b) elif len(hex_color) == 8: # aRGB format r = int(hex_color[2:4], 16) g = int(hex_color[4:6], 16) b = int(hex_color[6:8], 16) return (r, g, b) return default async def _parseJsonToSlides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]: """ Parse JSON content into slide data structure. Args: json_content: JSON content to parse title: Presentation title styles: AI-generated styles Returns: List of slide data dictionaries """ slides = [] try: # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) if not self._validateJsonStructure(json_content): raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") # Extract sections and metadata from standardized schema sections = self._extractSections(json_content) metadata = self._extractMetadata(json_content) # Use provided title (which comes from documents[].title) as primary source # Fallback to metadata.title only if title parameter is empty document_title = title if title else metadata.get("title", "Generated Document") # Create title slide slides.append({ "title": document_title, "content": "Generated by PowerOn AI System\n\n" + self._formatTimestamp() }) # Process sections into slides based on content and user intent slides.extend(self._createSlidesFromSections(sections, styles)) # If no content slides were created, create a default content slide if len(slides) == 1: # Only title slide slides.append({ "title": "Content Overview", "content": "No structured content found in the source documents.\n\nPlease check the source documents and try again." }) return slides except Exception as e: logger.error(f"Error parsing JSON to slides: {str(e)}") # Return minimal fallback slides return [ { "title": title, "content": "Error parsing content for presentation" } ] def _createSlideFromSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: """Create a slide from a JSON section.""" try: # Get section title from data or use default section_title = "Untitled Section" if section.get("content_type") == "heading": # Extract text from elements array - use nested content structure for element in section.get("elements", []): if isinstance(element, dict): content = element.get("content", {}) if isinstance(content, dict): text = content.get("text", "") if text: section_title = text break elif section.get("title"): section_title = section.get("title") content_type = section.get("content_type", "paragraph") elements = section.get("elements", []) # Check for three content formats from Phase 5D in elements content_parts = [] for element in elements: if not isinstance(element, dict): continue element_type = element.get("type", "") # Support three content formats from Phase 5D if element_type == "reference": # Document reference format doc_ref = element.get("documentReference", "") label = element.get("label", "Reference") content_parts.append(f"[Reference: {label}]") continue elif element_type == "extracted_text": # Extracted text format content = element.get("content", "") source = element.get("source", "") if content: source_text = f" (Source: {source})" if source else "" content_parts.append(f"{content}{source_text}") continue # Handle image sections specially if content_type == "image": # Extract image data from nested content structure images = [] for element in elements: if isinstance(element, dict): # Extract from nested content structure content = element.get("content", {}) if isinstance(content, dict): base64Data = content.get("base64Data") altText = content.get("altText", "Image") caption = content.get("caption", "") else: # Fallback to direct element fields base64Data = element.get("base64Data") altText = element.get("altText", "Image") caption = element.get("caption", "") if base64Data: images.append({ "base64Data": base64Data, "altText": altText, "caption": caption }) return { "title": section_title or (elements[0].get("content", {}).get("altText", "Image") if elements and isinstance(elements[0], dict) else "Image"), "content": "\n\n".join(content_parts) if content_parts else "", # Include reference/extracted_text if present "images": images } # Build slide content based on section type - iterate over elements and format each if not content_parts: # Only if we didn't process reference/extracted_text above for element in elements: if not isinstance(element, dict): continue element_type = element.get("type", "") # Use element type if available, otherwise fall back to section content_type if not element_type: element_type = content_type if element_type == "table": formatted = self._formatTableForSlide(element) if formatted: content_parts.append(formatted) elif element_type == "bullet_list" or element_type == "list": formatted = self._formatListForSlide(element) if formatted: content_parts.append(formatted) elif element_type == "heading": formatted = self._formatHeadingForSlide(element) if formatted: content_parts.append(formatted) elif element_type == "paragraph": formatted = self._formatParagraphForSlide(element) if formatted: content_parts.append(formatted) elif element_type == "code_block" or element_type == "code": formatted = self._formatCodeForSlide(element) if formatted: content_parts.append(formatted) else: # Fallback to paragraph formatting formatted = self._formatParagraphForSlide(element) if formatted: content_parts.append(formatted) # Combine content parts slide_content = "\n\n".join(filter(None, content_parts)) return { "title": section_title, "content": slide_content, "images": [] # No images for non-image sections } except Exception as e: logger.warning(f"Error creating slide from section: {str(e)}") return None def _formatTableForSlide(self, element: Dict[str, Any]) -> str: """Format table data for slide presentation.""" try: # Extract table data from element - handle nested content structure if not isinstance(element, dict): return "" # Extract from nested content structure content = element.get("content", {}) if not isinstance(content, dict): return "" headers = content.get("headers", []) rows = content.get("rows", []) if not headers: return "" # Create table representation table_lines = [] # Add headers header_line = " | ".join(str(h) for h in headers) table_lines.append(header_line) # Add separator separator = "-" * len(header_line) table_lines.append(separator) # Add data rows (limit based on content density) max_rows = 5 # Default limit for row in rows[:max_rows]: row_line = " | ".join(str(cell) for cell in row) table_lines.append(row_line) if len(rows) > max_rows: table_lines.append(f"... and {len(rows) - max_rows} more rows") return "\n".join(table_lines) except Exception as e: logger.warning(f"Error formatting table for slide: {str(e)}") return "" def _formatListForSlide(self, list_data: Dict[str, Any]) -> str: """Format list data for slide presentation.""" try: # Extract from nested content structure content = list_data.get("content", {}) if not isinstance(content, dict): return "" items = content.get("items", []) if not items: return "" # Create list representation list_lines = [] for item in items: if isinstance(item, dict): text = item.get("text", "") list_lines.append(f"• {text}") # Add subitems (limit to 3 for readability) subitems = item.get("subitems", [])[:3] for subitem in subitems: if isinstance(subitem, dict): list_lines.append(f" - {subitem.get('text', '')}") else: list_lines.append(f" - {subitem}") else: list_lines.append(f"• {str(item)}") return "\n".join(list_lines) except Exception as e: logger.warning(f"Error formatting list for slide: {str(e)}") return "" def _formatHeadingForSlide(self, heading_data: Dict[str, Any]) -> str: """Format heading data for slide presentation.""" try: # Extract from nested content structure content = heading_data.get("content", {}) if not isinstance(content, dict): return "" text = content.get("text", "") level = content.get("level", 1) if text: return f"{'#' * level} {text}" return "" except Exception as e: logger.warning(f"Error formatting heading for slide: {str(e)}") return "" def _formatParagraphForSlide(self, paragraph_data: Dict[str, Any]) -> str: """Format paragraph data for slide presentation.""" try: # Extract from nested content structure content = paragraph_data.get("content", {}) if isinstance(content, dict): text = content.get("text", "") elif isinstance(content, str): text = content else: text = "" if text: # Limit paragraph length based on content density max_length = 200 # Default limit if len(text) > max_length: text = text[:max_length] + "..." return text return "" except Exception as e: logger.warning(f"Error formatting paragraph for slide: {str(e)}") return "" def _formatCodeForSlide(self, code_data: Dict[str, Any]) -> str: """Format code data for slide presentation.""" try: # Extract from nested content structure content = code_data.get("content", {}) if not isinstance(content, dict): return "" code = content.get("code", "") language = content.get("language", "") if code: # Limit code length based on content density max_length = 100 # Default limit if len(code) > max_length: code = code[:max_length] + "..." if language: return f"Code ({language}):\n{code}" else: return f"Code:\n{code}" return "" except Exception as e: logger.warning(f"Error formatting code for slide: {str(e)}") return "" def _getSlideLayoutIndex(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int: """Determine the best professional slide layout based on content.""" try: content = slide_data.get("content", "") title = slide_data.get("title", "") # Check if it's a title slide (first slide) if not content or "Generated by PowerOn AI System" in content: return 0 # Title slide layout # Professional layout selection based on content if "|" in content and "-" in content: # Has both tables and lists - use content with caption for professional look return 2 elif "|" in content: # Has tables - use content layout for clean table presentation return 1 elif content.count("•") > 2: # Has many bullet points - use content layout for better readability return 1 elif len(content) > 200: # Long content - use content layout for better text flow return 1 elif title and len(title) > 20: # Long title - use title and content layout return 1 else: # Default to title and content layout for professional appearance return 1 except Exception as e: logger.warning(f"Error determining slide layout: {str(e)}") return 1 # Default to title and content layout def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]: """Create slides from sections: each heading level 1 (chapter) creates a new slide, content accumulates until next level 1 heading.""" try: slides = [] current_slide_sections = [] # Store sections (not formatted text) for proper rendering current_slide_title = "Content Overview" for section in sections: section_type = section.get("content_type", "paragraph") elements = section.get("elements", []) # Skip sections with no elements (unless they're headings that should create new slides) if not elements and section_type != "heading": continue if section_type == "heading": # Extract heading level level = 1 # Default heading_text = "" for element in elements: if isinstance(element, dict): # Extract from nested content structure content = element.get("content", {}) if isinstance(content, dict): heading_text = content.get("text", "") level = content.get("level", 1) elif isinstance(content, str): heading_text = content level = 1 # Only level 1 headings (chapters) create new slides if level == 1: # If we have accumulated content, create a slide if current_slide_sections: slides.append({ "title": current_slide_title, "sections": current_slide_sections.copy(), # Store sections for proper rendering "images": [] }) current_slide_sections = [] # Start new slide with heading as title if heading_text: current_slide_title = heading_text else: # If no heading text found but this is a heading section, use section ID or default current_slide_title = section.get("id", "Untitled Section") else: # Level 2+ headings are added as sections to current slide current_slide_sections.append(section) elif section_type == "image": # Images are added to current slide (will be organized in frames) current_slide_sections.append(section) else: # Add section to current slide (will be rendered properly) current_slide_sections.append(section) # Add final slide if there's content if current_slide_sections: slides.append({ "title": current_slide_title, "sections": current_slide_sections.copy(), "images": [] }) return slides except Exception as e: logger.warning(f"Error creating slides from sections: {str(e)}") return [] def _formatSectionContent(self, section: Dict[str, Any]) -> str: """Format section content for slide presentation.""" try: content_type = section.get("content_type", "paragraph") elements = section.get("elements", []) # Image sections return empty content (handled separately) if content_type == "image": return "" # Process each element in the section - use element type, not section type content_parts = [] for element in elements: if not isinstance(element, dict): continue element_type = element.get("type", "") # Use element type if available, otherwise fall back to section content_type if not element_type: element_type = content_type if element_type == "table": formatted = self._formatTableForSlide(element) if formatted: content_parts.append(formatted) elif element_type == "bullet_list" or element_type == "list": formatted = self._formatListForSlide(element) if formatted: content_parts.append(formatted) elif element_type == "heading": formatted = self._formatHeadingForSlide(element) if formatted: content_parts.append(formatted) elif element_type == "paragraph": formatted = self._formatParagraphForSlide(element) if formatted: content_parts.append(formatted) elif element_type == "code_block" or element_type == "code": formatted = self._formatCodeForSlide(element) if formatted: content_parts.append(formatted) else: # Fallback to paragraph formatting formatted = self._formatParagraphForSlide(element) if formatted: content_parts.append(formatted) return "\n\n".join(filter(None, content_parts)) except Exception as e: logger.warning(f"Error formatting section content: {str(e)}") return "" def _addImagesToSlide(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any]) -> None: """Add images to a PowerPoint slide.""" try: from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor import base64 import io if not images: return # Get slide dimensions from presentation if hasattr(self, '_currentPresentation'): prs = self._currentPresentation else: prs = slide.presentation slideWidth = prs.slide_width slideHeight = prs.slide_height titleHeight = Inches(1.5) # Approximate title height # Available area for images availableWidth = slideWidth - Inches(1) # Margins availableHeight = slideHeight - titleHeight - Inches(1) # Title + margins # Position images if len(images) == 1: # Single image: center it img = images[0] base64Data = img.get("base64Data") # Validate base64Data is present and not empty if not base64Data or not isinstance(base64Data, str) or len(base64Data.strip()) == 0: logger.error(f"Invalid base64Data: present={bool(base64Data)}, type={type(base64Data)}, length={len(base64Data) if base64Data else 0}") return try: imageBytes = base64.b64decode(base64Data) if len(imageBytes) == 0: logger.error("Decoded image bytes are empty") return imageStream = io.BytesIO(imageBytes) except Exception as decode_error: logger.error(f"Failed to decode base64 image data: {str(decode_error)}") return # Get image dimensions try: from PIL import Image as PILImage pilImage = PILImage.open(imageStream) imgWidth, imgHeight = pilImage.size # Scale to fit available space (max 90% of slide for better visibility) # Convert PIL pixels to PowerPoint points (1 inch = 72 points, typical screen DPI = 96) # Conversion: pixels * (72/96) = points imgWidthPoints = imgWidth * (72.0 / 96.0) imgHeightPoints = imgHeight * (72.0 / 96.0) maxWidth = availableWidth * 0.9 maxHeight = availableHeight * 0.9 scale = min(maxWidth / imgWidthPoints, maxHeight / imgHeightPoints, 1.0) finalWidth = imgWidthPoints * scale finalHeight = imgHeightPoints * scale # Center image left = (slideWidth - finalWidth) / 2 top = titleHeight + (availableHeight - finalHeight) / 2 imageStream.seek(0) except Exception: # Fallback: use default size finalWidth = Inches(6) finalHeight = Inches(4.5) left = (slideWidth - finalWidth) / 2 top = titleHeight + Inches(1) imageStream.seek(0) # Add image to slide try: slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) except Exception as add_error: # If add_picture fails, try with explicit format imageStream.seek(0) # Ensure we have valid image data if len(imageBytes) > 0: slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) else: raise Exception(f"Empty image data: {add_error}") # Add caption if available caption = img.get("caption") or img.get("altText") if caption and caption != "Image": # Add text box below image captionTop = top + finalHeight + Inches(0.2) captionBox = slide.shapes.add_textbox( Inches(1), captionTop, slideWidth - Inches(2), Inches(0.5) ) captionFrame = captionBox.text_frame captionFrame.text = caption captionFrame.paragraphs[0].font.size = Pt(12) captionFrame.paragraphs[0].font.italic = True captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER else: # Multiple images: arrange in grid cols = 2 if len(images) <= 4 else 3 rows = (len(images) + cols - 1) // cols imgWidth = (availableWidth - Inches(0.5) * (cols - 1)) / cols imgHeight = (availableHeight - Inches(0.5) * (rows - 1)) / rows for idx, img in enumerate(images): base64Data = img.get("base64Data") if base64Data: row = idx // cols col = idx % cols imageBytes = base64.b64decode(base64Data) imageStream = io.BytesIO(imageBytes) left = Inches(0.5) + col * (imgWidth + Inches(0.5)) top = titleHeight + Inches(0.5) + row * (imgHeight + Inches(0.5)) slide.shapes.add_picture(imageStream, left, top, width=imgWidth, height=imgHeight) except Exception as e: logger.error(f"Error embedding images in PPTX slide: {str(e)}") import traceback logger.error(f"Traceback: {traceback.format_exc()}") def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float, max_width: float = None) -> None: """Add a PowerPoint table to slide.""" try: from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor # Extract from nested content structure content = element.get("content", {}) if not isinstance(content, dict): return headers = content.get("headers", []) rows = content.get("rows", []) if not headers: return # Calculate table dimensions num_cols = int(len(headers)) # Ensure integer num_rows = int(len(rows) + 1) # +1 for header row, ensure integer left = Inches(0.5) # Get presentation from stored reference or slide if hasattr(self, '_currentPresentation'): prs = self._currentPresentation else: prs = slide.presentation width = max_width if max_width is not None else (prs.slide_width - Inches(1)) row_height = Inches(0.4) # Create table - ensure all parameters are proper types table_height = row_height * num_rows table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, table_height) table = table_shape.table # Set column widths - width is in EMU, divide evenly # python-pptx expects EMU values (914400 EMU = 1 inch) col_width_emu = int(width) // num_cols # Ensure integer division for EMU for col_idx in range(num_cols): table.columns[col_idx].width = col_width_emu # Add headers with styling - OPTIMIZED: pre-calculate color/style objects header_style = styles.get("table_header", {}) header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121))) header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255))) header_font_size = header_style.get("font_size", 18) # Pre-calculate and cache RGB color objects header_bg_rgb = RGBColor(*header_bg_color) header_text_rgb = RGBColor(*header_text_color) header_font_size_pt = Pt(header_font_size) header_bold = header_style.get("bold", True) # Determine alignment once align = header_style.get("align", "center") if align == "left": header_alignment = PP_ALIGN.LEFT elif align == "right": header_alignment = PP_ALIGN.RIGHT else: header_alignment = PP_ALIGN.CENTER for col_idx, header in enumerate(headers): cell = table.cell(0, col_idx) # Clear existing text and set new text cell.text_frame.clear() header_text = str(header) if header else "" cell.text = header_text # Ensure paragraph exists if len(cell.text_frame.paragraphs) == 0: cell.text_frame.add_paragraph() # Apply styling - use cached objects cell.fill.solid() cell.fill.fore_color.rgb = header_bg_rgb para = cell.text_frame.paragraphs[0] para.font.bold = header_bold para.font.size = header_font_size_pt para.font.color.rgb = header_text_rgb para.alignment = header_alignment # Ensure text is set on paragraph if not para.text: para.text = header_text # Add data rows with styling - OPTIMIZED: pre-calculate color/style objects cell_style = styles.get("table_cell", {}) cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255))) cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47))) cell_font_size = cell_style.get("font_size", 16) # Pre-calculate and cache RGB color objects cell_bg_rgb = RGBColor(*cell_bg_color) cell_text_rgb = RGBColor(*cell_text_color) cell_font_size_pt = Pt(cell_font_size) cell_bold = cell_style.get("bold", False) # Determine alignment once align = cell_style.get("align", "left") if align == "center": cell_alignment = PP_ALIGN.CENTER elif align == "right": cell_alignment = PP_ALIGN.RIGHT else: cell_alignment = PP_ALIGN.LEFT for row_idx, row_data in enumerate(rows, 1): for col_idx, cell_data in enumerate(row_data[:num_cols]): cell = table.cell(row_idx, col_idx) # Clear existing text and set new text cell.text_frame.clear() cell_text = str(cell_data) if cell_data is not None else "" cell.text = cell_text # Ensure paragraph exists if len(cell.text_frame.paragraphs) == 0: cell.text_frame.add_paragraph() # Apply styling - use cached objects cell.fill.solid() cell.fill.fore_color.rgb = cell_bg_rgb para = cell.text_frame.paragraphs[0] para.font.size = cell_font_size_pt para.font.bold = cell_bold para.font.color.rgb = cell_text_rgb para.alignment = cell_alignment # Ensure text is set on paragraph if not para.text: para.text = cell_text except Exception as e: logger.warning(f"Error adding table to slide: {str(e)}") def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add bullet list to slide text frame.""" try: from pptx.util import Pt from pptx.dml.color import RGBColor from pptx.enum.text import PP_ALIGN # Extract from nested content structure content = element.get("content", {}) if not isinstance(content, dict): return items = content.get("items", []) if not items: return list_style = styles.get("bullet_list", {}) base_font_size = list_style.get("font_size", 14) calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability # Pre-calculate and cache style objects to avoid repeated parsing font_size_pt = Pt(calculated_size) text_color = self._getSafeColor(list_style.get("color", (47, 47, 47))) text_color_rgb = RGBColor(*text_color) space_before_pt = Pt(2) space_after_pt = Pt(2) logger.debug(f"Rendering bullet list with {len(items)} items") for idx, item in enumerate(items): try: # Get text content first if isinstance(item, dict): item_text = item.get("text", "") else: item_text = str(item) # Skip empty items if not item_text or len(item_text.strip()) == 0: logger.debug(f"Skipping empty bullet item {idx}") continue # Create new paragraph for each bullet item p = text_frame.add_paragraph() # Set level to 1 for bullet points BEFORE setting text # In python-pptx, setting level > 0 should automatically enable bullets p.level = 1 # Set text content p.text = item_text # Apply formatting - use cached objects p.font.size = font_size_pt p.font.color.rgb = text_color_rgb p.alignment = PP_ALIGN.LEFT # Left align bullet lists p.space_before = space_before_pt # Small spacing before p.space_after = space_after_pt # Small spacing after # In python-pptx, setting level > 0 should enable bullets automatically # However, some versions may not support paragraph_format, so we'll use manual bullets as fallback # Always add manual bullet character to ensure visibility if not (p.text.startswith('•') or p.text.startswith('-') or p.text.startswith('*') or p.text.startswith('◦')): p.text = '• ' + p.text logger.debug(f"Added manual bullet character to item {idx}") # Set proper indentation for multiline bullets (hanging indent) # For multiline bullets: bullet at left margin, text indented, wrapped lines align with text try: # Try accessing paragraph_format - it may not exist in all python-pptx versions if hasattr(p, 'paragraph_format'): pf = p.paragraph_format # Left indent: indents the entire paragraph (bullet + text) pf.left_indent = Pt(18) # First line indent: negative value creates hanging indent # This brings the bullet back to the left while keeping text indented pf.first_line_indent = Pt(-18) # Negative to create hanging indent logger.debug(f"Set hanging indent for bullet item {idx}") else: # Try via _element if paragraph_format not available try: from pptx.util import Pt as PtUtil pPr = p._element.get_or_add_pPr() # Set left margin (indents entire paragraph) pPr.left_margin = PtUtil(18) # Set first line indent (negative for hanging indent) pPr.first_line_indent = PtUtil(-18) logger.debug(f"Set hanging indent via XML for bullet item {idx}") except Exception as xml_error: logger.debug(f"Could not set hanging indent via XML: {str(xml_error)}") # Indentation is optional, continue without it pass except Exception as indent_error: logger.debug(f"Could not set indent for item {idx}: {str(indent_error)}") # Continue without indent - bullets will still show, but multiline won't be properly indented logger.debug(f"Successfully added bullet item {idx}: '{item_text[:50]}...'") except Exception as item_error: logger.error(f"Error adding bullet item {idx}: {str(item_error)}", exc_info=True) # Continue with next item even if one fails continue logger.debug(f"Completed rendering bullet list, added {len(text_frame.paragraphs)} paragraphs") except Exception as e: logger.warning(f"Error adding bullet list to slide: {str(e)}") def _addHeadingToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add heading to slide text frame.""" try: from pptx.util import Pt from pptx.dml.color import RGBColor # Extract from nested content structure content = element.get("content", {}) if not isinstance(content, dict): return text = content.get("text", "") level = content.get("level", 1) if text: p = text_frame.add_paragraph() p.text = text # Headings should be level 0 (no indentation) regardless of heading level p.level = 0 heading_style = styles.get("heading", {}) # Different font sizes for different heading levels if level == 1: base_font_size = heading_style.get("font_size", 28) # Largest for H1 elif level == 2: base_font_size = heading_style.get("font_size", 22) # Medium for H2 elif level == 3: base_font_size = heading_style.get("font_size", 18) # Smaller for H3 else: base_font_size = heading_style.get("font_size", 16) # Default for H4+ calculated_size = max(12, int(base_font_size * font_size_multiplier)) # Minimum 12pt for headings p.font.size = Pt(calculated_size) p.font.bold = heading_style.get("bold", True) p.font.color.rgb = RGBColor(*self._getSafeColor(heading_style.get("color", (31, 78, 121)))) # Add spacing before and after headings p.space_before = Pt(12 if level == 1 else 8) # More space before H1 p.space_after = Pt(6) # Space after heading except Exception as e: logger.warning(f"Error adding heading to slide: {str(e)}") def _addParagraphToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add paragraph to slide text frame.""" try: from pptx.util import Pt from pptx.dml.color import RGBColor from pptx.enum.text import PP_ALIGN # Extract from nested content structure content = element.get("content", {}) if isinstance(content, dict): text = content.get("text", "") elif isinstance(content, str): text = content else: text = "" if text: p = text_frame.add_paragraph() p.text = text # Explicitly set level to 0 for regular paragraphs (not bullets) p.level = 0 # Ensure no bullet formatting try: if hasattr(p, 'paragraph_format'): p.paragraph_format.bullet.type = None except (AttributeError, TypeError): pass paragraph_style = styles.get("paragraph", {}) base_font_size = paragraph_style.get("font_size", 14) # Smaller default for better readability calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability p.font.size = Pt(calculated_size) p.font.bold = paragraph_style.get("bold", False) p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) # Add proper spacing p.space_before = Pt(6) # Space before paragraph p.space_after = Pt(6) # Space after paragraph p.line_spacing = 1.2 # Line spacing for readability align = paragraph_style.get("align", "left") if align == "center": p.alignment = PP_ALIGN.CENTER elif align == "right": p.alignment = PP_ALIGN.RIGHT else: p.alignment = PP_ALIGN.LEFT except Exception as e: logger.warning(f"Error adding paragraph to slide: {str(e)}") def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add code block to slide text frame.""" try: from pptx.util import Pt from pptx.dml.color import RGBColor # Extract from nested content structure content = element.get("content", {}) if not isinstance(content, dict): return code = content.get("code", "") language = content.get("language", "") if code: code_style = styles.get("code_block", {}) code_font = code_style.get("font", "Courier New") base_code_font_size = code_style.get("font_size", 9) code_font_size = max(6, int(base_code_font_size * font_size_multiplier)) # Minimum 6pt for code code_color = self._getSafeColor(code_style.get("color", (47, 47, 47))) p = text_frame.add_paragraph() if language: p.text = f"Code ({language}):" p.font.bold = True p.font.size = Pt(code_font_size) p = text_frame.add_paragraph() p.text = code p.font.name = code_font p.font.size = Pt(code_font_size) p.font.color.rgb = RGBColor(*code_color) except Exception as e: logger.warning(f"Error adding code block to slide: {str(e)}") def _formatTimestamp(self) -> str: """Format current timestamp for presentation generation.""" # datetime and UTC are already imported at module level return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") def _renderSlideContentWithFrames(self, slide, slide_sections: List[Dict[str, Any]], slide_images: List[Dict[str, Any]], styles: Dict[str, Any], prs) -> None: """ Organize slide content into frames for better layout. Groups content by type (images, bullet lists, paragraphs, tables) and renders each in appropriately sized frames. """ try: from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor # Extract images from sections first images_to_render = list(slide_images) if slide_images else [] text_sections = [] table_sections = [] for section in slide_sections: section_type = section.get("content_type", "paragraph") elements = section.get("elements", []) if not elements: # Skip empty sections continue # Extract images from all sections section_has_images = False for element in elements: if isinstance(element, dict) and element.get("type") == "image": content = element.get("content", {}) base64Data = None # Handle different content formats if isinstance(content, dict): base64Data = content.get("base64Data") altText = content.get("altText", "Image") caption = content.get("caption", "") elif isinstance(content, str): # If content is a string, it might be base64 data directly # Check if it looks like base64 if len(content) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in content[:100]): base64Data = content altText = "Image" caption = "" else: # Not base64, skip continue else: # Try to get base64Data directly from element base64Data = element.get("base64Data") altText = element.get("altText", "Image") caption = element.get("caption", "") if base64Data: images_to_render.append({ "base64Data": base64Data, "altText": altText, "caption": caption }) section_has_images = True # Skip image-only sections (they're already added to images_to_render) if section_type == "image" and section_has_images: continue # Categorize sections (excluding image elements) has_table = False non_image_elements = [] for element in elements: if isinstance(element, dict): element_type = element.get("type", "") # Skip image elements when categorizing if element_type == "image": continue if element_type == "table" or section_type == "table": has_table = True non_image_elements.append(element) # Only add sections that have non-image content if non_image_elements: if has_table: # Create a copy of section without image elements for table rendering table_section = { **section, "elements": non_image_elements } table_sections.append(table_section) else: # Create a copy of section without image elements for text rendering text_section = { **section, "elements": non_image_elements } text_sections.append(text_section) # Calculate layout dimensions title_height = Inches(1.5) available_height = prs.slide_height - title_height - Inches(0.5) # Title + margin available_width = prs.slide_width - Inches(1) # Margins margin = Inches(0.5) current_y = title_height + Inches(0.3) # Determine layout strategy based on content types has_images = len(images_to_render) > 0 has_tables = len(table_sections) > 0 has_text = len(text_sections) > 0 # Layout 1: Images + Text (horizontal split for landscape) if has_images and has_text and not has_tables: # Horizontal split: images on left, text on right (landscape format) img_width = available_width * 0.48 text_width = available_width * 0.48 img_left = margin text_left = margin + img_width + Inches(0.2) # Render images in left column (full height) if images_to_render: img_height = available_height - Inches(0.2) self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height) # Render text in right column (full height, adaptive font size) if text_sections: text_height = available_height - Inches(0.2) self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True) # Layout 2: Tables + Text (horizontal split for landscape) elif has_tables and has_text: # Horizontal split: tables on left, text on right (landscape format) table_width = available_width * 0.48 text_width = available_width * 0.48 table_left = margin text_left = margin + table_width + Inches(0.2) # Render tables in left column (full height) table_y = current_y for table_section in table_sections: elements = table_section.get("elements", []) for element in elements: if isinstance(element, dict) and element.get("type") == "table": try: self._addTableToSlide(slide, element, styles, table_y, max_width=table_width) # Calculate actual table height content = element.get("content", {}) if isinstance(content, dict): rows = content.get("rows", []) num_rows = len(rows) + 1 # +1 for header actual_height = Inches(0.4) * num_rows table_y += actual_height + Inches(0.15) else: table_y += Inches(2) except Exception as table_error: logger.error(f"Error rendering table: {str(table_error)}") # Continue with next table break # Render text in right column (full height, adaptive font size) if text_sections: text_height = available_height - Inches(0.2) self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True) # Layout 3: Images + Tables + Text (horizontal split for landscape) elif has_images and has_tables and has_text: # Horizontal split: Images (left), Tables (middle), Text (right) img_width = available_width * 0.31 table_width = available_width * 0.31 text_width = available_width * 0.31 img_left = margin table_left = margin + img_width + Inches(0.15) text_left = margin + img_width + table_width + Inches(0.3) # Render images in left column (full height) if images_to_render: img_height = available_height - Inches(0.2) self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height) # Render tables in middle column (full height) table_y = current_y for table_section in table_sections: elements = table_section.get("elements", []) for element in elements: if isinstance(element, dict) and element.get("type") == "table": try: self._addTableToSlide(slide, element, styles, table_y, max_width=table_width) content = element.get("content", {}) if isinstance(content, dict): rows = content.get("rows", []) num_rows = len(rows) + 1 actual_height = Inches(0.4) * num_rows table_y += actual_height + Inches(0.15) else: table_y += Inches(2) except Exception as table_error: logger.error(f"Error rendering table: {str(table_error)}") break # Render text in right column (full height, adaptive font size) if text_sections: text_height = available_height - Inches(0.2) self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True) # Layout 4: Images only elif has_images and not has_text and not has_tables: img_width = available_width * 0.8 img_height = available_height * 0.8 img_left = (available_width - img_width) / 2 + margin self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height) # Layout 5: Text only (default, adaptive font size) elif has_text and not has_images and not has_tables: text_height = available_height - Inches(0.2) self._renderTextSectionsInFrame(slide, text_sections, styles, margin, current_y, available_width, text_height, adaptiveFontSize=True) # Layout 6: Tables only elif has_tables and not has_images and not has_text: table_height = available_height / max(len(table_sections), 1) table_width = available_width for table_section in table_sections: elements = table_section.get("elements", []) for element in elements: if isinstance(element, dict) and element.get("type") == "table": try: self._addTableToSlide(slide, element, styles, current_y, max_width=table_width) # Calculate actual table height content = element.get("content", {}) if isinstance(content, dict): rows = content.get("rows", []) num_rows = len(rows) + 1 # +1 for header actual_height = min(Inches(0.4) * num_rows, table_height) current_y += actual_height + Inches(0.2) else: current_y += table_height + Inches(0.2) except Exception as table_error: logger.error(f"Error rendering table: {str(table_error)}") # Continue with next table break except Exception as e: logger.error(f"Error rendering slide content with frames: {str(e)}") # Fallback to simple rendering try: content_shape = slide.placeholders[1] text_frame = content_shape.text_frame text_frame.clear() except (AttributeError, IndexError): from pptx.util import Inches left = Inches(0.5) top = Inches(1.5) width = prs.slide_width - Inches(1) height = prs.slide_height - top - Inches(0.5) textbox = slide.shapes.add_textbox(left, top, width, height) text_frame = textbox.text_frame text_frame.word_wrap = True # Simple fallback rendering for section in slide_sections: self._renderSectionToTextFrame(slide, section, styles, text_frame, font_size_multiplier=1.0) def _renderTextSectionsInFrame(self, slide, text_sections: List[Dict[str, Any]], styles: Dict[str, Any], left: float, top: float, width: float, height: float, adaptiveFontSize: bool = False) -> None: """Render text sections (paragraphs, lists, headings) in a text frame.""" try: from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor # Calculate total text length for adaptive font sizing total_text_length = 0 if adaptiveFontSize: for section in text_sections: elements = section.get("elements", []) for element in elements: if isinstance(element, dict): element_type = element.get("type", "") if element_type in ["paragraph", "bullet_list", "list", "heading"]: content = element.get("content", "") if isinstance(content, dict): if "text" in content: total_text_length += len(str(content["text"])) elif "items" in content: for item in content.get("items", []): total_text_length += len(str(item)) elif isinstance(content, str): total_text_length += len(content) # Calculate adaptive font size multiplier based on text length and frame size font_size_multiplier = 1.0 if adaptiveFontSize and total_text_length > 0: try: # More accurate calculation: estimate characters per line based on average character width # Average character width is approximately 0.6 * font_size in points # For 14pt font, average char width ≈ 8.4pt avg_char_width_pt = 8.4 # Approximate for 14pt font chars_per_line = max(1, int(float(width) / avg_char_width_pt)) # Estimate lines needed lines_needed = total_text_length / max(chars_per_line, 1) # Available lines based on height (line height ≈ 1.2 * font_size) line_height_pt = 16.8 # Approximate for 14pt font with 1.2 spacing available_lines = max(1, int(float(height) / line_height_pt)) if available_lines > 0 and lines_needed > available_lines: # More aggressive scaling for long texts # Calculate exact scale needed, then add 10% buffer scale_needed = available_lines / lines_needed font_size_multiplier = scale_needed * 0.9 # 10% buffer # Allow scaling down to 50% for very long texts (minimum readable) font_size_multiplier = max(0.5, min(1.0, font_size_multiplier)) elif lines_needed <= available_lines * 0.7: # If text is much shorter than available space, can use slightly larger font font_size_multiplier = min(1.1, (available_lines / lines_needed) * 0.8) except (ZeroDivisionError, ValueError, TypeError) as calc_error: logger.debug(f"Font size calculation error: {str(calc_error)}") # Fallback to default if calculation fails font_size_multiplier = 1.0 textbox = slide.shapes.add_textbox(left, top, width, height) text_frame = textbox.text_frame text_frame.word_wrap = True text_frame.auto_size = None # Disable auto-size for fixed frame # Ensure text frame can display bullets text_frame.margin_left = Pt(0) text_frame.margin_right = Pt(0) text_frame.margin_top = Pt(0) text_frame.margin_bottom = Pt(0) # Pass font size multiplier to rendering methods for section in text_sections: self._renderSectionToTextFrame(slide, section, styles, text_frame, font_size_multiplier) except Exception as e: logger.warning(f"Error rendering text sections in frame: {str(e)}") def _renderSectionToTextFrame(self, slide, section: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Render a single section to a text frame.""" try: from pptx.util import Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor section_type = section.get("content_type", "paragraph") elements = section.get("elements", []) if not elements: return for element in elements: if not isinstance(element, dict): continue element_type = element.get("type", "") if not element_type: element_type = section_type # Skip images - handled separately if element_type == "image": continue if element_type == "bullet_list" or element_type == "list": self._addBulletListToSlide(slide, element, styles, text_frame, font_size_multiplier) elif element_type == "heading": self._addHeadingToSlide(slide, element, styles, text_frame, font_size_multiplier) elif element_type == "paragraph": self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier) elif element_type == "code_block" or element_type == "code": self._addCodeBlockToSlide(slide, element, styles, text_frame, font_size_multiplier) elif element_type == "extracted_text": content = element.get("content", "") source = element.get("source", "") if content: paragraph_style = styles.get("paragraph", {}) p = text_frame.add_paragraph() p.text = content base_font_size = paragraph_style.get("font_size", 18) p.font.size = Pt(int(base_font_size * font_size_multiplier)) p.font.bold = paragraph_style.get("bold", False) p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) p.alignment = PP_ALIGN.LEFT if source: p.add_run(f" (Source: {source})").font.italic = True elif element_type == "reference": label = element.get("label", "Reference") p = text_frame.add_paragraph() p.text = f"[Reference: {label}]" p.font.italic = True p.alignment = PP_ALIGN.LEFT else: # Fallback to paragraph content = element.get("content", "") if isinstance(content, dict): text = content.get("text", "") elif isinstance(content, str): text = content else: text = "" if text: self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier=1.0) except Exception as e: logger.warning(f"Error rendering section to text frame: {str(e)}") def _addImagesToSlideInFrame(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any], left: float, top: float, width: float, height: float) -> None: """Add images to slide within a specific frame area.""" try: from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN import base64 import io if not images: logger.debug("No images to render in frame") return logger.info(f"Rendering {len(images)} image(s) in frame at ({left}, {top}), size ({width}, {height})") # Calculate image dimensions within frame if len(images) == 1: # Single image: fit to frame img = images[0] base64Data = img.get("base64Data") if not base64Data: logger.warning("Image has no base64Data") return # Clean base64 data (remove data URI prefix if present) if isinstance(base64Data, str): if base64Data.startswith("data:image/"): # Extract base64 from data URI base64Data = base64Data.split(",", 1)[1] # Remove any whitespace base64Data = base64Data.strip() try: # Decode base64 imageBytes = base64.b64decode(base64Data, validate=True) if len(imageBytes) == 0: logger.error("Decoded image bytes are empty") return imageStream = io.BytesIO(imageBytes) # Get image dimensions using PIL imgWidth, imgHeight = None, None try: from PIL import Image as PILImage pilImage = PILImage.open(imageStream) imgWidth, imgHeight = pilImage.size imageStream.seek(0) # Reset stream for PowerPoint # Validate image dimensions - ensure they're reasonable if imgWidth <= 1 or imgHeight <= 1: logger.warning(f"Image has invalid dimensions: {imgWidth}x{imgHeight}, using default size") imgWidth, imgHeight = 800, 600 imageStream.seek(0) elif imgWidth < 100 or imgHeight < 100: logger.warning(f"Image dimensions very small: {imgWidth}x{imgHeight}, may appear tiny") except ImportError: logger.warning("PIL not available, using default image size") imgWidth, imgHeight = 800, 600 # Default dimensions except Exception as pil_error: logger.warning(f"Error getting image dimensions with PIL: {str(pil_error)}, using default size") imgWidth, imgHeight = 800, 600 imageStream.seek(0) # Ensure we have valid dimensions if not imgWidth or not imgHeight or imgWidth <= 1 or imgHeight <= 1: logger.warning("Invalid image dimensions, using default 800x600") imgWidth, imgHeight = 800, 600 # Scale to fit frame while maintaining aspect ratio # width and height parameters are already in Inches (from pptx.util.Inches) # Convert PIL pixel dimensions to Inches (assuming 96 DPI for PIL images) imgWidthInches = Inches(imgWidth / 96.0) imgHeightInches = Inches(imgHeight / 96.0) # Calculate scale to fit within frame # Inches objects support division, result is a float try: scale_width = width / imgWidthInches if imgWidthInches > 0 else 1.0 scale_height = height / imgHeightInches if imgHeightInches > 0 else 1.0 scale = min(scale_width, scale_height, 1.0) # Don't scale up, only down finalWidth = imgWidthInches * scale finalHeight = imgHeightInches * scale # Ensure minimum size (at least 1 inch) to prevent tiny rendering minSize = Inches(1) if finalWidth < minSize or finalHeight < minSize: # Use minimum size while maintaining aspect ratio min_scale = max(minSize / imgWidthInches if imgWidthInches > 0 else 1.0, minSize / imgHeightInches if imgHeightInches > 0 else 1.0) finalWidth = max(minSize, imgWidthInches * min_scale) finalHeight = max(minSize, imgHeightInches * min_scale) # Ensure we don't exceed frame bounds if finalWidth > width: finalWidth = width finalHeight = imgHeightInches * (width / imgWidthInches) if imgWidthInches > 0 else finalHeight if finalHeight > height: finalHeight = height finalWidth = imgWidthInches * (height / imgHeightInches) if imgHeightInches > 0 else finalWidth except (ZeroDivisionError, TypeError, AttributeError) as calc_error: logger.warning(f"Error calculating image size: {str(calc_error)}, using frame size") finalWidth = width * 0.9 # Use 90% of frame width finalHeight = height * 0.9 # Use 90% of frame height # Center in frame frame_left = left + (width - finalWidth) / 2 frame_top = top + (height - finalHeight) / 2 # Add image to slide imageStream.seek(0) slide.shapes.add_picture(imageStream, frame_left, frame_top, width=finalWidth, height=finalHeight) logger.info(f"Successfully added image to slide at ({frame_left}, {frame_top}), size ({finalWidth}, {finalHeight})") # Add caption if available caption = img.get("caption") or img.get("altText") if caption and caption != "Image": captionTop = frame_top + finalHeight + Inches(0.1) captionBox = slide.shapes.add_textbox(left, captionTop, width, Inches(0.4)) captionFrame = captionBox.text_frame captionFrame.text = caption captionFrame.paragraphs[0].font.size = Pt(10) captionFrame.paragraphs[0].font.italic = True captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER except base64.binascii.Error as b64_error: logger.error(f"Invalid base64 data: {str(b64_error)}") except Exception as img_error: logger.error(f"Error adding image to frame: {str(img_error)}", exc_info=True) else: # Multiple images: grid layout cols = 2 if len(images) <= 4 else 3 rows = (len(images) + cols - 1) // cols imgWidth = (width - Inches(0.2) * (cols - 1)) / cols imgHeight = (height - Inches(0.2) * (rows - 1)) / rows for idx, img in enumerate(images): base64Data = img.get("base64Data") if not base64Data: logger.warning(f"Image {idx} has no base64Data") continue # Clean base64 data if isinstance(base64Data, str): if base64Data.startswith("data:image/"): base64Data = base64Data.split(",", 1)[1] base64Data = base64Data.strip().replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") row = idx // cols col = idx % cols img_left = left + col * (imgWidth + Inches(0.2)) img_top = top + row * (imgHeight + Inches(0.2)) try: imageBytes = base64.b64decode(base64Data, validate=True) if len(imageBytes) == 0: logger.error(f"Decoded image {idx} bytes are empty") continue imageStream = io.BytesIO(imageBytes) # Try to get dimensions for better scaling try: from PIL import Image as PILImage pilImage = PILImage.open(imageStream) imgW, imgH = pilImage.size # Scale to fit grid cell while maintaining aspect ratio scale = min(imgWidth / (imgW * (72.0 / 96.0)), imgHeight / (imgH * (72.0 / 96.0)), 1.0) finalW = (imgW * (72.0 / 96.0)) * scale finalH = (imgH * (72.0 / 96.0)) * scale # Center in grid cell cell_left = img_left + (imgWidth - finalW) / 2 cell_top = img_top + (imgHeight - finalH) / 2 imageStream.seek(0) slide.shapes.add_picture(imageStream, cell_left, cell_top, width=finalW, height=finalH) except (ImportError, Exception): # Fallback: use grid cell size directly imageStream.seek(0) slide.shapes.add_picture(imageStream, img_left, img_top, width=imgWidth, height=imgHeight) logger.info(f"Successfully added image {idx+1}/{len(images)} to slide grid") except base64.binascii.Error as b64_error: logger.error(f"Invalid base64 data for image {idx}: {str(b64_error)}") except Exception as img_error: logger.error(f"Error adding image {idx} to frame: {str(img_error)}", exc_info=True) except Exception as e: logger.error(f"Error adding images to slide frame: {str(e)}", exc_info=True)