2160 lines
106 KiB
Python
2160 lines
106 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
import logging
|
|
import base64
|
|
import io
|
|
import json
|
|
import re
|
|
from datetime import datetime, UTC
|
|
from typing import Dict, Any, Optional, List
|
|
from .rendererBaseTemplate import BaseRenderer
|
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class RendererPptx(BaseRenderer):
|
|
"""Renderer for PowerPoint (.pptx) files using python-pptx library."""
|
|
|
|
def __init__(self, services=None):
|
|
super().__init__(services=services)
|
|
self.supportedFormats = ["pptx", "ppt"]
|
|
self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
|
|
|
@classmethod
|
|
def getSupportedFormats(cls) -> list:
|
|
"""Get list of supported output formats."""
|
|
return ["pptx", "ppt"]
|
|
|
|
@classmethod
|
|
def getFormatAliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return []
|
|
|
|
@classmethod
|
|
def getPriority(cls) -> int:
|
|
"""Return priority for PowerPoint renderer."""
|
|
return 105
|
|
|
|
@classmethod
|
|
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
|
|
"""Return output style classification: PowerPoint presentations are formatted documents."""
|
|
return 'document'
|
|
|
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
|
"""
|
|
Render content as PowerPoint presentation from JSON data.
|
|
|
|
Args:
|
|
extractedContent: JSON content to render as presentation
|
|
title: Title for the presentation
|
|
userPrompt: User prompt for AI styling
|
|
aiService: AI service for styling
|
|
**kwargs: Additional rendering options
|
|
|
|
Returns:
|
|
Base64-encoded PowerPoint presentation as string
|
|
"""
|
|
try:
|
|
# Import python-pptx
|
|
from pptx import Presentation
|
|
from pptx.util import Inches, Pt
|
|
from pptx.enum.text import PP_ALIGN
|
|
from pptx.dml.color import RGBColor
|
|
import re
|
|
|
|
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
|
styles = await self._getStyleSet(extractedContent, userPrompt, aiService)
|
|
|
|
# Create new presentation
|
|
prs = Presentation()
|
|
|
|
# Set slide size based on user intent (default to 16:9)
|
|
slide_size = styles.get("slide_size", "16:9")
|
|
if slide_size == "4:3":
|
|
prs.slide_width = Inches(10)
|
|
prs.slide_height = Inches(7.5)
|
|
else: # Default to 16:9
|
|
prs.slide_width = Inches(13.33)
|
|
prs.slide_height = Inches(7.5)
|
|
|
|
# Generate slides from JSON content
|
|
slidesData = await self._parseJsonToSlides(extractedContent, title, styles)
|
|
logger.info(f"Parsed {len(slidesData)} slides from JSON content")
|
|
|
|
# Debug: Show first 200 chars of content
|
|
logger.info(f"JSON content preview: {str(extractedContent)[:200]}...")
|
|
|
|
# Store prs reference for image methods
|
|
self._currentPresentation = prs
|
|
|
|
for i, slide_data in enumerate(slidesData):
|
|
slide_sections = slide_data.get("sections", [])
|
|
slide_images = list(slide_data.get("images", [])) # Make copy so we can append
|
|
slide_content = slide_data.get('content', '')
|
|
hasSections = slide_sections and len(slide_sections) > 0
|
|
hasImages = len(slide_images) > 0
|
|
|
|
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - sections: {len(slide_sections)}, images: {len(slide_images)}, content: {len(slide_content)} chars")
|
|
|
|
# Use blank layout for all slides to avoid placeholder interference
|
|
# Find blank layout (typically index 6, fallback to 5)
|
|
slideLayoutIndex = None
|
|
for idx in [6, 5]:
|
|
if idx < len(prs.slide_layouts):
|
|
try:
|
|
layout = prs.slide_layouts[idx]
|
|
# Check if it's a blank layout (no placeholders)
|
|
if len(layout.placeholders) == 0:
|
|
slideLayoutIndex = idx
|
|
break
|
|
except (AttributeError, IndexError):
|
|
continue
|
|
|
|
# If no blank layout found, use layout with fewest placeholders
|
|
if slideLayoutIndex is None:
|
|
min_placeholders = float('inf')
|
|
for idx in range(len(prs.slide_layouts)):
|
|
try:
|
|
layout = prs.slide_layouts[idx]
|
|
placeholder_count = len(layout.placeholders) if hasattr(layout, 'placeholders') else 0
|
|
if placeholder_count < min_placeholders:
|
|
min_placeholders = placeholder_count
|
|
slideLayoutIndex = idx
|
|
except:
|
|
continue
|
|
|
|
# Fallback to first layout if still None
|
|
if slideLayoutIndex is None:
|
|
slideLayoutIndex = 0
|
|
|
|
slide_layout = prs.slide_layouts[slideLayoutIndex]
|
|
slide = prs.slides.add_slide(slide_layout)
|
|
|
|
# Clear placeholder text instead of removing placeholders (safer approach)
|
|
# This avoids corrupting the PPTX file structure
|
|
try:
|
|
for shape in slide.shapes:
|
|
if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
|
|
try:
|
|
if hasattr(shape, 'text_frame'):
|
|
shape.text_frame.clear()
|
|
# Set text to empty string to remove "Click to add text"
|
|
if len(shape.text_frame.paragraphs) > 0:
|
|
shape.text_frame.paragraphs[0].text = ""
|
|
except:
|
|
pass
|
|
except Exception as placeholder_error:
|
|
logger.warning(f"Could not clear placeholders: {str(placeholder_error)}")
|
|
|
|
# Add title as textbox (smaller size for slides)
|
|
from pptx.util import Inches
|
|
titleBox = slide.shapes.add_textbox(Inches(0.5), Inches(0.2), prs.slide_width - Inches(1), Inches(0.6))
|
|
titleFrame = titleBox.text_frame
|
|
titleFrame.text = slide_data.get("title", "Slide")
|
|
title_style = styles.get("title", {})
|
|
# Smaller title size for slides (default 32 instead of 44)
|
|
title_font_size = title_style.get("font_size", 32)
|
|
# Reduce further for slides (max 32pt, min 10pt for readability)
|
|
title_font_size = max(10, min(title_font_size, 32))
|
|
titleFrame.paragraphs[0].font.size = Pt(title_font_size)
|
|
titleFrame.paragraphs[0].font.bold = title_style.get("bold", True)
|
|
title_color = self._getSafeColor(title_style.get("color", (31, 78, 121)))
|
|
titleFrame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
|
|
titleFrame.paragraphs[0].alignment = PP_ALIGN.LEFT
|
|
titleFrame.word_wrap = True
|
|
|
|
# Render sections with proper PowerPoint objects (tables, lists, etc.)
|
|
# Organize content into frames for better layout
|
|
if hasSections:
|
|
# Organize sections into content groups for frame-based layout
|
|
# Images are handled within the frame rendering method
|
|
self._renderSlideContentWithFrames(slide, slide_sections, slide_images, styles, prs)
|
|
|
|
# Fallback: if no sections but has content text, render in textbox
|
|
elif slide_content and not hasImages:
|
|
# Create textbox for content (no placeholders in blank layout)
|
|
from pptx.util import Inches
|
|
title_height_used = Inches(1.0) # Title height for blank slides
|
|
content_left = Inches(0.5)
|
|
content_top = title_height_used + Inches(0.3)
|
|
content_width = prs.slide_width - Inches(1)
|
|
content_height = prs.slide_height - content_top - Inches(0.5)
|
|
content_textbox = slide.shapes.add_textbox(content_left, content_top, content_width, content_height)
|
|
text_frame = content_textbox.text_frame
|
|
text_frame.word_wrap = True
|
|
text_frame.auto_size = None
|
|
|
|
# Split content into paragraphs
|
|
paragraphs = slide_content.split('\n\n')
|
|
|
|
for paragraph in paragraphs:
|
|
if paragraph.strip():
|
|
p = text_frame.add_paragraph()
|
|
p.text = paragraph.strip()
|
|
|
|
# Apply AI-generated styling with adaptive sizing
|
|
paragraph_style = styles.get("paragraph", {})
|
|
base_font_size = paragraph_style.get("font_size", 18)
|
|
# Calculate adaptive font size based on content length
|
|
try:
|
|
total_chars = len(slide_content)
|
|
chars_per_line = max(1, int(content_width / Pt(10)))
|
|
lines_needed = total_chars / chars_per_line
|
|
available_lines = max(1, int(content_height / Pt(14)))
|
|
font_multiplier = 1.0
|
|
if available_lines > 0 and lines_needed > available_lines:
|
|
font_multiplier = max(0.6, min(1.0, (available_lines / lines_needed) * 1.1))
|
|
calculated_size = max(6, int(base_font_size * font_multiplier)) # Minimum 6pt
|
|
except (ZeroDivisionError, ValueError, TypeError):
|
|
calculated_size = max(6, base_font_size) # Fallback to base size with minimum
|
|
|
|
p.font.size = Pt(calculated_size)
|
|
p.font.bold = paragraph_style.get("bold", False)
|
|
paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))
|
|
p.font.color.rgb = RGBColor(*paragraph_color)
|
|
|
|
# Apply alignment
|
|
align = paragraph_style.get("align", "left")
|
|
if align == "center":
|
|
p.alignment = PP_ALIGN.CENTER
|
|
elif align == "right":
|
|
p.alignment = PP_ALIGN.RIGHT
|
|
else:
|
|
p.alignment = PP_ALIGN.LEFT
|
|
|
|
# If no slides were created, create a default slide
|
|
if not slidesData:
|
|
slide_layout = prs.slide_layouts[0] # Title slide layout
|
|
slide = prs.slides.add_slide(slide_layout)
|
|
|
|
title_shape = slide.shapes.title
|
|
title_shape.text = title
|
|
|
|
# Apply title styling to default slide
|
|
title_style = styles.get("title", {})
|
|
if title_shape.text_frame.paragraphs[0].font:
|
|
title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 48))
|
|
title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True)
|
|
title_color = self._getSafeColor(title_style.get("color", (31, 78, 121)))
|
|
title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
|
|
|
|
subtitle_shape = slide.placeholders[1]
|
|
subtitle_shape.text = "Generated by PowerOn AI System"
|
|
|
|
# Apply subtitle styling
|
|
paragraph_style = styles.get("paragraph", {})
|
|
if subtitle_shape.text_frame.paragraphs[0].font:
|
|
subtitle_shape.text_frame.paragraphs[0].font.size = Pt(paragraph_style.get("font_size", 20))
|
|
subtitle_shape.text_frame.paragraphs[0].font.bold = paragraph_style.get("bold", False)
|
|
paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
|
|
subtitle_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*paragraph_color)
|
|
|
|
# Save to buffer
|
|
buffer = io.BytesIO()
|
|
prs.save(buffer)
|
|
buffer.seek(0)
|
|
|
|
# Convert to base64
|
|
pptx_bytes = buffer.getvalue()
|
|
pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')
|
|
|
|
logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
|
|
|
|
# Determine filename from document or title
|
|
documents = extractedContent.get("documents", [])
|
|
if documents and isinstance(documents[0], dict):
|
|
filename = documents[0].get("filename")
|
|
if not filename:
|
|
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation")
|
|
else:
|
|
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation")
|
|
|
|
# Extract metadata for document type and other info
|
|
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
|
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
|
|
|
|
return [
|
|
RenderedDocument(
|
|
documentData=pptx_bytes,
|
|
mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
filename=filename,
|
|
documentType=documentType,
|
|
metadata=metadata if isinstance(metadata, dict) else None
|
|
)
|
|
]
|
|
|
|
except ImportError:
|
|
logger.error("python-pptx library not installed. Install with: pip install python-pptx")
|
|
fallbackContent = "python-pptx library not installed"
|
|
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
|
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
|
|
return [
|
|
RenderedDocument(
|
|
documentData=fallbackContent.encode('utf-8'),
|
|
mimeType="text/plain",
|
|
filename=self._determineFilename(title, "text/plain"),
|
|
documentType=documentType,
|
|
metadata=metadata if isinstance(metadata, dict) else None
|
|
)
|
|
]
|
|
except Exception as e:
|
|
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
|
|
fallbackContent = f"Error rendering PowerPoint presentation: {str(e)}"
|
|
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
|
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
|
|
return [
|
|
RenderedDocument(
|
|
documentData=fallbackContent.encode('utf-8'),
|
|
mimeType="text/plain",
|
|
filename=self._determineFilename(title, "text/plain"),
|
|
documentType=documentType,
|
|
metadata=metadata if isinstance(metadata, dict) else None
|
|
)
|
|
]
|
|
|
|
def _parseContentToSlides(self, content: str, title: str) -> list:
|
|
"""
|
|
Parse content into slide data structure.
|
|
|
|
Args:
|
|
content: Content to parse
|
|
title: Presentation title
|
|
|
|
Returns:
|
|
List of slide data dictionaries
|
|
"""
|
|
slides = []
|
|
|
|
# Split content by slide markers or headers
|
|
slide_sections = self._splitContentIntoSlides(content)
|
|
|
|
for i, section in enumerate(slide_sections):
|
|
if section.strip():
|
|
slide_data = {
|
|
"title": f"Slide {i + 1}",
|
|
"content": section.strip()
|
|
}
|
|
|
|
# Extract title from content if it starts with #
|
|
lines = section.strip().split('\n')
|
|
if lines and lines[0].startswith('#'):
|
|
# Remove # symbols and clean up title
|
|
slide_title = lines[0].lstrip('#').strip()
|
|
slide_data["title"] = slide_title
|
|
slide_data["content"] = '\n'.join(lines[1:]).strip()
|
|
elif lines and lines[0].strip():
|
|
# Use first line as title if it looks like a title
|
|
first_line = lines[0].strip()
|
|
if len(first_line) < 100 and not first_line.endswith('.'):
|
|
slide_data["title"] = first_line
|
|
slide_data["content"] = '\n'.join(lines[1:]).strip()
|
|
|
|
slides.append(slide_data)
|
|
|
|
return slides
|
|
|
|
def _splitContentIntoSlides(self, content: str) -> list:
|
|
"""
|
|
Split content into individual slides based on headers and structure.
|
|
|
|
Args:
|
|
content: Content to split
|
|
|
|
Returns:
|
|
List of slide content strings
|
|
"""
|
|
# re is already imported at module level
|
|
|
|
# First, try to split by major headers (# or ##)
|
|
# This is the most common case for AI-generated content
|
|
header_pattern = r'^(#{1,2})\s+(.+)$'
|
|
lines = content.split('\n')
|
|
slides = []
|
|
current_slide = []
|
|
|
|
for line in lines:
|
|
# Check if this line is a header
|
|
header_match = re.match(header_pattern, line.strip())
|
|
if header_match:
|
|
# If we have content in current slide, save it
|
|
if current_slide:
|
|
slide_content = '\n'.join(current_slide).strip()
|
|
if slide_content:
|
|
slides.append(slide_content)
|
|
current_slide = []
|
|
|
|
# Start new slide with this header
|
|
current_slide.append(line)
|
|
else:
|
|
# Add line to current slide
|
|
current_slide.append(line)
|
|
|
|
# Add the last slide
|
|
if current_slide:
|
|
slide_content = '\n'.join(current_slide).strip()
|
|
if slide_content:
|
|
slides.append(slide_content)
|
|
|
|
# If we found slides with headers, return them
|
|
if len(slides) > 1:
|
|
return slides
|
|
|
|
# Fallback: Split by double newlines
|
|
sections = content.split('\n\n\n')
|
|
if len(sections) > 1:
|
|
return [s.strip() for s in sections if s.strip()]
|
|
|
|
# Another fallback: Split by double newlines
|
|
sections = content.split('\n\n')
|
|
if len(sections) > 1:
|
|
return [s.strip() for s in sections if s.strip()]
|
|
|
|
# Last resort: return as single slide
|
|
return [content.strip()]
|
|
|
|
|
|
def getOutputMimeType(self) -> str:
|
|
"""Get MIME type for rendered output."""
|
|
return self.outputMimeType
|
|
|
|
async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
|
|
"""Get style set - use styles from document generation metadata if available,
|
|
otherwise enhance default styles with AI if userPrompt provided.
|
|
|
|
WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
|
|
not be generated separately by renderers. Only fall back to AI if styles not provided.
|
|
|
|
Args:
|
|
extractedContent: Document content with metadata (may contain styles)
|
|
userPrompt: User's prompt (AI will detect style instructions in any language)
|
|
aiService: AI service (used only if styles not in metadata and userPrompt provided)
|
|
templateName: Name of template style set (None = default)
|
|
|
|
Returns:
|
|
Dict with style definitions for all document styles
|
|
"""
|
|
# Get default style set
|
|
defaultStyleSet = self._getDefaultStyleSet()
|
|
|
|
# FIRST: Check if styles are provided in document generation metadata (preferred approach)
|
|
if extractedContent:
|
|
metadata = extractedContent.get("metadata", {})
|
|
if isinstance(metadata, dict):
|
|
styles = metadata.get("styles")
|
|
if styles and isinstance(styles, dict):
|
|
self.logger.debug("Using styles from document generation metadata")
|
|
enhancedStyleSet = self._convertColorsFormat(styles)
|
|
return self._validateStylesReadability(enhancedStyleSet)
|
|
|
|
# FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
|
|
if userPrompt and aiService:
|
|
self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
|
|
enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
|
|
# Colors already converted in _getAiStylesWithPptxColors
|
|
return self._validateStylesReadability(enhancedStyleSet)
|
|
else:
|
|
# Use default styles only
|
|
return defaultStyleSet
|
|
|
|
async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
|
|
"""Enhance default styles with AI based on user prompt."""
|
|
try:
|
|
style_template = self._createProfessionalPptxTemplate(userPrompt, defaultStyleSet)
|
|
enhanced_styles = await self._getAiStylesWithPptxColors(aiService, style_template, defaultStyleSet)
|
|
return enhanced_styles
|
|
except Exception as e:
|
|
self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
|
|
return defaultStyleSet
|
|
|
|
def _validateStylesReadability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Validate and fix readability issues in AI-generated styles."""
|
|
try:
|
|
# Ensure minimum font sizes for PowerPoint readability
|
|
min_font_sizes = {
|
|
"title": 36,
|
|
"heading": 24,
|
|
"subheading": 20,
|
|
"paragraph": 14,
|
|
"bullet_list": 14,
|
|
"table_header": 12,
|
|
"table_cell": 12
|
|
}
|
|
|
|
for style_name, min_size in min_font_sizes.items():
|
|
if style_name in styles:
|
|
current_size = styles[style_name].get("font_size", 12)
|
|
if current_size < min_size:
|
|
styles[style_name]["font_size"] = min_size
|
|
|
|
return styles
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Style validation failed: {str(e)}")
|
|
return self._getDefaultStyleSet()
|
|
|
|
def _getDefaultStyleSet(self) -> Dict[str, Any]:
|
|
"""Default PowerPoint style set - used when no style instructions present."""
|
|
return {
|
|
"title": {"font_size": 32, "color": "#1B365D", "bold": True, "align": "left"},
|
|
"heading": {"font_size": 24, "color": "#1B365D", "bold": True, "align": "left"},
|
|
"subheading": {"font_size": 20, "color": "#4A90E2", "bold": True, "align": "left"},
|
|
"paragraph": {"font_size": 14, "color": "#2F2F2F", "bold": False, "align": "left"},
|
|
"bullet_list": {"font_size": 14, "color": "#2F2F2F", "indent": 20},
|
|
"table_header": {"font_size": 18, "color": "#FFFFFF", "bold": True, "background": "#1B365D"},
|
|
"table_cell": {"font_size": 16, "color": "#2F2F2F", "bold": False, "background": "#F8F9FA"},
|
|
"slide_size": "16:9",
|
|
"content_per_slide": "concise",
|
|
"design_theme": "corporate",
|
|
"color_scheme": "professional",
|
|
"background_style": "clean",
|
|
"accent_colors": ["#1B365D", "#2C5F2D", "#4A90E2", "#6B7280"],
|
|
"professional_grade": True,
|
|
"executive_ready": True
|
|
}
|
|
|
|
def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str:
|
|
"""Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
|
|
# json is already imported at module level
|
|
schema_json = json.dumps(style_schema, indent=4)
|
|
|
|
return f"""Customize the JSON below for professional PowerPoint slides.
|
|
|
|
User Request: {userPrompt or "Create professional corporate slides"}
|
|
|
|
Rules:
|
|
- Use professional colors (blues, grays, deep greens)
|
|
- Large, readable font sizes
|
|
- High contrast
|
|
- Sophisticated color palettes
|
|
|
|
Return ONLY this JSON with your changes:
|
|
|
|
{schema_json}
|
|
|
|
JSON ONLY. NO OTHER TEXT."""
|
|
|
|
async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Get AI styles with proper PowerPoint color conversion. Uses base _getAiStyles for debug file writing."""
|
|
if not aiService:
|
|
return default_styles
|
|
|
|
try:
|
|
# Use base template method which handles debug file writing
|
|
enhanced_styles = await self._getAiStyles(aiService, style_template, default_styles)
|
|
|
|
# Convert colors to PPTX format (RGB tuples)
|
|
return self._convertColorsFormat(enhanced_styles)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"AI style enhancement failed: {str(e)}, using defaults")
|
|
return default_styles
|
|
|
|
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Convert hex colors to RGB format for PowerPoint compatibility."""
|
|
try:
|
|
for style_name, style_config in styles.items():
|
|
if isinstance(style_config, dict):
|
|
for prop, value in style_config.items():
|
|
if isinstance(value, str) and value.startswith('#'):
|
|
# Convert hex to RGB tuple for PowerPoint
|
|
hex_color = value.lstrip('#')
|
|
if len(hex_color) == 6:
|
|
r = int(hex_color[0:2], 16)
|
|
g = int(hex_color[2:4], 16)
|
|
b = int(hex_color[4:6], 16)
|
|
styles[style_name][prop] = (r, g, b)
|
|
elif len(hex_color) == 8: # aRGB format
|
|
r = int(hex_color[2:4], 16)
|
|
g = int(hex_color[4:6], 16)
|
|
b = int(hex_color[6:8], 16)
|
|
styles[style_name][prop] = (r, g, b)
|
|
return styles
|
|
except Exception as e:
|
|
self.logger.warning(f"Color conversion failed: {str(e)}")
|
|
return styles
|
|
|
|
def _getSafeColor(self, color_value, default=(0, 0, 0)) -> tuple:
|
|
"""Get a safe RGB color tuple for PowerPoint."""
|
|
if isinstance(color_value, tuple) and len(color_value) == 3:
|
|
return color_value
|
|
elif isinstance(color_value, str) and color_value.startswith('#'):
|
|
hex_color = color_value.lstrip('#')
|
|
if len(hex_color) == 6:
|
|
r = int(hex_color[0:2], 16)
|
|
g = int(hex_color[2:4], 16)
|
|
b = int(hex_color[4:6], 16)
|
|
return (r, g, b)
|
|
elif len(hex_color) == 8: # aRGB format
|
|
r = int(hex_color[2:4], 16)
|
|
g = int(hex_color[4:6], 16)
|
|
b = int(hex_color[6:8], 16)
|
|
return (r, g, b)
|
|
return default
|
|
|
|
|
|
async def _parseJsonToSlides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Parse JSON content into slide data structure.
|
|
|
|
Args:
|
|
json_content: JSON content to parse
|
|
title: Presentation title
|
|
styles: AI-generated styles
|
|
|
|
Returns:
|
|
List of slide data dictionaries
|
|
"""
|
|
slides = []
|
|
|
|
try:
|
|
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
|
|
if not self._validateJsonStructure(json_content):
|
|
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
|
|
|
|
# Extract sections and metadata from standardized schema
|
|
sections = self._extractSections(json_content)
|
|
metadata = self._extractMetadata(json_content)
|
|
|
|
# Use provided title (which comes from documents[].title) as primary source
|
|
# Fallback to metadata.title only if title parameter is empty
|
|
document_title = title if title else metadata.get("title", "Generated Document")
|
|
|
|
# Create title slide
|
|
slides.append({
|
|
"title": document_title,
|
|
"content": "Generated by PowerOn AI System\n\n" + self._formatTimestamp()
|
|
})
|
|
|
|
# Process sections into slides based on content and user intent
|
|
slides.extend(self._createSlidesFromSections(sections, styles))
|
|
|
|
# If no content slides were created, create a default content slide
|
|
if len(slides) == 1: # Only title slide
|
|
slides.append({
|
|
"title": "Content Overview",
|
|
"content": "No structured content found in the source documents.\n\nPlease check the source documents and try again."
|
|
})
|
|
|
|
return slides
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error parsing JSON to slides: {str(e)}")
|
|
# Return minimal fallback slides
|
|
return [
|
|
{
|
|
"title": title,
|
|
"content": "Error parsing content for presentation"
|
|
}
|
|
]
|
|
|
|
def _createSlideFromSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Create a slide from a JSON section."""
|
|
try:
|
|
# Get section title from data or use default
|
|
section_title = "Untitled Section"
|
|
if section.get("content_type") == "heading":
|
|
# Extract text from elements array - use nested content structure
|
|
for element in section.get("elements", []):
|
|
if isinstance(element, dict):
|
|
content = element.get("content", {})
|
|
if isinstance(content, dict):
|
|
text = content.get("text", "")
|
|
if text:
|
|
section_title = text
|
|
break
|
|
elif section.get("title"):
|
|
section_title = section.get("title")
|
|
|
|
content_type = section.get("content_type", "paragraph")
|
|
elements = section.get("elements", [])
|
|
|
|
# Check for three content formats from Phase 5D in elements
|
|
content_parts = []
|
|
for element in elements:
|
|
if not isinstance(element, dict):
|
|
continue
|
|
|
|
element_type = element.get("type", "")
|
|
|
|
# Support three content formats from Phase 5D
|
|
if element_type == "reference":
|
|
# Document reference format
|
|
doc_ref = element.get("documentReference", "")
|
|
label = element.get("label", "Reference")
|
|
content_parts.append(f"[Reference: {label}]")
|
|
continue
|
|
elif element_type == "extracted_text":
|
|
# Extracted text format
|
|
content = element.get("content", "")
|
|
source = element.get("source", "")
|
|
if content:
|
|
source_text = f" (Source: {source})" if source else ""
|
|
content_parts.append(f"{content}{source_text}")
|
|
continue
|
|
|
|
# Handle image sections specially
|
|
if content_type == "image":
|
|
# Extract image data from nested content structure
|
|
images = []
|
|
for element in elements:
|
|
if isinstance(element, dict):
|
|
# Extract from nested content structure
|
|
content = element.get("content", {})
|
|
if isinstance(content, dict):
|
|
base64Data = content.get("base64Data")
|
|
altText = content.get("altText", "Image")
|
|
caption = content.get("caption", "")
|
|
else:
|
|
# Fallback to direct element fields
|
|
base64Data = element.get("base64Data")
|
|
altText = element.get("altText", "Image")
|
|
caption = element.get("caption", "")
|
|
|
|
if base64Data:
|
|
images.append({
|
|
"base64Data": base64Data,
|
|
"altText": altText,
|
|
"caption": caption
|
|
})
|
|
|
|
return {
|
|
"title": section_title or (elements[0].get("content", {}).get("altText", "Image") if elements and isinstance(elements[0], dict) else "Image"),
|
|
"content": "\n\n".join(content_parts) if content_parts else "", # Include reference/extracted_text if present
|
|
"images": images
|
|
}
|
|
|
|
# Build slide content based on section type - iterate over elements and format each
|
|
if not content_parts: # Only if we didn't process reference/extracted_text above
|
|
for element in elements:
|
|
if not isinstance(element, dict):
|
|
continue
|
|
|
|
element_type = element.get("type", "")
|
|
# Use element type if available, otherwise fall back to section content_type
|
|
if not element_type:
|
|
element_type = content_type
|
|
|
|
if element_type == "table":
|
|
formatted = self._formatTableForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
elif element_type == "bullet_list" or element_type == "list":
|
|
formatted = self._formatListForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
elif element_type == "heading":
|
|
formatted = self._formatHeadingForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
elif element_type == "paragraph":
|
|
formatted = self._formatParagraphForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
elif element_type == "code_block" or element_type == "code":
|
|
formatted = self._formatCodeForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
else:
|
|
# Fallback to paragraph formatting
|
|
formatted = self._formatParagraphForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
|
|
# Combine content parts
|
|
slide_content = "\n\n".join(filter(None, content_parts))
|
|
|
|
return {
|
|
"title": section_title,
|
|
"content": slide_content,
|
|
"images": [] # No images for non-image sections
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error creating slide from section: {str(e)}")
|
|
return None
|
|
|
|
def _formatTableForSlide(self, element: Dict[str, Any]) -> str:
|
|
"""Format table data for slide presentation."""
|
|
try:
|
|
# Extract table data from element - handle nested content structure
|
|
if not isinstance(element, dict):
|
|
return ""
|
|
|
|
# Extract from nested content structure
|
|
content = element.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return ""
|
|
|
|
headers = content.get("headers", [])
|
|
rows = content.get("rows", [])
|
|
|
|
if not headers:
|
|
return ""
|
|
|
|
# Create table representation
|
|
table_lines = []
|
|
|
|
# Add headers
|
|
header_line = " | ".join(str(h) for h in headers)
|
|
table_lines.append(header_line)
|
|
|
|
# Add separator
|
|
separator = "-" * len(header_line)
|
|
table_lines.append(separator)
|
|
|
|
# Add data rows (limit based on content density)
|
|
max_rows = 5 # Default limit
|
|
for row in rows[:max_rows]:
|
|
row_line = " | ".join(str(cell) for cell in row)
|
|
table_lines.append(row_line)
|
|
|
|
if len(rows) > max_rows:
|
|
table_lines.append(f"... and {len(rows) - max_rows} more rows")
|
|
|
|
return "\n".join(table_lines)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error formatting table for slide: {str(e)}")
|
|
return ""
|
|
|
|
def _formatListForSlide(self, list_data: Dict[str, Any]) -> str:
|
|
"""Format list data for slide presentation."""
|
|
try:
|
|
# Extract from nested content structure
|
|
content = list_data.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return ""
|
|
items = content.get("items", [])
|
|
|
|
if not items:
|
|
return ""
|
|
|
|
# Create list representation
|
|
list_lines = []
|
|
|
|
for item in items:
|
|
if isinstance(item, dict):
|
|
text = item.get("text", "")
|
|
list_lines.append(f"• {text}")
|
|
|
|
# Add subitems (limit to 3 for readability)
|
|
subitems = item.get("subitems", [])[:3]
|
|
for subitem in subitems:
|
|
if isinstance(subitem, dict):
|
|
list_lines.append(f" - {subitem.get('text', '')}")
|
|
else:
|
|
list_lines.append(f" - {subitem}")
|
|
else:
|
|
list_lines.append(f"• {str(item)}")
|
|
|
|
return "\n".join(list_lines)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error formatting list for slide: {str(e)}")
|
|
return ""
|
|
|
|
def _formatHeadingForSlide(self, heading_data: Dict[str, Any]) -> str:
|
|
"""Format heading data for slide presentation."""
|
|
try:
|
|
# Extract from nested content structure
|
|
content = heading_data.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return ""
|
|
text = content.get("text", "")
|
|
level = content.get("level", 1)
|
|
|
|
if text:
|
|
return f"{'#' * level} {text}"
|
|
|
|
return ""
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error formatting heading for slide: {str(e)}")
|
|
return ""
|
|
|
|
def _formatParagraphForSlide(self, paragraph_data: Dict[str, Any]) -> str:
|
|
"""Format paragraph data for slide presentation."""
|
|
try:
|
|
# Extract from nested content structure
|
|
content = paragraph_data.get("content", {})
|
|
if isinstance(content, dict):
|
|
text = content.get("text", "")
|
|
elif isinstance(content, str):
|
|
text = content
|
|
else:
|
|
text = ""
|
|
|
|
if text:
|
|
# Limit paragraph length based on content density
|
|
max_length = 200 # Default limit
|
|
if len(text) > max_length:
|
|
text = text[:max_length] + "..."
|
|
|
|
return text
|
|
|
|
return ""
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error formatting paragraph for slide: {str(e)}")
|
|
return ""
|
|
|
|
def _formatCodeForSlide(self, code_data: Dict[str, Any]) -> str:
|
|
"""Format code data for slide presentation."""
|
|
try:
|
|
# Extract from nested content structure
|
|
content = code_data.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return ""
|
|
code = content.get("code", "")
|
|
language = content.get("language", "")
|
|
|
|
if code:
|
|
# Limit code length based on content density
|
|
max_length = 100 # Default limit
|
|
if len(code) > max_length:
|
|
code = code[:max_length] + "..."
|
|
|
|
if language:
|
|
return f"Code ({language}):\n{code}"
|
|
else:
|
|
return f"Code:\n{code}"
|
|
|
|
return ""
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error formatting code for slide: {str(e)}")
|
|
return ""
|
|
|
|
def _getSlideLayoutIndex(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
|
|
"""Determine the best professional slide layout based on content."""
|
|
try:
|
|
content = slide_data.get("content", "")
|
|
title = slide_data.get("title", "")
|
|
|
|
# Check if it's a title slide (first slide)
|
|
if not content or "Generated by PowerOn AI System" in content:
|
|
return 0 # Title slide layout
|
|
|
|
# Professional layout selection based on content
|
|
if "|" in content and "-" in content:
|
|
# Has both tables and lists - use content with caption for professional look
|
|
return 2
|
|
elif "|" in content:
|
|
# Has tables - use content layout for clean table presentation
|
|
return 1
|
|
elif content.count("•") > 2:
|
|
# Has many bullet points - use content layout for better readability
|
|
return 1
|
|
elif len(content) > 200:
|
|
# Long content - use content layout for better text flow
|
|
return 1
|
|
elif title and len(title) > 20:
|
|
# Long title - use title and content layout
|
|
return 1
|
|
else:
|
|
# Default to title and content layout for professional appearance
|
|
return 1
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error determining slide layout: {str(e)}")
|
|
return 1 # Default to title and content layout
|
|
|
|
def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""Create slides from sections: each heading level 1 (chapter) creates a new slide, content accumulates until next level 1 heading."""
|
|
try:
|
|
slides = []
|
|
current_slide_sections = [] # Store sections (not formatted text) for proper rendering
|
|
current_slide_title = "Content Overview"
|
|
|
|
for section in sections:
|
|
section_type = section.get("content_type", "paragraph")
|
|
elements = section.get("elements", [])
|
|
|
|
# Skip sections with no elements (unless they're headings that should create new slides)
|
|
if not elements and section_type != "heading":
|
|
continue
|
|
|
|
if section_type == "heading":
|
|
# Extract heading level
|
|
level = 1 # Default
|
|
heading_text = ""
|
|
for element in elements:
|
|
if isinstance(element, dict):
|
|
# Extract from nested content structure
|
|
content = element.get("content", {})
|
|
if isinstance(content, dict):
|
|
heading_text = content.get("text", "")
|
|
level = content.get("level", 1)
|
|
elif isinstance(content, str):
|
|
heading_text = content
|
|
level = 1
|
|
|
|
# Only level 1 headings (chapters) create new slides
|
|
if level == 1:
|
|
# If we have accumulated content, create a slide
|
|
if current_slide_sections:
|
|
slides.append({
|
|
"title": current_slide_title,
|
|
"sections": current_slide_sections.copy(), # Store sections for proper rendering
|
|
"images": []
|
|
})
|
|
current_slide_sections = []
|
|
|
|
# Start new slide with heading as title
|
|
if heading_text:
|
|
current_slide_title = heading_text
|
|
else:
|
|
# If no heading text found but this is a heading section, use section ID or default
|
|
current_slide_title = section.get("id", "Untitled Section")
|
|
else:
|
|
# Level 2+ headings are added as sections to current slide
|
|
current_slide_sections.append(section)
|
|
elif section_type == "image":
|
|
# Images are added to current slide (will be organized in frames)
|
|
current_slide_sections.append(section)
|
|
else:
|
|
# Add section to current slide (will be rendered properly)
|
|
current_slide_sections.append(section)
|
|
|
|
# Add final slide if there's content
|
|
if current_slide_sections:
|
|
slides.append({
|
|
"title": current_slide_title,
|
|
"sections": current_slide_sections.copy(),
|
|
"images": []
|
|
})
|
|
|
|
return slides
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error creating slides from sections: {str(e)}")
|
|
return []
|
|
|
|
def _formatSectionContent(self, section: Dict[str, Any]) -> str:
|
|
"""Format section content for slide presentation."""
|
|
try:
|
|
content_type = section.get("content_type", "paragraph")
|
|
elements = section.get("elements", [])
|
|
|
|
# Image sections return empty content (handled separately)
|
|
if content_type == "image":
|
|
return ""
|
|
|
|
# Process each element in the section - use element type, not section type
|
|
content_parts = []
|
|
for element in elements:
|
|
if not isinstance(element, dict):
|
|
continue
|
|
|
|
element_type = element.get("type", "")
|
|
# Use element type if available, otherwise fall back to section content_type
|
|
if not element_type:
|
|
element_type = content_type
|
|
|
|
if element_type == "table":
|
|
formatted = self._formatTableForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
elif element_type == "bullet_list" or element_type == "list":
|
|
formatted = self._formatListForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
elif element_type == "heading":
|
|
formatted = self._formatHeadingForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
elif element_type == "paragraph":
|
|
formatted = self._formatParagraphForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
elif element_type == "code_block" or element_type == "code":
|
|
formatted = self._formatCodeForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
else:
|
|
# Fallback to paragraph formatting
|
|
formatted = self._formatParagraphForSlide(element)
|
|
if formatted:
|
|
content_parts.append(formatted)
|
|
|
|
return "\n\n".join(filter(None, content_parts))
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error formatting section content: {str(e)}")
|
|
return ""
|
|
|
|
def _addImagesToSlide(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any]) -> None:
|
|
"""Add images to a PowerPoint slide."""
|
|
try:
|
|
from pptx.util import Inches, Pt
|
|
from pptx.enum.text import PP_ALIGN
|
|
from pptx.dml.color import RGBColor
|
|
import base64
|
|
import io
|
|
|
|
if not images:
|
|
return
|
|
|
|
# Get slide dimensions from presentation
|
|
if hasattr(self, '_currentPresentation'):
|
|
prs = self._currentPresentation
|
|
else:
|
|
prs = slide.presentation
|
|
slideWidth = prs.slide_width
|
|
slideHeight = prs.slide_height
|
|
titleHeight = Inches(1.5) # Approximate title height
|
|
|
|
# Available area for images
|
|
availableWidth = slideWidth - Inches(1) # Margins
|
|
availableHeight = slideHeight - titleHeight - Inches(1) # Title + margins
|
|
|
|
# Position images
|
|
if len(images) == 1:
|
|
# Single image: center it
|
|
img = images[0]
|
|
base64Data = img.get("base64Data")
|
|
# Validate base64Data is present and not empty
|
|
if not base64Data or not isinstance(base64Data, str) or len(base64Data.strip()) == 0:
|
|
logger.error(f"Invalid base64Data: present={bool(base64Data)}, type={type(base64Data)}, length={len(base64Data) if base64Data else 0}")
|
|
return
|
|
|
|
try:
|
|
imageBytes = base64.b64decode(base64Data)
|
|
if len(imageBytes) == 0:
|
|
logger.error("Decoded image bytes are empty")
|
|
return
|
|
imageStream = io.BytesIO(imageBytes)
|
|
except Exception as decode_error:
|
|
logger.error(f"Failed to decode base64 image data: {str(decode_error)}")
|
|
return
|
|
|
|
# Get image dimensions
|
|
try:
|
|
from PIL import Image as PILImage
|
|
pilImage = PILImage.open(imageStream)
|
|
imgWidth, imgHeight = pilImage.size
|
|
|
|
# Scale to fit available space (max 90% of slide for better visibility)
|
|
# Convert PIL pixels to PowerPoint points (1 inch = 72 points, typical screen DPI = 96)
|
|
# Conversion: pixels * (72/96) = points
|
|
imgWidthPoints = imgWidth * (72.0 / 96.0)
|
|
imgHeightPoints = imgHeight * (72.0 / 96.0)
|
|
|
|
maxWidth = availableWidth * 0.9
|
|
maxHeight = availableHeight * 0.9
|
|
|
|
scale = min(maxWidth / imgWidthPoints, maxHeight / imgHeightPoints, 1.0)
|
|
finalWidth = imgWidthPoints * scale
|
|
finalHeight = imgHeightPoints * scale
|
|
|
|
# Center image
|
|
left = (slideWidth - finalWidth) / 2
|
|
top = titleHeight + (availableHeight - finalHeight) / 2
|
|
|
|
imageStream.seek(0)
|
|
except Exception:
|
|
# Fallback: use default size
|
|
finalWidth = Inches(6)
|
|
finalHeight = Inches(4.5)
|
|
left = (slideWidth - finalWidth) / 2
|
|
top = titleHeight + Inches(1)
|
|
imageStream.seek(0)
|
|
|
|
# Add image to slide
|
|
try:
|
|
slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight)
|
|
except Exception as add_error:
|
|
# If add_picture fails, try with explicit format
|
|
imageStream.seek(0)
|
|
# Ensure we have valid image data
|
|
if len(imageBytes) > 0:
|
|
slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight)
|
|
else:
|
|
raise Exception(f"Empty image data: {add_error}")
|
|
|
|
# Add caption if available
|
|
caption = img.get("caption") or img.get("altText")
|
|
if caption and caption != "Image":
|
|
# Add text box below image
|
|
captionTop = top + finalHeight + Inches(0.2)
|
|
captionBox = slide.shapes.add_textbox(
|
|
Inches(1),
|
|
captionTop,
|
|
slideWidth - Inches(2),
|
|
Inches(0.5)
|
|
)
|
|
captionFrame = captionBox.text_frame
|
|
captionFrame.text = caption
|
|
captionFrame.paragraphs[0].font.size = Pt(12)
|
|
captionFrame.paragraphs[0].font.italic = True
|
|
captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER
|
|
else:
|
|
# Multiple images: arrange in grid
|
|
cols = 2 if len(images) <= 4 else 3
|
|
rows = (len(images) + cols - 1) // cols
|
|
|
|
imgWidth = (availableWidth - Inches(0.5) * (cols - 1)) / cols
|
|
imgHeight = (availableHeight - Inches(0.5) * (rows - 1)) / rows
|
|
|
|
for idx, img in enumerate(images):
|
|
base64Data = img.get("base64Data")
|
|
if base64Data:
|
|
row = idx // cols
|
|
col = idx % cols
|
|
|
|
imageBytes = base64.b64decode(base64Data)
|
|
imageStream = io.BytesIO(imageBytes)
|
|
|
|
left = Inches(0.5) + col * (imgWidth + Inches(0.5))
|
|
top = titleHeight + Inches(0.5) + row * (imgHeight + Inches(0.5))
|
|
|
|
slide.shapes.add_picture(imageStream, left, top, width=imgWidth, height=imgHeight)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error embedding images in PPTX slide: {str(e)}")
|
|
import traceback
|
|
logger.error(f"Traceback: {traceback.format_exc()}")
|
|
|
|
def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float, max_width: float = None) -> None:
|
|
"""Add a PowerPoint table to slide."""
|
|
try:
|
|
from pptx.util import Inches, Pt
|
|
from pptx.enum.text import PP_ALIGN
|
|
from pptx.dml.color import RGBColor
|
|
|
|
# Extract from nested content structure
|
|
content = element.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return
|
|
|
|
headers = content.get("headers", [])
|
|
rows = content.get("rows", [])
|
|
|
|
if not headers:
|
|
return
|
|
|
|
# Calculate table dimensions
|
|
num_cols = int(len(headers)) # Ensure integer
|
|
num_rows = int(len(rows) + 1) # +1 for header row, ensure integer
|
|
left = Inches(0.5)
|
|
# Get presentation from stored reference or slide
|
|
if hasattr(self, '_currentPresentation'):
|
|
prs = self._currentPresentation
|
|
else:
|
|
prs = slide.presentation
|
|
width = max_width if max_width is not None else (prs.slide_width - Inches(1))
|
|
row_height = Inches(0.4)
|
|
|
|
# Create table - ensure all parameters are proper types
|
|
table_height = row_height * num_rows
|
|
table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, table_height)
|
|
table = table_shape.table
|
|
|
|
# Set column widths - width is in EMU, divide evenly
|
|
# python-pptx expects EMU values (914400 EMU = 1 inch)
|
|
col_width_emu = int(width) // num_cols # Ensure integer division for EMU
|
|
for col_idx in range(num_cols):
|
|
table.columns[col_idx].width = col_width_emu
|
|
|
|
# Add headers with styling
|
|
header_style = styles.get("table_header", {})
|
|
header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121)))
|
|
header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255)))
|
|
header_font_size = header_style.get("font_size", 18)
|
|
|
|
for col_idx, header in enumerate(headers):
|
|
cell = table.cell(0, col_idx)
|
|
# Clear existing text and set new text
|
|
cell.text_frame.clear()
|
|
cell.text = str(header) if header else ""
|
|
|
|
# Ensure paragraph exists
|
|
if len(cell.text_frame.paragraphs) == 0:
|
|
cell.text_frame.add_paragraph()
|
|
|
|
# Apply styling
|
|
cell.fill.solid()
|
|
cell.fill.fore_color.rgb = RGBColor(*header_bg_color)
|
|
para = cell.text_frame.paragraphs[0]
|
|
para.font.bold = header_style.get("bold", True)
|
|
para.font.size = Pt(header_font_size)
|
|
para.font.color.rgb = RGBColor(*header_text_color)
|
|
|
|
align = header_style.get("align", "center")
|
|
if align == "left":
|
|
para.alignment = PP_ALIGN.LEFT
|
|
elif align == "right":
|
|
para.alignment = PP_ALIGN.RIGHT
|
|
else:
|
|
para.alignment = PP_ALIGN.CENTER
|
|
|
|
# Ensure text is set on paragraph
|
|
if not para.text:
|
|
para.text = str(header) if header else ""
|
|
|
|
# Add data rows with styling
|
|
cell_style = styles.get("table_cell", {})
|
|
cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255)))
|
|
cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47)))
|
|
cell_font_size = cell_style.get("font_size", 16)
|
|
|
|
for row_idx, row_data in enumerate(rows, 1):
|
|
for col_idx, cell_data in enumerate(row_data[:num_cols]):
|
|
cell = table.cell(row_idx, col_idx)
|
|
# Clear existing text and set new text
|
|
cell.text_frame.clear()
|
|
cell.text = str(cell_data) if cell_data is not None else ""
|
|
|
|
# Ensure paragraph exists
|
|
if len(cell.text_frame.paragraphs) == 0:
|
|
cell.text_frame.add_paragraph()
|
|
|
|
# Apply styling
|
|
cell.fill.solid()
|
|
cell.fill.fore_color.rgb = RGBColor(*cell_bg_color)
|
|
para = cell.text_frame.paragraphs[0]
|
|
para.font.size = Pt(cell_font_size)
|
|
para.font.bold = cell_style.get("bold", False)
|
|
para.font.color.rgb = RGBColor(*cell_text_color)
|
|
|
|
align = cell_style.get("align", "left")
|
|
if align == "center":
|
|
para.alignment = PP_ALIGN.CENTER
|
|
elif align == "right":
|
|
para.alignment = PP_ALIGN.RIGHT
|
|
else:
|
|
para.alignment = PP_ALIGN.LEFT
|
|
|
|
# Ensure text is set on paragraph
|
|
if not para.text:
|
|
para.text = str(cell_data) if cell_data is not None else ""
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error adding table to slide: {str(e)}")
|
|
|
|
def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None:
|
|
"""Add bullet list to slide text frame."""
|
|
try:
|
|
from pptx.util import Pt
|
|
from pptx.dml.color import RGBColor
|
|
from pptx.enum.text import PP_ALIGN
|
|
|
|
# Extract from nested content structure
|
|
content = element.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return
|
|
|
|
items = content.get("items", [])
|
|
if not items:
|
|
return
|
|
|
|
list_style = styles.get("bullet_list", {})
|
|
base_font_size = list_style.get("font_size", 14)
|
|
calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability
|
|
|
|
logger.debug(f"Rendering bullet list with {len(items)} items")
|
|
|
|
for idx, item in enumerate(items):
|
|
try:
|
|
# Get text content first
|
|
if isinstance(item, dict):
|
|
item_text = item.get("text", "")
|
|
else:
|
|
item_text = str(item)
|
|
|
|
# Skip empty items
|
|
if not item_text or len(item_text.strip()) == 0:
|
|
logger.debug(f"Skipping empty bullet item {idx}")
|
|
continue
|
|
|
|
# Create new paragraph for each bullet item
|
|
p = text_frame.add_paragraph()
|
|
|
|
# Set level to 1 for bullet points BEFORE setting text
|
|
# In python-pptx, setting level > 0 should automatically enable bullets
|
|
p.level = 1
|
|
|
|
# Set text content
|
|
p.text = item_text
|
|
|
|
# Apply formatting first
|
|
p.font.size = Pt(calculated_size)
|
|
p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47))))
|
|
p.alignment = PP_ALIGN.LEFT # Left align bullet lists
|
|
p.space_before = Pt(2) # Small spacing before
|
|
p.space_after = Pt(2) # Small spacing after
|
|
|
|
# In python-pptx, setting level > 0 should enable bullets automatically
|
|
# However, some versions may not support paragraph_format, so we'll use manual bullets as fallback
|
|
# Always add manual bullet character to ensure visibility
|
|
if not (p.text.startswith('•') or p.text.startswith('-') or p.text.startswith('*') or p.text.startswith('◦')):
|
|
p.text = '• ' + p.text
|
|
logger.debug(f"Added manual bullet character to item {idx}")
|
|
|
|
# Set proper indentation for multiline bullets (hanging indent)
|
|
# For multiline bullets: bullet at left margin, text indented, wrapped lines align with text
|
|
try:
|
|
# Try accessing paragraph_format - it may not exist in all python-pptx versions
|
|
if hasattr(p, 'paragraph_format'):
|
|
pf = p.paragraph_format
|
|
# Left indent: indents the entire paragraph (bullet + text)
|
|
pf.left_indent = Pt(18)
|
|
# First line indent: negative value creates hanging indent
|
|
# This brings the bullet back to the left while keeping text indented
|
|
pf.first_line_indent = Pt(-18) # Negative to create hanging indent
|
|
logger.debug(f"Set hanging indent for bullet item {idx}")
|
|
else:
|
|
# Try via _element if paragraph_format not available
|
|
try:
|
|
from pptx.util import Pt as PtUtil
|
|
pPr = p._element.get_or_add_pPr()
|
|
# Set left margin (indents entire paragraph)
|
|
pPr.left_margin = PtUtil(18)
|
|
# Set first line indent (negative for hanging indent)
|
|
pPr.first_line_indent = PtUtil(-18)
|
|
logger.debug(f"Set hanging indent via XML for bullet item {idx}")
|
|
except Exception as xml_error:
|
|
logger.debug(f"Could not set hanging indent via XML: {str(xml_error)}")
|
|
# Indentation is optional, continue without it
|
|
pass
|
|
except Exception as indent_error:
|
|
logger.debug(f"Could not set indent for item {idx}: {str(indent_error)}")
|
|
# Continue without indent - bullets will still show, but multiline won't be properly indented
|
|
|
|
logger.debug(f"Successfully added bullet item {idx}: '{item_text[:50]}...'")
|
|
|
|
except Exception as item_error:
|
|
logger.error(f"Error adding bullet item {idx}: {str(item_error)}", exc_info=True)
|
|
# Continue with next item even if one fails
|
|
continue
|
|
|
|
logger.debug(f"Completed rendering bullet list, added {len(text_frame.paragraphs)} paragraphs")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error adding bullet list to slide: {str(e)}")
|
|
|
|
def _addHeadingToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None:
|
|
"""Add heading to slide text frame."""
|
|
try:
|
|
from pptx.util import Pt
|
|
from pptx.dml.color import RGBColor
|
|
|
|
# Extract from nested content structure
|
|
content = element.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return
|
|
|
|
text = content.get("text", "")
|
|
level = content.get("level", 1)
|
|
|
|
if text:
|
|
p = text_frame.add_paragraph()
|
|
p.text = text
|
|
# Headings should be level 0 (no indentation) regardless of heading level
|
|
p.level = 0
|
|
|
|
heading_style = styles.get("heading", {})
|
|
# Different font sizes for different heading levels
|
|
if level == 1:
|
|
base_font_size = heading_style.get("font_size", 28) # Largest for H1
|
|
elif level == 2:
|
|
base_font_size = heading_style.get("font_size", 22) # Medium for H2
|
|
elif level == 3:
|
|
base_font_size = heading_style.get("font_size", 18) # Smaller for H3
|
|
else:
|
|
base_font_size = heading_style.get("font_size", 16) # Default for H4+
|
|
|
|
calculated_size = max(12, int(base_font_size * font_size_multiplier)) # Minimum 12pt for headings
|
|
p.font.size = Pt(calculated_size)
|
|
p.font.bold = heading_style.get("bold", True)
|
|
p.font.color.rgb = RGBColor(*self._getSafeColor(heading_style.get("color", (31, 78, 121))))
|
|
# Add spacing before and after headings
|
|
p.space_before = Pt(12 if level == 1 else 8) # More space before H1
|
|
p.space_after = Pt(6) # Space after heading
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error adding heading to slide: {str(e)}")
|
|
|
|
def _addParagraphToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None:
|
|
"""Add paragraph to slide text frame."""
|
|
try:
|
|
from pptx.util import Pt
|
|
from pptx.dml.color import RGBColor
|
|
from pptx.enum.text import PP_ALIGN
|
|
|
|
# Extract from nested content structure
|
|
content = element.get("content", {})
|
|
if isinstance(content, dict):
|
|
text = content.get("text", "")
|
|
elif isinstance(content, str):
|
|
text = content
|
|
else:
|
|
text = ""
|
|
|
|
if text:
|
|
p = text_frame.add_paragraph()
|
|
p.text = text
|
|
# Explicitly set level to 0 for regular paragraphs (not bullets)
|
|
p.level = 0
|
|
|
|
# Ensure no bullet formatting
|
|
try:
|
|
if hasattr(p, 'paragraph_format'):
|
|
p.paragraph_format.bullet.type = None
|
|
except (AttributeError, TypeError):
|
|
pass
|
|
|
|
paragraph_style = styles.get("paragraph", {})
|
|
base_font_size = paragraph_style.get("font_size", 14) # Smaller default for better readability
|
|
calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability
|
|
p.font.size = Pt(calculated_size)
|
|
p.font.bold = paragraph_style.get("bold", False)
|
|
p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
|
|
|
|
# Add proper spacing
|
|
p.space_before = Pt(6) # Space before paragraph
|
|
p.space_after = Pt(6) # Space after paragraph
|
|
p.line_spacing = 1.2 # Line spacing for readability
|
|
|
|
align = paragraph_style.get("align", "left")
|
|
if align == "center":
|
|
p.alignment = PP_ALIGN.CENTER
|
|
elif align == "right":
|
|
p.alignment = PP_ALIGN.RIGHT
|
|
else:
|
|
p.alignment = PP_ALIGN.LEFT
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error adding paragraph to slide: {str(e)}")
|
|
|
|
def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None:
|
|
"""Add code block to slide text frame."""
|
|
try:
|
|
from pptx.util import Pt
|
|
from pptx.dml.color import RGBColor
|
|
|
|
# Extract from nested content structure
|
|
content = element.get("content", {})
|
|
if not isinstance(content, dict):
|
|
return
|
|
|
|
code = content.get("code", "")
|
|
language = content.get("language", "")
|
|
|
|
if code:
|
|
code_style = styles.get("code_block", {})
|
|
code_font = code_style.get("font", "Courier New")
|
|
base_code_font_size = code_style.get("font_size", 9)
|
|
code_font_size = max(6, int(base_code_font_size * font_size_multiplier)) # Minimum 6pt for code
|
|
code_color = self._getSafeColor(code_style.get("color", (47, 47, 47)))
|
|
|
|
p = text_frame.add_paragraph()
|
|
if language:
|
|
p.text = f"Code ({language}):"
|
|
p.font.bold = True
|
|
p.font.size = Pt(code_font_size)
|
|
p = text_frame.add_paragraph()
|
|
|
|
p.text = code
|
|
p.font.name = code_font
|
|
p.font.size = Pt(code_font_size)
|
|
p.font.color.rgb = RGBColor(*code_color)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error adding code block to slide: {str(e)}")
|
|
|
|
def _formatTimestamp(self) -> str:
|
|
"""Format current timestamp for presentation generation."""
|
|
# datetime and UTC are already imported at module level
|
|
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
|
|
def _renderSlideContentWithFrames(self, slide, slide_sections: List[Dict[str, Any]], slide_images: List[Dict[str, Any]], styles: Dict[str, Any], prs) -> None:
|
|
"""
|
|
Organize slide content into frames for better layout.
|
|
Groups content by type (images, bullet lists, paragraphs, tables) and renders each in appropriately sized frames.
|
|
"""
|
|
try:
|
|
from pptx.util import Inches, Pt
|
|
from pptx.enum.text import PP_ALIGN
|
|
from pptx.dml.color import RGBColor
|
|
|
|
# Extract images from sections first
|
|
images_to_render = list(slide_images) if slide_images else []
|
|
text_sections = []
|
|
table_sections = []
|
|
|
|
for section in slide_sections:
|
|
section_type = section.get("content_type", "paragraph")
|
|
elements = section.get("elements", [])
|
|
|
|
if not elements:
|
|
# Skip empty sections
|
|
continue
|
|
|
|
# Extract images from all sections
|
|
section_has_images = False
|
|
for element in elements:
|
|
if isinstance(element, dict) and element.get("type") == "image":
|
|
content = element.get("content", {})
|
|
base64Data = None
|
|
|
|
# Handle different content formats
|
|
if isinstance(content, dict):
|
|
base64Data = content.get("base64Data")
|
|
altText = content.get("altText", "Image")
|
|
caption = content.get("caption", "")
|
|
elif isinstance(content, str):
|
|
# If content is a string, it might be base64 data directly
|
|
# Check if it looks like base64
|
|
if len(content) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in content[:100]):
|
|
base64Data = content
|
|
altText = "Image"
|
|
caption = ""
|
|
else:
|
|
# Not base64, skip
|
|
continue
|
|
else:
|
|
# Try to get base64Data directly from element
|
|
base64Data = element.get("base64Data")
|
|
altText = element.get("altText", "Image")
|
|
caption = element.get("caption", "")
|
|
|
|
if base64Data:
|
|
images_to_render.append({
|
|
"base64Data": base64Data,
|
|
"altText": altText,
|
|
"caption": caption
|
|
})
|
|
section_has_images = True
|
|
|
|
# Skip image-only sections (they're already added to images_to_render)
|
|
if section_type == "image" and section_has_images:
|
|
continue
|
|
|
|
# Categorize sections (excluding image elements)
|
|
has_table = False
|
|
non_image_elements = []
|
|
|
|
for element in elements:
|
|
if isinstance(element, dict):
|
|
element_type = element.get("type", "")
|
|
# Skip image elements when categorizing
|
|
if element_type == "image":
|
|
continue
|
|
if element_type == "table" or section_type == "table":
|
|
has_table = True
|
|
non_image_elements.append(element)
|
|
|
|
# Only add sections that have non-image content
|
|
if non_image_elements:
|
|
if has_table:
|
|
# Create a copy of section without image elements for table rendering
|
|
table_section = {
|
|
**section,
|
|
"elements": non_image_elements
|
|
}
|
|
table_sections.append(table_section)
|
|
else:
|
|
# Create a copy of section without image elements for text rendering
|
|
text_section = {
|
|
**section,
|
|
"elements": non_image_elements
|
|
}
|
|
text_sections.append(text_section)
|
|
|
|
# Calculate layout dimensions
|
|
title_height = Inches(1.5)
|
|
available_height = prs.slide_height - title_height - Inches(0.5) # Title + margin
|
|
available_width = prs.slide_width - Inches(1) # Margins
|
|
margin = Inches(0.5)
|
|
|
|
current_y = title_height + Inches(0.3)
|
|
|
|
# Determine layout strategy based on content types
|
|
has_images = len(images_to_render) > 0
|
|
has_tables = len(table_sections) > 0
|
|
has_text = len(text_sections) > 0
|
|
|
|
# Layout 1: Images + Text (horizontal split for landscape)
|
|
if has_images and has_text and not has_tables:
|
|
# Horizontal split: images on left, text on right (landscape format)
|
|
img_width = available_width * 0.48
|
|
text_width = available_width * 0.48
|
|
img_left = margin
|
|
text_left = margin + img_width + Inches(0.2)
|
|
|
|
# Render images in left column (full height)
|
|
if images_to_render:
|
|
img_height = available_height - Inches(0.2)
|
|
self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height)
|
|
|
|
# Render text in right column (full height, adaptive font size)
|
|
if text_sections:
|
|
text_height = available_height - Inches(0.2)
|
|
self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True)
|
|
|
|
# Layout 2: Tables + Text (horizontal split for landscape)
|
|
elif has_tables and has_text:
|
|
# Horizontal split: tables on left, text on right (landscape format)
|
|
table_width = available_width * 0.48
|
|
text_width = available_width * 0.48
|
|
table_left = margin
|
|
text_left = margin + table_width + Inches(0.2)
|
|
|
|
# Render tables in left column (full height)
|
|
table_y = current_y
|
|
for table_section in table_sections:
|
|
elements = table_section.get("elements", [])
|
|
for element in elements:
|
|
if isinstance(element, dict) and element.get("type") == "table":
|
|
try:
|
|
self._addTableToSlide(slide, element, styles, table_y, max_width=table_width)
|
|
# Calculate actual table height
|
|
content = element.get("content", {})
|
|
if isinstance(content, dict):
|
|
rows = content.get("rows", [])
|
|
num_rows = len(rows) + 1 # +1 for header
|
|
actual_height = Inches(0.4) * num_rows
|
|
table_y += actual_height + Inches(0.15)
|
|
else:
|
|
table_y += Inches(2)
|
|
except Exception as table_error:
|
|
logger.error(f"Error rendering table: {str(table_error)}")
|
|
# Continue with next table
|
|
break
|
|
|
|
# Render text in right column (full height, adaptive font size)
|
|
if text_sections:
|
|
text_height = available_height - Inches(0.2)
|
|
self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True)
|
|
|
|
# Layout 3: Images + Tables + Text (horizontal split for landscape)
|
|
elif has_images and has_tables and has_text:
|
|
# Horizontal split: Images (left), Tables (middle), Text (right)
|
|
img_width = available_width * 0.31
|
|
table_width = available_width * 0.31
|
|
text_width = available_width * 0.31
|
|
img_left = margin
|
|
table_left = margin + img_width + Inches(0.15)
|
|
text_left = margin + img_width + table_width + Inches(0.3)
|
|
|
|
# Render images in left column (full height)
|
|
if images_to_render:
|
|
img_height = available_height - Inches(0.2)
|
|
self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height)
|
|
|
|
# Render tables in middle column (full height)
|
|
table_y = current_y
|
|
for table_section in table_sections:
|
|
elements = table_section.get("elements", [])
|
|
for element in elements:
|
|
if isinstance(element, dict) and element.get("type") == "table":
|
|
try:
|
|
self._addTableToSlide(slide, element, styles, table_y, max_width=table_width)
|
|
content = element.get("content", {})
|
|
if isinstance(content, dict):
|
|
rows = content.get("rows", [])
|
|
num_rows = len(rows) + 1
|
|
actual_height = Inches(0.4) * num_rows
|
|
table_y += actual_height + Inches(0.15)
|
|
else:
|
|
table_y += Inches(2)
|
|
except Exception as table_error:
|
|
logger.error(f"Error rendering table: {str(table_error)}")
|
|
break
|
|
|
|
# Render text in right column (full height, adaptive font size)
|
|
if text_sections:
|
|
text_height = available_height - Inches(0.2)
|
|
self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True)
|
|
|
|
# Layout 4: Images only
|
|
elif has_images and not has_text and not has_tables:
|
|
img_width = available_width * 0.8
|
|
img_height = available_height * 0.8
|
|
img_left = (available_width - img_width) / 2 + margin
|
|
self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height)
|
|
|
|
# Layout 5: Text only (default, adaptive font size)
|
|
elif has_text and not has_images and not has_tables:
|
|
text_height = available_height - Inches(0.2)
|
|
self._renderTextSectionsInFrame(slide, text_sections, styles, margin, current_y, available_width, text_height, adaptiveFontSize=True)
|
|
|
|
# Layout 6: Tables only
|
|
elif has_tables and not has_images and not has_text:
|
|
table_height = available_height / max(len(table_sections), 1)
|
|
table_width = available_width
|
|
for table_section in table_sections:
|
|
elements = table_section.get("elements", [])
|
|
for element in elements:
|
|
if isinstance(element, dict) and element.get("type") == "table":
|
|
try:
|
|
self._addTableToSlide(slide, element, styles, current_y, max_width=table_width)
|
|
# Calculate actual table height
|
|
content = element.get("content", {})
|
|
if isinstance(content, dict):
|
|
rows = content.get("rows", [])
|
|
num_rows = len(rows) + 1 # +1 for header
|
|
actual_height = min(Inches(0.4) * num_rows, table_height)
|
|
current_y += actual_height + Inches(0.2)
|
|
else:
|
|
current_y += table_height + Inches(0.2)
|
|
except Exception as table_error:
|
|
logger.error(f"Error rendering table: {str(table_error)}")
|
|
# Continue with next table
|
|
break
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error rendering slide content with frames: {str(e)}")
|
|
# Fallback to simple rendering
|
|
try:
|
|
content_shape = slide.placeholders[1]
|
|
text_frame = content_shape.text_frame
|
|
text_frame.clear()
|
|
except (AttributeError, IndexError):
|
|
from pptx.util import Inches
|
|
left = Inches(0.5)
|
|
top = Inches(1.5)
|
|
width = prs.slide_width - Inches(1)
|
|
height = prs.slide_height - top - Inches(0.5)
|
|
textbox = slide.shapes.add_textbox(left, top, width, height)
|
|
text_frame = textbox.text_frame
|
|
text_frame.word_wrap = True
|
|
|
|
# Simple fallback rendering
|
|
for section in slide_sections:
|
|
self._renderSectionToTextFrame(slide, section, styles, text_frame, font_size_multiplier=1.0)
|
|
|
|
def _renderTextSectionsInFrame(self, slide, text_sections: List[Dict[str, Any]], styles: Dict[str, Any], left: float, top: float, width: float, height: float, adaptiveFontSize: bool = False) -> None:
|
|
"""Render text sections (paragraphs, lists, headings) in a text frame."""
|
|
try:
|
|
from pptx.util import Inches, Pt
|
|
from pptx.enum.text import PP_ALIGN
|
|
from pptx.dml.color import RGBColor
|
|
|
|
# Calculate total text length for adaptive font sizing
|
|
total_text_length = 0
|
|
if adaptiveFontSize:
|
|
for section in text_sections:
|
|
elements = section.get("elements", [])
|
|
for element in elements:
|
|
if isinstance(element, dict):
|
|
element_type = element.get("type", "")
|
|
if element_type in ["paragraph", "bullet_list", "list", "heading"]:
|
|
content = element.get("content", "")
|
|
if isinstance(content, dict):
|
|
if "text" in content:
|
|
total_text_length += len(str(content["text"]))
|
|
elif "items" in content:
|
|
for item in content.get("items", []):
|
|
total_text_length += len(str(item))
|
|
elif isinstance(content, str):
|
|
total_text_length += len(content)
|
|
|
|
# Calculate adaptive font size multiplier based on text length and frame size
|
|
font_size_multiplier = 1.0
|
|
if adaptiveFontSize and total_text_length > 0:
|
|
try:
|
|
# More accurate calculation: estimate characters per line based on average character width
|
|
# Average character width is approximately 0.6 * font_size in points
|
|
# For 14pt font, average char width ≈ 8.4pt
|
|
avg_char_width_pt = 8.4 # Approximate for 14pt font
|
|
chars_per_line = max(1, int(float(width) / avg_char_width_pt))
|
|
|
|
# Estimate lines needed
|
|
lines_needed = total_text_length / max(chars_per_line, 1)
|
|
|
|
# Available lines based on height (line height ≈ 1.2 * font_size)
|
|
line_height_pt = 16.8 # Approximate for 14pt font with 1.2 spacing
|
|
available_lines = max(1, int(float(height) / line_height_pt))
|
|
|
|
if available_lines > 0 and lines_needed > available_lines:
|
|
# More aggressive scaling for long texts
|
|
# Calculate exact scale needed, then add 10% buffer
|
|
scale_needed = available_lines / lines_needed
|
|
font_size_multiplier = scale_needed * 0.9 # 10% buffer
|
|
# Allow scaling down to 50% for very long texts (minimum readable)
|
|
font_size_multiplier = max(0.5, min(1.0, font_size_multiplier))
|
|
elif lines_needed <= available_lines * 0.7:
|
|
# If text is much shorter than available space, can use slightly larger font
|
|
font_size_multiplier = min(1.1, (available_lines / lines_needed) * 0.8)
|
|
except (ZeroDivisionError, ValueError, TypeError) as calc_error:
|
|
logger.debug(f"Font size calculation error: {str(calc_error)}")
|
|
# Fallback to default if calculation fails
|
|
font_size_multiplier = 1.0
|
|
|
|
textbox = slide.shapes.add_textbox(left, top, width, height)
|
|
text_frame = textbox.text_frame
|
|
text_frame.word_wrap = True
|
|
text_frame.auto_size = None # Disable auto-size for fixed frame
|
|
# Ensure text frame can display bullets
|
|
text_frame.margin_left = Pt(0)
|
|
text_frame.margin_right = Pt(0)
|
|
text_frame.margin_top = Pt(0)
|
|
text_frame.margin_bottom = Pt(0)
|
|
|
|
# Pass font size multiplier to rendering methods
|
|
for section in text_sections:
|
|
self._renderSectionToTextFrame(slide, section, styles, text_frame, font_size_multiplier)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error rendering text sections in frame: {str(e)}")
|
|
|
|
def _renderSectionToTextFrame(self, slide, section: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None:
|
|
"""Render a single section to a text frame."""
|
|
try:
|
|
from pptx.util import Pt
|
|
from pptx.enum.text import PP_ALIGN
|
|
from pptx.dml.color import RGBColor
|
|
|
|
section_type = section.get("content_type", "paragraph")
|
|
elements = section.get("elements", [])
|
|
|
|
if not elements:
|
|
return
|
|
|
|
for element in elements:
|
|
if not isinstance(element, dict):
|
|
continue
|
|
|
|
element_type = element.get("type", "")
|
|
if not element_type:
|
|
element_type = section_type
|
|
|
|
# Skip images - handled separately
|
|
if element_type == "image":
|
|
continue
|
|
|
|
if element_type == "bullet_list" or element_type == "list":
|
|
self._addBulletListToSlide(slide, element, styles, text_frame, font_size_multiplier)
|
|
elif element_type == "heading":
|
|
self._addHeadingToSlide(slide, element, styles, text_frame, font_size_multiplier)
|
|
elif element_type == "paragraph":
|
|
self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier)
|
|
elif element_type == "code_block" or element_type == "code":
|
|
self._addCodeBlockToSlide(slide, element, styles, text_frame, font_size_multiplier)
|
|
elif element_type == "extracted_text":
|
|
content = element.get("content", "")
|
|
source = element.get("source", "")
|
|
if content:
|
|
paragraph_style = styles.get("paragraph", {})
|
|
p = text_frame.add_paragraph()
|
|
p.text = content
|
|
base_font_size = paragraph_style.get("font_size", 18)
|
|
p.font.size = Pt(int(base_font_size * font_size_multiplier))
|
|
p.font.bold = paragraph_style.get("bold", False)
|
|
p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
|
|
p.alignment = PP_ALIGN.LEFT
|
|
if source:
|
|
p.add_run(f" (Source: {source})").font.italic = True
|
|
elif element_type == "reference":
|
|
label = element.get("label", "Reference")
|
|
p = text_frame.add_paragraph()
|
|
p.text = f"[Reference: {label}]"
|
|
p.font.italic = True
|
|
p.alignment = PP_ALIGN.LEFT
|
|
else:
|
|
# Fallback to paragraph
|
|
content = element.get("content", "")
|
|
if isinstance(content, dict):
|
|
text = content.get("text", "")
|
|
elif isinstance(content, str):
|
|
text = content
|
|
else:
|
|
text = ""
|
|
|
|
if text:
|
|
self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier=1.0)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error rendering section to text frame: {str(e)}")
|
|
|
|
def _addImagesToSlideInFrame(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any], left: float, top: float, width: float, height: float) -> None:
|
|
"""Add images to slide within a specific frame area."""
|
|
try:
|
|
from pptx.util import Inches, Pt
|
|
from pptx.enum.text import PP_ALIGN
|
|
import base64
|
|
import io
|
|
|
|
if not images:
|
|
logger.debug("No images to render in frame")
|
|
return
|
|
|
|
logger.info(f"Rendering {len(images)} image(s) in frame at ({left}, {top}), size ({width}, {height})")
|
|
|
|
# Calculate image dimensions within frame
|
|
if len(images) == 1:
|
|
# Single image: fit to frame
|
|
img = images[0]
|
|
base64Data = img.get("base64Data")
|
|
|
|
if not base64Data:
|
|
logger.warning("Image has no base64Data")
|
|
return
|
|
|
|
# Clean base64 data (remove data URI prefix if present)
|
|
if isinstance(base64Data, str):
|
|
if base64Data.startswith("data:image/"):
|
|
# Extract base64 from data URI
|
|
base64Data = base64Data.split(",", 1)[1]
|
|
# Remove any whitespace
|
|
base64Data = base64Data.strip()
|
|
|
|
try:
|
|
# Decode base64
|
|
imageBytes = base64.b64decode(base64Data, validate=True)
|
|
if len(imageBytes) == 0:
|
|
logger.error("Decoded image bytes are empty")
|
|
return
|
|
|
|
imageStream = io.BytesIO(imageBytes)
|
|
|
|
# Get image dimensions using PIL
|
|
imgWidth, imgHeight = None, None
|
|
try:
|
|
from PIL import Image as PILImage
|
|
pilImage = PILImage.open(imageStream)
|
|
imgWidth, imgHeight = pilImage.size
|
|
imageStream.seek(0) # Reset stream for PowerPoint
|
|
|
|
# Validate image dimensions - ensure they're reasonable
|
|
if imgWidth <= 1 or imgHeight <= 1:
|
|
logger.warning(f"Image has invalid dimensions: {imgWidth}x{imgHeight}, using default size")
|
|
imgWidth, imgHeight = 800, 600
|
|
imageStream.seek(0)
|
|
elif imgWidth < 100 or imgHeight < 100:
|
|
logger.warning(f"Image dimensions very small: {imgWidth}x{imgHeight}, may appear tiny")
|
|
except ImportError:
|
|
logger.warning("PIL not available, using default image size")
|
|
imgWidth, imgHeight = 800, 600 # Default dimensions
|
|
except Exception as pil_error:
|
|
logger.warning(f"Error getting image dimensions with PIL: {str(pil_error)}, using default size")
|
|
imgWidth, imgHeight = 800, 600
|
|
imageStream.seek(0)
|
|
|
|
# Ensure we have valid dimensions
|
|
if not imgWidth or not imgHeight or imgWidth <= 1 or imgHeight <= 1:
|
|
logger.warning("Invalid image dimensions, using default 800x600")
|
|
imgWidth, imgHeight = 800, 600
|
|
|
|
# Scale to fit frame while maintaining aspect ratio
|
|
# width and height parameters are already in Inches (from pptx.util.Inches)
|
|
# Convert PIL pixel dimensions to Inches (assuming 96 DPI for PIL images)
|
|
imgWidthInches = Inches(imgWidth / 96.0)
|
|
imgHeightInches = Inches(imgHeight / 96.0)
|
|
|
|
# Calculate scale to fit within frame
|
|
# Inches objects support division, result is a float
|
|
try:
|
|
scale_width = width / imgWidthInches if imgWidthInches > 0 else 1.0
|
|
scale_height = height / imgHeightInches if imgHeightInches > 0 else 1.0
|
|
scale = min(scale_width, scale_height, 1.0) # Don't scale up, only down
|
|
|
|
finalWidth = imgWidthInches * scale
|
|
finalHeight = imgHeightInches * scale
|
|
|
|
# Ensure minimum size (at least 1 inch) to prevent tiny rendering
|
|
minSize = Inches(1)
|
|
if finalWidth < minSize or finalHeight < minSize:
|
|
# Use minimum size while maintaining aspect ratio
|
|
min_scale = max(minSize / imgWidthInches if imgWidthInches > 0 else 1.0,
|
|
minSize / imgHeightInches if imgHeightInches > 0 else 1.0)
|
|
finalWidth = max(minSize, imgWidthInches * min_scale)
|
|
finalHeight = max(minSize, imgHeightInches * min_scale)
|
|
|
|
# Ensure we don't exceed frame bounds
|
|
if finalWidth > width:
|
|
finalWidth = width
|
|
finalHeight = imgHeightInches * (width / imgWidthInches) if imgWidthInches > 0 else finalHeight
|
|
if finalHeight > height:
|
|
finalHeight = height
|
|
finalWidth = imgWidthInches * (height / imgHeightInches) if imgHeightInches > 0 else finalWidth
|
|
except (ZeroDivisionError, TypeError, AttributeError) as calc_error:
|
|
logger.warning(f"Error calculating image size: {str(calc_error)}, using frame size")
|
|
finalWidth = width * 0.9 # Use 90% of frame width
|
|
finalHeight = height * 0.9 # Use 90% of frame height
|
|
|
|
# Center in frame
|
|
frame_left = left + (width - finalWidth) / 2
|
|
frame_top = top + (height - finalHeight) / 2
|
|
|
|
# Add image to slide
|
|
imageStream.seek(0)
|
|
slide.shapes.add_picture(imageStream, frame_left, frame_top, width=finalWidth, height=finalHeight)
|
|
logger.info(f"Successfully added image to slide at ({frame_left}, {frame_top}), size ({finalWidth}, {finalHeight})")
|
|
|
|
# Add caption if available
|
|
caption = img.get("caption") or img.get("altText")
|
|
if caption and caption != "Image":
|
|
captionTop = frame_top + finalHeight + Inches(0.1)
|
|
captionBox = slide.shapes.add_textbox(left, captionTop, width, Inches(0.4))
|
|
captionFrame = captionBox.text_frame
|
|
captionFrame.text = caption
|
|
captionFrame.paragraphs[0].font.size = Pt(10)
|
|
captionFrame.paragraphs[0].font.italic = True
|
|
captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER
|
|
except base64.binascii.Error as b64_error:
|
|
logger.error(f"Invalid base64 data: {str(b64_error)}")
|
|
except Exception as img_error:
|
|
logger.error(f"Error adding image to frame: {str(img_error)}", exc_info=True)
|
|
else:
|
|
# Multiple images: grid layout
|
|
cols = 2 if len(images) <= 4 else 3
|
|
rows = (len(images) + cols - 1) // cols
|
|
imgWidth = (width - Inches(0.2) * (cols - 1)) / cols
|
|
imgHeight = (height - Inches(0.2) * (rows - 1)) / rows
|
|
|
|
for idx, img in enumerate(images):
|
|
base64Data = img.get("base64Data")
|
|
if not base64Data:
|
|
logger.warning(f"Image {idx} has no base64Data")
|
|
continue
|
|
|
|
# Clean base64 data
|
|
if isinstance(base64Data, str):
|
|
if base64Data.startswith("data:image/"):
|
|
base64Data = base64Data.split(",", 1)[1]
|
|
base64Data = base64Data.strip().replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "")
|
|
|
|
row = idx // cols
|
|
col = idx % cols
|
|
img_left = left + col * (imgWidth + Inches(0.2))
|
|
img_top = top + row * (imgHeight + Inches(0.2))
|
|
|
|
try:
|
|
imageBytes = base64.b64decode(base64Data, validate=True)
|
|
if len(imageBytes) == 0:
|
|
logger.error(f"Decoded image {idx} bytes are empty")
|
|
continue
|
|
|
|
imageStream = io.BytesIO(imageBytes)
|
|
|
|
# Try to get dimensions for better scaling
|
|
try:
|
|
from PIL import Image as PILImage
|
|
pilImage = PILImage.open(imageStream)
|
|
imgW, imgH = pilImage.size
|
|
# Scale to fit grid cell while maintaining aspect ratio
|
|
scale = min(imgWidth / (imgW * (72.0 / 96.0)), imgHeight / (imgH * (72.0 / 96.0)), 1.0)
|
|
finalW = (imgW * (72.0 / 96.0)) * scale
|
|
finalH = (imgH * (72.0 / 96.0)) * scale
|
|
# Center in grid cell
|
|
cell_left = img_left + (imgWidth - finalW) / 2
|
|
cell_top = img_top + (imgHeight - finalH) / 2
|
|
imageStream.seek(0)
|
|
slide.shapes.add_picture(imageStream, cell_left, cell_top, width=finalW, height=finalH)
|
|
except (ImportError, Exception):
|
|
# Fallback: use grid cell size directly
|
|
imageStream.seek(0)
|
|
slide.shapes.add_picture(imageStream, img_left, img_top, width=imgWidth, height=imgHeight)
|
|
|
|
logger.info(f"Successfully added image {idx+1}/{len(images)} to slide grid")
|
|
except base64.binascii.Error as b64_error:
|
|
logger.error(f"Invalid base64 data for image {idx}: {str(b64_error)}")
|
|
except Exception as img_error:
|
|
logger.error(f"Error adding image {idx} to frame: {str(img_error)}", exc_info=True)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error adding images to slide frame: {str(e)}", exc_info=True)
|