gateway/modules/services/serviceGeneration/renderers/rendererPptx.py
2025-10-12 00:51:23 +02:00

652 lines
27 KiB
Python

import logging
import base64
import io
from typing import Dict, Any, Optional, Tuple, List
from .rendererBaseTemplate import BaseRenderer
logger = logging.getLogger(__name__)
class RendererPptx(BaseRenderer):
"""Renderer for PowerPoint (.pptx) files using python-pptx library."""
def __init__(self):
super().__init__()
self.supported_formats = ["pptx", "ppt"]
self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
@classmethod
def get_supported_formats(cls) -> list:
"""Get list of supported output formats."""
return ["pptx", "ppt"]
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""
Render content as PowerPoint presentation from JSON data.
Args:
extracted_content: JSON content to render as presentation
title: Title for the presentation
user_prompt: User prompt for AI styling
ai_service: AI service for styling
**kwargs: Additional rendering options
Returns:
Base64-encoded PowerPoint presentation as string
"""
try:
# Import python-pptx
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
import re
# Create new presentation
prs = Presentation()
# Set slide size based on user intent (default to 16:9)
slide_size = styles.get("slide_size", "16:9")
if slide_size == "4:3":
prs.slide_width = Inches(10)
prs.slide_height = Inches(7.5)
else: # Default to 16:9
prs.slide_width = Inches(13.33)
prs.slide_height = Inches(7.5)
# Get AI-generated styling definitions
styles = await self._get_pptx_styles(user_prompt, ai_service)
# Generate slides from JSON content
slides_data = await self._parse_json_to_slides(extracted_content, title, styles)
logger.info(f"Parsed {len(slides_data)} slides from JSON content")
# Debug: Show first 200 chars of content
logger.info(f"JSON content preview: {str(extracted_content)[:200]}...")
for i, slide_data in enumerate(slides_data):
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
# Debug: Show slide content preview
slide_content = slide_data.get('content', '')
if slide_content:
logger.info(f" Content preview: '{slide_content[:100]}...'")
else:
logger.warning(f" ⚠️ Slide {i+1} has NO content!")
# Create slide with appropriate layout based on content
slide_layout_index = self._get_slide_layout_index(slide_data, styles)
slide_layout = prs.slide_layouts[slide_layout_index]
slide = prs.slides.add_slide(slide_layout)
# Set title
title_shape = slide.shapes.title
title_shape.text = slide_data.get("title", "Slide")
# Set content
content_shape = slide.placeholders[1]
content_text = slide_data.get("content", "")
# Format content text
text_frame = content_shape.text_frame
text_frame.clear()
# Split content into paragraphs
paragraphs = content_text.split('\n\n')
for i, paragraph in enumerate(paragraphs):
if paragraph.strip():
if i == 0:
p = text_frame.paragraphs[0]
else:
p = text_frame.add_paragraph()
p.text = paragraph.strip()
# Format based on content type
if paragraph.startswith('#'):
# Header
p.text = paragraph.lstrip('#').strip()
p.font.size = Pt(24)
p.font.bold = True
elif paragraph.startswith('##'):
# Subheader
p.text = paragraph.lstrip('#').strip()
p.font.size = Pt(20)
p.font.bold = True
elif paragraph.startswith('*') and paragraph.endswith('*'):
# Bold text
p.text = paragraph.strip('*')
p.font.bold = True
else:
# Regular text
p.font.size = Pt(14)
p.alignment = PP_ALIGN.LEFT
# If no slides were created, create a default slide
if not slides_data:
slide_layout = prs.slide_layouts[0] # Title slide layout
slide = prs.slides.add_slide(slide_layout)
title_shape = slide.shapes.title
title_shape.text = title
subtitle_shape = slide.placeholders[1]
subtitle_shape.text = "Generated by PowerOn AI System"
# Save to buffer
buffer = io.BytesIO()
prs.save(buffer)
buffer.seek(0)
# Convert to base64
pptx_bytes = buffer.getvalue()
pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')
logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation"
except ImportError:
logger.error("python-pptx library not installed. Install with: pip install python-pptx")
return "python-pptx library not installed", "text/plain"
except Exception as e:
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
def _parse_content_to_slides(self, content: str, title: str) -> list:
"""
Parse content into slide data structure.
Args:
content: Content to parse
title: Presentation title
Returns:
List of slide data dictionaries
"""
slides = []
# Split content by slide markers or headers
slide_sections = self._split_content_into_slides(content)
for i, section in enumerate(slide_sections):
if section.strip():
slide_data = {
"title": f"Slide {i + 1}",
"content": section.strip()
}
# Extract title from content if it starts with #
lines = section.strip().split('\n')
if lines and lines[0].startswith('#'):
# Remove # symbols and clean up title
slide_title = lines[0].lstrip('#').strip()
slide_data["title"] = slide_title
slide_data["content"] = '\n'.join(lines[1:]).strip()
elif lines and lines[0].strip():
# Use first line as title if it looks like a title
first_line = lines[0].strip()
if len(first_line) < 100 and not first_line.endswith('.'):
slide_data["title"] = first_line
slide_data["content"] = '\n'.join(lines[1:]).strip()
slides.append(slide_data)
return slides
def _split_content_into_slides(self, content: str) -> list:
"""
Split content into individual slides based on headers and structure.
Args:
content: Content to split
Returns:
List of slide content strings
"""
import re
# First, try to split by major headers (# or ##)
# This is the most common case for AI-generated content
header_pattern = r'^(#{1,2})\s+(.+)$'
lines = content.split('\n')
slides = []
current_slide = []
for line in lines:
# Check if this line is a header
header_match = re.match(header_pattern, line.strip())
if header_match:
# If we have content in current slide, save it
if current_slide:
slide_content = '\n'.join(current_slide).strip()
if slide_content:
slides.append(slide_content)
current_slide = []
# Start new slide with this header
current_slide.append(line)
else:
# Add line to current slide
current_slide.append(line)
# Add the last slide
if current_slide:
slide_content = '\n'.join(current_slide).strip()
if slide_content:
slides.append(slide_content)
# If we found slides with headers, return them
if len(slides) > 1:
return slides
# Fallback: Split by double newlines
sections = content.split('\n\n\n')
if len(sections) > 1:
return [s.strip() for s in sections if s.strip()]
# Another fallback: Split by double newlines
sections = content.split('\n\n')
if len(sections) > 1:
return [s.strip() for s in sections if s.strip()]
# Last resort: return as single slide
return [content.strip()]
def get_output_mime_type(self) -> str:
"""Get MIME type for rendered output."""
return self.output_mime_type
async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Get PowerPoint styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 44, "color": "#1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 32, "color": "#2F2F2F", "bold": True, "align": "left"},
"subheading": {"font_size": 24, "color": "#4F4F4F", "bold": True, "align": "left"},
"paragraph": {"font_size": 18, "color": "#2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 18, "color": "#2F2F2F", "indent": 20},
"table_header": {"font_size": 16, "color": "#FFFFFF", "bold": True, "background": "#4F4F4F"},
"table_cell": {"font_size": 14, "color": "#2F2F2F", "bold": False, "background": "#FFFFFF"},
"slide_size": "16:9",
"content_per_slide": "concise"
}
style_template = self._create_ai_style_template("pptx", user_prompt, style_schema)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pptx_styles())
# Validate PowerPoint-specific requirements
return self._validate_pptx_styles_readability(styles)
def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix readability issues in AI-generated styles."""
try:
# Ensure minimum font sizes for PowerPoint readability
min_font_sizes = {
"title": 36,
"heading": 24,
"subheading": 20,
"paragraph": 14,
"bullet_list": 14,
"table_header": 12,
"table_cell": 12
}
for style_name, min_size in min_font_sizes.items():
if style_name in styles:
current_size = styles[style_name].get("font_size", 12)
if current_size < min_size:
styles[style_name]["font_size"] = min_size
return styles
except Exception as e:
logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_pptx_styles()
def _get_default_pptx_styles(self) -> Dict[str, Any]:
"""Default PowerPoint styles."""
return {
"title": {"font_size": 44, "color": "#1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 32, "color": "#2F2F2F", "bold": True, "align": "left"},
"subheading": {"font_size": 24, "color": "#4F4F4F", "bold": True, "align": "left"},
"paragraph": {"font_size": 18, "color": "#2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 18, "color": "#2F2F2F", "indent": 20},
"table_header": {"font_size": 16, "color": "#FFFFFF", "bold": True, "background": "#4F4F4F"},
"table_cell": {"font_size": 14, "color": "#2F2F2F", "bold": False, "background": "#FFFFFF"},
"slide_size": "16:9",
"content_per_slide": "concise"
}
async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Parse JSON content into slide data structure.
Args:
json_content: JSON content to parse
title: Presentation title
styles: AI-generated styles
Returns:
List of slide data dictionaries
"""
slides = []
try:
# Validate JSON structure
if not isinstance(json_content, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
# Create title slide
slides.append({
"title": document_title,
"content": "Generated by PowerOn AI System\n\n" + self._format_timestamp()
})
# Process sections into slides based on content and user intent
sections = json_content.get("sections", [])
slides.extend(self._create_slides_from_sections(sections, styles))
# If no content slides were created, create a default content slide
if len(slides) == 1: # Only title slide
slides.append({
"title": "Content Overview",
"content": "No structured content found in the source documents.\n\nPlease check the source documents and try again."
})
return slides
except Exception as e:
logger.error(f"Error parsing JSON to slides: {str(e)}")
# Return minimal fallback slides
return [
{
"title": title,
"content": "Error parsing content for presentation"
}
]
def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
"""Create a slide from a JSON section."""
try:
section_title = section.get("title", "Untitled Section")
content_type = section.get("content_type", "paragraph")
elements = section.get("elements", [])
# Build slide content based on section type
content_parts = []
for element in elements:
if content_type == "table":
content_parts.append(self._format_table_for_slide(element))
elif content_type == "list":
content_parts.append(self._format_list_for_slide(element))
elif content_type == "heading":
content_parts.append(self._format_heading_for_slide(element))
elif content_type == "paragraph":
content_parts.append(self._format_paragraph_for_slide(element))
elif content_type == "code":
content_parts.append(self._format_code_for_slide(element))
else:
content_parts.append(self._format_paragraph_for_slide(element))
# Combine content parts
slide_content = "\n\n".join(filter(None, content_parts))
return {
"title": section_title,
"content": slide_content
}
except Exception as e:
logger.warning(f"Error creating slide from section: {str(e)}")
return None
def _format_table_for_slide(self, table_data: Dict[str, Any]) -> str:
"""Format table data for slide presentation."""
try:
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
if not headers:
return ""
# Create table representation
table_lines = []
# Add headers
header_line = " | ".join(str(h) for h in headers)
table_lines.append(header_line)
# Add separator
separator = "-" * len(header_line)
table_lines.append(separator)
# Add data rows (limit based on content density)
max_rows = 5 # Default limit
for row in rows[:max_rows]:
row_line = " | ".join(str(cell) for cell in row)
table_lines.append(row_line)
if len(rows) > max_rows:
table_lines.append(f"... and {len(rows) - max_rows} more rows")
return "\n".join(table_lines)
except Exception as e:
logger.warning(f"Error formatting table for slide: {str(e)}")
return ""
def _format_list_for_slide(self, list_data: Dict[str, Any]) -> str:
"""Format list data for slide presentation."""
try:
items = list_data.get("items", [])
if not items:
return ""
# Create list representation
list_lines = []
for item in items:
if isinstance(item, dict):
text = item.get("text", "")
list_lines.append(f"{text}")
# Add subitems (limit to 3 for readability)
subitems = item.get("subitems", [])[:3]
for subitem in subitems:
if isinstance(subitem, dict):
list_lines.append(f" - {subitem.get('text', '')}")
else:
list_lines.append(f" - {subitem}")
else:
list_lines.append(f"{str(item)}")
return "\n".join(list_lines)
except Exception as e:
logger.warning(f"Error formatting list for slide: {str(e)}")
return ""
def _format_heading_for_slide(self, heading_data: Dict[str, Any]) -> str:
"""Format heading data for slide presentation."""
try:
text = heading_data.get("text", "")
level = heading_data.get("level", 1)
if text:
return f"{'#' * level} {text}"
return ""
except Exception as e:
logger.warning(f"Error formatting heading for slide: {str(e)}")
return ""
def _format_paragraph_for_slide(self, paragraph_data: Dict[str, Any]) -> str:
"""Format paragraph data for slide presentation."""
try:
text = paragraph_data.get("text", "")
if text:
# Limit paragraph length based on content density
max_length = 200 # Default limit
if len(text) > max_length:
text = text[:max_length] + "..."
return text
return ""
except Exception as e:
logger.warning(f"Error formatting paragraph for slide: {str(e)}")
return ""
def _format_code_for_slide(self, code_data: Dict[str, Any]) -> str:
"""Format code data for slide presentation."""
try:
code = code_data.get("code", "")
language = code_data.get("language", "")
if code:
# Limit code length based on content density
max_length = 100 # Default limit
if len(code) > max_length:
code = code[:max_length] + "..."
if language:
return f"Code ({language}):\n{code}"
else:
return f"Code:\n{code}"
return ""
except Exception as e:
logger.warning(f"Error formatting code for slide: {str(e)}")
return ""
def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
"""Determine the best slide layout based on content."""
try:
content = slide_data.get("content", "")
title = slide_data.get("title", "")
# Check if it's a title slide (first slide)
if not content or "Generated by PowerOn AI System" in content:
return 0 # Title slide layout
# Check content type to determine layout
if "|" in content and "-" in content:
# Has both tables and lists - use content with caption
return 2
elif "|" in content:
# Has tables - use content layout
return 1
elif content.count("") > 2:
# Has many bullet points - use content layout
return 1
else:
# Default to title and content
return 1
except Exception as e:
logger.warning(f"Error determining slide layout: {str(e)}")
return 1 # Default to title and content
def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Create slides from sections based on content density and user intent."""
try:
slides = []
content_per_slide = styles.get("content_per_slide", "concise")
for section in sections:
section_slides = self._create_section_slides(section, styles, content_per_slide)
slides.extend(section_slides)
return slides
except Exception as e:
logger.warning(f"Error creating slides from sections: {str(e)}")
return []
def _create_section_slides(self, section: Dict[str, Any], styles: Dict[str, Any], content_per_slide: str) -> List[Dict[str, Any]]:
"""Create one or more slides from a section based on content density."""
try:
section_title = section.get("title", "Untitled Section")
content_type = section.get("content_type", "paragraph")
elements = section.get("elements", [])
if not elements:
return [{
"title": section_title,
"content": "No content available for this section."
}]
# Determine how to split content based on type and density
if content_per_slide == "detailed" and len(elements) > 3:
# Split large sections into multiple slides
return self._split_section_into_multiple_slides(section_title, elements, content_type)
else:
# Create single slide for section
slide_data = self._create_slide_from_section(section, styles)
return [slide_data] if slide_data else []
except Exception as e:
logger.warning(f"Error creating section slides: {str(e)}")
return []
def _split_section_into_multiple_slides(self, section_title: str, elements: List[Dict[str, Any]], content_type: str) -> List[Dict[str, Any]]:
"""Split a large section into multiple slides."""
try:
slides = []
max_elements_per_slide = 3
for i in range(0, len(elements), max_elements_per_slide):
slide_elements = elements[i:i + max_elements_per_slide]
# Create slide title
if i == 0:
slide_title = section_title
else:
slide_title = f"{section_title} (Part {i//max_elements_per_slide + 1})"
# Build content for this slide
content_parts = []
for element in slide_elements:
if content_type == "table":
content_parts.append(self._format_table_for_slide(element))
elif content_type == "list":
content_parts.append(self._format_list_for_slide(element))
elif content_type == "heading":
content_parts.append(self._format_heading_for_slide(element))
elif content_type == "paragraph":
content_parts.append(self._format_paragraph_for_slide(element))
elif content_type == "code":
content_parts.append(self._format_code_for_slide(element))
else:
content_parts.append(self._format_paragraph_for_slide(element))
slide_content = "\n\n".join(filter(None, content_parts))
slides.append({
"title": slide_title,
"content": slide_content
})
return slides
except Exception as e:
logger.warning(f"Error splitting section into slides: {str(e)}")
return []
def _format_timestamp(self) -> str:
"""Format current timestamp for presentation generation."""
from datetime import datetime, UTC
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")