252 lines
9.8 KiB
Python
252 lines
9.8 KiB
Python
import logging
|
|
import base64
|
|
import io
|
|
from typing import Dict, Any, Optional, Tuple
|
|
from .base_renderer import BaseRenderer
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PptxRenderer(BaseRenderer):
|
|
"""Renderer for PowerPoint (.pptx) files using python-pptx library."""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.supported_formats = ["pptx", "ppt"]
|
|
self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
|
|
|
@classmethod
|
|
def get_supported_formats(cls) -> list:
|
|
"""Get list of supported output formats."""
|
|
return ["pptx", "ppt"]
|
|
|
|
async def render(self, content: str, title: str = "Generated Presentation", **kwargs) -> Tuple[str, str]:
|
|
"""
|
|
Render content as PowerPoint presentation.
|
|
|
|
Args:
|
|
content: Content to render as presentation
|
|
title: Title for the presentation
|
|
**kwargs: Additional rendering options
|
|
|
|
Returns:
|
|
Base64-encoded PowerPoint presentation as string
|
|
"""
|
|
try:
|
|
# Import python-pptx
|
|
from pptx import Presentation
|
|
from pptx.util import Inches, Pt
|
|
from pptx.enum.text import PP_ALIGN
|
|
from pptx.dml.color import RGBColor
|
|
import re
|
|
|
|
# Create new presentation
|
|
prs = Presentation()
|
|
|
|
# Set slide size (16:9)
|
|
prs.slide_width = Inches(13.33)
|
|
prs.slide_height = Inches(7.5)
|
|
|
|
# Parse content into slides
|
|
slides_data = self._parse_content_to_slides(content, title)
|
|
logger.info(f"Parsed {len(slides_data)} slides from content")
|
|
|
|
# Debug: Show first 200 chars of content
|
|
logger.info(f"Content preview: '{content[:200]}...'")
|
|
|
|
for i, slide_data in enumerate(slides_data):
|
|
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
|
|
# Debug: Show slide content preview
|
|
slide_content = slide_data.get('content', '')
|
|
if slide_content:
|
|
logger.info(f" Content preview: '{slide_content[:100]}...'")
|
|
else:
|
|
logger.warning(f" ⚠️ Slide {i+1} has NO content!")
|
|
|
|
# Create slide with title and content layout
|
|
slide_layout = prs.slide_layouts[1] # Title and Content layout
|
|
slide = prs.slides.add_slide(slide_layout)
|
|
|
|
# Set title
|
|
title_shape = slide.shapes.title
|
|
title_shape.text = slide_data.get("title", "Slide")
|
|
|
|
# Set content
|
|
content_shape = slide.placeholders[1]
|
|
content_text = slide_data.get("content", "")
|
|
|
|
# Format content text
|
|
text_frame = content_shape.text_frame
|
|
text_frame.clear()
|
|
|
|
# Split content into paragraphs
|
|
paragraphs = content_text.split('\n\n')
|
|
|
|
for i, paragraph in enumerate(paragraphs):
|
|
if paragraph.strip():
|
|
if i == 0:
|
|
p = text_frame.paragraphs[0]
|
|
else:
|
|
p = text_frame.add_paragraph()
|
|
|
|
p.text = paragraph.strip()
|
|
|
|
# Format based on content type
|
|
if paragraph.startswith('#'):
|
|
# Header
|
|
p.text = paragraph.lstrip('#').strip()
|
|
p.font.size = Pt(24)
|
|
p.font.bold = True
|
|
elif paragraph.startswith('##'):
|
|
# Subheader
|
|
p.text = paragraph.lstrip('#').strip()
|
|
p.font.size = Pt(20)
|
|
p.font.bold = True
|
|
elif paragraph.startswith('*') and paragraph.endswith('*'):
|
|
# Bold text
|
|
p.text = paragraph.strip('*')
|
|
p.font.bold = True
|
|
else:
|
|
# Regular text
|
|
p.font.size = Pt(14)
|
|
|
|
p.alignment = PP_ALIGN.LEFT
|
|
|
|
# If no slides were created, create a default slide
|
|
if not slides_data:
|
|
slide_layout = prs.slide_layouts[0] # Title slide layout
|
|
slide = prs.slides.add_slide(slide_layout)
|
|
|
|
title_shape = slide.shapes.title
|
|
title_shape.text = title
|
|
|
|
subtitle_shape = slide.placeholders[1]
|
|
subtitle_shape.text = "Generated by PowerOn AI System"
|
|
|
|
# Save to buffer
|
|
buffer = io.BytesIO()
|
|
prs.save(buffer)
|
|
buffer.seek(0)
|
|
|
|
# Convert to base64
|
|
pptx_bytes = buffer.getvalue()
|
|
pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')
|
|
|
|
logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
|
|
return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
|
|
|
except ImportError:
|
|
logger.error("python-pptx library not installed. Install with: pip install python-pptx")
|
|
return "python-pptx library not installed", "text/plain"
|
|
except Exception as e:
|
|
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
|
|
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
|
|
|
|
def _parse_content_to_slides(self, content: str, title: str) -> list:
|
|
"""
|
|
Parse content into slide data structure.
|
|
|
|
Args:
|
|
content: Content to parse
|
|
title: Presentation title
|
|
|
|
Returns:
|
|
List of slide data dictionaries
|
|
"""
|
|
slides = []
|
|
|
|
# Split content by slide markers or headers
|
|
slide_sections = self._split_content_into_slides(content)
|
|
|
|
for i, section in enumerate(slide_sections):
|
|
if section.strip():
|
|
slide_data = {
|
|
"title": f"Slide {i + 1}",
|
|
"content": section.strip()
|
|
}
|
|
|
|
# Extract title from content if it starts with #
|
|
lines = section.strip().split('\n')
|
|
if lines and lines[0].startswith('#'):
|
|
# Remove # symbols and clean up title
|
|
slide_title = lines[0].lstrip('#').strip()
|
|
slide_data["title"] = slide_title
|
|
slide_data["content"] = '\n'.join(lines[1:]).strip()
|
|
elif lines and lines[0].strip():
|
|
# Use first line as title if it looks like a title
|
|
first_line = lines[0].strip()
|
|
if len(first_line) < 100 and not first_line.endswith('.'):
|
|
slide_data["title"] = first_line
|
|
slide_data["content"] = '\n'.join(lines[1:]).strip()
|
|
|
|
slides.append(slide_data)
|
|
|
|
return slides
|
|
|
|
def _split_content_into_slides(self, content: str) -> list:
|
|
"""
|
|
Split content into individual slides based on headers and structure.
|
|
|
|
Args:
|
|
content: Content to split
|
|
|
|
Returns:
|
|
List of slide content strings
|
|
"""
|
|
import re
|
|
|
|
# First, try to split by major headers (# or ##)
|
|
# This is the most common case for AI-generated content
|
|
header_pattern = r'^(#{1,2})\s+(.+)$'
|
|
lines = content.split('\n')
|
|
slides = []
|
|
current_slide = []
|
|
|
|
for line in lines:
|
|
# Check if this line is a header
|
|
header_match = re.match(header_pattern, line.strip())
|
|
if header_match:
|
|
# If we have content in current slide, save it
|
|
if current_slide:
|
|
slide_content = '\n'.join(current_slide).strip()
|
|
if slide_content:
|
|
slides.append(slide_content)
|
|
current_slide = []
|
|
|
|
# Start new slide with this header
|
|
current_slide.append(line)
|
|
else:
|
|
# Add line to current slide
|
|
current_slide.append(line)
|
|
|
|
# Add the last slide
|
|
if current_slide:
|
|
slide_content = '\n'.join(current_slide).strip()
|
|
if slide_content:
|
|
slides.append(slide_content)
|
|
|
|
# If we found slides with headers, return them
|
|
if len(slides) > 1:
|
|
return slides
|
|
|
|
# Fallback: Split by double newlines
|
|
sections = content.split('\n\n\n')
|
|
if len(sections) > 1:
|
|
return [s.strip() for s in sections if s.strip()]
|
|
|
|
# Another fallback: Split by double newlines
|
|
sections = content.split('\n\n')
|
|
if len(sections) > 1:
|
|
return [s.strip() for s in sections if s.strip()]
|
|
|
|
# Last resort: return as single slide
|
|
return [content.strip()]
|
|
|
|
|
|
def get_output_mime_type(self) -> str:
|
|
"""Get MIME type for rendered output."""
|
|
return self.output_mime_type
|
|
|
|
def getExtractionPrompt(self) -> str:
|
|
"""Get extraction prompt for this renderer."""
|
|
return "Extract content for PowerPoint presentation generation"
|