gateway/modules/services/serviceGeneration/renderers/pptx_renderer.py
2025-10-11 18:30:26 +02:00

252 lines
9.8 KiB
Python

import logging
import base64
import io
from typing import Dict, Any, Optional, Tuple
from .base_renderer import BaseRenderer
logger = logging.getLogger(__name__)
class PptxRenderer(BaseRenderer):
"""Renderer for PowerPoint (.pptx) files using python-pptx library."""
def __init__(self):
super().__init__()
self.supported_formats = ["pptx", "ppt"]
self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
@classmethod
def get_supported_formats(cls) -> list:
"""Get list of supported output formats."""
return ["pptx", "ppt"]
async def render(self, content: str, title: str = "Generated Presentation", **kwargs) -> Tuple[str, str]:
"""
Render content as PowerPoint presentation.
Args:
content: Content to render as presentation
title: Title for the presentation
**kwargs: Additional rendering options
Returns:
Base64-encoded PowerPoint presentation as string
"""
try:
# Import python-pptx
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
import re
# Create new presentation
prs = Presentation()
# Set slide size (16:9)
prs.slide_width = Inches(13.33)
prs.slide_height = Inches(7.5)
# Parse content into slides
slides_data = self._parse_content_to_slides(content, title)
logger.info(f"Parsed {len(slides_data)} slides from content")
# Debug: Show first 200 chars of content
logger.info(f"Content preview: '{content[:200]}...'")
for i, slide_data in enumerate(slides_data):
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
# Debug: Show slide content preview
slide_content = slide_data.get('content', '')
if slide_content:
logger.info(f" Content preview: '{slide_content[:100]}...'")
else:
logger.warning(f" ⚠️ Slide {i+1} has NO content!")
# Create slide with title and content layout
slide_layout = prs.slide_layouts[1] # Title and Content layout
slide = prs.slides.add_slide(slide_layout)
# Set title
title_shape = slide.shapes.title
title_shape.text = slide_data.get("title", "Slide")
# Set content
content_shape = slide.placeholders[1]
content_text = slide_data.get("content", "")
# Format content text
text_frame = content_shape.text_frame
text_frame.clear()
# Split content into paragraphs
paragraphs = content_text.split('\n\n')
for i, paragraph in enumerate(paragraphs):
if paragraph.strip():
if i == 0:
p = text_frame.paragraphs[0]
else:
p = text_frame.add_paragraph()
p.text = paragraph.strip()
# Format based on content type
if paragraph.startswith('#'):
# Header
p.text = paragraph.lstrip('#').strip()
p.font.size = Pt(24)
p.font.bold = True
elif paragraph.startswith('##'):
# Subheader
p.text = paragraph.lstrip('#').strip()
p.font.size = Pt(20)
p.font.bold = True
elif paragraph.startswith('*') and paragraph.endswith('*'):
# Bold text
p.text = paragraph.strip('*')
p.font.bold = True
else:
# Regular text
p.font.size = Pt(14)
p.alignment = PP_ALIGN.LEFT
# If no slides were created, create a default slide
if not slides_data:
slide_layout = prs.slide_layouts[0] # Title slide layout
slide = prs.slides.add_slide(slide_layout)
title_shape = slide.shapes.title
title_shape.text = title
subtitle_shape = slide.placeholders[1]
subtitle_shape.text = "Generated by PowerOn AI System"
# Save to buffer
buffer = io.BytesIO()
prs.save(buffer)
buffer.seek(0)
# Convert to base64
pptx_bytes = buffer.getvalue()
pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')
logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation"
except ImportError:
logger.error("python-pptx library not installed. Install with: pip install python-pptx")
return "python-pptx library not installed", "text/plain"
except Exception as e:
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
def _parse_content_to_slides(self, content: str, title: str) -> list:
"""
Parse content into slide data structure.
Args:
content: Content to parse
title: Presentation title
Returns:
List of slide data dictionaries
"""
slides = []
# Split content by slide markers or headers
slide_sections = self._split_content_into_slides(content)
for i, section in enumerate(slide_sections):
if section.strip():
slide_data = {
"title": f"Slide {i + 1}",
"content": section.strip()
}
# Extract title from content if it starts with #
lines = section.strip().split('\n')
if lines and lines[0].startswith('#'):
# Remove # symbols and clean up title
slide_title = lines[0].lstrip('#').strip()
slide_data["title"] = slide_title
slide_data["content"] = '\n'.join(lines[1:]).strip()
elif lines and lines[0].strip():
# Use first line as title if it looks like a title
first_line = lines[0].strip()
if len(first_line) < 100 and not first_line.endswith('.'):
slide_data["title"] = first_line
slide_data["content"] = '\n'.join(lines[1:]).strip()
slides.append(slide_data)
return slides
def _split_content_into_slides(self, content: str) -> list:
"""
Split content into individual slides based on headers and structure.
Args:
content: Content to split
Returns:
List of slide content strings
"""
import re
# First, try to split by major headers (# or ##)
# This is the most common case for AI-generated content
header_pattern = r'^(#{1,2})\s+(.+)$'
lines = content.split('\n')
slides = []
current_slide = []
for line in lines:
# Check if this line is a header
header_match = re.match(header_pattern, line.strip())
if header_match:
# If we have content in current slide, save it
if current_slide:
slide_content = '\n'.join(current_slide).strip()
if slide_content:
slides.append(slide_content)
current_slide = []
# Start new slide with this header
current_slide.append(line)
else:
# Add line to current slide
current_slide.append(line)
# Add the last slide
if current_slide:
slide_content = '\n'.join(current_slide).strip()
if slide_content:
slides.append(slide_content)
# If we found slides with headers, return them
if len(slides) > 1:
return slides
# Fallback: Split by double newlines
sections = content.split('\n\n\n')
if len(sections) > 1:
return [s.strip() for s in sections if s.strip()]
# Another fallback: Split by double newlines
sections = content.split('\n\n')
if len(sections) > 1:
return [s.strip() for s in sections if s.strip()]
# Last resort: return as single slide
return [content.strip()]
def get_output_mime_type(self) -> str:
"""Get MIME type for rendered output."""
return self.output_mime_type
def getExtractionPrompt(self) -> str:
"""Get extraction prompt for this renderer."""
return "Extract content for PowerPoint presentation generation"