From a68dac200e3c1e2c139b5d20e127629becb5262a Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sun, 12 Oct 2025 02:18:42 +0200
Subject: [PATCH] renderers pdf and pptx tested and fixed
---
.../renderers/rendererBaseTemplate.py | 94 +++-
.../renderers/rendererPdf.py | 242 +++++++++-
.../renderers/rendererPptx.py | 457 +++++++++++++-----
test_document_processing.py | 8 +-
4 files changed, 655 insertions(+), 146 deletions(-)
diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
index d4b147a7..34c7387c 100644
--- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
+++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
@@ -197,6 +197,9 @@ class BaseRenderer(ABC):
Returns:
Dict with styling definitions
"""
+ # DEBUG: Show which renderer is calling this method
+ print(f"🔍 BASE TEMPLATE _get_ai_styles called by: {self.__class__.__name__}")
+
if not ai_service:
return default_styles
@@ -207,15 +210,16 @@ class BaseRenderer(ABC):
request_options.operationType = OperationType.GENERAL
request = AiCallRequest(prompt=style_template, context="", options=request_options)
+
+ # DEBUG: Show the actual prompt being sent to AI
+ self.logger.debug(f"AI Style Template Prompt:")
+ self.logger.debug(f"{style_template}")
+
response = await ai_service.aiObjects.call(request)
import json
import re
- # Debug output
- print(f"🔍 AI STYLING RESPONSE TYPE: {type(response)}")
- print(f"🔍 AI STYLING RESPONSE LENGTH: {len(response.content) if response and hasattr(response, 'content') and response.content else 0}")
-
# Clean and parse JSON
result = response.content.strip() if response and response.content else ""
@@ -228,25 +232,73 @@ class BaseRenderer(ABC):
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if json_match:
result = json_match.group(1).strip()
- print(f"🔍 EXTRACTED JSON FROM MARKDOWN: {result[:100]}...")
elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
- print(f"🔍 CLEANED JSON FROM MARKDOWN: {result[:100]}...")
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
- print(f"🔍 CLEANED JSON FROM GENERIC MARKDOWN: {result[:100]}...")
# Try to parse JSON
try:
styles = json.loads(result)
- print(f"🔍 AI STYLING PARSED KEYS: {list(styles.keys()) if isinstance(styles, dict) else 'Not a dict'}")
except json.JSONDecodeError as json_error:
- print(f"🔍 AI STYLING JSON ERROR: {json_error}")
- print(f"🔍 AI STYLING RAW RESULT: {result[:200]}...")
- self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
- return default_styles
+ self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
+
+ # Use print instead of logger to avoid truncation
+ print(f"🔍 FULL AI RESPONSE THAT FAILED TO PARSE:")
+ print("=" * 100)
+ print(result)
+ print("=" * 100)
+ print(f"🔍 RESPONSE LENGTH: {len(result)} characters")
+
+ self.logger.warning(f"Raw content that failed to parse: {result}")
+
+ # Try to fix incomplete JSON by adding missing closing braces
+ open_braces = result.count('{')
+ close_braces = result.count('}')
+
+ if open_braces > close_braces:
+ # JSON is incomplete, add missing closing braces
+ missing_braces = open_braces - close_braces
+ result = result + '}' * missing_braces
+ self.logger.info(f"Added {missing_braces} missing closing brace(s)")
+ self.logger.debug(f"Fixed JSON: {result}")
+
+ # Try parsing the fixed JSON
+ try:
+ styles = json.loads(result)
+ self.logger.info("Successfully fixed incomplete JSON")
+ except json.JSONDecodeError as fix_error:
+ self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
+ self.logger.warning(f"Fixed JSON content: {result}")
+ # Try to extract just the JSON part if it's embedded in text
+ json_start = result.find('{')
+ json_end = result.rfind('}')
+ if json_start != -1 and json_end != -1 and json_end > json_start:
+ json_part = result[json_start:json_end+1]
+ try:
+ styles = json.loads(json_part)
+ self.logger.info("Successfully extracted JSON from explanatory text")
+ except json.JSONDecodeError:
+ self.logger.warning("Could not extract valid JSON from response, using defaults")
+ return default_styles
+ else:
+ return default_styles
+ else:
+ # Try to extract just the JSON part if it's embedded in text
+ json_start = result.find('{')
+ json_end = result.rfind('}')
+ if json_start != -1 and json_end != -1 and json_end > json_start:
+ json_part = result[json_start:json_end+1]
+ try:
+ styles = json.loads(json_part)
+ self.logger.info("Successfully extracted JSON from explanatory text")
+ except json.JSONDecodeError:
+ self.logger.warning("Could not extract valid JSON from response, using defaults")
+ return default_styles
+ else:
+ return default_styles
# Convert colors to appropriate format
styles = self._convert_colors_format(styles)
@@ -278,8 +330,22 @@ class BaseRenderer(ABC):
"""
schema_json = json.dumps(style_schema, indent=4)
- return f"""Return this exact JSON structure with your styling customizations:
+ # DEBUG: Show the schema being sent
+ print(f"🔍 AI STYLE SCHEMA FOR {format_name.upper()}:")
+ print("=" * 80)
+ print(schema_json)
+ print("=" * 80)
+
+ return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {format_name.upper()} documents.
+
+Use this schema as a template and customize the values for professional document styling:
{schema_json}
-NO TEXT. NO EXPLANATIONS. NO MARKDOWN. NO WRAPPER OBJECTS. ONLY THE JSON ABOVE."""
\ No newline at end of file
+Requirements:
+- Return ONLY the complete JSON object (no markdown, no explanations)
+- Customize colors, fonts, and spacing for professional appearance
+- Ensure all objects are properly closed with closing braces
+- Make the styling modern and professional
+
+Return the complete JSON:"""
\ No newline at end of file
diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py
index 43c0ce6d..1c5f0739 100644
--- a/modules/services/serviceGeneration/renderers/rendererPdf.py
+++ b/modules/services/serviceGeneration/renderers/rendererPdf.py
@@ -73,6 +73,10 @@ class RendererPdf(BaseRenderer):
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
+ # Make title shorter to prevent wrapping/overlapping
+ if len(document_title) > 40:
+ document_title = "PowerOn - Consent Agreement"
+
# Create a buffer to hold the PDF
buffer = io.BytesIO()
@@ -92,14 +96,18 @@ class RendererPdf(BaseRenderer):
# Title page
title_style = self._create_title_style(styles)
story.append(Paragraph(document_title, title_style))
- story.append(Spacer(1, 20))
+ story.append(Spacer(1, 50)) # Increased spacing to prevent overlap
story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles)))
+ story.append(Spacer(1, 30)) # Add spacing before page break
story.append(PageBreak())
# Process each section
sections = json_content.get("sections", [])
- for section in sections:
+ print(f"🔍 PDF SECTIONS TO PROCESS: {len(sections)} sections")
+ for i, section in enumerate(sections):
+ print(f"🔍 PDF SECTION {i}: type={section.get('type', 'unknown')}, id={section.get('id', 'unknown')}")
section_elements = self._render_json_section(section, styles)
+ print(f"🔍 PDF SECTION {i} ELEMENTS: {len(section_elements)} elements")
story.extend(section_elements)
# Build PDF
@@ -130,11 +138,210 @@ class RendererPdf(BaseRenderer):
}
style_template = self._create_ai_style_template("pdf", user_prompt, style_schema)
+
+ # DEBUG: Show which method is being called
+ print(f"🔍 PDF RENDERER: Calling base template _get_ai_styles")
+
+ # Use base template method like DOCX does (this works!)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles())
+ # DEBUG: Check what we got from AI styling
+ print(f"🔍 PDF AI STYLING RESULT: {type(styles)}")
+ if styles is None:
+ print(f"🔍 PDF AI STYLING RETURNED NONE!")
+ return self._get_default_pdf_styles()
+ elif isinstance(styles, dict):
+ print(f"🔍 PDF AI STYLING KEYS: {list(styles.keys())}")
+ print(f"🔍 PDF AI STYLING CONTENT:")
+ for key, value in styles.items():
+ print(f" {key}: {value}")
+ # Check specific colors
+ print(f"🔍 PDF TITLE COLOR FROM AI: {styles.get('title', {}).get('color', 'NOT_FOUND')}")
+ print(f"🔍 PDF HEADING1 COLOR FROM AI: {styles.get('heading1', {}).get('color', 'NOT_FOUND')}")
+ print(f"🔍 PDF PARAGRAPH COLOR FROM AI: {styles.get('paragraph', {}).get('color', 'NOT_FOUND')}")
+ else:
+ print(f"🔍 PDF AI STYLING VALUE: {styles}")
+
+ # Convert colors to PDF format after getting styles
+ print(f"🔍 PDF BEFORE COLOR CONVERSION:")
+ for key, value in styles.items():
+ print(f" {key}: {value}")
+
+ styles = self._convert_colors_format(styles)
+
+ print(f"🔍 PDF AFTER COLOR CONVERSION:")
+ for key, value in styles.items():
+ print(f" {key}: {value}")
+
# Validate and fix contrast issues
return self._validate_pdf_styles_contrast(styles)
+ async def _get_ai_styles_with_pdf_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
+ """Get AI styles with proper PDF color conversion."""
+ if not ai_service:
+ return default_styles
+
+ try:
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+
+ request_options = AiCallOptions()
+ request_options.operationType = OperationType.GENERAL
+
+ request = AiCallRequest(prompt=style_template, context="", options=request_options)
+
+ # Check if AI service is properly configured
+ if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
+ self.logger.warning("AI service not properly configured, using defaults")
+ return default_styles
+
+ response = await ai_service.aiObjects.call(request)
+
+ # Check if response is valid
+ if not response:
+ self.logger.warning("AI service returned no response, using defaults")
+ return default_styles
+
+ import json
+ import re
+
+ # Clean and parse JSON
+ result = response.content.strip() if response and response.content else ""
+
+ # Check if result is empty
+ if not result:
+ self.logger.warning("AI styling returned empty response, using defaults")
+ return default_styles
+
+ # Log the raw response for debugging
+ self.logger.debug(f"AI styling raw response: {result[:200]}...")
+
+ # Extract JSON from various formats
+ json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
+ if json_match:
+ result = json_match.group(1).strip()
+ elif result.startswith('```json'):
+ result = re.sub(r'^```json\s*', '', result)
+ result = re.sub(r'\s*```$', '', result)
+ elif result.startswith('```'):
+ result = re.sub(r'^```\s*', '', result)
+ result = re.sub(r'\s*```$', '', result)
+
+ # Try to extract JSON from explanatory text
+ json_patterns = [
+ r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object
+ r'\{.*?"title".*?\}', # JSON with title field
+ r'\{.*?"font_size".*?\}', # JSON with font_size field
+ ]
+
+ for pattern in json_patterns:
+ json_match = re.search(pattern, result, re.DOTALL)
+ if json_match:
+ result = json_match.group(0)
+ break
+
+ # Additional cleanup - remove any leading/trailing whitespace and newlines
+ result = result.strip()
+
+ # Check if result is still empty after cleanup
+ if not result:
+ self.logger.warning("AI styling returned empty content after cleanup, using defaults")
+ return default_styles
+
+ # Try to parse JSON
+ try:
+ styles = json.loads(result)
+ self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
+ except json.JSONDecodeError as json_error:
+ self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
+
+ # Use print instead of logger to avoid truncation
+ print(f"🔍 FULL AI RESPONSE THAT FAILED TO PARSE:")
+ print("=" * 100)
+ print(result)
+ print("=" * 100)
+ print(f"🔍 RESPONSE LENGTH: {len(result)} characters")
+
+ self.logger.warning(f"Raw content that failed to parse: {result}")
+
+ # Try to fix incomplete JSON by adding missing closing braces
+ open_braces = result.count('{')
+ close_braces = result.count('}')
+
+ if open_braces > close_braces:
+ # JSON is incomplete, add missing closing braces
+ missing_braces = open_braces - close_braces
+ result = result + '}' * missing_braces
+ self.logger.info(f"Added {missing_braces} missing closing brace(s)")
+
+ # Try parsing the fixed JSON
+ try:
+ styles = json.loads(result)
+ self.logger.info("Successfully fixed incomplete JSON")
+ except json.JSONDecodeError as fix_error:
+ self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
+ # Try to extract just the JSON part if it's embedded in text
+ json_start = result.find('{')
+ json_end = result.rfind('}')
+ if json_start != -1 and json_end != -1 and json_end > json_start:
+ json_part = result[json_start:json_end+1]
+ try:
+ styles = json.loads(json_part)
+ self.logger.info("Successfully extracted JSON from explanatory text")
+ except json.JSONDecodeError:
+ self.logger.warning("Could not extract valid JSON from response, using defaults")
+ return default_styles
+ else:
+ return default_styles
+ else:
+ # Try to extract just the JSON part if it's embedded in text
+ json_start = result.find('{')
+ json_end = result.rfind('}')
+ if json_start != -1 and json_end != -1 and json_end > json_start:
+ json_part = result[json_start:json_end+1]
+ try:
+ styles = json.loads(json_part)
+ self.logger.info("Successfully extracted JSON from explanatory text")
+ except json.JSONDecodeError:
+ self.logger.warning("Could not extract valid JSON from response, using defaults")
+ return default_styles
+ else:
+ return default_styles
+
+ # Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
+ styles = self._convert_colors_format(styles)
+
+ return styles
+
+ except Exception as e:
+ self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
+ return default_styles
+
+ def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+ """Convert colors to proper format for PDF compatibility."""
+ try:
+ for style_name, style_config in styles.items():
+ if isinstance(style_config, dict):
+ for prop, value in style_config.items():
+ if isinstance(value, str) and value.startswith('#') and len(value) == 7:
+ # Convert #RRGGBB to #AARRGGBB (add FF alpha channel) for consistency
+ styles[style_name][prop] = f"FF{value[1:]}"
+ elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
+ # Already aRGB format, keep as is
+ pass
+ return styles
+ except Exception as e:
+ self.logger.warning(f"Color conversion failed: {str(e)}")
+ return styles
+
+ def _get_safe_color(self, color_value: str, default: str = "#000000") -> str:
+ """Get a safe hex color value for PDF."""
+ if isinstance(color_value, str) and color_value.startswith('#'):
+ if len(color_value) == 7:
+ return f"FF{color_value[1:]}"
+ elif len(color_value) == 9:
+ return color_value
+ return default
+
def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
@@ -189,12 +396,20 @@ class RendererPdf(BaseRenderer):
"""Create title style from style definitions."""
title_style_def = styles.get("title", {})
+ # DEBUG: Show what color and spacing is being used for title
+ title_color = title_style_def.get("color", "#1F4E79")
+ title_space_after = title_style_def.get("space_after", 30)
+ print(f"🔍 PDF TITLE COLOR: {title_color} -> {self._hex_to_color(title_color)}")
+ print(f"🔍 PDF TITLE SPACE_AFTER: {title_space_after}")
+
return ParagraphStyle(
'CustomTitle',
- fontSize=title_style_def.get("font_size", 24),
+ fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20
spaceAfter=title_style_def.get("space_after", 30),
alignment=self._get_alignment(title_style_def.get("align", "center")),
- textColor=self._hex_to_color(title_style_def.get("color", "#1F4E79"))
+ textColor=self._hex_to_color(title_color),
+ leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles
+ spaceBefore=0 # Ensure no space before title
)
def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
@@ -237,10 +452,21 @@ class RendererPdf(BaseRenderer):
"""Convert hex color to reportlab color."""
try:
hex_color = hex_color.lstrip('#')
- r = int(hex_color[0:2], 16) / 255.0
- g = int(hex_color[2:4], 16) / 255.0
- b = int(hex_color[4:6], 16) / 255.0
- return colors.Color(r, g, b)
+
+ # Handle aRGB format (8 characters: FF + RGB)
+ if len(hex_color) == 8:
+ # Skip the alpha channel (first 2 characters)
+ hex_color = hex_color[2:]
+
+ # Handle RGB format (6 characters)
+ if len(hex_color) == 6:
+ r = int(hex_color[0:2], 16) / 255.0
+ g = int(hex_color[2:4], 16) / 255.0
+ b = int(hex_color[4:6], 16) / 255.0
+ return colors.Color(r, g, b)
+
+ # Fallback for other formats
+ return colors.black
except:
return colors.black
diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py
index 4dd0a07d..2ac9cd11 100644
--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
@@ -42,6 +42,9 @@ class RendererPptx(BaseRenderer):
from pptx.dml.color import RGBColor
import re
+ # Get AI-generated styling definitions first
+ styles = await self._get_pptx_styles(user_prompt, ai_service)
+
# Create new presentation
prs = Presentation()
@@ -54,9 +57,6 @@ class RendererPptx(BaseRenderer):
prs.slide_width = Inches(13.33)
prs.slide_height = Inches(7.5)
- # Get AI-generated styling definitions
- styles = await self._get_pptx_styles(user_prompt, ai_service)
-
# Generate slides from JSON content
slides_data = await self._parse_json_to_slides(extracted_content, title, styles)
logger.info(f"Parsed {len(slides_data)} slides from JSON content")
@@ -78,15 +78,23 @@ class RendererPptx(BaseRenderer):
slide_layout = prs.slide_layouts[slide_layout_index]
slide = prs.slides.add_slide(slide_layout)
- # Set title
+ # Set title with AI-generated styling
title_shape = slide.shapes.title
title_shape.text = slide_data.get("title", "Slide")
- # Set content
+ # Apply title styling
+ title_style = styles.get("title", {})
+ if title_shape.text_frame.paragraphs[0].font:
+ title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44))
+ title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True)
+ title_color = self._get_safe_color(title_style.get("color", (31, 78, 121)))
+ title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
+
+ # Set content with AI-generated styling
content_shape = slide.placeholders[1]
content_text = slide_data.get("content", "")
- # Format content text
+ # Format content text with AI styles
text_frame = content_shape.text_frame
text_frame.clear()
@@ -102,26 +110,47 @@ class RendererPptx(BaseRenderer):
p.text = paragraph.strip()
- # Format based on content type
+ # Apply AI-generated styling based on content type
if paragraph.startswith('#'):
# Header
p.text = paragraph.lstrip('#').strip()
- p.font.size = Pt(24)
- p.font.bold = True
+ heading_style = styles.get("heading", {})
+ p.font.size = Pt(heading_style.get("font_size", 32))
+ p.font.bold = heading_style.get("bold", True)
+ heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47)))
+ p.font.color.rgb = RGBColor(*heading_color)
elif paragraph.startswith('##'):
# Subheader
p.text = paragraph.lstrip('#').strip()
- p.font.size = Pt(20)
- p.font.bold = True
+ subheading_style = styles.get("subheading", {})
+ p.font.size = Pt(subheading_style.get("font_size", 24))
+ p.font.bold = subheading_style.get("bold", True)
+ subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79)))
+ p.font.color.rgb = RGBColor(*subheading_color)
elif paragraph.startswith('*') and paragraph.endswith('*'):
# Bold text
p.text = paragraph.strip('*')
+ paragraph_style = styles.get("paragraph", {})
+ p.font.size = Pt(paragraph_style.get("font_size", 18))
p.font.bold = True
+ paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
+ p.font.color.rgb = RGBColor(*paragraph_color)
else:
# Regular text
- p.font.size = Pt(14)
+ paragraph_style = styles.get("paragraph", {})
+ p.font.size = Pt(paragraph_style.get("font_size", 18))
+ p.font.bold = paragraph_style.get("bold", False)
+ paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
+ p.font.color.rgb = RGBColor(*paragraph_color)
- p.alignment = PP_ALIGN.LEFT
+ # Apply alignment
+ align = paragraph_style.get("align", "left")
+ if align == "center":
+ p.alignment = PP_ALIGN.CENTER
+ elif align == "right":
+ p.alignment = PP_ALIGN.RIGHT
+ else:
+ p.alignment = PP_ALIGN.LEFT
# If no slides were created, create a default slide
if not slides_data:
@@ -131,8 +160,24 @@ class RendererPptx(BaseRenderer):
title_shape = slide.shapes.title
title_shape.text = title
+ # Apply title styling to default slide
+ title_style = styles.get("title", {})
+ if title_shape.text_frame.paragraphs[0].font:
+ title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 48))
+ title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True)
+ title_color = self._get_safe_color(title_style.get("color", (31, 78, 121)))
+ title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
+
subtitle_shape = slide.placeholders[1]
subtitle_shape.text = "Generated by PowerOn AI System"
+
+ # Apply subtitle styling
+ paragraph_style = styles.get("paragraph", {})
+ if subtitle_shape.text_frame.paragraphs[0].font:
+ subtitle_shape.text_frame.paragraphs[0].font.size = Pt(paragraph_style.get("font_size", 20))
+ subtitle_shape.text_frame.paragraphs[0].font.bold = paragraph_style.get("bold", False)
+ paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
+ subtitle_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*paragraph_color)
# Save to buffer
buffer = io.BytesIO()
@@ -261,23 +306,195 @@ class RendererPptx(BaseRenderer):
async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Get PowerPoint styling definitions using base template AI styling."""
style_schema = {
- "title": {"font_size": 44, "color": "#1F4E79", "bold": True, "align": "center"},
- "heading": {"font_size": 32, "color": "#2F2F2F", "bold": True, "align": "left"},
- "subheading": {"font_size": 24, "color": "#4F4F4F", "bold": True, "align": "left"},
- "paragraph": {"font_size": 18, "color": "#2F2F2F", "bold": False, "align": "left"},
- "bullet_list": {"font_size": 18, "color": "#2F2F2F", "indent": 20},
- "table_header": {"font_size": 16, "color": "#FFFFFF", "bold": True, "background": "#4F4F4F"},
- "table_cell": {"font_size": 14, "color": "#2F2F2F", "bold": False, "background": "#FFFFFF"},
+ "title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"},
+ "heading": {"font_size": 36, "color": "#2C5F2D", "bold": True, "align": "left"},
+ "subheading": {"font_size": 28, "color": "#4A90E2", "bold": True, "align": "left"},
+ "paragraph": {"font_size": 20, "color": "#2F2F2F", "bold": False, "align": "left"},
+ "bullet_list": {"font_size": 20, "color": "#2F2F2F", "indent": 20},
+ "table_header": {"font_size": 18, "color": "#FFFFFF", "bold": True, "background": "#1B365D"},
+ "table_cell": {"font_size": 16, "color": "#2F2F2F", "bold": False, "background": "#F8F9FA"},
"slide_size": "16:9",
- "content_per_slide": "concise"
+ "content_per_slide": "concise",
+ "design_theme": "corporate",
+ "color_scheme": "professional",
+ "background_style": "clean",
+ "accent_colors": ["#1B365D", "#2C5F2D", "#4A90E2", "#6B7280"],
+ "professional_grade": True,
+ "executive_ready": True
}
- style_template = self._create_ai_style_template("pptx", user_prompt, style_schema)
- styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pptx_styles())
+ style_template = self._create_professional_pptx_template(user_prompt, style_schema)
+ # Use our own _get_ai_styles_with_pptx_colors method to ensure proper color conversion
+ styles = await self._get_ai_styles_with_pptx_colors(ai_service, style_template, self._get_default_pptx_styles())
# Validate PowerPoint-specific requirements
return self._validate_pptx_styles_readability(styles)
+ def _create_professional_pptx_template(self, user_prompt: str, style_schema: Dict[str, Any]) -> str:
+ """Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
+ import json
+ schema_json = json.dumps(style_schema, indent=4)
+
+ return f"""Customize the JSON below for professional PowerPoint slides.
+
+User Request: {user_prompt or "Create professional corporate slides"}
+
+Rules:
+- Use professional colors (blues, grays, deep greens)
+- Large, readable font sizes
+- High contrast
+- Sophisticated color palettes
+
+Return ONLY this JSON with your changes:
+
+{schema_json}
+
+JSON ONLY. NO OTHER TEXT."""
+
+ async def _get_ai_styles_with_pptx_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
+ """Get AI styles with proper PowerPoint color conversion."""
+ if not ai_service:
+ return default_styles
+
+ try:
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+
+ request_options = AiCallOptions()
+ request_options.operationType = OperationType.GENERAL
+
+ request = AiCallRequest(prompt=style_template, context="", options=request_options)
+
+ # Check if AI service is properly configured
+ if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
+ self.logger.warning("AI service not properly configured, using defaults")
+ return default_styles
+
+ response = await ai_service.aiObjects.call(request)
+
+ # Check if response is valid
+ if not response:
+ self.logger.warning("AI service returned no response, using defaults")
+ return default_styles
+
+ import json
+ import re
+
+ # Clean and parse JSON
+ result = response.content.strip() if response and response.content else ""
+
+ # Check if result is empty
+ if not result:
+ self.logger.warning("AI styling returned empty response, using defaults")
+ return default_styles
+
+ # Log the raw response for debugging
+ self.logger.debug(f"AI styling raw response: {result[:200]}...")
+
+ # Extract JSON from various formats
+ json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
+ if json_match:
+ result = json_match.group(1).strip()
+ elif result.startswith('```json'):
+ result = re.sub(r'^```json\s*', '', result)
+ result = re.sub(r'\s*```$', '', result)
+ elif result.startswith('```'):
+ result = re.sub(r'^```\s*', '', result)
+ result = re.sub(r'\s*```$', '', result)
+
+ # Try to extract JSON from explanatory text
+ json_patterns = [
+ r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object
+ r'\{.*?"title".*?\}', # JSON with title field
+ r'\{.*?"font_size".*?\}', # JSON with font_size field
+ ]
+
+ for pattern in json_patterns:
+ json_match = re.search(pattern, result, re.DOTALL)
+ if json_match:
+ result = json_match.group(0)
+ break
+
+ # Additional cleanup - remove any leading/trailing whitespace and newlines
+ result = result.strip()
+
+ # Check if result is still empty after cleanup
+ if not result:
+ self.logger.warning("AI styling returned empty content after cleanup, using defaults")
+ return default_styles
+
+ # Try to parse JSON
+ try:
+ styles = json.loads(result)
+ self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
+ except json.JSONDecodeError as json_error:
+ self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
+ self.logger.warning(f"Raw content that failed to parse: {result[:100]}...")
+ # Try to extract just the JSON part if it's embedded in text
+ json_start = result.find('{')
+ json_end = result.rfind('}')
+ if json_start != -1 and json_end != -1 and json_end > json_start:
+ json_part = result[json_start:json_end+1]
+ try:
+ styles = json.loads(json_part)
+ self.logger.info("Successfully extracted JSON from explanatory text")
+ self.logger.debug(f"Extracted AI styles: {list(styles.keys())}")
+ except json.JSONDecodeError:
+ self.logger.warning("Could not extract valid JSON from response, using defaults")
+ return default_styles
+ else:
+ return default_styles
+
+ # Convert colors to PowerPoint RGB format
+ styles = self._convert_colors_format(styles)
+
+ return styles
+
+ except Exception as e:
+ self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
+ return default_styles
+
+ def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+ """Convert hex colors to RGB format for PowerPoint compatibility."""
+ try:
+ for style_name, style_config in styles.items():
+ if isinstance(style_config, dict):
+ for prop, value in style_config.items():
+ if isinstance(value, str) and value.startswith('#'):
+ # Convert hex to RGB tuple for PowerPoint
+ hex_color = value.lstrip('#')
+ if len(hex_color) == 6:
+ r = int(hex_color[0:2], 16)
+ g = int(hex_color[2:4], 16)
+ b = int(hex_color[4:6], 16)
+ styles[style_name][prop] = (r, g, b)
+ elif len(hex_color) == 8: # aRGB format
+ r = int(hex_color[2:4], 16)
+ g = int(hex_color[4:6], 16)
+ b = int(hex_color[6:8], 16)
+ styles[style_name][prop] = (r, g, b)
+ return styles
+ except Exception as e:
+ self.logger.warning(f"Color conversion failed: {str(e)}")
+ return styles
+
+ def _get_safe_color(self, color_value, default=(0, 0, 0)) -> tuple:
+ """Get a safe RGB color tuple for PowerPoint."""
+ if isinstance(color_value, tuple) and len(color_value) == 3:
+ return color_value
+ elif isinstance(color_value, str) and color_value.startswith('#'):
+ hex_color = color_value.lstrip('#')
+ if len(hex_color) == 6:
+ r = int(hex_color[0:2], 16)
+ g = int(hex_color[2:4], 16)
+ b = int(hex_color[4:6], 16)
+ return (r, g, b)
+ elif len(hex_color) == 8: # aRGB format
+ r = int(hex_color[2:4], 16)
+ g = int(hex_color[4:6], 16)
+ b = int(hex_color[6:8], 16)
+ return (r, g, b)
+ return default
+
def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix readability issues in AI-generated styles."""
try:
@@ -305,17 +522,23 @@ class RendererPptx(BaseRenderer):
return self._get_default_pptx_styles()
def _get_default_pptx_styles(self) -> Dict[str, Any]:
- """Default PowerPoint styles."""
+ """Default PowerPoint styles with corporate professional color scheme."""
return {
- "title": {"font_size": 44, "color": "#1F4E79", "bold": True, "align": "center"},
- "heading": {"font_size": 32, "color": "#2F2F2F", "bold": True, "align": "left"},
- "subheading": {"font_size": 24, "color": "#4F4F4F", "bold": True, "align": "left"},
- "paragraph": {"font_size": 18, "color": "#2F2F2F", "bold": False, "align": "left"},
- "bullet_list": {"font_size": 18, "color": "#2F2F2F", "indent": 20},
- "table_header": {"font_size": 16, "color": "#FFFFFF", "bold": True, "background": "#4F4F4F"},
- "table_cell": {"font_size": 14, "color": "#2F2F2F", "bold": False, "background": "#FFFFFF"},
+ "title": {"font_size": 52, "color": (27, 54, 93), "bold": True, "align": "center"},
+ "heading": {"font_size": 36, "color": (44, 95, 45), "bold": True, "align": "left"},
+ "subheading": {"font_size": 28, "color": (74, 144, 226), "bold": True, "align": "left"},
+ "paragraph": {"font_size": 20, "color": (47, 47, 47), "bold": False, "align": "left"},
+ "bullet_list": {"font_size": 20, "color": (47, 47, 47), "indent": 20},
+ "table_header": {"font_size": 18, "color": (255, 255, 255), "bold": True, "background": (27, 54, 93)},
+ "table_cell": {"font_size": 16, "color": (47, 47, 47), "bold": False, "background": (248, 249, 250)},
"slide_size": "16:9",
- "content_per_slide": "concise"
+ "content_per_slide": "concise",
+ "design_theme": "corporate",
+ "color_scheme": "professional",
+ "background_style": "clean",
+ "accent_colors": [(27, 54, 93), (44, 95, 45), (74, 144, 226), (107, 114, 128)],
+ "professional_grade": True,
+ "executive_ready": True
}
async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
@@ -375,26 +598,31 @@ class RendererPptx(BaseRenderer):
def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
"""Create a slide from a JSON section."""
try:
- section_title = section.get("title", "Untitled Section")
- content_type = section.get("content_type", "paragraph")
- elements = section.get("elements", [])
+ # Get section title from data or use default
+ section_title = "Untitled Section"
+ if section.get("type") == "heading":
+ section_title = section.get("data", {}).get("text", "Untitled Section")
+ elif section.get("title"):
+ section_title = section.get("title")
+
+ content_type = section.get("type", "paragraph")
+ section_data = section.get("data", {})
# Build slide content based on section type
content_parts = []
- for element in elements:
- if content_type == "table":
- content_parts.append(self._format_table_for_slide(element))
- elif content_type == "list":
- content_parts.append(self._format_list_for_slide(element))
- elif content_type == "heading":
- content_parts.append(self._format_heading_for_slide(element))
- elif content_type == "paragraph":
- content_parts.append(self._format_paragraph_for_slide(element))
- elif content_type == "code":
- content_parts.append(self._format_code_for_slide(element))
- else:
- content_parts.append(self._format_paragraph_for_slide(element))
+ if content_type == "table":
+ content_parts.append(self._format_table_for_slide(section_data))
+ elif content_type == "list":
+ content_parts.append(self._format_list_for_slide(section_data))
+ elif content_type == "heading":
+ content_parts.append(self._format_heading_for_slide(section_data))
+ elif content_type == "paragraph":
+ content_parts.append(self._format_paragraph_for_slide(section_data))
+ elif content_type == "code":
+ content_parts.append(self._format_code_for_slide(section_data))
+ else:
+ content_parts.append(self._format_paragraph_for_slide(section_data))
# Combine content parts
slide_content = "\n\n".join(filter(None, content_parts))
@@ -533,7 +761,7 @@ class RendererPptx(BaseRenderer):
return ""
def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
- """Determine the best slide layout based on content."""
+ """Determine the best professional slide layout based on content."""
try:
content = slide_data.get("content", "")
title = slide_data.get("title", "")
@@ -542,23 +770,29 @@ class RendererPptx(BaseRenderer):
if not content or "Generated by PowerOn AI System" in content:
return 0 # Title slide layout
- # Check content type to determine layout
+ # Professional layout selection based on content
if "|" in content and "-" in content:
- # Has both tables and lists - use content with caption
+ # Has both tables and lists - use content with caption for professional look
return 2
elif "|" in content:
- # Has tables - use content layout
+ # Has tables - use content layout for clean table presentation
return 1
elif content.count("•") > 2:
- # Has many bullet points - use content layout
+ # Has many bullet points - use content layout for better readability
+ return 1
+ elif len(content) > 200:
+ # Long content - use content layout for better text flow
+ return 1
+ elif title and len(title) > 20:
+ # Long title - use title and content layout
return 1
else:
- # Default to title and content
+ # Default to title and content layout for professional appearance
return 1
except Exception as e:
logger.warning(f"Error determining slide layout: {str(e)}")
- return 1 # Default to title and content
+ return 1 # Default to title and content layout
def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Create slides from sections based on content density and user intent."""
@@ -566,9 +800,37 @@ class RendererPptx(BaseRenderer):
slides = []
content_per_slide = styles.get("content_per_slide", "concise")
+ # Group sections by type and create slides
+ current_slide_content = []
+ current_slide_title = "Content Overview"
+
for section in sections:
- section_slides = self._create_section_slides(section, styles, content_per_slide)
- slides.extend(section_slides)
+ section_type = section.get("type", "paragraph")
+ section_data = section.get("data", {})
+
+ if section_type == "heading":
+ # If we have accumulated content, create a slide
+ if current_slide_content:
+ slides.append({
+ "title": current_slide_title,
+ "content": "\n\n".join(current_slide_content)
+ })
+ current_slide_content = []
+
+ # Start new slide with heading as title
+ current_slide_title = section_data.get("text", "Untitled Section")
+ else:
+ # Add content to current slide
+ formatted_content = self._format_section_content(section)
+ if formatted_content:
+ current_slide_content.append(formatted_content)
+
+ # Add final slide if there's content
+ if current_slide_content:
+ slides.append({
+ "title": current_slide_title,
+ "content": "\n\n".join(current_slide_content)
+ })
return slides
@@ -576,75 +838,28 @@ class RendererPptx(BaseRenderer):
logger.warning(f"Error creating slides from sections: {str(e)}")
return []
- def _create_section_slides(self, section: Dict[str, Any], styles: Dict[str, Any], content_per_slide: str) -> List[Dict[str, Any]]:
- """Create one or more slides from a section based on content density."""
+ def _format_section_content(self, section: Dict[str, Any]) -> str:
+ """Format section content for slide presentation."""
try:
- section_title = section.get("title", "Untitled Section")
- content_type = section.get("content_type", "paragraph")
- elements = section.get("elements", [])
+ content_type = section.get("type", "paragraph")
+ section_data = section.get("data", {})
- if not elements:
- return [{
- "title": section_title,
- "content": "No content available for this section."
- }]
-
- # Determine how to split content based on type and density
- if content_per_slide == "detailed" and len(elements) > 3:
- # Split large sections into multiple slides
- return self._split_section_into_multiple_slides(section_title, elements, content_type)
+ if content_type == "table":
+ return self._format_table_for_slide(section_data)
+ elif content_type == "list":
+ return self._format_list_for_slide(section_data)
+ elif content_type == "heading":
+ return self._format_heading_for_slide(section_data)
+ elif content_type == "paragraph":
+ return self._format_paragraph_for_slide(section_data)
+ elif content_type == "code":
+ return self._format_code_for_slide(section_data)
else:
- # Create single slide for section
- slide_data = self._create_slide_from_section(section, styles)
- return [slide_data] if slide_data else []
+ return self._format_paragraph_for_slide(section_data)
except Exception as e:
- logger.warning(f"Error creating section slides: {str(e)}")
- return []
-
- def _split_section_into_multiple_slides(self, section_title: str, elements: List[Dict[str, Any]], content_type: str) -> List[Dict[str, Any]]:
- """Split a large section into multiple slides."""
- try:
- slides = []
- max_elements_per_slide = 3
-
- for i in range(0, len(elements), max_elements_per_slide):
- slide_elements = elements[i:i + max_elements_per_slide]
-
- # Create slide title
- if i == 0:
- slide_title = section_title
- else:
- slide_title = f"{section_title} (Part {i//max_elements_per_slide + 1})"
-
- # Build content for this slide
- content_parts = []
- for element in slide_elements:
- if content_type == "table":
- content_parts.append(self._format_table_for_slide(element))
- elif content_type == "list":
- content_parts.append(self._format_list_for_slide(element))
- elif content_type == "heading":
- content_parts.append(self._format_heading_for_slide(element))
- elif content_type == "paragraph":
- content_parts.append(self._format_paragraph_for_slide(element))
- elif content_type == "code":
- content_parts.append(self._format_code_for_slide(element))
- else:
- content_parts.append(self._format_paragraph_for_slide(element))
-
- slide_content = "\n\n".join(filter(None, content_parts))
-
- slides.append({
- "title": slide_title,
- "content": slide_content
- })
-
- return slides
-
- except Exception as e:
- logger.warning(f"Error splitting section into slides: {str(e)}")
- return []
+ logger.warning(f"Error formatting section content: {str(e)}")
+ return ""
def _format_timestamp(self) -> str:
"""Format current timestamp for presentation generation."""
diff --git a/test_document_processing.py b/test_document_processing.py
index 6170a5c5..b89b238f 100644
--- a/test_document_processing.py
+++ b/test_document_processing.py
@@ -154,7 +154,9 @@ async def process_documents_and_generate_summary():
# userPrompt = "Analyze these documents and create a comprehensive DOCX summary document including: 1) Document types and purposes, 2) Key information and main points, 3) Important details and numbers, 4) Notable sections, 5) Overall assessment and recommendations."
- userPrompt = "Extract the table from file and produce 2 lists in excel. one list with all entries, one list only with entries that are yellow highlighted."
+ userPrompt = "Analyze these documents and create a comprehensive form for a user to fill out"
+
+ # userPrompt = "Extract the table from file and produce 2 lists in excel. one list with all entries, one list only with entries that are yellow highlighted."
# userPrompt = "Create a docx file containing a summary and the COMPLETE list from the pdf file, having one additional column with a 'x' marker for all items, which are yellow highlighted."
@@ -166,8 +168,8 @@ async def process_documents_and_generate_summary():
prompt=userPrompt,
documents=documents,
options=ai_options,
- outputFormat="xlsx",
- title="Document Analysis Summary"
+ outputFormat="pdf",
+ title="Formulaire"
)
logger.info(f"✅ End-to-end test completed successfully")