From a540729533a598f3d6cc309c4d7ee17dfa465dfa Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Mon, 29 Dec 2025 00:06:34 +0100
Subject: [PATCH] fixed rendering styles
---
.../services/serviceAi/subStructureFilling.py | 38 +-
.../renderers/rendererDocx.py | 119 ++++-
.../renderers/rendererHtml.py | 1 -
.../renderers/rendererPptx.py | 478 ++++++++++++------
.../test10_document_generation_formats.py | 25 +-
5 files changed, 456 insertions(+), 205 deletions(-)
diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py
index af1e51f6..fd4d8bcd 100644
--- a/modules/services/serviceAi/subStructureFilling.py
+++ b/modules/services/serviceAi/subStructureFilling.py
@@ -215,10 +215,16 @@ class StructureFiller:
useAiCall = section.get("useAiCall", False)
# WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden
- # Aber: Wenn generationHint vorhanden ist, kann AI auch ohne ContentParts generieren (z.B. Executive Summary)
+ # Aber: Wenn generationHint vorhanden ist, SOLLTE AI verwendet werden, auch wenn useAiCall=false gesetzt ist
+ # (z.B. wenn AI die Struktur generiert hat, aber useAiCall falsch gesetzt wurde)
if len(contentPartIds) == 0 and not generationHint:
useAiCall = False
logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False")
+ elif len(contentPartIds) == 0 and generationHint and not useAiCall:
+ # Override: If there's a generationHint but no content parts, we should use AI
+ # This handles cases where structure generation set useAiCall=false incorrectly
+ useAiCall = True
+ logger.info(f"Section {sectionId}: Overriding useAiCall=True (has generationHint but no content parts)")
elements = []
@@ -658,14 +664,28 @@ class StructureFiller:
logger.error(f"Error generating section {sectionId}: {str(e)}")
# NICHT raise - Section wird mit Fehlermeldung gerendert
else:
- # Füge extrahierten Text direkt hinzu (kein AI-Call)
- logger.debug(f"Processing section {sectionId}: Single extracted part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly")
- elements.append({
- "type": "extracted_text",
- "content": part.data,
- "source": part.metadata.get("documentId"),
- "extractionPrompt": part.metadata.get("extractionPrompt")
- })
+ # Füge extrahierten Content direkt hinzu (kein AI-Call)
+ # CRITICAL: Check part typeGroup to determine correct element type
+ if part.typeGroup == "image":
+ # Image content should be added as image element, not extracted_text
+ logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call - adding as image element")
+ elements.append({
+ "type": "image",
+ "content": {
+ "base64Data": part.data,
+ "altText": part.metadata.get("usageHint", part.label),
+ "caption": part.metadata.get("caption", "")
+ }
+ })
+ else:
+ # Text content - add as extracted_text element
+ logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly")
+ elements.append({
+ "type": "extracted_text",
+ "content": part.data,
+ "source": part.metadata.get("documentId"),
+ "extractionPrompt": part.metadata.get("extractionPrompt")
+ })
section["elements"] = elements
diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py
index 43c85c47..337811a4 100644
--- a/modules/services/serviceGeneration/renderers/rendererDocx.py
+++ b/modules/services/serviceGeneration/renderers/rendererDocx.py
@@ -335,14 +335,26 @@ class RendererDocx(BaseRenderer):
elif section_type == "heading":
self._renderJsonHeading(doc, element, styles)
elif section_type == "paragraph":
- self._renderJsonParagraph(doc, element, styles)
+ # CRITICAL: Check if this is actually an image element before rendering as paragraph
+ # Image elements might not have type set, but have base64Data in content
+ content = element.get("content", {})
+ if isinstance(content, dict) and content.get("base64Data"):
+ # This is actually an image, render it as such
+ self._renderJsonImage(doc, element, styles)
+ else:
+ self._renderJsonParagraph(doc, element, styles)
elif section_type == "code_block":
self._renderJsonCodeBlock(doc, element, styles)
elif section_type == "image":
self._renderJsonImage(doc, element, styles)
else:
- # Fallback to paragraph for unknown types
- self._renderJsonParagraph(doc, element, styles)
+ # Fallback to paragraph for unknown types, but check for image data first
+ content = element.get("content", {})
+ if isinstance(content, dict) and content.get("base64Data"):
+ # This is actually an image, render it as such
+ self._renderJsonImage(doc, element, styles)
+ else:
+ self._renderJsonParagraph(doc, element, styles)
except Exception as e:
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
@@ -517,13 +529,22 @@ class RendererDocx(BaseRenderer):
if not isinstance(content, dict):
return
items = content.get("items", [])
- bullet_style = styles["bullet_list"]
+ bullet_style = styles.get("bullet_list", {})
for item in items:
if isinstance(item, str):
para = doc.add_paragraph(item, style='List Bullet')
elif isinstance(item, dict) and "text" in item:
para = doc.add_paragraph(item["text"], style='List Bullet')
+
+ # Apply bullet list styling from style set
+ if bullet_style and para.runs:
+ for run in para.runs:
+ if "font_size" in bullet_style:
+ run.font.size = Pt(bullet_style["font_size"])
+ if "color" in bullet_style:
+ color_hex = bullet_style["color"].lstrip('#')
+ run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
@@ -540,7 +561,13 @@ class RendererDocx(BaseRenderer):
if text:
level = max(1, min(6, level))
- doc.add_heading(text, level=level)
+ # Use custom heading style if available, otherwise use built-in
+ style_name = f"Heading {level}" if level <= 2 else "Heading 1"
+ try:
+ para = doc.add_paragraph(text, style=style_name)
+ except KeyError:
+ # Fallback to built-in heading if custom style doesn't exist
+ doc.add_heading(text, level=level)
except Exception as e:
self.logger.warning(f"Error rendering heading: {str(e)}")
@@ -570,6 +597,25 @@ class RendererDocx(BaseRenderer):
if text:
para = doc.add_paragraph(text)
+ # Apply paragraph styling from style set
+ paragraph_style = styles.get("paragraph", {})
+ if paragraph_style:
+ for run in para.runs:
+ if "font_size" in paragraph_style:
+ run.font.size = Pt(paragraph_style["font_size"])
+ if "bold" in paragraph_style:
+ run.font.bold = paragraph_style["bold"]
+ if "color" in paragraph_style:
+ color_hex = paragraph_style["color"].lstrip('#')
+ run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
+ if "align" in paragraph_style:
+ align = paragraph_style["align"]
+ if align == "center":
+ para.alignment = WD_ALIGN_PARAGRAPH.CENTER
+ elif align == "right":
+ para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
+ else:
+ para.alignment = WD_ALIGN_PARAGRAPH.LEFT
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
@@ -583,16 +629,21 @@ class RendererDocx(BaseRenderer):
return
code = content.get("code", "")
language = content.get("language", "")
+ code_style = styles.get("code_block", {})
if code:
if language:
lang_para = doc.add_paragraph(f"Code ({language}):")
- lang_para.runs[0].bold = True
+ if lang_para.runs:
+ lang_para.runs[0].bold = True
code_para = doc.add_paragraph(code)
for run in code_para.runs:
- run.font.name = 'Courier New'
- run.font.size = Pt(10)
+ run.font.name = code_style.get("font", "Courier New")
+ run.font.size = Pt(code_style.get("font_size", 9))
+ if "color" in code_style:
+ color_hex = code_style["color"].lstrip('#')
+ run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
@@ -602,24 +653,38 @@ class RendererDocx(BaseRenderer):
try:
# Extract from nested content structure
content = image_data.get("content", {})
- if not isinstance(content, dict):
- return
- base64_data = content.get("base64Data", "")
- alt_text = content.get("altText", "Image")
+ base64_data = ""
+ alt_text = "Image"
- if base64_data:
- try:
- image_bytes = base64.b64decode(base64_data)
- doc.add_picture(io.BytesIO(image_bytes), width=Inches(4))
-
- if alt_text:
- caption_para = doc.add_paragraph(f"Figure: {alt_text}")
- caption_para.runs[0].italic = True
- except Exception as embedError:
- # Image decoding or embedding failed
- raise Exception(f"Failed to decode or embed image: {str(embedError)}")
- else:
+ if isinstance(content, dict):
+ base64_data = content.get("base64Data", "")
+ alt_text = content.get("altText", "Image")
+ elif isinstance(content, str):
+ # Content might be base64 string directly (shouldn't happen, but handle it)
+ self.logger.warning("Image content is a string, not a dict. This should not happen.")
+ return
+
+ # If base64Data not found in content, try direct element fields (fallback)
+ if not base64_data:
+ base64_data = image_data.get("base64Data", "")
+ if not alt_text or alt_text == "Image":
+ alt_text = image_data.get("altText", "Image")
+
+ # CRITICAL: Ensure we don't render base64 data as text
+ # If base64_data looks like it might be rendered elsewhere, skip it
+ if not base64_data:
raise Exception("No image data provided (base64Data is empty)")
+
+ try:
+ image_bytes = base64.b64decode(base64_data)
+ doc.add_picture(io.BytesIO(image_bytes), width=Inches(4))
+
+ if alt_text and alt_text != "Image":
+ caption_para = doc.add_paragraph(f"Figure: {alt_text}")
+ caption_para.runs[0].italic = True
+ except Exception as embedError:
+ # Image decoding or embedding failed
+ raise Exception(f"Failed to decode or embed image: {str(embedError)}")
except Exception as e:
self.logger.error(f"Error embedding image in DOCX: {str(e)}")
@@ -792,7 +857,11 @@ class RendererDocx(BaseRenderer):
if "heading2" in styleSet:
self._createStyle(doc, "Heading 2", styleSet["heading2"], WD_STYLE_TYPE.PARAGRAPH)
- # Note: List Bullet and List Number are built-in Word styles, no need to create
+ # Create Paragraph style
+ if "paragraph" in styleSet:
+ self._createStyle(doc, "Custom Paragraph", styleSet["paragraph"], WD_STYLE_TYPE.PARAGRAPH)
+
+ # Note: List Bullet and List Number are built-in Word styles, but we apply custom styling to runs
except Exception as e:
self.logger.warning(f"Could not set up document styles: {str(e)}")
diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py
index 4d7dafe0..dda2c09f 100644
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@@ -712,7 +712,6 @@ class RendererHtml(BaseRenderer):
# Bestimme MIME-Type und Extension
mimeType = element.get("mimeType", "") or (content.get("mimeType", "") if isinstance(content, dict) else "")
- if not mimeType or mimeType == "unknown":
if not mimeType or mimeType == "unknown":
# Versuche MIME-Type aus base64 zu erkennen
if base64Data.startswith("/9j/"):
diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py
index 850a59a4..f824aa62 100644
--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
@@ -106,11 +106,85 @@ class RendererPptx(BaseRenderer):
if hasImages:
self._addImagesToSlide(slide, slide_data.get("images", []), styles)
- # Set content with AI-generated styling (if not image-only slide)
- if slide_content or not hasImages:
+ # Render sections with proper PowerPoint objects (tables, lists, etc.)
+ slide_sections = slide_data.get("sections", [])
+ if slide_sections:
+ # Use content placeholder for structured content
content_shape = slide.placeholders[1]
+ text_frame = content_shape.text_frame
+ text_frame.clear()
- # Format content text with AI styles
+ # Track vertical position for multiple content types
+ current_y = Inches(1.5) # Start below title
+
+ for section in slide_sections:
+ section_type = section.get("content_type", "paragraph")
+ elements = section.get("elements", [])
+
+ # Handle sections without elements (e.g., headings that create slides)
+ if not elements:
+ continue
+
+ for element in elements:
+ if not isinstance(element, dict):
+ continue
+
+ # Check element type first, fall back to section type
+ element_type = element.get("type", "")
+ if not element_type:
+ element_type = section_type
+
+ if element_type == "table":
+ # Render as actual PowerPoint table
+ self._addTableToSlide(slide, element, styles, current_y)
+ current_y += Inches(2) # Space for table
+ elif element_type == "bullet_list" or element_type == "list":
+ # Render as actual PowerPoint bullet list
+ self._addBulletListToSlide(slide, element, styles, text_frame)
+ elif element_type == "heading":
+ # Render as heading in text frame
+ self._addHeadingToSlide(slide, element, styles, text_frame)
+ elif element_type == "paragraph":
+ # Render as paragraph in text frame
+ self._addParagraphToSlide(slide, element, styles, text_frame)
+ elif element_type == "code_block" or element_type == "code":
+ # Render as formatted code block
+ self._addCodeBlockToSlide(slide, element, styles, text_frame)
+ elif element_type == "extracted_text":
+ # Render extracted text as paragraph with styling
+ content = element.get("content", "")
+ source = element.get("source", "")
+ if content:
+ paragraph_style = styles.get("paragraph", {})
+ p = text_frame.add_paragraph()
+ p.text = content
+ p.font.size = Pt(paragraph_style.get("font_size", 18))
+ p.font.bold = paragraph_style.get("bold", False)
+ p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
+ if source:
+ p.add_run(f" (Source: {source})").font.italic = True
+ elif element_type == "reference":
+ # Render reference
+ label = element.get("label", "Reference")
+ p = text_frame.add_paragraph()
+ p.text = f"[Reference: {label}]"
+ p.font.italic = True
+ else:
+ # Fallback: try to render as paragraph
+ content = element.get("content", "")
+ if isinstance(content, dict):
+ text = content.get("text", "")
+ elif isinstance(content, str):
+ text = content
+ else:
+ text = ""
+
+ if text:
+ self._addParagraphToSlide(slide, element, styles, text_frame)
+
+ # Fallback: if no sections but has content text, render as before
+ elif slide_content and not hasImages:
+ content_shape = slide.placeholders[1]
text_frame = content_shape.text_frame
text_frame.clear()
@@ -126,38 +200,12 @@ class RendererPptx(BaseRenderer):
p.text = paragraph.strip()
- # Apply AI-generated styling based on content type
- if paragraph.startswith('#'):
- # Header
- p.text = paragraph.lstrip('#').strip()
- heading_style = styles.get("heading", {})
- p.font.size = Pt(heading_style.get("font_size", 32))
- p.font.bold = heading_style.get("bold", True)
- heading_color = self._getSafeColor(heading_style.get("color", (47, 47, 47)))
- p.font.color.rgb = RGBColor(*heading_color)
- elif paragraph.startswith('##'):
- # Subheader
- p.text = paragraph.lstrip('#').strip()
- subheading_style = styles.get("subheading", {})
- p.font.size = Pt(subheading_style.get("font_size", 24))
- p.font.bold = subheading_style.get("bold", True)
- subheading_color = self._getSafeColor(subheading_style.get("color", (79, 79, 79)))
- p.font.color.rgb = RGBColor(*subheading_color)
- elif paragraph.startswith('*') and paragraph.endswith('*'):
- # Bold text
- p.text = paragraph.strip('*')
- paragraph_style = styles.get("paragraph", {})
- p.font.size = Pt(paragraph_style.get("font_size", 18))
- p.font.bold = True
- paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))
- p.font.color.rgb = RGBColor(*paragraph_color)
- else:
- # Regular text
- paragraph_style = styles.get("paragraph", {})
- p.font.size = Pt(paragraph_style.get("font_size", 18))
- p.font.bold = paragraph_style.get("bold", False)
- paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))
- p.font.color.rgb = RGBColor(*paragraph_color)
+ # Apply AI-generated styling
+ paragraph_style = styles.get("paragraph", {})
+ p.font.size = Pt(paragraph_style.get("font_size", 18))
+ p.font.bold = paragraph_style.get("bold", False)
+ paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))
+ p.font.color.rgb = RGBColor(*paragraph_color)
# Apply alignment
align = paragraph_style.get("align", "left")
@@ -396,8 +444,7 @@ class RendererPptx(BaseRenderer):
if userPrompt and aiService:
self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
- # Convert colors to PPTX format after getting styles
- enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet)
+ # Colors already converted in _getAiStylesWithPptxColors
return self._validateStylesReadability(enhancedStyleSet)
else:
# Use default styles only
@@ -481,104 +528,19 @@ Return ONLY this JSON with your changes:
JSON ONLY. NO OTHER TEXT."""
async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
- """Get AI styles with proper PowerPoint color conversion."""
+ """Get AI styles with proper PowerPoint color conversion. Uses base _getAiStyles for debug file writing."""
if not aiService:
return default_styles
try:
- from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
+ # Use base template method which handles debug file writing
+ enhanced_styles = await self._getAiStyles(aiService, style_template, default_styles)
- request_options = AiCallOptions()
- request_options.operationType = OperationTypeEnum.DATA_GENERATE
-
- request = AiCallRequest(prompt=style_template, context="", options=request_options)
-
- # Check if AI service is properly configured
- if not hasattr(aiService, 'aiObjects') or not aiService.aiObjects:
- self.logger.warning("AI service not properly configured, using defaults")
- return default_styles
-
- response = await aiService.callAi(request)
-
- # Check if response is valid
- if not response:
- self.logger.warning("AI service returned no response, using defaults")
- return default_styles
-
- # json and re are already imported at module level
-
- # Clean and parse JSON
- result = response.content.strip() if response and response.content else ""
-
- # Check if result is empty
- if not result:
- self.logger.warning("AI styling returned empty response, using defaults")
- return default_styles
-
- # Log the raw response for debugging
- self.logger.debug(f"AI styling raw response: {result[:200]}...")
-
- # Extract JSON from various formats
- json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
- if json_match:
- result = json_match.group(1).strip()
- elif result.startswith('```json'):
- result = re.sub(r'^```json\s*', '', result)
- result = re.sub(r'\s*```$', '', result)
- elif result.startswith('```'):
- result = re.sub(r'^```\s*', '', result)
- result = re.sub(r'\s*```$', '', result)
-
- # Try to extract JSON from explanatory text
- json_patterns = [
- r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object
- r'\{.*?"title".*?\}', # JSON with title field
- r'\{.*?"font_size".*?\}', # JSON with font_size field
- ]
-
- for pattern in json_patterns:
- json_match = re.search(pattern, result, re.DOTALL)
- if json_match:
- result = json_match.group(0)
- break
-
- # Additional cleanup - remove any leading/trailing whitespace and newlines
- result = result.strip()
-
- # Check if result is still empty after cleanup
- if not result:
- self.logger.warning("AI styling returned empty content after cleanup, using defaults")
- return default_styles
-
- # Try to parse JSON
- try:
- styles = json.loads(result)
- self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
- except json.JSONDecodeError as json_error:
- self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
- self.logger.warning(f"Raw content that failed to parse: {result[:100]}...")
- # Try to extract just the JSON part if it's embedded in text
- json_start = result.find('{')
- json_end = result.rfind('}')
- if json_start != -1 and json_end != -1 and json_end > json_start:
- json_part = result[json_start:json_end+1]
- try:
- styles = json.loads(json_part)
- self.logger.info("Successfully extracted JSON from explanatory text")
- self.logger.debug(f"Extracted AI styles: {list(styles.keys())}")
- except json.JSONDecodeError:
- self.logger.warning("Could not extract valid JSON from response, using defaults")
- return default_styles
- else:
- return default_styles
-
- # Convert colors to PowerPoint RGB format
- styles = self._convertColorsFormat(styles)
-
- return styles
+ # Convert colors to PPTX format (RGB tuples)
+ return self._convertColorsFormat(enhanced_styles)
except Exception as e:
- self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
+ self.logger.warning(f"AI style enhancement failed: {str(e)}, using defaults")
return default_styles
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
@@ -962,13 +924,10 @@ JSON ONLY. NO OTHER TEXT."""
return 1 # Default to title and content layout
def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
- """Create slides from sections based on content density and user intent."""
+ """Create slides from sections: each heading creates a new slide, content accumulates until next heading."""
try:
slides = []
- content_per_slide = styles.get("content_per_slide", "concise")
-
- # Group sections by type and create slides
- current_slide_content = []
+ current_slide_sections = [] # Store sections (not formatted text) for proper rendering
current_slide_title = "Content Overview"
for section in sections:
@@ -981,13 +940,13 @@ JSON ONLY. NO OTHER TEXT."""
if section_type == "heading":
# If we have accumulated content, create a slide
- if current_slide_content:
+ if current_slide_sections:
slides.append({
"title": current_slide_title,
- "content": "\n\n".join(current_slide_content),
+ "sections": current_slide_sections.copy(), # Store sections for proper rendering
"images": []
})
- current_slide_content = []
+ current_slide_sections = []
# Start new slide with heading as title
heading_found = False
@@ -1012,13 +971,13 @@ JSON ONLY. NO OTHER TEXT."""
current_slide_title = section.get("id", "Untitled Section")
elif section_type == "image":
# Create separate slide for image
- if current_slide_content:
+ if current_slide_sections:
slides.append({
"title": current_slide_title,
- "content": "\n\n".join(current_slide_content),
+ "sections": current_slide_sections.copy(),
"images": []
})
- current_slide_content = []
+ current_slide_sections = []
# Extract image data
imageData = []
@@ -1045,20 +1004,18 @@ JSON ONLY. NO OTHER TEXT."""
slides.append({
"title": section.get("title") or (imageData[0].get("altText", "Image") if imageData else "Image"),
- "content": "",
+ "sections": [],
"images": imageData
})
else:
- # Add content to current slide
- formatted_content = self._formatSectionContent(section)
- if formatted_content:
- current_slide_content.append(formatted_content)
+ # Add section to current slide (will be rendered properly)
+ current_slide_sections.append(section)
# Add final slide if there's content
- if current_slide_content:
+ if current_slide_sections:
slides.append({
"title": current_slide_title,
- "content": "\n\n".join(current_slide_content),
+ "sections": current_slide_sections.copy(),
"images": []
})
@@ -1204,24 +1161,217 @@ JSON ONLY. NO OTHER TEXT."""
except Exception as e:
logger.error(f"Error embedding images in PPTX slide: {str(e)}")
- # Add error message text box to slide
- try:
- from pptx.util import Inches, Pt
- from pptx.enum.text import PP_ALIGN
- errorMsg = f"[Error: Could not embed image(s). {str(e)}]"
- errorBox = slide.shapes.add_textbox(
- Inches(0.5),
- Inches(2),
- slideWidth - Inches(1),
- Inches(0.5)
- )
- errorFrame = errorBox.text_frame
- errorFrame.text = errorMsg
- errorFrame.paragraphs[0].font.size = Pt(12)
- errorFrame.paragraphs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color
- errorFrame.paragraphs[0].alignment = PP_ALIGN.LEFT
- except Exception as errorBoxError:
- logger.error(f"Could not add error message to slide: {str(errorBoxError)}")
+
+ def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float) -> None:
+ """Add a PowerPoint table to slide."""
+ try:
+ from pptx.util import Inches, Pt
+ from pptx.enum.text import PP_ALIGN
+ from pptx.dml.color import RGBColor
+
+ # Extract from nested content structure
+ content = element.get("content", {})
+ if not isinstance(content, dict):
+ return
+
+ headers = content.get("headers", [])
+ rows = content.get("rows", [])
+
+ if not headers:
+ return
+
+ # Calculate table dimensions
+ num_cols = len(headers)
+ num_rows = len(rows) + 1 # +1 for header row
+ left = Inches(0.5)
+ width = slide.presentation.slide_width - Inches(1)
+ row_height = Inches(0.4)
+
+ # Create table
+ table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, row_height * num_rows)
+ table = table_shape.table
+
+ # Set column widths
+ col_width = width / num_cols
+ for col_idx in range(num_cols):
+ table.columns[col_idx].width = col_width
+
+ # Add headers with styling
+ header_style = styles.get("table_header", {})
+ header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121)))
+ header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255)))
+ header_font_size = header_style.get("font_size", 18)
+
+ for col_idx, header in enumerate(headers):
+ cell = table.cell(0, col_idx)
+ cell.text = str(header)
+ cell.fill.solid()
+ cell.fill.fore_color.rgb = RGBColor(*header_bg_color)
+ cell.text_frame.paragraphs[0].font.bold = header_style.get("bold", True)
+ cell.text_frame.paragraphs[0].font.size = Pt(header_font_size)
+ cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*header_text_color)
+
+ align = header_style.get("align", "center")
+ if align == "left":
+ cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
+ elif align == "right":
+ cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT
+ else:
+ cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
+
+ # Add data rows with styling
+ cell_style = styles.get("table_cell", {})
+ cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255)))
+ cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47)))
+ cell_font_size = cell_style.get("font_size", 16)
+
+ for row_idx, row_data in enumerate(rows, 1):
+ for col_idx, cell_data in enumerate(row_data[:num_cols]):
+ cell = table.cell(row_idx, col_idx)
+ cell.text = str(cell_data)
+ cell.fill.solid()
+ cell.fill.fore_color.rgb = RGBColor(*cell_bg_color)
+ cell.text_frame.paragraphs[0].font.size = Pt(cell_font_size)
+ cell.text_frame.paragraphs[0].font.bold = cell_style.get("bold", False)
+ cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*cell_text_color)
+
+ align = cell_style.get("align", "left")
+ if align == "center":
+ cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
+ elif align == "right":
+ cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT
+ else:
+ cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
+
+ except Exception as e:
+ logger.warning(f"Error adding table to slide: {str(e)}")
+
+ def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None:
+ """Add bullet list to slide text frame."""
+ try:
+ from pptx.util import Pt
+ from pptx.dml.color import RGBColor
+
+ # Extract from nested content structure
+ content = element.get("content", {})
+ if not isinstance(content, dict):
+ return
+
+ items = content.get("items", [])
+ if not items:
+ return
+
+ list_style = styles.get("bullet_list", {})
+ for item in items:
+ p = text_frame.add_paragraph()
+ if isinstance(item, dict):
+ p.text = item.get("text", "")
+ else:
+ p.text = str(item)
+
+ p.level = 0
+ p.font.size = Pt(list_style.get("font_size", 18))
+ p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47))))
+
+ except Exception as e:
+ logger.warning(f"Error adding bullet list to slide: {str(e)}")
+
+ def _addHeadingToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None:
+ """Add heading to slide text frame."""
+ try:
+ from pptx.util import Pt
+ from pptx.dml.color import RGBColor
+
+ # Extract from nested content structure
+ content = element.get("content", {})
+ if not isinstance(content, dict):
+ return
+
+ text = content.get("text", "")
+ level = content.get("level", 1)
+
+ if text:
+ p = text_frame.add_paragraph()
+ p.text = text
+ p.level = min(level - 1, 2) # PowerPoint supports 0-2 levels
+
+ heading_style = styles.get("heading", {})
+ p.font.size = Pt(heading_style.get("font_size", 32))
+ p.font.bold = heading_style.get("bold", True)
+ p.font.color.rgb = RGBColor(*self._getSafeColor(heading_style.get("color", (47, 47, 47))))
+
+ except Exception as e:
+ logger.warning(f"Error adding heading to slide: {str(e)}")
+
+ def _addParagraphToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None:
+ """Add paragraph to slide text frame."""
+ try:
+ from pptx.util import Pt
+ from pptx.dml.color import RGBColor
+ from pptx.enum.text import PP_ALIGN
+
+ # Extract from nested content structure
+ content = element.get("content", {})
+ if isinstance(content, dict):
+ text = content.get("text", "")
+ elif isinstance(content, str):
+ text = content
+ else:
+ text = ""
+
+ if text:
+ p = text_frame.add_paragraph()
+ p.text = text
+
+ paragraph_style = styles.get("paragraph", {})
+ p.font.size = Pt(paragraph_style.get("font_size", 18))
+ p.font.bold = paragraph_style.get("bold", False)
+ p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
+
+ align = paragraph_style.get("align", "left")
+ if align == "center":
+ p.alignment = PP_ALIGN.CENTER
+ elif align == "right":
+ p.alignment = PP_ALIGN.RIGHT
+ else:
+ p.alignment = PP_ALIGN.LEFT
+
+ except Exception as e:
+ logger.warning(f"Error adding paragraph to slide: {str(e)}")
+
+ def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None:
+ """Add code block to slide text frame."""
+ try:
+ from pptx.util import Pt
+ from pptx.dml.color import RGBColor
+
+ # Extract from nested content structure
+ content = element.get("content", {})
+ if not isinstance(content, dict):
+ return
+
+ code = content.get("code", "")
+ language = content.get("language", "")
+
+ if code:
+ code_style = styles.get("code_block", {})
+ code_font = code_style.get("font", "Courier New")
+ code_font_size = code_style.get("font_size", 9)
+ code_color = self._getSafeColor(code_style.get("color", (47, 47, 47)))
+
+ p = text_frame.add_paragraph()
+ if language:
+ p.text = f"Code ({language}):"
+ p.font.bold = True
+ p = text_frame.add_paragraph()
+
+ p.text = code
+ p.font.name = code_font
+ p.font.size = Pt(code_font_size)
+ p.font.color.rgb = RGBColor(*code_color)
+
+ except Exception as e:
+ logger.warning(f"Error adding code block to slide: {str(e)}")
def _formatTimestamp(self) -> str:
"""Format current timestamp for presentation generation."""
diff --git a/tests/functional/test10_document_generation_formats.py b/tests/functional/test10_document_generation_formats.py
index 941034ba..05532313 100644
--- a/tests/functional/test10_document_generation_formats.py
+++ b/tests/functional/test10_document_generation_formats.py
@@ -153,6 +153,17 @@ class DocumentGenerationFormatsTester10:
" - A table summarizing transaction details\n"
"5) A conclusion paragraph with recommendations\n\n"
"Format as a professional PDF document suitable for printing."
+ ),
+ "html": (
+ "Create a professional HTML document about 'Fuel Station Receipt Analysis' with:\n"
+ "1) A main title\n"
+ "2) An introduction paragraph explaining the receipt analysis\n"
+ "3) Extract and include the image from the attached PDF document (B2025-02c.pdf)\n"
+ "4) A section analyzing the receipt data with:\n"
+ " - Bullet points of key findings\n"
+ " - A table summarizing transaction details\n"
+ "5) A conclusion paragraph with recommendations\n\n"
+ "Format as a professional HTML document with proper styling, responsive design, and embedded CSS."
)
}
@@ -350,7 +361,8 @@ class DocumentGenerationFormatsTester10:
"pdf": ["application/pdf"],
"docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
"xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"],
- "pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"]
+ "pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"],
+ "html": ["text/html", "application/xhtml+xml"]
}
# Expected file extensions
@@ -358,7 +370,8 @@ class DocumentGenerationFormatsTester10:
"pdf": [".pdf"],
"docx": [".docx"],
"xlsx": [".xlsx"],
- "pptx": [".pptx"]
+ "pptx": [".pptx"],
+ "html": [".html", ".htm"]
}
formatLower = expectedFormat.lower()
@@ -398,12 +411,12 @@ class DocumentGenerationFormatsTester10:
return verification
async def testAllFormats(self) -> Dict[str, Any]:
- """Test document generation in DOCX, XLSX, PPTX, and PDF formats."""
+ """Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
print("\n" + "="*80)
- print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, AND PDF FORMATS")
+ print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, PDF, AND HTML FORMATS")
print("="*80)
- formats = ["docx", "xlsx", "pptx", "pdf"]
+ formats = ["docx", "xlsx", "pptx", "pdf", "html"]
results = {}
for format in formats:
@@ -456,7 +469,7 @@ class DocumentGenerationFormatsTester10:
async def runTest(self):
"""Run the complete test."""
print("\n" + "="*80)
- print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF")
+ print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF, HTML")
print("="*80)
try: