fixed rendering styles

This commit is contained in:
ValueOn AG 2025-12-29 00:06:34 +01:00
parent 3e7c75335a
commit a540729533
5 changed files with 456 additions and 205 deletions

View file

@ -215,10 +215,16 @@ class StructureFiller:
useAiCall = section.get("useAiCall", False)
# WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden
# Aber: Wenn generationHint vorhanden ist, kann AI auch ohne ContentParts generieren (z.B. Executive Summary)
# Aber: Wenn generationHint vorhanden ist, SOLLTE AI verwendet werden, auch wenn useAiCall=false gesetzt ist
# (z.B. wenn AI die Struktur generiert hat, aber useAiCall falsch gesetzt wurde)
if len(contentPartIds) == 0 and not generationHint:
useAiCall = False
logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False")
elif len(contentPartIds) == 0 and generationHint and not useAiCall:
# Override: If there's a generationHint but no content parts, we should use AI
# This handles cases where structure generation set useAiCall=false incorrectly
useAiCall = True
logger.info(f"Section {sectionId}: Overriding useAiCall=True (has generationHint but no content parts)")
elements = []
@ -658,14 +664,28 @@ class StructureFiller:
logger.error(f"Error generating section {sectionId}: {str(e)}")
# NICHT raise - Section wird mit Fehlermeldung gerendert
else:
# Füge extrahierten Text direkt hinzu (kein AI-Call)
logger.debug(f"Processing section {sectionId}: Single extracted part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly")
elements.append({
"type": "extracted_text",
"content": part.data,
"source": part.metadata.get("documentId"),
"extractionPrompt": part.metadata.get("extractionPrompt")
})
# Füge extrahierten Content direkt hinzu (kein AI-Call)
# CRITICAL: Check part typeGroup to determine correct element type
if part.typeGroup == "image":
# Image content should be added as image element, not extracted_text
logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call - adding as image element")
elements.append({
"type": "image",
"content": {
"base64Data": part.data,
"altText": part.metadata.get("usageHint", part.label),
"caption": part.metadata.get("caption", "")
}
})
else:
# Text content - add as extracted_text element
logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly")
elements.append({
"type": "extracted_text",
"content": part.data,
"source": part.metadata.get("documentId"),
"extractionPrompt": part.metadata.get("extractionPrompt")
})
section["elements"] = elements

View file

@ -335,14 +335,26 @@ class RendererDocx(BaseRenderer):
elif section_type == "heading":
self._renderJsonHeading(doc, element, styles)
elif section_type == "paragraph":
self._renderJsonParagraph(doc, element, styles)
# CRITICAL: Check if this is actually an image element before rendering as paragraph
# Image elements might not have type set, but have base64Data in content
content = element.get("content", {})
if isinstance(content, dict) and content.get("base64Data"):
# This is actually an image, render it as such
self._renderJsonImage(doc, element, styles)
else:
self._renderJsonParagraph(doc, element, styles)
elif section_type == "code_block":
self._renderJsonCodeBlock(doc, element, styles)
elif section_type == "image":
self._renderJsonImage(doc, element, styles)
else:
# Fallback to paragraph for unknown types
self._renderJsonParagraph(doc, element, styles)
# Fallback to paragraph for unknown types, but check for image data first
content = element.get("content", {})
if isinstance(content, dict) and content.get("base64Data"):
# This is actually an image, render it as such
self._renderJsonImage(doc, element, styles)
else:
self._renderJsonParagraph(doc, element, styles)
except Exception as e:
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
@ -517,13 +529,22 @@ class RendererDocx(BaseRenderer):
if not isinstance(content, dict):
return
items = content.get("items", [])
bullet_style = styles["bullet_list"]
bullet_style = styles.get("bullet_list", {})
for item in items:
if isinstance(item, str):
para = doc.add_paragraph(item, style='List Bullet')
elif isinstance(item, dict) and "text" in item:
para = doc.add_paragraph(item["text"], style='List Bullet')
# Apply bullet list styling from style set
if bullet_style and para.runs:
for run in para.runs:
if "font_size" in bullet_style:
run.font.size = Pt(bullet_style["font_size"])
if "color" in bullet_style:
color_hex = bullet_style["color"].lstrip('#')
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
@ -540,7 +561,13 @@ class RendererDocx(BaseRenderer):
if text:
level = max(1, min(6, level))
doc.add_heading(text, level=level)
# Use custom heading style if available, otherwise use built-in
style_name = f"Heading {level}" if level <= 2 else "Heading 1"
try:
para = doc.add_paragraph(text, style=style_name)
except KeyError:
# Fallback to built-in heading if custom style doesn't exist
doc.add_heading(text, level=level)
except Exception as e:
self.logger.warning(f"Error rendering heading: {str(e)}")
@ -570,6 +597,25 @@ class RendererDocx(BaseRenderer):
if text:
para = doc.add_paragraph(text)
# Apply paragraph styling from style set
paragraph_style = styles.get("paragraph", {})
if paragraph_style:
for run in para.runs:
if "font_size" in paragraph_style:
run.font.size = Pt(paragraph_style["font_size"])
if "bold" in paragraph_style:
run.font.bold = paragraph_style["bold"]
if "color" in paragraph_style:
color_hex = paragraph_style["color"].lstrip('#')
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
if "align" in paragraph_style:
align = paragraph_style["align"]
if align == "center":
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
elif align == "right":
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
else:
para.alignment = WD_ALIGN_PARAGRAPH.LEFT
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
@ -583,16 +629,21 @@ class RendererDocx(BaseRenderer):
return
code = content.get("code", "")
language = content.get("language", "")
code_style = styles.get("code_block", {})
if code:
if language:
lang_para = doc.add_paragraph(f"Code ({language}):")
lang_para.runs[0].bold = True
if lang_para.runs:
lang_para.runs[0].bold = True
code_para = doc.add_paragraph(code)
for run in code_para.runs:
run.font.name = 'Courier New'
run.font.size = Pt(10)
run.font.name = code_style.get("font", "Courier New")
run.font.size = Pt(code_style.get("font_size", 9))
if "color" in code_style:
color_hex = code_style["color"].lstrip('#')
run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
@ -602,24 +653,38 @@ class RendererDocx(BaseRenderer):
try:
# Extract from nested content structure
content = image_data.get("content", {})
if not isinstance(content, dict):
return
base64_data = content.get("base64Data", "")
alt_text = content.get("altText", "Image")
base64_data = ""
alt_text = "Image"
if base64_data:
try:
image_bytes = base64.b64decode(base64_data)
doc.add_picture(io.BytesIO(image_bytes), width=Inches(4))
if alt_text:
caption_para = doc.add_paragraph(f"Figure: {alt_text}")
caption_para.runs[0].italic = True
except Exception as embedError:
# Image decoding or embedding failed
raise Exception(f"Failed to decode or embed image: {str(embedError)}")
else:
if isinstance(content, dict):
base64_data = content.get("base64Data", "")
alt_text = content.get("altText", "Image")
elif isinstance(content, str):
# Content might be base64 string directly (shouldn't happen, but handle it)
self.logger.warning("Image content is a string, not a dict. This should not happen.")
return
# If base64Data not found in content, try direct element fields (fallback)
if not base64_data:
base64_data = image_data.get("base64Data", "")
if not alt_text or alt_text == "Image":
alt_text = image_data.get("altText", "Image")
# CRITICAL: Ensure we don't render base64 data as text
# If base64_data looks like it might be rendered elsewhere, skip it
if not base64_data:
raise Exception("No image data provided (base64Data is empty)")
try:
image_bytes = base64.b64decode(base64_data)
doc.add_picture(io.BytesIO(image_bytes), width=Inches(4))
if alt_text and alt_text != "Image":
caption_para = doc.add_paragraph(f"Figure: {alt_text}")
caption_para.runs[0].italic = True
except Exception as embedError:
# Image decoding or embedding failed
raise Exception(f"Failed to decode or embed image: {str(embedError)}")
except Exception as e:
self.logger.error(f"Error embedding image in DOCX: {str(e)}")
@ -792,7 +857,11 @@ class RendererDocx(BaseRenderer):
if "heading2" in styleSet:
self._createStyle(doc, "Heading 2", styleSet["heading2"], WD_STYLE_TYPE.PARAGRAPH)
# Note: List Bullet and List Number are built-in Word styles, no need to create
# Create Paragraph style
if "paragraph" in styleSet:
self._createStyle(doc, "Custom Paragraph", styleSet["paragraph"], WD_STYLE_TYPE.PARAGRAPH)
# Note: List Bullet and List Number are built-in Word styles, but we apply custom styling to runs
except Exception as e:
self.logger.warning(f"Could not set up document styles: {str(e)}")

View file

@ -712,7 +712,6 @@ class RendererHtml(BaseRenderer):
# Bestimme MIME-Type und Extension
mimeType = element.get("mimeType", "") or (content.get("mimeType", "") if isinstance(content, dict) else "")
if not mimeType or mimeType == "unknown":
if not mimeType or mimeType == "unknown":
# Versuche MIME-Type aus base64 zu erkennen
if base64Data.startswith("/9j/"):

View file

@ -106,11 +106,85 @@ class RendererPptx(BaseRenderer):
if hasImages:
self._addImagesToSlide(slide, slide_data.get("images", []), styles)
# Set content with AI-generated styling (if not image-only slide)
if slide_content or not hasImages:
# Render sections with proper PowerPoint objects (tables, lists, etc.)
slide_sections = slide_data.get("sections", [])
if slide_sections:
# Use content placeholder for structured content
content_shape = slide.placeholders[1]
text_frame = content_shape.text_frame
text_frame.clear()
# Format content text with AI styles
# Track vertical position for multiple content types
current_y = Inches(1.5) # Start below title
for section in slide_sections:
section_type = section.get("content_type", "paragraph")
elements = section.get("elements", [])
# Handle sections without elements (e.g., headings that create slides)
if not elements:
continue
for element in elements:
if not isinstance(element, dict):
continue
# Check element type first, fall back to section type
element_type = element.get("type", "")
if not element_type:
element_type = section_type
if element_type == "table":
# Render as actual PowerPoint table
self._addTableToSlide(slide, element, styles, current_y)
current_y += Inches(2) # Space for table
elif element_type == "bullet_list" or element_type == "list":
# Render as actual PowerPoint bullet list
self._addBulletListToSlide(slide, element, styles, text_frame)
elif element_type == "heading":
# Render as heading in text frame
self._addHeadingToSlide(slide, element, styles, text_frame)
elif element_type == "paragraph":
# Render as paragraph in text frame
self._addParagraphToSlide(slide, element, styles, text_frame)
elif element_type == "code_block" or element_type == "code":
# Render as formatted code block
self._addCodeBlockToSlide(slide, element, styles, text_frame)
elif element_type == "extracted_text":
# Render extracted text as paragraph with styling
content = element.get("content", "")
source = element.get("source", "")
if content:
paragraph_style = styles.get("paragraph", {})
p = text_frame.add_paragraph()
p.text = content
p.font.size = Pt(paragraph_style.get("font_size", 18))
p.font.bold = paragraph_style.get("bold", False)
p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
if source:
p.add_run(f" (Source: {source})").font.italic = True
elif element_type == "reference":
# Render reference
label = element.get("label", "Reference")
p = text_frame.add_paragraph()
p.text = f"[Reference: {label}]"
p.font.italic = True
else:
# Fallback: try to render as paragraph
content = element.get("content", "")
if isinstance(content, dict):
text = content.get("text", "")
elif isinstance(content, str):
text = content
else:
text = ""
if text:
self._addParagraphToSlide(slide, element, styles, text_frame)
# Fallback: if no sections but has content text, render as before
elif slide_content and not hasImages:
content_shape = slide.placeholders[1]
text_frame = content_shape.text_frame
text_frame.clear()
@ -126,38 +200,12 @@ class RendererPptx(BaseRenderer):
p.text = paragraph.strip()
# Apply AI-generated styling based on content type
if paragraph.startswith('#'):
# Header
p.text = paragraph.lstrip('#').strip()
heading_style = styles.get("heading", {})
p.font.size = Pt(heading_style.get("font_size", 32))
p.font.bold = heading_style.get("bold", True)
heading_color = self._getSafeColor(heading_style.get("color", (47, 47, 47)))
p.font.color.rgb = RGBColor(*heading_color)
elif paragraph.startswith('##'):
# Subheader
p.text = paragraph.lstrip('#').strip()
subheading_style = styles.get("subheading", {})
p.font.size = Pt(subheading_style.get("font_size", 24))
p.font.bold = subheading_style.get("bold", True)
subheading_color = self._getSafeColor(subheading_style.get("color", (79, 79, 79)))
p.font.color.rgb = RGBColor(*subheading_color)
elif paragraph.startswith('*') and paragraph.endswith('*'):
# Bold text
p.text = paragraph.strip('*')
paragraph_style = styles.get("paragraph", {})
p.font.size = Pt(paragraph_style.get("font_size", 18))
p.font.bold = True
paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))
p.font.color.rgb = RGBColor(*paragraph_color)
else:
# Regular text
paragraph_style = styles.get("paragraph", {})
p.font.size = Pt(paragraph_style.get("font_size", 18))
p.font.bold = paragraph_style.get("bold", False)
paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))
p.font.color.rgb = RGBColor(*paragraph_color)
# Apply AI-generated styling
paragraph_style = styles.get("paragraph", {})
p.font.size = Pt(paragraph_style.get("font_size", 18))
p.font.bold = paragraph_style.get("bold", False)
paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))
p.font.color.rgb = RGBColor(*paragraph_color)
# Apply alignment
align = paragraph_style.get("align", "left")
@ -396,8 +444,7 @@ class RendererPptx(BaseRenderer):
if userPrompt and aiService:
self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
# Convert colors to PPTX format after getting styles
enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet)
# Colors already converted in _getAiStylesWithPptxColors
return self._validateStylesReadability(enhancedStyleSet)
else:
# Use default styles only
@ -481,104 +528,19 @@ Return ONLY this JSON with your changes:
JSON ONLY. NO OTHER TEXT."""
async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper PowerPoint color conversion."""
"""Get AI styles with proper PowerPoint color conversion. Uses base _getAiStyles for debug file writing."""
if not aiService:
return default_styles
try:
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
# Use base template method which handles debug file writing
enhanced_styles = await self._getAiStyles(aiService, style_template, default_styles)
request_options = AiCallOptions()
request_options.operationType = OperationTypeEnum.DATA_GENERATE
request = AiCallRequest(prompt=style_template, context="", options=request_options)
# Check if AI service is properly configured
if not hasattr(aiService, 'aiObjects') or not aiService.aiObjects:
self.logger.warning("AI service not properly configured, using defaults")
return default_styles
response = await aiService.callAi(request)
# Check if response is valid
if not response:
self.logger.warning("AI service returned no response, using defaults")
return default_styles
# json and re are already imported at module level
# Clean and parse JSON
result = response.content.strip() if response and response.content else ""
# Check if result is empty
if not result:
self.logger.warning("AI styling returned empty response, using defaults")
return default_styles
# Log the raw response for debugging
self.logger.debug(f"AI styling raw response: {result[:200]}...")
# Extract JSON from various formats
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if json_match:
result = json_match.group(1).strip()
elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
# Try to extract JSON from explanatory text
json_patterns = [
r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object
r'\{.*?"title".*?\}', # JSON with title field
r'\{.*?"font_size".*?\}', # JSON with font_size field
]
for pattern in json_patterns:
json_match = re.search(pattern, result, re.DOTALL)
if json_match:
result = json_match.group(0)
break
# Additional cleanup - remove any leading/trailing whitespace and newlines
result = result.strip()
# Check if result is still empty after cleanup
if not result:
self.logger.warning("AI styling returned empty content after cleanup, using defaults")
return default_styles
# Try to parse JSON
try:
styles = json.loads(result)
self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
except json.JSONDecodeError as json_error:
self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
self.logger.warning(f"Raw content that failed to parse: {result[:100]}...")
# Try to extract just the JSON part if it's embedded in text
json_start = result.find('{')
json_end = result.rfind('}')
if json_start != -1 and json_end != -1 and json_end > json_start:
json_part = result[json_start:json_end+1]
try:
styles = json.loads(json_part)
self.logger.info("Successfully extracted JSON from explanatory text")
self.logger.debug(f"Extracted AI styles: {list(styles.keys())}")
except json.JSONDecodeError:
self.logger.warning("Could not extract valid JSON from response, using defaults")
return default_styles
else:
return default_styles
# Convert colors to PowerPoint RGB format
styles = self._convertColorsFormat(styles)
return styles
# Convert colors to PPTX format (RGB tuples)
return self._convertColorsFormat(enhanced_styles)
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
self.logger.warning(f"AI style enhancement failed: {str(e)}, using defaults")
return default_styles
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
@ -962,13 +924,10 @@ JSON ONLY. NO OTHER TEXT."""
return 1 # Default to title and content layout
def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Create slides from sections based on content density and user intent."""
"""Create slides from sections: each heading creates a new slide, content accumulates until next heading."""
try:
slides = []
content_per_slide = styles.get("content_per_slide", "concise")
# Group sections by type and create slides
current_slide_content = []
current_slide_sections = [] # Store sections (not formatted text) for proper rendering
current_slide_title = "Content Overview"
for section in sections:
@ -981,13 +940,13 @@ JSON ONLY. NO OTHER TEXT."""
if section_type == "heading":
# If we have accumulated content, create a slide
if current_slide_content:
if current_slide_sections:
slides.append({
"title": current_slide_title,
"content": "\n\n".join(current_slide_content),
"sections": current_slide_sections.copy(), # Store sections for proper rendering
"images": []
})
current_slide_content = []
current_slide_sections = []
# Start new slide with heading as title
heading_found = False
@ -1012,13 +971,13 @@ JSON ONLY. NO OTHER TEXT."""
current_slide_title = section.get("id", "Untitled Section")
elif section_type == "image":
# Create separate slide for image
if current_slide_content:
if current_slide_sections:
slides.append({
"title": current_slide_title,
"content": "\n\n".join(current_slide_content),
"sections": current_slide_sections.copy(),
"images": []
})
current_slide_content = []
current_slide_sections = []
# Extract image data
imageData = []
@ -1045,20 +1004,18 @@ JSON ONLY. NO OTHER TEXT."""
slides.append({
"title": section.get("title") or (imageData[0].get("altText", "Image") if imageData else "Image"),
"content": "",
"sections": [],
"images": imageData
})
else:
# Add content to current slide
formatted_content = self._formatSectionContent(section)
if formatted_content:
current_slide_content.append(formatted_content)
# Add section to current slide (will be rendered properly)
current_slide_sections.append(section)
# Add final slide if there's content
if current_slide_content:
if current_slide_sections:
slides.append({
"title": current_slide_title,
"content": "\n\n".join(current_slide_content),
"sections": current_slide_sections.copy(),
"images": []
})
@ -1204,24 +1161,217 @@ JSON ONLY. NO OTHER TEXT."""
except Exception as e:
logger.error(f"Error embedding images in PPTX slide: {str(e)}")
# Add error message text box to slide
try:
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
errorMsg = f"[Error: Could not embed image(s). {str(e)}]"
errorBox = slide.shapes.add_textbox(
Inches(0.5),
Inches(2),
slideWidth - Inches(1),
Inches(0.5)
)
errorFrame = errorBox.text_frame
errorFrame.text = errorMsg
errorFrame.paragraphs[0].font.size = Pt(12)
errorFrame.paragraphs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color
errorFrame.paragraphs[0].alignment = PP_ALIGN.LEFT
except Exception as errorBoxError:
logger.error(f"Could not add error message to slide: {str(errorBoxError)}")
def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float) -> None:
"""Add a PowerPoint table to slide."""
try:
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return
headers = content.get("headers", [])
rows = content.get("rows", [])
if not headers:
return
# Calculate table dimensions
num_cols = len(headers)
num_rows = len(rows) + 1 # +1 for header row
left = Inches(0.5)
width = slide.presentation.slide_width - Inches(1)
row_height = Inches(0.4)
# Create table
table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, row_height * num_rows)
table = table_shape.table
# Set column widths
col_width = width / num_cols
for col_idx in range(num_cols):
table.columns[col_idx].width = col_width
# Add headers with styling
header_style = styles.get("table_header", {})
header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121)))
header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255)))
header_font_size = header_style.get("font_size", 18)
for col_idx, header in enumerate(headers):
cell = table.cell(0, col_idx)
cell.text = str(header)
cell.fill.solid()
cell.fill.fore_color.rgb = RGBColor(*header_bg_color)
cell.text_frame.paragraphs[0].font.bold = header_style.get("bold", True)
cell.text_frame.paragraphs[0].font.size = Pt(header_font_size)
cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*header_text_color)
align = header_style.get("align", "center")
if align == "left":
cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
elif align == "right":
cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT
else:
cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
# Add data rows with styling
cell_style = styles.get("table_cell", {})
cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255)))
cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47)))
cell_font_size = cell_style.get("font_size", 16)
for row_idx, row_data in enumerate(rows, 1):
for col_idx, cell_data in enumerate(row_data[:num_cols]):
cell = table.cell(row_idx, col_idx)
cell.text = str(cell_data)
cell.fill.solid()
cell.fill.fore_color.rgb = RGBColor(*cell_bg_color)
cell.text_frame.paragraphs[0].font.size = Pt(cell_font_size)
cell.text_frame.paragraphs[0].font.bold = cell_style.get("bold", False)
cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*cell_text_color)
align = cell_style.get("align", "left")
if align == "center":
cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
elif align == "right":
cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT
else:
cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
except Exception as e:
logger.warning(f"Error adding table to slide: {str(e)}")
def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None:
"""Add bullet list to slide text frame."""
try:
from pptx.util import Pt
from pptx.dml.color import RGBColor
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return
items = content.get("items", [])
if not items:
return
list_style = styles.get("bullet_list", {})
for item in items:
p = text_frame.add_paragraph()
if isinstance(item, dict):
p.text = item.get("text", "")
else:
p.text = str(item)
p.level = 0
p.font.size = Pt(list_style.get("font_size", 18))
p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47))))
except Exception as e:
logger.warning(f"Error adding bullet list to slide: {str(e)}")
def _addHeadingToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None:
"""Add heading to slide text frame."""
try:
from pptx.util import Pt
from pptx.dml.color import RGBColor
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return
text = content.get("text", "")
level = content.get("level", 1)
if text:
p = text_frame.add_paragraph()
p.text = text
p.level = min(level - 1, 2) # PowerPoint supports 0-2 levels
heading_style = styles.get("heading", {})
p.font.size = Pt(heading_style.get("font_size", 32))
p.font.bold = heading_style.get("bold", True)
p.font.color.rgb = RGBColor(*self._getSafeColor(heading_style.get("color", (47, 47, 47))))
except Exception as e:
logger.warning(f"Error adding heading to slide: {str(e)}")
def _addParagraphToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None:
"""Add paragraph to slide text frame."""
try:
from pptx.util import Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN
# Extract from nested content structure
content = element.get("content", {})
if isinstance(content, dict):
text = content.get("text", "")
elif isinstance(content, str):
text = content
else:
text = ""
if text:
p = text_frame.add_paragraph()
p.text = text
paragraph_style = styles.get("paragraph", {})
p.font.size = Pt(paragraph_style.get("font_size", 18))
p.font.bold = paragraph_style.get("bold", False)
p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
align = paragraph_style.get("align", "left")
if align == "center":
p.alignment = PP_ALIGN.CENTER
elif align == "right":
p.alignment = PP_ALIGN.RIGHT
else:
p.alignment = PP_ALIGN.LEFT
except Exception as e:
logger.warning(f"Error adding paragraph to slide: {str(e)}")
def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None:
"""Add code block to slide text frame."""
try:
from pptx.util import Pt
from pptx.dml.color import RGBColor
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return
code = content.get("code", "")
language = content.get("language", "")
if code:
code_style = styles.get("code_block", {})
code_font = code_style.get("font", "Courier New")
code_font_size = code_style.get("font_size", 9)
code_color = self._getSafeColor(code_style.get("color", (47, 47, 47)))
p = text_frame.add_paragraph()
if language:
p.text = f"Code ({language}):"
p.font.bold = True
p = text_frame.add_paragraph()
p.text = code
p.font.name = code_font
p.font.size = Pt(code_font_size)
p.font.color.rgb = RGBColor(*code_color)
except Exception as e:
logger.warning(f"Error adding code block to slide: {str(e)}")
def _formatTimestamp(self) -> str:
"""Format current timestamp for presentation generation."""

View file

@ -153,6 +153,17 @@ class DocumentGenerationFormatsTester10:
" - A table summarizing transaction details\n"
"5) A conclusion paragraph with recommendations\n\n"
"Format as a professional PDF document suitable for printing."
),
"html": (
"Create a professional HTML document about 'Fuel Station Receipt Analysis' with:\n"
"1) A main title\n"
"2) An introduction paragraph explaining the receipt analysis\n"
"3) Extract and include the image from the attached PDF document (B2025-02c.pdf)\n"
"4) A section analyzing the receipt data with:\n"
" - Bullet points of key findings\n"
" - A table summarizing transaction details\n"
"5) A conclusion paragraph with recommendations\n\n"
"Format as a professional HTML document with proper styling, responsive design, and embedded CSS."
)
}
@ -350,7 +361,8 @@ class DocumentGenerationFormatsTester10:
"pdf": ["application/pdf"],
"docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
"xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"],
"pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"]
"pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"],
"html": ["text/html", "application/xhtml+xml"]
}
# Expected file extensions
@ -358,7 +370,8 @@ class DocumentGenerationFormatsTester10:
"pdf": [".pdf"],
"docx": [".docx"],
"xlsx": [".xlsx"],
"pptx": [".pptx"]
"pptx": [".pptx"],
"html": [".html", ".htm"]
}
formatLower = expectedFormat.lower()
@ -398,12 +411,12 @@ class DocumentGenerationFormatsTester10:
return verification
async def testAllFormats(self) -> Dict[str, Any]:
"""Test document generation in DOCX, XLSX, PPTX, and PDF formats."""
"""Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
print("\n" + "="*80)
print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, AND PDF FORMATS")
print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, PDF, AND HTML FORMATS")
print("="*80)
formats = ["docx", "xlsx", "pptx", "pdf"]
formats = ["docx", "xlsx", "pptx", "pdf", "html"]
results = {}
for format in formats:
@ -456,7 +469,7 @@ class DocumentGenerationFormatsTester10:
async def runTest(self):
"""Run the complete test."""
print("\n" + "="*80)
print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF")
print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF, HTML")
print("="*80)
try: