fixed generation to renderer

This commit is contained in:
ValueOn AG 2025-12-29 02:09:33 +01:00
parent 25d2908d48
commit bcbaf41f4f
3 changed files with 222 additions and 74 deletions

View file

@ -160,6 +160,13 @@ IMPORTANT - CHAPTER INDEPENDENCE:
- One chapter does NOT have information about another chapter
- Each chapter must provide its own context and be understandable alone
CRITICAL - CHAPTERS WITHOUT CONTENT PARTS:
- If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
- Include: what to generate, what information to include, purpose, specific details
- Without content parts, AI relies ENTIRELY on generationHint
- GOOD: "Create [specific content] with [details]. Include [information]. Purpose: [explanation]."
- BAD: "Create title" or "Add section" (too vague)
For each chapter:
- chapter id
- level (1, 2, 3, etc.)
@ -171,6 +178,7 @@ For each chapter:
}}
}}
- generationHint: Description of the content (must be self-contained with all necessary context)
* If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
OUTPUT FORMAT: {outputFormat}
@ -197,6 +205,15 @@ RETURN JSON:
}},
"generationHint": "Create introduction section",
"sections": []
}},
{{
"id": "chapter_2",
"level": 1,
"title": "Main Title",
"contentPartIds": [],
"contentPartInstructions": {{}},
"generationHint": "Create [specific content description] with [formatting details]. Include [required information]. Purpose: [explanation of what this chapter provides].",
"sections": []
}}
]
}}]

View file

@ -78,12 +78,24 @@ class RendererPptx(BaseRenderer):
slide_images = list(slide_data.get("images", [])) # Make copy so we can append
slide_content = slide_data.get('content', '')
hasSections = slide_sections and len(slide_sections) > 0
hasImages = len(slide_images) > 0
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - sections: {len(slide_sections)}, images: {len(slide_images)}, content: {len(slide_content)} chars")
# Determine layout: first slide (i==0) uses title slide layout, others use title+content
# Determine layout: first slide (i==0) uses title slide layout
# For image-only slides, use blank layout to avoid placeholder interference
# Otherwise use title+content layout
if i == 0:
slideLayoutIndex = 0 # Title slide layout
elif hasImages and not hasSections and not slide_content:
# Image-only slide: use blank layout (typically index 6, fallback to 5 if not available)
try:
slideLayoutIndex = 6 # Blank layout
# Verify layout exists, fallback if not
if slideLayoutIndex >= len(prs.slide_layouts):
slideLayoutIndex = 5 # Alternative blank layout
except (AttributeError, IndexError):
slideLayoutIndex = 1 # Fallback to title+content
else:
slideLayoutIndex = 1 # Title and content layout
@ -91,18 +103,32 @@ class RendererPptx(BaseRenderer):
slide = prs.slides.add_slide(slide_layout)
# Set title with AI-generated styling
title_shape = slide.shapes.title
title_shape.text = slide_data.get("title", "Slide")
# Apply title styling - LEFT ALIGNED by default
title_style = styles.get("title", {})
if title_shape.text_frame.paragraphs[0].font:
title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44))
title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True)
# For blank layouts, add title as textbox since there's no title placeholder
try:
title_shape = slide.shapes.title
title_shape.text = slide_data.get("title", "Slide")
# Apply title styling - LEFT ALIGNED by default
title_style = styles.get("title", {})
if title_shape.text_frame.paragraphs[0].font:
title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44))
title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True)
title_color = self._getSafeColor(title_style.get("color", (31, 78, 121)))
title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
# Set left alignment for title
title_shape.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
except AttributeError:
# Blank layout has no title placeholder - add title as textbox
from pptx.util import Inches
titleBox = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), prs.slide_width - Inches(1), Inches(0.8))
titleFrame = titleBox.text_frame
titleFrame.text = slide_data.get("title", "Slide")
title_style = styles.get("title", {})
titleFrame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44))
titleFrame.paragraphs[0].font.bold = title_style.get("bold", True)
title_color = self._getSafeColor(title_style.get("color", (31, 78, 121)))
title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
# Set left alignment for title
title_shape.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
titleFrame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
titleFrame.paragraphs[0].alignment = PP_ALIGN.LEFT
# Render sections with proper PowerPoint objects (tables, lists, etc.)
if hasSections:
@ -229,6 +255,7 @@ class RendererPptx(BaseRenderer):
self._addParagraphToSlide(slide, element, styles, text_frame)
# Handle images after processing sections (images may have been extracted from sections)
# Update hasImages in case images were added during section processing
hasImages = len(slide_images) > 0
if hasImages:
self._addImagesToSlide(slide, slide_images, styles)
@ -1138,9 +1165,20 @@ JSON ONLY. NO OTHER TEXT."""
# Single image: center it
img = images[0]
base64Data = img.get("base64Data")
if base64Data:
imageBytes = base64.b64decode(base64Data)
imageStream = io.BytesIO(imageBytes)
# Validate base64Data is present and not empty
if base64Data and isinstance(base64Data, str) and len(base64Data.strip()) > 0:
try:
imageBytes = base64.b64decode(base64Data)
if len(imageBytes) == 0:
logger.error("Decoded image bytes are empty")
return
imageStream = io.BytesIO(imageBytes)
except Exception as decode_error:
logger.error(f"Failed to decode base64 image data: {str(decode_error)}")
return
else:
logger.error(f"Invalid base64Data: present={bool(base64Data)}, type={type(base64Data)}, length={len(base64Data) if base64Data else 0}")
return
# Get image dimensions
try:
@ -1175,7 +1213,16 @@ JSON ONLY. NO OTHER TEXT."""
imageStream.seek(0)
# Add image to slide
slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight)
try:
slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight)
except Exception as add_error:
# If add_picture fails, try with explicit format
imageStream.seek(0)
# Ensure we have valid image data
if len(imageBytes) > 0:
slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight)
else:
raise Exception(f"Empty image data: {add_error}")
# Add caption if available
caption = img.get("caption") or img.get("altText")
@ -1217,6 +1264,8 @@ JSON ONLY. NO OTHER TEXT."""
except Exception as e:
logger.error(f"Error embedding images in PPTX slide: {str(e)}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float) -> None:
"""Add a PowerPoint table to slide."""

View file

@ -113,14 +113,37 @@ class RendererXlsx(BaseRenderer):
analysisSheet = wb.create_sheet("Analysis", 2)
# Add content to sheets
self._populateSummarySheet(summarySheet, title)
self._populateSummarySheet(summarySheet, title, wb)
self._populateDataSheet(dataSheet, content)
self._populateAnalysisSheet(analysisSheet, content)
# Save to buffer
# Ensure workbook has at least one sheet (Excel requirement)
if len(wb.worksheets) == 0:
wb.create_sheet("Sheet1")
# Save to buffer with error handling
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
try:
wb.save(buffer)
buffer.seek(0)
except Exception as save_error:
self.logger.error(f"Error saving Excel workbook: {str(save_error)}")
# Try to fix common issues and retry
try:
# Remove any invalid sheet names or empty sheets
for sheet in list(wb.worksheets):
if not sheet.title or len(sheet.title.strip()) == 0:
wb.remove(sheet)
# Ensure at least one sheet exists
if len(wb.worksheets) == 0:
wb.create_sheet("Sheet1")
# Retry save
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
except Exception as retry_error:
self.logger.error(f"Retry save also failed: {str(retry_error)}")
raise Exception(f"Failed to save Excel workbook: {str(save_error)}")
# Convert to base64
excelBytes = buffer.getvalue()
@ -132,7 +155,7 @@ class RendererXlsx(BaseRenderer):
self.logger.error(f"Error generating Excel: {str(e)}")
raise
def _populateSummarySheet(self, sheet, title: str):
def _populateSummarySheet(self, sheet, title: str, wb: Workbook = None):
"""Populate the summary sheet."""
try:
# Title
@ -150,7 +173,11 @@ class RendererXlsx(BaseRenderer):
sheet['A6'] = "Key Metrics:"
sheet['A6'].font = Font(bold=True)
sheet['A7'] = "Total Items:"
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
# Only add formula if Data sheet exists (check workbook sheets)
if wb and "Data" in [s.title for s in wb.worksheets]:
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
else:
sheet['B7'] = "N/A" # Data sheet not available
# Auto-adjust column widths
sheet.column_dimensions['A'].width = 20
@ -167,7 +194,7 @@ class RendererXlsx(BaseRenderer):
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=1, column=col, value=header)
cell.font = Font(bold=True)
cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
cell.fill = PatternFill(start_color="FFCCCCCC", end_color="FFCCCCCC", fill_type="solid")
# Process content
lines = content.split('\n')
@ -271,10 +298,33 @@ class RendererXlsx(BaseRenderer):
# Populate sheets with content
self._populateExcelSheets(sheets, jsonContent, styles)
# Save to buffer
# Ensure workbook has at least one sheet (Excel requirement)
if len(wb.worksheets) == 0:
wb.create_sheet("Sheet1")
# Save to buffer with error handling
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
try:
wb.save(buffer)
buffer.seek(0)
except Exception as save_error:
self.logger.error(f"Error saving Excel workbook: {str(save_error)}")
# Try to fix common issues and retry
try:
# Remove any invalid sheet names or empty sheets
for sheet in list(wb.worksheets):
if not sheet.title or len(sheet.title.strip()) == 0:
wb.remove(sheet)
# Ensure at least one sheet exists
if len(wb.worksheets) == 0:
wb.create_sheet("Sheet1")
# Retry save
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
except Exception as retry_error:
self.logger.error(f"Retry save also failed: {str(retry_error)}")
raise Exception(f"Failed to save Excel workbook: {str(save_error)}")
# Convert to base64
excelBytes = buffer.getvalue()
@ -348,30 +398,46 @@ class RendererXlsx(BaseRenderer):
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
bgColor = header.get("background", "#FFFFFF")
textColor = header.get("text_color", "#000000")
bgColor = header.get("background", "FFFFFFFF")
textColor = header.get("text_color", "FF000000")
# Normalize colors (remove # if present, ensure aRGB format)
bgColor = self._normalizeColor(bgColor)
textColor = self._normalizeColor(textColor)
# If both are white or both are dark, fix it
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
header["background"] = "#FF4F4F4F"
header["text_color"] = "#FFFFFFFF"
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
header["background"] = "#FF4F4F4F"
header["text_color"] = "#FFFFFFFF"
if bgColor.upper() == "FFFFFFFF" and textColor.upper() == "FFFFFFFF":
header["background"] = "FF4F4F4F"
header["text_color"] = "FFFFFFFF"
elif bgColor.upper() == "FF000000" and textColor.upper() == "FF000000":
header["background"] = "FF4F4F4F"
header["text_color"] = "FFFFFFFF"
else:
# Ensure colors are in correct format
header["background"] = bgColor
header["text_color"] = textColor
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
bgColor = cell.get("background", "#FFFFFF")
textColor = cell.get("text_color", "#000000")
bgColor = cell.get("background", "FFFFFFFF")
textColor = cell.get("text_color", "FF000000")
# Normalize colors (remove # if present, ensure aRGB format)
bgColor = self._normalizeColor(bgColor)
textColor = self._normalizeColor(textColor)
# If both are white or both are dark, fix it
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
cell["background"] = "#FFFFFFFF"
cell["text_color"] = "#FF2F2F2F"
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
cell["background"] = "#FFFFFFFF"
cell["text_color"] = "#FF2F2F2F"
if bgColor.upper() == "FFFFFFFF" and textColor.upper() == "FFFFFFFF":
cell["background"] = "FFFFFFFF"
cell["text_color"] = "FF2F2F2F"
elif bgColor.upper() == "FF000000" and textColor.upper() == "FF000000":
cell["background"] = "FFFFFFFF"
cell["text_color"] = "FF2F2F2F"
else:
# Ensure colors are in correct format
cell["background"] = bgColor
cell["text_color"] = textColor
return styles
@ -379,16 +445,39 @@ class RendererXlsx(BaseRenderer):
self.logger.warning(f"Style validation failed: {str(e)}")
return self._getDefaultStyleSet()
def _normalizeColor(self, colorValue: str) -> str:
"""Normalize color to aRGB format without # prefix."""
if not isinstance(colorValue, str):
return "FF000000"
# Remove # prefix if present
if colorValue.startswith('#'):
colorValue = colorValue[1:]
# Convert to uppercase for consistency
colorValue = colorValue.upper()
# Ensure aRGB format (8 characters)
if len(colorValue) == 6:
# Convert RRGGBB to AARRGGBB (add FF alpha channel)
return f"FF{colorValue}"
elif len(colorValue) == 8:
# Already aRGB format
return colorValue
else:
# Unexpected format, return default black
return "FF000000"
def _getDefaultStyleSet(self) -> Dict[str, Any]:
"""Default Excel style set - used when no style instructions present."""
return {
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "left"},
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
"paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
"title": {"font_size": 16, "color": "FF1F4E79", "bold": True, "align": "left"},
"heading": {"font_size": 14, "color": "FF2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "FF4F4F4F", "text_color": "FFFFFFFF", "bold": True, "align": "center"},
"table_cell": {"background": "FFFFFFFF", "text_color": "FF2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 11, "color": "FF2F2F2F", "indent": 2},
"paragraph": {"font_size": 11, "color": "FF2F2F2F", "bold": False, "align": "left"},
"code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"}
}
async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
@ -450,37 +539,26 @@ class RendererXlsx(BaseRenderer):
"""Get a safe aRGB color value for Excel (without # prefix)."""
if not isinstance(colorValue, str):
return default
# Remove # prefix if present
if colorValue.startswith('#'):
colorValue = colorValue[1:]
if len(colorValue) == 6:
# Convert RRGGBB to AARRGGBB
return f"FF{colorValue}"
elif len(colorValue) == 8:
# Already aRGB format
return colorValue
else:
# Unexpected format, return default
# Use the normalize function for consistency
try:
normalized = self._normalizeColor(colorValue)
return normalized
except Exception:
return default
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert hex colors to aRGB format for Excel compatibility."""
"""Convert hex colors to aRGB format for Excel compatibility (without # prefix)."""
try:
self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
for styleName, styleConfig in styles.items():
if isinstance(styleConfig, dict):
for prop, value in styleConfig.items():
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
styles[styleName][prop] = f"FF{value[1:]}"
elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
pass # Already aRGB format
elif isinstance(value, str) and value.startswith('#'):
pass # Unexpected format, keep as is
if isinstance(value, str):
# Normalize color to aRGB format without # prefix
styles[styleName][prop] = self._normalizeColor(value)
return styles
except Exception as e:
self.logger.warning(f"Color conversion failed: {str(e)}")
return styles
def _createExcelSheets(self, wb: Workbook, jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
@ -835,13 +913,13 @@ class RendererXlsx(BaseRenderer):
self.logger.warning(f"Could not add section to sheet: {str(e)}")
return startRow + 1
def _sanitizeCellValue(self, value: Any) -> str:
"""Sanitize cell value: remove markdown, convert to string, handle None."""
def _sanitizeCellValue(self, value: Any) -> Any:
"""Sanitize cell value: remove markdown, convert to string, handle None, limit length."""
if value is None:
return ""
if isinstance(value, dict):
# Extract value from dict if present
return str(value.get("value", ""))
value = value.get("value", "")
if isinstance(value, (int, float)):
return value # Keep numbers as-is
# Convert to string and remove markdown formatting
@ -852,7 +930,11 @@ class RendererXlsx(BaseRenderer):
text = text.replace("*", "")
# Remove other markdown
text = text.replace("__", "").replace("_", "")
return text.strip()
text = text.strip()
# Excel cell value limit is 32,767 characters - truncate if necessary
if len(text) > 32767:
text = text[:32764] + "..."
return text
def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a table element to Excel sheet with proper formatting and borders."""