fixed renderers and progress tracking for generation part

This commit is contained in:
ValueOn AG 2025-12-29 22:21:17 +01:00
parent bc2dd6687d
commit 911bcffcd7
5 changed files with 1905 additions and 757 deletions

File diff suppressed because it is too large Load diff

View file

@ -160,18 +160,30 @@ IMPORTANT - CHAPTER INDEPENDENCE:
- One chapter does NOT have information about another chapter
- Each chapter must provide its own context and be understandable alone
CRITICAL - CONTENT ASSIGNMENT TO CHAPTERS:
- You MUST assign available ContentParts to chapters using contentPartIds
- Based on the user request, determine which content should be used in which chapter
- If the user request mentions specific content, assign the corresponding ContentPart to the appropriate chapter
- Chapters WITHOUT contentPartIds can only generate generic content, NOT document-specific analysis
- To include document content analysis, chapters MUST have contentPartIds assigned
- Review the user request carefully to match ContentParts to chapters based on context and purpose
CRITICAL - CHAPTERS WITHOUT CONTENT PARTS:
- If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
- Include: what to generate, what information to include, purpose, specific details
- Without content parts, AI relies ENTIRELY on generationHint
- GOOD: "Create [specific content] with [details]. Include [information]. Purpose: [explanation]."
- BAD: "Create title" or "Add section" (too vague)
- Without content parts, AI relies ENTIRELY on generationHint and CANNOT analyze document content
IMPORTANT - FORMATTING:
- Formatting (fonts, colors, layouts, styles) is handled AUTOMATICALLY by the renderer
- Do NOT specify formatting details in generationHint unless it's content-specific (e.g., "pie chart with 3 segments")
- Focus on CONTENT and STRUCTURE, not visual formatting
- The renderer will apply appropriate styling based on the output format ({outputFormat})
For each chapter:
- chapter id
- level (1, 2, 3, etc.)
- title
- contentPartIds: [List of ContentPart IDs]
- contentPartIds: [List of ContentPart IDs] - ASSIGN content based on user request and chapter purpose
- contentPartInstructions: {{
"partId": {{
"instruction": "How content should be structured"
@ -179,6 +191,7 @@ For each chapter:
}}
- generationHint: Description of the content (must be self-contained with all necessary context)
* If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
* Focus on content and structure, NOT formatting details
OUTPUT FORMAT: {outputFormat}

File diff suppressed because it is too large Load diff

View file

@ -535,6 +535,45 @@ class RendererXlsx(BaseRenderer):
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return defaultStyles
def _getSafeAlignment(self, alignValue: Any) -> str:
"""Get safe alignment value for openpyxl. Valid values: 'left', 'general', 'distributed', 'fill', 'justify', 'center', 'right', 'centerContinuous'."""
if not alignValue:
return "left"
alignStr = str(alignValue).lower().strip()
# Map common alignment values to openpyxl values
alignmentMap = {
"left": "left",
"right": "right",
"center": "center",
"centre": "center",
"general": "general",
"distributed": "distributed",
"fill": "fill",
"justify": "justify",
"centercontinuous": "centerContinuous",
"center-continuous": "centerContinuous",
"start": "left",
"end": "right",
"middle": "center"
}
# Check direct mapping
if alignStr in alignmentMap:
return alignmentMap[alignStr]
# Check if it contains alignment keywords
if "left" in alignStr or "start" in alignStr:
return "left"
elif "right" in alignStr or "end" in alignStr:
return "right"
elif "center" in alignStr or "centre" in alignStr or "middle" in alignStr:
return "center"
# Default to left if unknown
return "left"
def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str:
"""Get a safe aRGB color value for Excel (without # prefix)."""
if not isinstance(colorValue, str):
@ -603,30 +642,34 @@ class RendererXlsx(BaseRenderer):
return sanitized[:31]
def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
"""Generate sheet names: each heading section creates a new tab."""
"""Generate sheet names: each heading level 1 (chapter) creates a new tab."""
sections = self._extractSections(jsonContent)
# If no sections, create a single sheet
if not sections:
return ["Content"]
# Simple logic: each heading section creates a new tab
# Only heading level 1 (chapters) create new tabs
sheetNames = []
for section in sections:
if section.get("content_type") == "heading":
# Extract heading text from elements
# Extract heading text and level from elements
elements = section.get("elements", [])
if elements and isinstance(elements, list) and len(elements) > 0:
headingElement = elements[0]
content = headingElement.get("content", {})
if isinstance(content, dict):
headingText = content.get("text", "")
level = content.get("level", 1)
elif isinstance(content, str):
headingText = content
level = 1
else:
headingText = ""
level = 1
if headingText:
# Only level 1 headings (chapters) create tabs
if headingText and level == 1:
sanitized_name = self._sanitizeSheetName(headingText)
# Ensure unique sheet names
if sanitized_name not in sheetNames:
@ -639,7 +682,7 @@ class RendererXlsx(BaseRenderer):
counter += 1
sheetNames.append(f"{base_name} ({counter})"[:31])
# If no headings found, use document title
# If no level 1 headings found, use document title
if not sheetNames:
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
sheetNames.append(self._sanitizeSheetName(documentTitle))
@ -647,7 +690,7 @@ class RendererXlsx(BaseRenderer):
return sheetNames
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Populate Excel sheets: each heading creates a new tab, all following content goes in that tab."""
"""Populate Excel sheets: each heading level 1 (chapter) creates a new tab, all following content goes in that tab."""
try:
# Get the actual sheet names that were created (keys are lowercase)
sheetNames = list(sheets.keys())
@ -657,7 +700,7 @@ class RendererXlsx(BaseRenderer):
sections = self._extractSections(jsonContent)
# Simple logic: iterate through sections, each heading creates a new tab
# Only heading level 1 (chapters) create new tabs
currentSheetIndex = 0
currentSheet = None
currentRow = 1
@ -665,17 +708,28 @@ class RendererXlsx(BaseRenderer):
for section in sections:
contentType = section.get("content_type", "paragraph")
# Heading section: switch to next sheet
# Heading section: check if it's level 1 (chapter) to switch to next sheet
if contentType == "heading":
if currentSheetIndex < len(sheetNames):
sheetName = sheetNames[currentSheetIndex]
currentSheet = sheets[sheetName] # sheets dict uses lowercase keys
currentSheetIndex += 1
currentRow = 1 # Start at row 1 for new sheet
else:
# More headings than sheets - use last sheet
if sheetNames:
currentSheet = sheets[sheetNames[-1]]
# Extract level from heading element
elements = section.get("elements", [])
level = 1 # Default
if elements and isinstance(elements, list) and len(elements) > 0:
headingElement = elements[0]
content = headingElement.get("content", {})
if isinstance(content, dict):
level = content.get("level", 1)
# Only level 1 headings (chapters) create new tabs
if level == 1:
if currentSheetIndex < len(sheetNames):
sheetName = sheetNames[currentSheetIndex]
currentSheet = sheets[sheetName] # sheets dict uses lowercase keys
currentSheetIndex += 1
currentRow = 1 # Start at row 1 for new sheet
else:
# More headings than sheets - use last sheet
if sheetNames:
currentSheet = sheets[sheetNames[-1]]
# Render content in current sheet (or first sheet if no headings yet)
if currentSheet is None and sheetNames:
@ -695,7 +749,7 @@ class RendererXlsx(BaseRenderer):
sheet['A1'] = sheetTitle
title_style = styles.get("title", {})
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79")))
sheet['A1'].alignment = Alignment(horizontal=title_style.get("align", "left"))
sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style.get("align", "left")))
# Get table data from elements (canonical JSON format)
elements = section.get("elements", [])
@ -707,8 +761,13 @@ class RendererXlsx(BaseRenderer):
headers = []
rows = []
else:
headers = content.get("headers", [])
rows = content.get("rows", [])
headers = content.get("headers") or []
rows = content.get("rows") or []
# Ensure headers and rows are lists
if not isinstance(headers, list):
headers = []
if not isinstance(rows, list):
rows = []
else:
headers = []
rows = []
@ -770,11 +829,11 @@ class RendererXlsx(BaseRenderer):
try:
safe_color = self._getSafeColor(title_style["color"])
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
except Exception as font_error:
# Try with a safe color
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000")
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
# Generation info
sheet['A3'] = "Generated:"
@ -892,6 +951,8 @@ class RendererXlsx(BaseRenderer):
startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
elif element_type == "image":
startRow = self._addImageToExcel(sheet, element, styles, startRow)
elif element_type == "code_block" or element_type == "code":
startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow)
else:
# Fallback: if element_type not set, use section_type
if section_type == "table":
@ -904,6 +965,8 @@ class RendererXlsx(BaseRenderer):
startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
elif section_type == "image":
startRow = self._addImageToExcel(sheet, element, styles, startRow)
elif section_type == "code_block" or section_type == "code":
startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow)
else:
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
@ -943,9 +1006,16 @@ class RendererXlsx(BaseRenderer):
content = element.get("content", {})
if not isinstance(content, dict):
return startRow
headers = content.get("headers", [])
rows = content.get("rows", [])
# Ensure headers and rows are lists
if not isinstance(headers, list):
headers = []
if not isinstance(rows, list):
rows = []
if not headers and not rows:
return startRow
@ -965,60 +1035,95 @@ class RendererXlsx(BaseRenderer):
sanitized_header = self._sanitizeCellValue(header)
cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
# Font styling
cell.font = Font(
bold=header_style.get("bold", True),
color=self._getSafeColor(header_style.get("text_color", "FF000000"))
)
# Background color
if header_style.get("background"):
cell.fill = PatternFill(
start_color=self._getSafeColor(header_style["background"]),
end_color=self._getSafeColor(header_style["background"]),
fill_type="solid"
# Apply styling with fallbacks - don't let styling errors prevent data rendering
try:
# Font styling
cell.font = Font(
bold=header_style.get("bold", True),
color=self._getSafeColor(header_style.get("text_color", "FF000000"))
)
except Exception:
# Fallback to default font if styling fails
try:
cell.font = Font(bold=True, color=self._getSafeColor("FF000000"))
except Exception:
pass # Continue even if font fails
# Alignment
cell.alignment = Alignment(
horizontal=header_style.get("align", "left"),
vertical="center"
)
try:
# Background color
if header_style.get("background"):
cell.fill = PatternFill(
start_color=self._getSafeColor(header_style["background"]),
end_color=self._getSafeColor(header_style["background"]),
fill_type="solid"
)
except Exception:
pass # Continue without background color if it fails
# Border
cell.border = thin_border
try:
# Alignment
cell.alignment = Alignment(
horizontal=self._getSafeAlignment(header_style.get("align", "left")),
vertical="center"
)
except Exception:
# Fallback to default alignment if it fails
try:
cell.alignment = Alignment(horizontal="left", vertical="center")
except Exception:
pass # Continue even if alignment fails
try:
# Border
cell.border = thin_border
except Exception:
pass # Continue without border if it fails
startRow += 1
# Add rows with formatting
cell_style = styles.get("table_cell", {})
for row_data in rows:
# Handle different row formats
if isinstance(row_data, list):
cell_values = row_data
elif isinstance(row_data, dict) and "cells" in row_data:
cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])]
else:
continue
for col, cell_value in enumerate(cell_values, 1):
sanitized_value = self._sanitizeCellValue(cell_value)
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
# Handle different row formats
if isinstance(row_data, list):
cell_values = row_data
elif isinstance(row_data, dict) and "cells" in row_data:
cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])]
else:
continue
# Font styling
if cell_style.get("text_color"):
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
for col, cell_value in enumerate(cell_values, 1):
sanitized_value = self._sanitizeCellValue(cell_value)
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
# Apply styling with fallbacks - don't let styling errors prevent data rendering
try:
# Font styling
if cell_style.get("text_color"):
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
except Exception:
pass # Continue without font color if it fails
try:
# Alignment
cell.alignment = Alignment(
horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
vertical="center"
)
except Exception:
# Fallback to default alignment if it fails
try:
cell.alignment = Alignment(horizontal="left", vertical="center")
except Exception:
pass # Continue even if alignment fails
try:
# Border
cell.border = thin_border
except Exception:
pass # Continue without border if it fails
# Alignment
cell.alignment = Alignment(
horizontal=cell_style.get("align", "left"),
vertical="center"
)
# Border
cell.border = thin_border
startRow += 1
startRow += 1
# Auto-adjust column widths
for col in range(1, len(headers) + 1):
@ -1038,7 +1143,10 @@ class RendererXlsx(BaseRenderer):
content = element.get("content", {})
if not isinstance(content, dict):
return startRow
list_items = content.get("items", [])
list_items = content.get("items") or []
# Ensure list_items is a list
if not isinstance(list_items, list):
list_items = []
list_style = styles.get("bullet_list", {})
for item in list_items:
@ -1199,6 +1307,52 @@ class RendererXlsx(BaseRenderer):
errorCell = sheet.cell(row=startRow, column=1, value=errorMsg)
errorCell.font = Font(color="FFFF0000", italic=True) # Red color
return startRow + 1
def _addCodeBlockToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a code block element to Excel sheet. Expects nested content structure."""
try:
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return startRow
code = content.get("code", "")
language = content.get("language", "")
if code:
code_style = styles.get("code_block", {})
# Add language label if present
if language:
langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):")
langCell.font = Font(bold=True, color=self._getSafeColor(code_style.get("color", "FF000000")))
startRow += 1
# Split code into lines and add each line
code_lines = code.split('\n')
for line in code_lines:
codeCell = sheet.cell(row=startRow, column=1, value=line)
codeCell.font = Font(
name=code_style.get("font", "Courier New"),
size=code_style.get("font_size", 10),
color=self._getSafeColor(code_style.get("color", "FF2F2F2F"))
)
# Set background color if specified
if code_style.get("background"):
codeCell.fill = PatternFill(
start_color=self._getSafeColor(code_style["background"]),
end_color=self._getSafeColor(code_style["background"]),
fill_type="solid"
)
startRow += 1
# Add spacing after code block
startRow += 1
return startRow
except Exception as e:
self.logger.warning(f"Could not add code block to Excel: {str(e)}")
return startRow + 1
def _formatTimestamp(self) -> str:
"""Format current timestamp for document generation."""

View file

@ -413,10 +413,12 @@ class DocumentGenerationFormatsTester10:
async def testAllFormats(self) -> Dict[str, Any]:
"""Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
print("\n" + "="*80)
print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, PDF, AND HTML FORMATS")
print("TESTING DOCUMENT GENERATION IN HTML FORMAT")
print("="*80)
formats = ["docx", "xlsx", "pptx", "pdf", "html"]
# Only test HTML format
formats = ["html"]
# formats = ["docx", "xlsx", "pptx", "pdf", "html"] # Commented out other formats
results = {}
for format in formats:
@ -469,7 +471,7 @@ class DocumentGenerationFormatsTester10:
async def runTest(self):
"""Run the complete test."""
print("\n" + "="*80)
print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF, HTML")
print("DOCUMENT GENERATION FORMATS TEST 10 - HTML ONLY")
print("="*80)
try: