fixed renderers and progress tracking for generation part

This commit is contained in:
ValueOn AG 2025-12-29 22:21:17 +01:00
parent bc2dd6687d
commit 911bcffcd7
5 changed files with 1905 additions and 757 deletions

File diff suppressed because it is too large Load diff

View file

@ -160,18 +160,30 @@ IMPORTANT - CHAPTER INDEPENDENCE:
- One chapter does NOT have information about another chapter - One chapter does NOT have information about another chapter
- Each chapter must provide its own context and be understandable alone - Each chapter must provide its own context and be understandable alone
CRITICAL - CONTENT ASSIGNMENT TO CHAPTERS:
- You MUST assign available ContentParts to chapters using contentPartIds
- Based on the user request, determine which content should be used in which chapter
- If the user request mentions specific content, assign the corresponding ContentPart to the appropriate chapter
- Chapters WITHOUT contentPartIds can only generate generic content, NOT document-specific analysis
- To include document content analysis, chapters MUST have contentPartIds assigned
- Review the user request carefully to match ContentParts to chapters based on context and purpose
CRITICAL - CHAPTERS WITHOUT CONTENT PARTS: CRITICAL - CHAPTERS WITHOUT CONTENT PARTS:
- If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch - If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
- Include: what to generate, what information to include, purpose, specific details - Include: what to generate, what information to include, purpose, specific details
- Without content parts, AI relies ENTIRELY on generationHint - Without content parts, AI relies ENTIRELY on generationHint and CANNOT analyze document content
- GOOD: "Create [specific content] with [details]. Include [information]. Purpose: [explanation]."
- BAD: "Create title" or "Add section" (too vague) IMPORTANT - FORMATTING:
- Formatting (fonts, colors, layouts, styles) is handled AUTOMATICALLY by the renderer
- Do NOT specify formatting details in generationHint unless it's content-specific (e.g., "pie chart with 3 segments")
- Focus on CONTENT and STRUCTURE, not visual formatting
- The renderer will apply appropriate styling based on the output format ({outputFormat})
For each chapter: For each chapter:
- chapter id - chapter id
- level (1, 2, 3, etc.) - level (1, 2, 3, etc.)
- title - title
- contentPartIds: [List of ContentPart IDs] - contentPartIds: [List of ContentPart IDs] - ASSIGN content based on user request and chapter purpose
- contentPartInstructions: {{ - contentPartInstructions: {{
"partId": {{ "partId": {{
"instruction": "How content should be structured" "instruction": "How content should be structured"
@ -179,6 +191,7 @@ For each chapter:
}} }}
- generationHint: Description of the content (must be self-contained with all necessary context) - generationHint: Description of the content (must be self-contained with all necessary context)
* If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch * If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
* Focus on content and structure, NOT formatting details
OUTPUT FORMAT: {outputFormat} OUTPUT FORMAT: {outputFormat}

File diff suppressed because it is too large Load diff

View file

@ -535,6 +535,45 @@ class RendererXlsx(BaseRenderer):
self.logger.warning(f"AI styling failed: {str(e)}, using defaults") self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return defaultStyles return defaultStyles
def _getSafeAlignment(self, alignValue: Any) -> str:
"""Get safe alignment value for openpyxl. Valid values: 'left', 'general', 'distributed', 'fill', 'justify', 'center', 'right', 'centerContinuous'."""
if not alignValue:
return "left"
alignStr = str(alignValue).lower().strip()
# Map common alignment values to openpyxl values
alignmentMap = {
"left": "left",
"right": "right",
"center": "center",
"centre": "center",
"general": "general",
"distributed": "distributed",
"fill": "fill",
"justify": "justify",
"centercontinuous": "centerContinuous",
"center-continuous": "centerContinuous",
"start": "left",
"end": "right",
"middle": "center"
}
# Check direct mapping
if alignStr in alignmentMap:
return alignmentMap[alignStr]
# Check if it contains alignment keywords
if "left" in alignStr or "start" in alignStr:
return "left"
elif "right" in alignStr or "end" in alignStr:
return "right"
elif "center" in alignStr or "centre" in alignStr or "middle" in alignStr:
return "center"
# Default to left if unknown
return "left"
def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str: def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str:
"""Get a safe aRGB color value for Excel (without # prefix).""" """Get a safe aRGB color value for Excel (without # prefix)."""
if not isinstance(colorValue, str): if not isinstance(colorValue, str):
@ -603,30 +642,34 @@ class RendererXlsx(BaseRenderer):
return sanitized[:31] return sanitized[:31]
def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]: def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
"""Generate sheet names: each heading section creates a new tab.""" """Generate sheet names: each heading level 1 (chapter) creates a new tab."""
sections = self._extractSections(jsonContent) sections = self._extractSections(jsonContent)
# If no sections, create a single sheet # If no sections, create a single sheet
if not sections: if not sections:
return ["Content"] return ["Content"]
# Simple logic: each heading section creates a new tab # Only heading level 1 (chapters) create new tabs
sheetNames = [] sheetNames = []
for section in sections: for section in sections:
if section.get("content_type") == "heading": if section.get("content_type") == "heading":
# Extract heading text from elements # Extract heading text and level from elements
elements = section.get("elements", []) elements = section.get("elements", [])
if elements and isinstance(elements, list) and len(elements) > 0: if elements and isinstance(elements, list) and len(elements) > 0:
headingElement = elements[0] headingElement = elements[0]
content = headingElement.get("content", {}) content = headingElement.get("content", {})
if isinstance(content, dict): if isinstance(content, dict):
headingText = content.get("text", "") headingText = content.get("text", "")
level = content.get("level", 1)
elif isinstance(content, str): elif isinstance(content, str):
headingText = content headingText = content
level = 1
else: else:
headingText = "" headingText = ""
level = 1
if headingText: # Only level 1 headings (chapters) create tabs
if headingText and level == 1:
sanitized_name = self._sanitizeSheetName(headingText) sanitized_name = self._sanitizeSheetName(headingText)
# Ensure unique sheet names # Ensure unique sheet names
if sanitized_name not in sheetNames: if sanitized_name not in sheetNames:
@ -639,7 +682,7 @@ class RendererXlsx(BaseRenderer):
counter += 1 counter += 1
sheetNames.append(f"{base_name} ({counter})"[:31]) sheetNames.append(f"{base_name} ({counter})"[:31])
# If no headings found, use document title # If no level 1 headings found, use document title
if not sheetNames: if not sheetNames:
documentTitle = jsonContent.get("metadata", {}).get("title", "Document") documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
sheetNames.append(self._sanitizeSheetName(documentTitle)) sheetNames.append(self._sanitizeSheetName(documentTitle))
@ -647,7 +690,7 @@ class RendererXlsx(BaseRenderer):
return sheetNames return sheetNames
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None: def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Populate Excel sheets: each heading creates a new tab, all following content goes in that tab.""" """Populate Excel sheets: each heading level 1 (chapter) creates a new tab, all following content goes in that tab."""
try: try:
# Get the actual sheet names that were created (keys are lowercase) # Get the actual sheet names that were created (keys are lowercase)
sheetNames = list(sheets.keys()) sheetNames = list(sheets.keys())
@ -657,7 +700,7 @@ class RendererXlsx(BaseRenderer):
sections = self._extractSections(jsonContent) sections = self._extractSections(jsonContent)
# Simple logic: iterate through sections, each heading creates a new tab # Only heading level 1 (chapters) create new tabs
currentSheetIndex = 0 currentSheetIndex = 0
currentSheet = None currentSheet = None
currentRow = 1 currentRow = 1
@ -665,17 +708,28 @@ class RendererXlsx(BaseRenderer):
for section in sections: for section in sections:
contentType = section.get("content_type", "paragraph") contentType = section.get("content_type", "paragraph")
# Heading section: switch to next sheet # Heading section: check if it's level 1 (chapter) to switch to next sheet
if contentType == "heading": if contentType == "heading":
if currentSheetIndex < len(sheetNames): # Extract level from heading element
sheetName = sheetNames[currentSheetIndex] elements = section.get("elements", [])
currentSheet = sheets[sheetName] # sheets dict uses lowercase keys level = 1 # Default
currentSheetIndex += 1 if elements and isinstance(elements, list) and len(elements) > 0:
currentRow = 1 # Start at row 1 for new sheet headingElement = elements[0]
else: content = headingElement.get("content", {})
# More headings than sheets - use last sheet if isinstance(content, dict):
if sheetNames: level = content.get("level", 1)
currentSheet = sheets[sheetNames[-1]]
# Only level 1 headings (chapters) create new tabs
if level == 1:
if currentSheetIndex < len(sheetNames):
sheetName = sheetNames[currentSheetIndex]
currentSheet = sheets[sheetName] # sheets dict uses lowercase keys
currentSheetIndex += 1
currentRow = 1 # Start at row 1 for new sheet
else:
# More headings than sheets - use last sheet
if sheetNames:
currentSheet = sheets[sheetNames[-1]]
# Render content in current sheet (or first sheet if no headings yet) # Render content in current sheet (or first sheet if no headings yet)
if currentSheet is None and sheetNames: if currentSheet is None and sheetNames:
@ -695,7 +749,7 @@ class RendererXlsx(BaseRenderer):
sheet['A1'] = sheetTitle sheet['A1'] = sheetTitle
title_style = styles.get("title", {}) title_style = styles.get("title", {})
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79"))) sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79")))
sheet['A1'].alignment = Alignment(horizontal=title_style.get("align", "left")) sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style.get("align", "left")))
# Get table data from elements (canonical JSON format) # Get table data from elements (canonical JSON format)
elements = section.get("elements", []) elements = section.get("elements", [])
@ -707,8 +761,13 @@ class RendererXlsx(BaseRenderer):
headers = [] headers = []
rows = [] rows = []
else: else:
headers = content.get("headers", []) headers = content.get("headers") or []
rows = content.get("rows", []) rows = content.get("rows") or []
# Ensure headers and rows are lists
if not isinstance(headers, list):
headers = []
if not isinstance(rows, list):
rows = []
else: else:
headers = [] headers = []
rows = [] rows = []
@ -770,11 +829,11 @@ class RendererXlsx(BaseRenderer):
try: try:
safe_color = self._getSafeColor(title_style["color"]) safe_color = self._getSafeColor(title_style["color"])
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color) sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
sheet['A1'].alignment = Alignment(horizontal=title_style["align"]) sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
except Exception as font_error: except Exception as font_error:
# Try with a safe color # Try with a safe color
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000") sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000")
sheet['A1'].alignment = Alignment(horizontal=title_style["align"]) sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
# Generation info # Generation info
sheet['A3'] = "Generated:" sheet['A3'] = "Generated:"
@ -892,6 +951,8 @@ class RendererXlsx(BaseRenderer):
startRow = self._addHeadingToExcel(sheet, element, styles, startRow) startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
elif element_type == "image": elif element_type == "image":
startRow = self._addImageToExcel(sheet, element, styles, startRow) startRow = self._addImageToExcel(sheet, element, styles, startRow)
elif element_type == "code_block" or element_type == "code":
startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow)
else: else:
# Fallback: if element_type not set, use section_type # Fallback: if element_type not set, use section_type
if section_type == "table": if section_type == "table":
@ -904,6 +965,8 @@ class RendererXlsx(BaseRenderer):
startRow = self._addHeadingToExcel(sheet, element, styles, startRow) startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
elif section_type == "image": elif section_type == "image":
startRow = self._addImageToExcel(sheet, element, styles, startRow) startRow = self._addImageToExcel(sheet, element, styles, startRow)
elif section_type == "code_block" or section_type == "code":
startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow)
else: else:
startRow = self._addParagraphToExcel(sheet, element, styles, startRow) startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
@ -943,9 +1006,16 @@ class RendererXlsx(BaseRenderer):
content = element.get("content", {}) content = element.get("content", {})
if not isinstance(content, dict): if not isinstance(content, dict):
return startRow return startRow
headers = content.get("headers", []) headers = content.get("headers", [])
rows = content.get("rows", []) rows = content.get("rows", [])
# Ensure headers and rows are lists
if not isinstance(headers, list):
headers = []
if not isinstance(rows, list):
rows = []
if not headers and not rows: if not headers and not rows:
return startRow return startRow
@ -965,60 +1035,95 @@ class RendererXlsx(BaseRenderer):
sanitized_header = self._sanitizeCellValue(header) sanitized_header = self._sanitizeCellValue(header)
cell = sheet.cell(row=headerRow, column=col, value=sanitized_header) cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
# Font styling # Apply styling with fallbacks - don't let styling errors prevent data rendering
cell.font = Font( try:
bold=header_style.get("bold", True), # Font styling
color=self._getSafeColor(header_style.get("text_color", "FF000000")) cell.font = Font(
) bold=header_style.get("bold", True),
color=self._getSafeColor(header_style.get("text_color", "FF000000"))
# Background color
if header_style.get("background"):
cell.fill = PatternFill(
start_color=self._getSafeColor(header_style["background"]),
end_color=self._getSafeColor(header_style["background"]),
fill_type="solid"
) )
except Exception:
# Fallback to default font if styling fails
try:
cell.font = Font(bold=True, color=self._getSafeColor("FF000000"))
except Exception:
pass # Continue even if font fails
# Alignment try:
cell.alignment = Alignment( # Background color
horizontal=header_style.get("align", "left"), if header_style.get("background"):
vertical="center" cell.fill = PatternFill(
) start_color=self._getSafeColor(header_style["background"]),
end_color=self._getSafeColor(header_style["background"]),
fill_type="solid"
)
except Exception:
pass # Continue without background color if it fails
# Border try:
cell.border = thin_border # Alignment
cell.alignment = Alignment(
horizontal=self._getSafeAlignment(header_style.get("align", "left")),
vertical="center"
)
except Exception:
# Fallback to default alignment if it fails
try:
cell.alignment = Alignment(horizontal="left", vertical="center")
except Exception:
pass # Continue even if alignment fails
try:
# Border
cell.border = thin_border
except Exception:
pass # Continue without border if it fails
startRow += 1 startRow += 1
# Add rows with formatting # Add rows with formatting
cell_style = styles.get("table_cell", {}) cell_style = styles.get("table_cell", {})
for row_data in rows: for row_data in rows:
# Handle different row formats # Handle different row formats
if isinstance(row_data, list): if isinstance(row_data, list):
cell_values = row_data cell_values = row_data
elif isinstance(row_data, dict) and "cells" in row_data: elif isinstance(row_data, dict) and "cells" in row_data:
cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])] cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])]
else: else:
continue continue
for col, cell_value in enumerate(cell_values, 1):
sanitized_value = self._sanitizeCellValue(cell_value)
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
# Font styling for col, cell_value in enumerate(cell_values, 1):
if cell_style.get("text_color"): sanitized_value = self._sanitizeCellValue(cell_value)
cell.font = Font(color=self._getSafeColor(cell_style["text_color"])) cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
# Apply styling with fallbacks - don't let styling errors prevent data rendering
try:
# Font styling
if cell_style.get("text_color"):
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
except Exception:
pass # Continue without font color if it fails
try:
# Alignment
cell.alignment = Alignment(
horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
vertical="center"
)
except Exception:
# Fallback to default alignment if it fails
try:
cell.alignment = Alignment(horizontal="left", vertical="center")
except Exception:
pass # Continue even if alignment fails
try:
# Border
cell.border = thin_border
except Exception:
pass # Continue without border if it fails
# Alignment startRow += 1
cell.alignment = Alignment(
horizontal=cell_style.get("align", "left"),
vertical="center"
)
# Border
cell.border = thin_border
startRow += 1
# Auto-adjust column widths # Auto-adjust column widths
for col in range(1, len(headers) + 1): for col in range(1, len(headers) + 1):
@ -1038,7 +1143,10 @@ class RendererXlsx(BaseRenderer):
content = element.get("content", {}) content = element.get("content", {})
if not isinstance(content, dict): if not isinstance(content, dict):
return startRow return startRow
list_items = content.get("items", []) list_items = content.get("items") or []
# Ensure list_items is a list
if not isinstance(list_items, list):
list_items = []
list_style = styles.get("bullet_list", {}) list_style = styles.get("bullet_list", {})
for item in list_items: for item in list_items:
@ -1199,6 +1307,52 @@ class RendererXlsx(BaseRenderer):
errorCell = sheet.cell(row=startRow, column=1, value=errorMsg) errorCell = sheet.cell(row=startRow, column=1, value=errorMsg)
errorCell.font = Font(color="FFFF0000", italic=True) # Red color errorCell.font = Font(color="FFFF0000", italic=True) # Red color
return startRow + 1 return startRow + 1
def _addCodeBlockToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a code block element to Excel sheet. Expects nested content structure."""
try:
# Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return startRow
code = content.get("code", "")
language = content.get("language", "")
if code:
code_style = styles.get("code_block", {})
# Add language label if present
if language:
langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):")
langCell.font = Font(bold=True, color=self._getSafeColor(code_style.get("color", "FF000000")))
startRow += 1
# Split code into lines and add each line
code_lines = code.split('\n')
for line in code_lines:
codeCell = sheet.cell(row=startRow, column=1, value=line)
codeCell.font = Font(
name=code_style.get("font", "Courier New"),
size=code_style.get("font_size", 10),
color=self._getSafeColor(code_style.get("color", "FF2F2F2F"))
)
# Set background color if specified
if code_style.get("background"):
codeCell.fill = PatternFill(
start_color=self._getSafeColor(code_style["background"]),
end_color=self._getSafeColor(code_style["background"]),
fill_type="solid"
)
startRow += 1
# Add spacing after code block
startRow += 1
return startRow
except Exception as e:
self.logger.warning(f"Could not add code block to Excel: {str(e)}")
return startRow + 1
def _formatTimestamp(self) -> str: def _formatTimestamp(self) -> str:
"""Format current timestamp for document generation.""" """Format current timestamp for document generation."""

View file

@ -413,10 +413,12 @@ class DocumentGenerationFormatsTester10:
async def testAllFormats(self) -> Dict[str, Any]: async def testAllFormats(self) -> Dict[str, Any]:
"""Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats.""" """Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
print("\n" + "="*80) print("\n" + "="*80)
print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, PDF, AND HTML FORMATS") print("TESTING DOCUMENT GENERATION IN HTML FORMAT")
print("="*80) print("="*80)
formats = ["docx", "xlsx", "pptx", "pdf", "html"] # Only test HTML format
formats = ["html"]
# formats = ["docx", "xlsx", "pptx", "pdf", "html"] # Commented out other formats
results = {} results = {}
for format in formats: for format in formats:
@ -469,7 +471,7 @@ class DocumentGenerationFormatsTester10:
async def runTest(self): async def runTest(self):
"""Run the complete test.""" """Run the complete test."""
print("\n" + "="*80) print("\n" + "="*80)
print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF, HTML") print("DOCUMENT GENERATION FORMATS TEST 10 - HTML ONLY")
print("="*80) print("="*80)
try: try: