fixed renderers and progress tracking for generation part
This commit is contained in:
parent
bc2dd6687d
commit
911bcffcd7
5 changed files with 1905 additions and 757 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -160,18 +160,30 @@ IMPORTANT - CHAPTER INDEPENDENCE:
|
|||
- One chapter does NOT have information about another chapter
|
||||
- Each chapter must provide its own context and be understandable alone
|
||||
|
||||
CRITICAL - CONTENT ASSIGNMENT TO CHAPTERS:
|
||||
- You MUST assign available ContentParts to chapters using contentPartIds
|
||||
- Based on the user request, determine which content should be used in which chapter
|
||||
- If the user request mentions specific content, assign the corresponding ContentPart to the appropriate chapter
|
||||
- Chapters WITHOUT contentPartIds can only generate generic content, NOT document-specific analysis
|
||||
- To include document content analysis, chapters MUST have contentPartIds assigned
|
||||
- Review the user request carefully to match ContentParts to chapters based on context and purpose
|
||||
|
||||
CRITICAL - CHAPTERS WITHOUT CONTENT PARTS:
|
||||
- If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
|
||||
- Include: what to generate, what information to include, purpose, specific details
|
||||
- Without content parts, AI relies ENTIRELY on generationHint
|
||||
- GOOD: "Create [specific content] with [details]. Include [information]. Purpose: [explanation]."
|
||||
- BAD: "Create title" or "Add section" (too vague)
|
||||
- Without content parts, AI relies ENTIRELY on generationHint and CANNOT analyze document content
|
||||
|
||||
IMPORTANT - FORMATTING:
|
||||
- Formatting (fonts, colors, layouts, styles) is handled AUTOMATICALLY by the renderer
|
||||
- Do NOT specify formatting details in generationHint unless it's content-specific (e.g., "pie chart with 3 segments")
|
||||
- Focus on CONTENT and STRUCTURE, not visual formatting
|
||||
- The renderer will apply appropriate styling based on the output format ({outputFormat})
|
||||
|
||||
For each chapter:
|
||||
- chapter id
|
||||
- level (1, 2, 3, etc.)
|
||||
- title
|
||||
- contentPartIds: [List of ContentPart IDs]
|
||||
- contentPartIds: [List of ContentPart IDs] - ASSIGN content based on user request and chapter purpose
|
||||
- contentPartInstructions: {{
|
||||
"partId": {{
|
||||
"instruction": "How content should be structured"
|
||||
|
|
@ -179,6 +191,7 @@ For each chapter:
|
|||
}}
|
||||
- generationHint: Description of the content (must be self-contained with all necessary context)
|
||||
* If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
|
||||
* Focus on content and structure, NOT formatting details
|
||||
|
||||
OUTPUT FORMAT: {outputFormat}
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -535,6 +535,45 @@ class RendererXlsx(BaseRenderer):
|
|||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||
return defaultStyles
|
||||
|
||||
def _getSafeAlignment(self, alignValue: Any) -> str:
|
||||
"""Get safe alignment value for openpyxl. Valid values: 'left', 'general', 'distributed', 'fill', 'justify', 'center', 'right', 'centerContinuous'."""
|
||||
if not alignValue:
|
||||
return "left"
|
||||
|
||||
alignStr = str(alignValue).lower().strip()
|
||||
|
||||
# Map common alignment values to openpyxl values
|
||||
alignmentMap = {
|
||||
"left": "left",
|
||||
"right": "right",
|
||||
"center": "center",
|
||||
"centre": "center",
|
||||
"general": "general",
|
||||
"distributed": "distributed",
|
||||
"fill": "fill",
|
||||
"justify": "justify",
|
||||
"centercontinuous": "centerContinuous",
|
||||
"center-continuous": "centerContinuous",
|
||||
"start": "left",
|
||||
"end": "right",
|
||||
"middle": "center"
|
||||
}
|
||||
|
||||
# Check direct mapping
|
||||
if alignStr in alignmentMap:
|
||||
return alignmentMap[alignStr]
|
||||
|
||||
# Check if it contains alignment keywords
|
||||
if "left" in alignStr or "start" in alignStr:
|
||||
return "left"
|
||||
elif "right" in alignStr or "end" in alignStr:
|
||||
return "right"
|
||||
elif "center" in alignStr or "centre" in alignStr or "middle" in alignStr:
|
||||
return "center"
|
||||
|
||||
# Default to left if unknown
|
||||
return "left"
|
||||
|
||||
def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str:
|
||||
"""Get a safe aRGB color value for Excel (without # prefix)."""
|
||||
if not isinstance(colorValue, str):
|
||||
|
|
@ -603,30 +642,34 @@ class RendererXlsx(BaseRenderer):
|
|||
return sanitized[:31]
|
||||
|
||||
def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
|
||||
"""Generate sheet names: each heading section creates a new tab."""
|
||||
"""Generate sheet names: each heading level 1 (chapter) creates a new tab."""
|
||||
sections = self._extractSections(jsonContent)
|
||||
|
||||
# If no sections, create a single sheet
|
||||
if not sections:
|
||||
return ["Content"]
|
||||
|
||||
# Simple logic: each heading section creates a new tab
|
||||
# Only heading level 1 (chapters) create new tabs
|
||||
sheetNames = []
|
||||
for section in sections:
|
||||
if section.get("content_type") == "heading":
|
||||
# Extract heading text from elements
|
||||
# Extract heading text and level from elements
|
||||
elements = section.get("elements", [])
|
||||
if elements and isinstance(elements, list) and len(elements) > 0:
|
||||
headingElement = elements[0]
|
||||
content = headingElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
headingText = content.get("text", "")
|
||||
level = content.get("level", 1)
|
||||
elif isinstance(content, str):
|
||||
headingText = content
|
||||
level = 1
|
||||
else:
|
||||
headingText = ""
|
||||
level = 1
|
||||
|
||||
if headingText:
|
||||
# Only level 1 headings (chapters) create tabs
|
||||
if headingText and level == 1:
|
||||
sanitized_name = self._sanitizeSheetName(headingText)
|
||||
# Ensure unique sheet names
|
||||
if sanitized_name not in sheetNames:
|
||||
|
|
@ -639,7 +682,7 @@ class RendererXlsx(BaseRenderer):
|
|||
counter += 1
|
||||
sheetNames.append(f"{base_name} ({counter})"[:31])
|
||||
|
||||
# If no headings found, use document title
|
||||
# If no level 1 headings found, use document title
|
||||
if not sheetNames:
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
|
||||
sheetNames.append(self._sanitizeSheetName(documentTitle))
|
||||
|
|
@ -647,7 +690,7 @@ class RendererXlsx(BaseRenderer):
|
|||
return sheetNames
|
||||
|
||||
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Populate Excel sheets: each heading creates a new tab, all following content goes in that tab."""
|
||||
"""Populate Excel sheets: each heading level 1 (chapter) creates a new tab, all following content goes in that tab."""
|
||||
try:
|
||||
# Get the actual sheet names that were created (keys are lowercase)
|
||||
sheetNames = list(sheets.keys())
|
||||
|
|
@ -657,7 +700,7 @@ class RendererXlsx(BaseRenderer):
|
|||
|
||||
sections = self._extractSections(jsonContent)
|
||||
|
||||
# Simple logic: iterate through sections, each heading creates a new tab
|
||||
# Only heading level 1 (chapters) create new tabs
|
||||
currentSheetIndex = 0
|
||||
currentSheet = None
|
||||
currentRow = 1
|
||||
|
|
@ -665,17 +708,28 @@ class RendererXlsx(BaseRenderer):
|
|||
for section in sections:
|
||||
contentType = section.get("content_type", "paragraph")
|
||||
|
||||
# Heading section: switch to next sheet
|
||||
# Heading section: check if it's level 1 (chapter) to switch to next sheet
|
||||
if contentType == "heading":
|
||||
if currentSheetIndex < len(sheetNames):
|
||||
sheetName = sheetNames[currentSheetIndex]
|
||||
currentSheet = sheets[sheetName] # sheets dict uses lowercase keys
|
||||
currentSheetIndex += 1
|
||||
currentRow = 1 # Start at row 1 for new sheet
|
||||
else:
|
||||
# More headings than sheets - use last sheet
|
||||
if sheetNames:
|
||||
currentSheet = sheets[sheetNames[-1]]
|
||||
# Extract level from heading element
|
||||
elements = section.get("elements", [])
|
||||
level = 1 # Default
|
||||
if elements and isinstance(elements, list) and len(elements) > 0:
|
||||
headingElement = elements[0]
|
||||
content = headingElement.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
level = content.get("level", 1)
|
||||
|
||||
# Only level 1 headings (chapters) create new tabs
|
||||
if level == 1:
|
||||
if currentSheetIndex < len(sheetNames):
|
||||
sheetName = sheetNames[currentSheetIndex]
|
||||
currentSheet = sheets[sheetName] # sheets dict uses lowercase keys
|
||||
currentSheetIndex += 1
|
||||
currentRow = 1 # Start at row 1 for new sheet
|
||||
else:
|
||||
# More headings than sheets - use last sheet
|
||||
if sheetNames:
|
||||
currentSheet = sheets[sheetNames[-1]]
|
||||
|
||||
# Render content in current sheet (or first sheet if no headings yet)
|
||||
if currentSheet is None and sheetNames:
|
||||
|
|
@ -695,7 +749,7 @@ class RendererXlsx(BaseRenderer):
|
|||
sheet['A1'] = sheetTitle
|
||||
title_style = styles.get("title", {})
|
||||
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79")))
|
||||
sheet['A1'].alignment = Alignment(horizontal=title_style.get("align", "left"))
|
||||
sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style.get("align", "left")))
|
||||
|
||||
# Get table data from elements (canonical JSON format)
|
||||
elements = section.get("elements", [])
|
||||
|
|
@ -707,8 +761,13 @@ class RendererXlsx(BaseRenderer):
|
|||
headers = []
|
||||
rows = []
|
||||
else:
|
||||
headers = content.get("headers", [])
|
||||
rows = content.get("rows", [])
|
||||
headers = content.get("headers") or []
|
||||
rows = content.get("rows") or []
|
||||
# Ensure headers and rows are lists
|
||||
if not isinstance(headers, list):
|
||||
headers = []
|
||||
if not isinstance(rows, list):
|
||||
rows = []
|
||||
else:
|
||||
headers = []
|
||||
rows = []
|
||||
|
|
@ -770,11 +829,11 @@ class RendererXlsx(BaseRenderer):
|
|||
try:
|
||||
safe_color = self._getSafeColor(title_style["color"])
|
||||
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
|
||||
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
|
||||
sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
|
||||
except Exception as font_error:
|
||||
# Try with a safe color
|
||||
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000")
|
||||
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
|
||||
sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
|
||||
|
||||
# Generation info
|
||||
sheet['A3'] = "Generated:"
|
||||
|
|
@ -892,6 +951,8 @@ class RendererXlsx(BaseRenderer):
|
|||
startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
|
||||
elif element_type == "image":
|
||||
startRow = self._addImageToExcel(sheet, element, styles, startRow)
|
||||
elif element_type == "code_block" or element_type == "code":
|
||||
startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow)
|
||||
else:
|
||||
# Fallback: if element_type not set, use section_type
|
||||
if section_type == "table":
|
||||
|
|
@ -904,6 +965,8 @@ class RendererXlsx(BaseRenderer):
|
|||
startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
|
||||
elif section_type == "image":
|
||||
startRow = self._addImageToExcel(sheet, element, styles, startRow)
|
||||
elif section_type == "code_block" or section_type == "code":
|
||||
startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow)
|
||||
else:
|
||||
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
|
||||
|
||||
|
|
@ -943,9 +1006,16 @@ class RendererXlsx(BaseRenderer):
|
|||
content = element.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return startRow
|
||||
|
||||
headers = content.get("headers", [])
|
||||
rows = content.get("rows", [])
|
||||
|
||||
# Ensure headers and rows are lists
|
||||
if not isinstance(headers, list):
|
||||
headers = []
|
||||
if not isinstance(rows, list):
|
||||
rows = []
|
||||
|
||||
if not headers and not rows:
|
||||
return startRow
|
||||
|
||||
|
|
@ -965,60 +1035,95 @@ class RendererXlsx(BaseRenderer):
|
|||
sanitized_header = self._sanitizeCellValue(header)
|
||||
cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
|
||||
|
||||
# Font styling
|
||||
cell.font = Font(
|
||||
bold=header_style.get("bold", True),
|
||||
color=self._getSafeColor(header_style.get("text_color", "FF000000"))
|
||||
)
|
||||
|
||||
# Background color
|
||||
if header_style.get("background"):
|
||||
cell.fill = PatternFill(
|
||||
start_color=self._getSafeColor(header_style["background"]),
|
||||
end_color=self._getSafeColor(header_style["background"]),
|
||||
fill_type="solid"
|
||||
# Apply styling with fallbacks - don't let styling errors prevent data rendering
|
||||
try:
|
||||
# Font styling
|
||||
cell.font = Font(
|
||||
bold=header_style.get("bold", True),
|
||||
color=self._getSafeColor(header_style.get("text_color", "FF000000"))
|
||||
)
|
||||
except Exception:
|
||||
# Fallback to default font if styling fails
|
||||
try:
|
||||
cell.font = Font(bold=True, color=self._getSafeColor("FF000000"))
|
||||
except Exception:
|
||||
pass # Continue even if font fails
|
||||
|
||||
# Alignment
|
||||
cell.alignment = Alignment(
|
||||
horizontal=header_style.get("align", "left"),
|
||||
vertical="center"
|
||||
)
|
||||
try:
|
||||
# Background color
|
||||
if header_style.get("background"):
|
||||
cell.fill = PatternFill(
|
||||
start_color=self._getSafeColor(header_style["background"]),
|
||||
end_color=self._getSafeColor(header_style["background"]),
|
||||
fill_type="solid"
|
||||
)
|
||||
except Exception:
|
||||
pass # Continue without background color if it fails
|
||||
|
||||
# Border
|
||||
cell.border = thin_border
|
||||
try:
|
||||
# Alignment
|
||||
cell.alignment = Alignment(
|
||||
horizontal=self._getSafeAlignment(header_style.get("align", "left")),
|
||||
vertical="center"
|
||||
)
|
||||
except Exception:
|
||||
# Fallback to default alignment if it fails
|
||||
try:
|
||||
cell.alignment = Alignment(horizontal="left", vertical="center")
|
||||
except Exception:
|
||||
pass # Continue even if alignment fails
|
||||
|
||||
try:
|
||||
# Border
|
||||
cell.border = thin_border
|
||||
except Exception:
|
||||
pass # Continue without border if it fails
|
||||
|
||||
startRow += 1
|
||||
|
||||
# Add rows with formatting
|
||||
cell_style = styles.get("table_cell", {})
|
||||
for row_data in rows:
|
||||
# Handle different row formats
|
||||
if isinstance(row_data, list):
|
||||
cell_values = row_data
|
||||
elif isinstance(row_data, dict) and "cells" in row_data:
|
||||
cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])]
|
||||
else:
|
||||
continue
|
||||
|
||||
for col, cell_value in enumerate(cell_values, 1):
|
||||
sanitized_value = self._sanitizeCellValue(cell_value)
|
||||
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
|
||||
# Handle different row formats
|
||||
if isinstance(row_data, list):
|
||||
cell_values = row_data
|
||||
elif isinstance(row_data, dict) and "cells" in row_data:
|
||||
cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])]
|
||||
else:
|
||||
continue
|
||||
|
||||
# Font styling
|
||||
if cell_style.get("text_color"):
|
||||
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
|
||||
for col, cell_value in enumerate(cell_values, 1):
|
||||
sanitized_value = self._sanitizeCellValue(cell_value)
|
||||
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
|
||||
|
||||
# Apply styling with fallbacks - don't let styling errors prevent data rendering
|
||||
try:
|
||||
# Font styling
|
||||
if cell_style.get("text_color"):
|
||||
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
|
||||
except Exception:
|
||||
pass # Continue without font color if it fails
|
||||
|
||||
try:
|
||||
# Alignment
|
||||
cell.alignment = Alignment(
|
||||
horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
|
||||
vertical="center"
|
||||
)
|
||||
except Exception:
|
||||
# Fallback to default alignment if it fails
|
||||
try:
|
||||
cell.alignment = Alignment(horizontal="left", vertical="center")
|
||||
except Exception:
|
||||
pass # Continue even if alignment fails
|
||||
|
||||
try:
|
||||
# Border
|
||||
cell.border = thin_border
|
||||
except Exception:
|
||||
pass # Continue without border if it fails
|
||||
|
||||
# Alignment
|
||||
cell.alignment = Alignment(
|
||||
horizontal=cell_style.get("align", "left"),
|
||||
vertical="center"
|
||||
)
|
||||
|
||||
# Border
|
||||
cell.border = thin_border
|
||||
|
||||
startRow += 1
|
||||
startRow += 1
|
||||
|
||||
# Auto-adjust column widths
|
||||
for col in range(1, len(headers) + 1):
|
||||
|
|
@ -1038,7 +1143,10 @@ class RendererXlsx(BaseRenderer):
|
|||
content = element.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return startRow
|
||||
list_items = content.get("items", [])
|
||||
list_items = content.get("items") or []
|
||||
# Ensure list_items is a list
|
||||
if not isinstance(list_items, list):
|
||||
list_items = []
|
||||
|
||||
list_style = styles.get("bullet_list", {})
|
||||
for item in list_items:
|
||||
|
|
@ -1199,6 +1307,52 @@ class RendererXlsx(BaseRenderer):
|
|||
errorCell = sheet.cell(row=startRow, column=1, value=errorMsg)
|
||||
errorCell.font = Font(color="FFFF0000", italic=True) # Red color
|
||||
return startRow + 1
|
||||
|
||||
def _addCodeBlockToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||
"""Add a code block element to Excel sheet. Expects nested content structure."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = element.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return startRow
|
||||
code = content.get("code", "")
|
||||
language = content.get("language", "")
|
||||
|
||||
if code:
|
||||
code_style = styles.get("code_block", {})
|
||||
|
||||
# Add language label if present
|
||||
if language:
|
||||
langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):")
|
||||
langCell.font = Font(bold=True, color=self._getSafeColor(code_style.get("color", "FF000000")))
|
||||
startRow += 1
|
||||
|
||||
# Split code into lines and add each line
|
||||
code_lines = code.split('\n')
|
||||
for line in code_lines:
|
||||
codeCell = sheet.cell(row=startRow, column=1, value=line)
|
||||
codeCell.font = Font(
|
||||
name=code_style.get("font", "Courier New"),
|
||||
size=code_style.get("font_size", 10),
|
||||
color=self._getSafeColor(code_style.get("color", "FF2F2F2F"))
|
||||
)
|
||||
# Set background color if specified
|
||||
if code_style.get("background"):
|
||||
codeCell.fill = PatternFill(
|
||||
start_color=self._getSafeColor(code_style["background"]),
|
||||
end_color=self._getSafeColor(code_style["background"]),
|
||||
fill_type="solid"
|
||||
)
|
||||
startRow += 1
|
||||
|
||||
# Add spacing after code block
|
||||
startRow += 1
|
||||
|
||||
return startRow
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add code block to Excel: {str(e)}")
|
||||
return startRow + 1
|
||||
|
||||
def _formatTimestamp(self) -> str:
|
||||
"""Format current timestamp for document generation."""
|
||||
|
|
|
|||
|
|
@ -413,10 +413,12 @@ class DocumentGenerationFormatsTester10:
|
|||
async def testAllFormats(self) -> Dict[str, Any]:
|
||||
"""Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
|
||||
print("\n" + "="*80)
|
||||
print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, PDF, AND HTML FORMATS")
|
||||
print("TESTING DOCUMENT GENERATION IN HTML FORMAT")
|
||||
print("="*80)
|
||||
|
||||
formats = ["docx", "xlsx", "pptx", "pdf", "html"]
|
||||
# Only test HTML format
|
||||
formats = ["html"]
|
||||
# formats = ["docx", "xlsx", "pptx", "pdf", "html"] # Commented out other formats
|
||||
results = {}
|
||||
|
||||
for format in formats:
|
||||
|
|
@ -469,7 +471,7 @@ class DocumentGenerationFormatsTester10:
|
|||
async def runTest(self):
|
||||
"""Run the complete test."""
|
||||
print("\n" + "="*80)
|
||||
print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF, HTML")
|
||||
print("DOCUMENT GENERATION FORMATS TEST 10 - HTML ONLY")
|
||||
print("="*80)
|
||||
|
||||
try:
|
||||
|
|
|
|||
Loading…
Reference in a new issue