fixed renderers and progress tracking for generation part

2025-12-29 22:21:17 +01:00 · 2025-12-29 22:21:17 +01:00 · 911bcffcd7
commit 911bcffcd7
parent bc2dd6687d
5 changed files with 1905 additions and 757 deletions
--- a/modules/services/serviceAi/subStructureFilling.py
+++ b/modules/services/serviceAi/subStructureFilling.py
--- a/modules/services/serviceAi/subStructureGeneration.py
+++ b/modules/services/serviceAi/subStructureGeneration.py
@ -160,18 +160,30 @@ IMPORTANT - CHAPTER INDEPENDENCE:
 - One chapter does NOT have information about another chapter
 - Each chapter must provide its own context and be understandable alone

+CRITICAL - CONTENT ASSIGNMENT TO CHAPTERS:
+- You MUST assign available ContentParts to chapters using contentPartIds
+- Based on the user request, determine which content should be used in which chapter
+- If the user request mentions specific content, assign the corresponding ContentPart to the appropriate chapter
+- Chapters WITHOUT contentPartIds can only generate generic content, NOT document-specific analysis
+- To include document content analysis, chapters MUST have contentPartIds assigned
+- Review the user request carefully to match ContentParts to chapters based on context and purpose
+
 CRITICAL - CHAPTERS WITHOUT CONTENT PARTS:
 - If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
 - Include: what to generate, what information to include, purpose, specific details
- Without content parts, AI relies ENTIRELY on generationHint
- GOOD: "Create [specific content] with [details]. Include [information]. Purpose: [explanation]."
- BAD: "Create title" or "Add section" (too vague)
+- Without content parts, AI relies ENTIRELY on generationHint and CANNOT analyze document content
+
+IMPORTANT - FORMATTING:
+- Formatting (fonts, colors, layouts, styles) is handled AUTOMATICALLY by the renderer
+- Do NOT specify formatting details in generationHint unless it's content-specific (e.g., "pie chart with 3 segments")
+- Focus on CONTENT and STRUCTURE, not visual formatting
+- The renderer will apply appropriate styling based on the output format ({outputFormat})

 For each chapter:
 - chapter id
 - level (1, 2, 3, etc.)
 - title
- contentPartIds: [List of ContentPart IDs]
+- contentPartIds: [List of ContentPart IDs] - ASSIGN content based on user request and chapter purpose
 - contentPartInstructions: {{
    "partId": {{
        "instruction": "How content should be structured"
@ -179,6 +191,7 @@ For each chapter:
 }}
 - generationHint: Description of the content (must be self-contained with all necessary context)
  * If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
+  * Focus on content and structure, NOT formatting details

 OUTPUT FORMAT: {outputFormat}

--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@ -535,6 +535,45 @@ class RendererXlsx(BaseRenderer):
            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
            return defaultStyles

+    def _getSafeAlignment(self, alignValue: Any) -> str:
+        """Get safe alignment value for openpyxl. Valid values: 'left', 'general', 'distributed', 'fill', 'justify', 'center', 'right', 'centerContinuous'."""
+        if not alignValue:
+            return "left"
+        
+        alignStr = str(alignValue).lower().strip()
+        
+        # Map common alignment values to openpyxl values
+        alignmentMap = {
+            "left": "left",
+            "right": "right",
+            "center": "center",
+            "centre": "center",
+            "general": "general",
+            "distributed": "distributed",
+            "fill": "fill",
+            "justify": "justify",
+            "centercontinuous": "centerContinuous",
+            "center-continuous": "centerContinuous",
+            "start": "left",
+            "end": "right",
+            "middle": "center"
+        }
+        
+        # Check direct mapping
+        if alignStr in alignmentMap:
+            return alignmentMap[alignStr]
+        
+        # Check if it contains alignment keywords
+        if "left" in alignStr or "start" in alignStr:
+            return "left"
+        elif "right" in alignStr or "end" in alignStr:
+            return "right"
+        elif "center" in alignStr or "centre" in alignStr or "middle" in alignStr:
+            return "center"
+        
+        # Default to left if unknown
+        return "left"
+    
    def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str:
        """Get a safe aRGB color value for Excel (without # prefix)."""
        if not isinstance(colorValue, str):
@ -603,30 +642,34 @@ class RendererXlsx(BaseRenderer):
        return sanitized[:31]
    
    def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
-        """Generate sheet names: each heading section creates a new tab."""
+        """Generate sheet names: each heading level 1 (chapter) creates a new tab."""
        sections = self._extractSections(jsonContent)
        
        # If no sections, create a single sheet
        if not sections:
            return ["Content"]
        
-        # Simple logic: each heading section creates a new tab
+        # Only heading level 1 (chapters) create new tabs
        sheetNames = []
        for section in sections:
            if section.get("content_type") == "heading":
-                # Extract heading text from elements
+                # Extract heading text and level from elements
                elements = section.get("elements", [])
                if elements and isinstance(elements, list) and len(elements) > 0:
                    headingElement = elements[0]
                    content = headingElement.get("content", {})
                    if isinstance(content, dict):
                        headingText = content.get("text", "")
+                        level = content.get("level", 1)
                    elif isinstance(content, str):
                        headingText = content
+                        level = 1
                    else:
                        headingText = ""
+                        level = 1
                    
-                    if headingText:
+                    # Only level 1 headings (chapters) create tabs
+                    if headingText and level == 1:
                        sanitized_name = self._sanitizeSheetName(headingText)
                        # Ensure unique sheet names
                        if sanitized_name not in sheetNames:
@ -639,7 +682,7 @@ class RendererXlsx(BaseRenderer):
                                counter += 1
                            sheetNames.append(f"{base_name} ({counter})"[:31])
        
-        # If no headings found, use document title
+        # If no level 1 headings found, use document title
        if not sheetNames:
            documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
            sheetNames.append(self._sanitizeSheetName(documentTitle))
@ -647,7 +690,7 @@ class RendererXlsx(BaseRenderer):
        return sheetNames
    
    def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
-        """Populate Excel sheets: each heading creates a new tab, all following content goes in that tab."""
+        """Populate Excel sheets: each heading level 1 (chapter) creates a new tab, all following content goes in that tab."""
        try:
            # Get the actual sheet names that were created (keys are lowercase)
            sheetNames = list(sheets.keys())
@ -657,7 +700,7 @@ class RendererXlsx(BaseRenderer):
            
            sections = self._extractSections(jsonContent)
            
-            # Simple logic: iterate through sections, each heading creates a new tab
+            # Only heading level 1 (chapters) create new tabs
            currentSheetIndex = 0
            currentSheet = None
            currentRow = 1
@ -665,17 +708,28 @@ class RendererXlsx(BaseRenderer):
            for section in sections:
                contentType = section.get("content_type", "paragraph")
                
-                # Heading section: switch to next sheet
+                # Heading section: check if it's level 1 (chapter) to switch to next sheet
                if contentType == "heading":
-                    if currentSheetIndex < len(sheetNames):
-                        sheetName = sheetNames[currentSheetIndex]
-                        currentSheet = sheets[sheetName]  # sheets dict uses lowercase keys
-                        currentSheetIndex += 1
-                        currentRow = 1  # Start at row 1 for new sheet
-                    else:
-                        # More headings than sheets - use last sheet
-                        if sheetNames:
-                            currentSheet = sheets[sheetNames[-1]]
+                    # Extract level from heading element
+                    elements = section.get("elements", [])
+                    level = 1  # Default
+                    if elements and isinstance(elements, list) and len(elements) > 0:
+                        headingElement = elements[0]
+                        content = headingElement.get("content", {})
+                        if isinstance(content, dict):
+                            level = content.get("level", 1)
+                    
+                    # Only level 1 headings (chapters) create new tabs
+                    if level == 1:
+                        if currentSheetIndex < len(sheetNames):
+                            sheetName = sheetNames[currentSheetIndex]
+                            currentSheet = sheets[sheetName]  # sheets dict uses lowercase keys
+                            currentSheetIndex += 1
+                            currentRow = 1  # Start at row 1 for new sheet
+                        else:
+                            # More headings than sheets - use last sheet
+                            if sheetNames:
+                                currentSheet = sheets[sheetNames[-1]]
                
                # Render content in current sheet (or first sheet if no headings yet)
                if currentSheet is None and sheetNames:
@ -695,7 +749,7 @@ class RendererXlsx(BaseRenderer):
            sheet['A1'] = sheetTitle
            title_style = styles.get("title", {})
            sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79")))
-            sheet['A1'].alignment = Alignment(horizontal=title_style.get("align", "left"))
+            sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style.get("align", "left")))
            
            # Get table data from elements (canonical JSON format)
            elements = section.get("elements", [])
@ -707,8 +761,13 @@ class RendererXlsx(BaseRenderer):
                    headers = []
                    rows = []
                else:
-                    headers = content.get("headers", [])
-                    rows = content.get("rows", [])
+                    headers = content.get("headers") or []
+                    rows = content.get("rows") or []
+                    # Ensure headers and rows are lists
+                    if not isinstance(headers, list):
+                        headers = []
+                    if not isinstance(rows, list):
+                        rows = []
            else:
                headers = []
                rows = []
@ -770,11 +829,11 @@ class RendererXlsx(BaseRenderer):
            try:
                safe_color = self._getSafeColor(title_style["color"])
                sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
-                sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
+                sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
            except Exception as font_error:
                # Try with a safe color
                sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000")
-                sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
+                sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
            
            # Generation info
            sheet['A3'] = "Generated:"
@ -892,6 +951,8 @@ class RendererXlsx(BaseRenderer):
                    startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
                elif element_type == "image":
                    startRow = self._addImageToExcel(sheet, element, styles, startRow)
+                elif element_type == "code_block" or element_type == "code":
+                    startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow)
                else:
                    # Fallback: if element_type not set, use section_type
                    if section_type == "table":
@ -904,6 +965,8 @@ class RendererXlsx(BaseRenderer):
                        startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
                    elif section_type == "image":
                        startRow = self._addImageToExcel(sheet, element, styles, startRow)
+                    elif section_type == "code_block" or section_type == "code":
+                        startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow)
                    else:
                        startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
            
@ -943,9 +1006,16 @@ class RendererXlsx(BaseRenderer):
            content = element.get("content", {})
            if not isinstance(content, dict):
                return startRow
+            
            headers = content.get("headers", [])
            rows = content.get("rows", [])
            
+            # Ensure headers and rows are lists
+            if not isinstance(headers, list):
+                headers = []
+            if not isinstance(rows, list):
+                rows = []
+            
            if not headers and not rows:
                return startRow
            
@ -965,60 +1035,95 @@ class RendererXlsx(BaseRenderer):
                sanitized_header = self._sanitizeCellValue(header)
                cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
                
-                # Font styling
-                cell.font = Font(
-                    bold=header_style.get("bold", True),
-                    color=self._getSafeColor(header_style.get("text_color", "FF000000"))
-                )
-                
-                # Background color
-                if header_style.get("background"):
-                    cell.fill = PatternFill(
-                        start_color=self._getSafeColor(header_style["background"]),
-                        end_color=self._getSafeColor(header_style["background"]),
-                        fill_type="solid"
+                # Apply styling with fallbacks - don't let styling errors prevent data rendering
+                try:
+                    # Font styling
+                    cell.font = Font(
+                        bold=header_style.get("bold", True),
+                        color=self._getSafeColor(header_style.get("text_color", "FF000000"))
                    )
+                except Exception:
+                    # Fallback to default font if styling fails
+                    try:
+                        cell.font = Font(bold=True, color=self._getSafeColor("FF000000"))
+                    except Exception:
+                        pass  # Continue even if font fails
                
-                # Alignment
-                cell.alignment = Alignment(
-                    horizontal=header_style.get("align", "left"),
-                    vertical="center"
-                )
+                try:
+                    # Background color
+                    if header_style.get("background"):
+                        cell.fill = PatternFill(
+                            start_color=self._getSafeColor(header_style["background"]),
+                            end_color=self._getSafeColor(header_style["background"]),
+                            fill_type="solid"
+                        )
+                except Exception:
+                    pass  # Continue without background color if it fails
                
-                # Border
-                cell.border = thin_border
+                try:
+                    # Alignment
+                    cell.alignment = Alignment(
+                        horizontal=self._getSafeAlignment(header_style.get("align", "left")),
+                        vertical="center"
+                    )
+                except Exception:
+                    # Fallback to default alignment if it fails
+                    try:
+                        cell.alignment = Alignment(horizontal="left", vertical="center")
+                    except Exception:
+                        pass  # Continue even if alignment fails
+                
+                try:
+                    # Border
+                    cell.border = thin_border
+                except Exception:
+                    pass  # Continue without border if it fails
            
            startRow += 1
            
            # Add rows with formatting
            cell_style = styles.get("table_cell", {})
            for row_data in rows:
-                # Handle different row formats
-                if isinstance(row_data, list):
-                    cell_values = row_data
-                elif isinstance(row_data, dict) and "cells" in row_data:
-                    cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])]
-                else:
-                    continue
-                
-                for col, cell_value in enumerate(cell_values, 1):
-                    sanitized_value = self._sanitizeCellValue(cell_value)
-                    cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
+                    # Handle different row formats
+                    if isinstance(row_data, list):
+                        cell_values = row_data
+                    elif isinstance(row_data, dict) and "cells" in row_data:
+                        cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])]
+                    else:
+                        continue
                    
-                    # Font styling
-                    if cell_style.get("text_color"):
-                        cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
+                    for col, cell_value in enumerate(cell_values, 1):
+                        sanitized_value = self._sanitizeCellValue(cell_value)
+                        cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
+                        
+                        # Apply styling with fallbacks - don't let styling errors prevent data rendering
+                        try:
+                            # Font styling
+                            if cell_style.get("text_color"):
+                                cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
+                        except Exception:
+                            pass  # Continue without font color if it fails
+                        
+                        try:
+                            # Alignment
+                            cell.alignment = Alignment(
+                                horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
+                                vertical="center"
+                            )
+                        except Exception:
+                            # Fallback to default alignment if it fails
+                            try:
+                                cell.alignment = Alignment(horizontal="left", vertical="center")
+                            except Exception:
+                                pass  # Continue even if alignment fails
+                        
+                        try:
+                            # Border
+                            cell.border = thin_border
+                        except Exception:
+                            pass  # Continue without border if it fails
                    
-                    # Alignment
-                    cell.alignment = Alignment(
-                        horizontal=cell_style.get("align", "left"),
-                        vertical="center"
-                    )
-                    
-                    # Border
-                    cell.border = thin_border
-                
-                startRow += 1
+                    startRow += 1
            
            # Auto-adjust column widths
            for col in range(1, len(headers) + 1):
@ -1038,7 +1143,10 @@ class RendererXlsx(BaseRenderer):
            content = element.get("content", {})
            if not isinstance(content, dict):
                return startRow
-            list_items = content.get("items", [])
+            list_items = content.get("items") or []
+            # Ensure list_items is a list
+            if not isinstance(list_items, list):
+                list_items = []
            
            list_style = styles.get("bullet_list", {})
            for item in list_items:
@ -1199,6 +1307,52 @@ class RendererXlsx(BaseRenderer):
            errorCell = sheet.cell(row=startRow, column=1, value=errorMsg)
            errorCell.font = Font(color="FFFF0000", italic=True)  # Red color
            return startRow + 1
+    
+    def _addCodeBlockToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
+        """Add a code block element to Excel sheet. Expects nested content structure."""
+        try:
+            # Extract from nested content structure
+            content = element.get("content", {})
+            if not isinstance(content, dict):
+                return startRow
+            code = content.get("code", "")
+            language = content.get("language", "")
+            
+            if code:
+                code_style = styles.get("code_block", {})
+                
+                # Add language label if present
+                if language:
+                    langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):")
+                    langCell.font = Font(bold=True, color=self._getSafeColor(code_style.get("color", "FF000000")))
+                    startRow += 1
+                
+                # Split code into lines and add each line
+                code_lines = code.split('\n')
+                for line in code_lines:
+                    codeCell = sheet.cell(row=startRow, column=1, value=line)
+                    codeCell.font = Font(
+                        name=code_style.get("font", "Courier New"),
+                        size=code_style.get("font_size", 10),
+                        color=self._getSafeColor(code_style.get("color", "FF2F2F2F"))
+                    )
+                    # Set background color if specified
+                    if code_style.get("background"):
+                        codeCell.fill = PatternFill(
+                            start_color=self._getSafeColor(code_style["background"]),
+                            end_color=self._getSafeColor(code_style["background"]),
+                            fill_type="solid"
+                        )
+                    startRow += 1
+                
+                # Add spacing after code block
+                startRow += 1
+            
+            return startRow
+            
+        except Exception as e:
+            self.logger.warning(f"Could not add code block to Excel: {str(e)}")
+            return startRow + 1

    def _formatTimestamp(self) -> str:
        """Format current timestamp for document generation."""
--- a/tests/functional/test10_document_generation_formats.py
+++ b/tests/functional/test10_document_generation_formats.py
@ -413,10 +413,12 @@ class DocumentGenerationFormatsTester10:
    async def testAllFormats(self) -> Dict[str, Any]:
        """Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
        print("\n" + "="*80)
-        print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, PDF, AND HTML FORMATS")
+        print("TESTING DOCUMENT GENERATION IN HTML FORMAT")
        print("="*80)
        
-        formats = ["docx", "xlsx", "pptx", "pdf", "html"]
+        # Only test HTML format
+        formats = ["html"]
+        # formats = ["docx", "xlsx", "pptx", "pdf", "html"]  # Commented out other formats
        results = {}
        
        for format in formats:
@ -469,7 +471,7 @@ class DocumentGenerationFormatsTester10:
    async def runTest(self):
        """Run the complete test."""
        print("\n" + "="*80)
-        print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF, HTML")
+        print("DOCUMENT GENERATION FORMATS TEST 10 - HTML ONLY")
        print("="*80)
        
        try: