gateway/modules/services/serviceGeneration/renderers/rendererDocx.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
DOCX renderer for report generation using python-docx.
"""

from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import io
import base64
import re
import csv

try:
    from docx import Document
    from docx.shared import Inches, Pt, RGBColor
    from docx.enum.text import WD_ALIGN_PARAGRAPH
    from docx.enum.table import WD_TABLE_ALIGNMENT
    DOCX_AVAILABLE = True
except ImportError:
    DOCX_AVAILABLE = False

class RendererDocx(BaseRenderer):
    """Renders content to DOCX format using python-docx."""

    @classmethod
    def getSupportedFormats(cls) -> List[str]:
        """Return supported DOCX formats."""
        return ['docx', 'doc']

    @classmethod
    def getFormatAliases(cls) -> List[str]:
        """Return format aliases."""
        return ['word', 'document']

    @classmethod
    def getPriority(cls) -> int:
        """Return priority for DOCX renderer."""
        return 115

    @classmethod
    def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
        """Return output style classification: Word documents are formatted documents."""
        return 'document'

    @classmethod
    def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
        """
        Return list of section content types that DOCX renderer accepts.
        DOCX renderer accepts all section types (Word documents can contain all content types).
        """
        from modules.datamodels.datamodelJson import supportedSectionTypes
        return list(supportedSectionTypes)

    async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
        """Render extracted JSON content to DOCX format using AI-analyzed styling."""
        self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
        try:
            if not DOCX_AVAILABLE:
                # Fallback to HTML if python-docx not available
                from .rendererHtml import RendererHtml
                htmlRenderer = RendererHtml()
                return await htmlRenderer.render(extractedContent, title, userPrompt, aiService)

            # Generate DOCX using AI-analyzed styling
            docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)

            # Extract metadata for document type and other info
            metadata = extractedContent.get("metadata", {}) if extractedContent else {}
            documentType = metadata.get("documentType") if isinstance(metadata, dict) else None

            # Determine filename from document or title
            documents = extractedContent.get("documents", [])
            if documents and isinstance(documents[0], dict):
                filename = documents[0].get("filename")
                if not filename:
                    filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
            else:
                filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")

            # Convert DOCX content to bytes if it's a string (base64)
            if isinstance(docx_content, str):
                try:
                    docx_bytes = base64.b64decode(docx_content)
                except Exception:
                    docx_bytes = docx_content.encode('utf-8')
            else:
                docx_bytes = docx_content

            return [
                RenderedDocument(
                    documentData=docx_bytes,
                    mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                    filename=filename,
                    documentType=documentType,
                    metadata=metadata if isinstance(metadata, dict) else None
                )
            ]

        except Exception as e:
            self.logger.error(f"Error rendering DOCX: {str(e)}")
            # Return minimal fallback
            fallbackContent = f"DOCX Generation Error: {str(e)}"
            metadata = extractedContent.get("metadata", {}) if extractedContent else {}
            documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
            return [
                RenderedDocument(
                    documentData=fallbackContent.encode('utf-8'),
                    mimeType="text/plain",
                    filename=self._determineFilename(title, "text/plain"),
                    documentType=documentType,
                    metadata=metadata if isinstance(metadata, dict) else None
                )
            ]

    async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
        """Generate DOCX content from structured JSON document."""
        import time
        start_time = time.time()
        try:
            self.logger.debug("_generateDocxFromJson: Starting document generation")
            # Create new document
            doc = Document()
            self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s")

            # Get style set: use styles from metadata if available, otherwise enhance with AI
            style_start = time.time()
            self.logger.debug("_generateDocxFromJson: About to get style set")
            styleSet = await self._getStyleSet(json_content, userPrompt, aiService)
            self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s")

            # Setup basic document styles and create all styles from style set
            setup_start = time.time()
            self.logger.debug("_generateDocxFromJson: Setting up document styles")
            self._setupBasicDocumentStyles(doc)
            self._setupDocumentStyles(doc, styleSet)
            self.logger.debug(f"_generateDocxFromJson: Document styles setup in {time.time() - setup_start:.2f}s")

            # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
            if not self._validateJsonStructure(json_content):
                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")

            # Extract sections and metadata from standardized schema
            extract_start = time.time()
            self.logger.debug("_generateDocxFromJson: Extracting sections and metadata")
            sections = self._extractSections(json_content)
            metadata = self._extractMetadata(json_content)
            self.logger.debug(f"_generateDocxFromJson: Extracted {len(sections)} sections in {time.time() - extract_start:.2f}s")

            # Use provided title (which comes from documents[].title) as primary source
            # Fallback to metadata.title only if title parameter is empty
            document_title = title if title else metadata.get("title", "Generated Document")

            # Add document title using Title style
            if document_title:
                doc.add_paragraph(document_title, style='Title')

            # Process each section in order
            render_start = time.time()
            self.logger.debug(f"_generateDocxFromJson: Starting to render {len(sections)} sections")
            for idx, section in enumerate(sections):
                section_start = time.time()
                self.logger.debug(f"_generateDocxFromJson: Rendering section {idx + 1}/{len(sections)}")
                self._renderJsonSection(doc, section, styleSet)
                self.logger.debug(f"_generateDocxFromJson: Section {idx + 1} rendered in {time.time() - section_start:.2f}s")
            self.logger.debug(f"_generateDocxFromJson: All sections rendered in {time.time() - render_start:.2f}s")

            # Save to buffer
            save_start = time.time()
            self.logger.debug("_generateDocxFromJson: Starting to save document to buffer")
            buffer = io.BytesIO()
            doc.save(buffer)
            buffer.seek(0)
            self.logger.debug(f"_generateDocxFromJson: Document saved to buffer in {time.time() - save_start:.2f}s")

            # Convert to base64
            encode_start = time.time()
            self.logger.debug("_generateDocxFromJson: Converting to base64")
            docx_bytes = buffer.getvalue()
            docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
            self.logger.debug(f"_generateDocxFromJson: Converted to base64 in {time.time() - encode_start:.2f}s (document size: {len(docx_bytes)} bytes)")

            total_time = time.time() - start_time
            self.logger.info(f"_generateDocxFromJson: Document generation completed in {total_time:.2f}s")
            return docx_base64

        except Exception as e:
            self.logger.error(f"Error generating DOCX from JSON: {str(e)}")
            raise Exception(f"DOCX generation failed: {str(e)}")

    async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
        """Get style set - use styles from document generation metadata if available,
        otherwise enhance default styles with AI if userPrompt provided.

        WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
        not be generated separately by renderers. Only fall back to AI if styles not provided.

        Args:
            extractedContent: Document content with metadata (may contain styles)
            userPrompt: User's prompt (AI will detect style instructions in any language)
            aiService: AI service (used only if styles not in metadata and userPrompt provided)
            templateName: Name of template style set (None = default)

        Returns:
            Dict with style definitions for all document styles
        """
        # Get default style set
        if templateName == "corporate":
            defaultStyleSet = self._getCorporateStyleSet()
        elif templateName == "minimal":
            defaultStyleSet = self._getMinimalStyleSet()
        else:
            defaultStyleSet = self._getDefaultStyleSet()

        # FIRST: Check if styles are provided in document generation metadata (preferred approach)
        if extractedContent:
            metadata = extractedContent.get("metadata", {})
            if isinstance(metadata, dict):
                styles = metadata.get("styles")
                if styles and isinstance(styles, dict):
                    self.logger.debug("Using styles from document generation metadata")
                    return self._validateStylesContrast(styles)

        # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
        if userPrompt and aiService:
            self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
            enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
            return self._validateStylesContrast(enhancedStyleSet)
        else:
            # Use default styles only
            return defaultStyleSet

    async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
        """Enhance default styles with AI based on user prompt."""
        try:
            style_template = self._createAiStyleTemplate("docx", userPrompt, defaultStyleSet)
            enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet)
            return enhanced_styles
        except Exception as e:
            self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
            return defaultStyleSet

    def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and fix contrast issues in AI-generated styles."""
        try:
            # Fix table header contrast
            if "table_header" in styles:
                header = styles["table_header"]
                bg_color = header.get("background", "#FFFFFF")
                text_color = header.get("text_color", "#000000")

                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"

            # Fix table cell contrast
            if "table_cell" in styles:
                cell = styles["table_cell"]
                bg_color = cell.get("background", "#FFFFFF")
                text_color = cell.get("text_color", "#000000")

                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"

            return styles

        except Exception as e:
            self.logger.warning(f"Style validation failed: {str(e)}")
            return self._getDefaultStyleSet()

    def _getDefaultStyleSet(self) -> Dict[str, Any]:
        """Default DOCX style set - used when no style instructions present."""
        return {
            "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
            "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"},
            "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"},
            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
            "table_border": {"style": "horizontal_only", "color": "#000000", "thickness": "thin"},
            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 20},
            "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
        }

    def _setupBasicDocumentStyles(self, doc: Document) -> None:
        """Set up basic document styles."""
        try:
            # Set default font
            style = doc.styles['Normal']
            font = style.font
            font.name = 'Calibri'
            font.size = Pt(11)
        except Exception as e:
            self.logger.warning(f"Could not set up basic document styles: {str(e)}")


    def _clearTemplateContent(self, doc: Document) -> None:
        """Clear template content while preserving styles."""
        try:
            # Remove all paragraphs except keep the styles
            for paragraph in list(doc.paragraphs):
                # Keep the paragraph but clear its content
                paragraph.clear()

            # Remove all tables
            for table in list(doc.tables):
                table._element.getparent().remove(table._element)

        except Exception as e:
            self.logger.warning(f"Could not clear template content: {str(e)}")

    def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a single JSON section to DOCX using AI-generated styles.
        Supports three content formats: reference, object (base64), extracted_text.
        """
        try:
            section_type = section.get("content_type", "paragraph")
            elements = section.get("elements", [])

            # If no elements, skip this section (it has no content to render)
            if not elements:
                return

            # Process each element in the section
            for element in elements:
                # Skip non-dict elements (e.g., int, str, etc.)
                if not isinstance(element, dict):
                    continue
                element_type = element.get("type", "")

                # Support three content formats from Phase 5D
                if element_type == "reference":
                    # Document reference format
                    doc_ref = element.get("documentReference", "")
                    label = element.get("label", "Reference")
                    para = doc.add_paragraph(f"[Reference: {label}]")
                    para.runs[0].italic = True
                    continue
                elif element_type == "extracted_text":
                    # Extracted text format - render as paragraph
                    content = element.get("content", "")
                    source = element.get("source", "")
                    if content:
                        para = doc.add_paragraph(content)
                        if source:
                            para.add_run(f" (Source: {source})").italic = True
                    continue

                # Check element type, not section type (elements can have different types than section)
                if element_type == "table":
                    self._renderJsonTable(doc, element, styles)
                elif element_type == "bullet_list":
                    self._renderJsonBulletList(doc, element, styles)
                elif element_type == "heading":
                    self._renderJsonHeading(doc, element, styles)
                elif element_type == "paragraph":
                    self._renderJsonParagraph(doc, element, styles)
                elif element_type == "code_block":
                    self._renderJsonCodeBlock(doc, element, styles)
                elif element_type == "image":
                    self._renderJsonImage(doc, element, styles)
                else:
                    # Fallback: if element_type not set, use section_type
                    if section_type == "table":
                        self._renderJsonTable(doc, element, styles)
                    elif section_type == "bullet_list":
                        self._renderJsonBulletList(doc, element, styles)
                    elif section_type == "heading":
                        self._renderJsonHeading(doc, element, styles)
                    elif section_type == "paragraph":
                        # CRITICAL: Check if this is actually an image element before rendering as paragraph
                        # Image elements might not have type set, but have base64Data in content
                        content = element.get("content", {})
                        if isinstance(content, dict) and content.get("base64Data"):
                            # This is actually an image, render it as such
                            self._renderJsonImage(doc, element, styles)
                        else:
                            self._renderJsonParagraph(doc, element, styles)
                    elif section_type == "code_block":
                        self._renderJsonCodeBlock(doc, element, styles)
                    elif section_type == "image":
                        self._renderJsonImage(doc, element, styles)
                    else:
                        # Fallback to paragraph for unknown types, but check for image data first
                        content = element.get("content", {})
                        if isinstance(content, dict) and content.get("base64Data"):
                            # This is actually an image, render it as such
                            self._renderJsonImage(doc, element, styles)
                        else:
                            self._renderJsonParagraph(doc, element, styles)

        except Exception as e:
            self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
            # Add error paragraph as fallback
            error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")

    def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """
        Render a JSON table to DOCX using AI-generated styles.

        PERFORMANCE OPTIMIZATION: Uses direct XML manipulation via lxml instead of
        python-docx high-level API. This bypasses the slow cell.text assignment
        which creates multiple XML operations per cell.

        The key insight: python-docx's cell.text setter is slow because it:
        1. Clears existing content (XML manipulation)
        2. Creates a new paragraph element
        3. Creates a new run element
        4. Sets text value

        By building the XML directly, we achieve 100-1000x faster performance.
        """
        import time
        table_start = time.time()
        try:
            # Extract from nested content structure
            content = table_data.get("content", {})
            if not isinstance(content, dict):
                return
            headers = content.get("headers", [])
            rows = content.get("rows", [])

            if not headers or not rows:
                return

            totalRows = len(rows)
            totalCols = len(headers)
            totalCells = totalRows * totalCols

            self.logger.debug(f"_renderJsonTable: Starting FAST table render - {totalRows} rows x {totalCols} columns = {totalCells} cells")

            # Use fast XML-based table rendering
            self._renderTableFastXml(doc, headers, rows, styles)

            total_time = time.time() - table_start
            rate = totalCells / total_time if total_time > 0 else 0
            self.logger.info(f"_renderJsonTable: Table completed in {total_time:.2f}s ({totalRows} rows x {totalCols} cols = {totalCells} cells) - Rate: {rate:.0f} cells/s")

        except Exception as e:
            self.logger.error(f"Error rendering table: {str(e)}", exc_info=True)

    def _renderTableFastXml(self, doc: Document, headers: List[str], rows: List[List[Any]], styles: Dict[str, Any]) -> None:
        """
        High-performance table rendering using direct XML manipulation.

        This bypasses python-docx's slow high-level API and builds the table
        XML structure directly using lxml, which is 100-1000x faster.
        """
        import time
        from docx.oxml.shared import OxmlElement, qn
        from docx.oxml.ns import nsmap
        from lxml import etree

        create_start = time.time()

        # Get the document body element
        body = doc._body._body

        # Create table element
        tbl = OxmlElement('w:tbl')

        # Add table properties
        tblPr = OxmlElement('w:tblPr')

        # Table width - auto
        tblW = OxmlElement('w:tblW')
        tblW.set(qn('w:type'), 'auto')
        tblW.set(qn('w:w'), '0')
        tblPr.append(tblW)

        # Center alignment
        jc = OxmlElement('w:jc')
        jc.set(qn('w:val'), 'center')
        tblPr.append(jc)

        # Apply table borders directly (works without template styles)
        borderStyle = styles.get("table_border", {}).get("style", "grid")
        tblBorders = self._createTableBordersXml(borderStyle)
        tblPr.append(tblBorders)

        # Table cell margins for better readability
        tblCellMar = OxmlElement('w:tblCellMar')
        for side in ['top', 'left', 'bottom', 'right']:
            margin = OxmlElement(f'w:{side}')
            margin.set(qn('w:w'), '80')  # 80 twips = ~4pt padding
            margin.set(qn('w:type'), 'dxa')
            tblCellMar.append(margin)
        tblPr.append(tblCellMar)

        tbl.append(tblPr)

        # Create table grid (column definitions)
        tblGrid = OxmlElement('w:tblGrid')
        for _ in range(len(headers)):
            gridCol = OxmlElement('w:gridCol')
            tblGrid.append(gridCol)
        tbl.append(tblGrid)

        self.logger.debug(f"_renderTableFastXml: Table structure created in {time.time() - create_start:.3f}s")

        # Build all rows using fast XML
        rows_start = time.time()

        # Header row
        headerRow = self._createTableRowXml(headers, isHeader=True)
        tbl.append(headerRow)

        header_time = time.time() - rows_start
        self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s")

        # Data rows - batch process for performance
        data_start = time.time()
        rowCount = len(rows)

        for idx, rowData in enumerate(rows):
            # Convert all cells to strings
            cellTexts = [str(cell) if cell is not None else '' for cell in rowData]
            # Pad if needed
            while len(cellTexts) < len(headers):
                cellTexts.append('')

            row = self._createTableRowXml(cellTexts, isHeader=False)
            tbl.append(row)

            # Log progress every 10%
            if rowCount > 100 and (idx + 1) % (rowCount // 10) == 0:
                elapsed = time.time() - data_start
                rate = (idx + 1) * len(headers) / elapsed if elapsed > 0 else 0
                self.logger.debug(f"_renderTableFastXml: Progress {((idx + 1) / rowCount * 100):.0f}% ({idx + 1}/{rowCount} rows) - Rate: {rate:.0f} cells/s")

        data_time = time.time() - data_start

        # Append table to document body
        body.append(tbl)

        total_time = time.time() - create_start
        totalCells = (rowCount + 1) * len(headers)
        rate = totalCells / total_time if total_time > 0 else 0

        self.logger.debug(f"_renderTableFastXml: All rows created in {data_time:.2f}s, total: {total_time:.2f}s, rate: {rate:.0f} cells/s")

    def _createTableBordersXml(self, borderStyle: str) -> Any:
        """
        Create table borders XML element based on style.

        Supports:
        - 'grid': Full grid with all borders (default)
        - 'horizontal_only': Only horizontal lines between rows
        - 'none' or other: Minimal/no borders
        """
        from docx.oxml.shared import OxmlElement, qn

        tblBorders = OxmlElement('w:tblBorders')

        # Border color - dark gray for professional look
        borderColor = '404040'
        borderSize = '4'  # 0.5pt (in eighths of a point)

        if borderStyle == "grid":
            # Full grid - all borders
            for borderName in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']:
                border = OxmlElement(f'w:{borderName}')
                border.set(qn('w:val'), 'single')
                border.set(qn('w:sz'), borderSize)
                border.set(qn('w:space'), '0')
                border.set(qn('w:color'), borderColor)
                tblBorders.append(border)

        elif borderStyle == "horizontal_only":
            # Only horizontal lines
            for borderName in ['top', 'bottom', 'insideH']:
                border = OxmlElement(f'w:{borderName}')
                border.set(qn('w:val'), 'single')
                border.set(qn('w:sz'), borderSize)
                border.set(qn('w:space'), '0')
                border.set(qn('w:color'), borderColor)
                tblBorders.append(border)
            # No vertical borders
            for borderName in ['left', 'right', 'insideV']:
                border = OxmlElement(f'w:{borderName}')
                border.set(qn('w:val'), 'nil')
                tblBorders.append(border)
        else:
            # Minimal - just outer border
            for borderName in ['top', 'left', 'bottom', 'right']:
                border = OxmlElement(f'w:{borderName}')
                border.set(qn('w:val'), 'single')
                border.set(qn('w:sz'), borderSize)
                border.set(qn('w:space'), '0')
                border.set(qn('w:color'), borderColor)
                tblBorders.append(border)

        return tblBorders

    def _createTableRowXml(self, cells: List[str], isHeader: bool = False) -> Any:
        """
        Create a table row XML element with cells.

        This is the core fast-path: builds the row XML directly without
        going through python-docx's slow cell.text assignment.
        """
        from docx.oxml.shared import OxmlElement, qn

        tr = OxmlElement('w:tr')

        # Row properties for header
        if isHeader:
            trPr = OxmlElement('w:trPr')
            tblHeader = OxmlElement('w:tblHeader')
            trPr.append(tblHeader)
            tr.append(trPr)

        for cellText in cells:
            # Create cell
            tc = OxmlElement('w:tc')

            # Cell properties
            tcPr = OxmlElement('w:tcPr')
            tcW = OxmlElement('w:tcW')
            tcW.set(qn('w:type'), 'auto')
            tcW.set(qn('w:w'), '0')
            tcPr.append(tcW)

            # Header cell styling - light blue background
            if isHeader:
                shd = OxmlElement('w:shd')
                shd.set(qn('w:val'), 'clear')
                shd.set(qn('w:color'), 'auto')
                shd.set(qn('w:fill'), '4472C4')  # Professional blue
                tcPr.append(shd)

            tc.append(tcPr)

            # Paragraph with text
            p = OxmlElement('w:p')

            # Add run with text
            r = OxmlElement('w:r')

            # Header text styling - bold and white
            if isHeader:
                rPr = OxmlElement('w:rPr')
                b = OxmlElement('w:b')
                rPr.append(b)
                # White text color
                color = OxmlElement('w:color')
                color.set(qn('w:val'), 'FFFFFF')
                rPr.append(color)
                r.append(rPr)

            # Text element
            t = OxmlElement('w:t')
            # Preserve spaces if text starts/ends with whitespace
            if cellText and (cellText[0] == ' ' or cellText[-1] == ' '):
                t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
            t.text = cellText
            r.append(t)

            p.append(r)
            tc.append(p)
            tr.append(tc)

        return tr

    def _applyHorizontalBordersOnly(self, table) -> None:
        """Apply only horizontal borders to the table (no vertical borders)."""
        try:
            from docx.oxml.shared import OxmlElement, qn

            # Get table properties
            tbl_pr = table._element.find(qn('w:tblPr'))
            if tbl_pr is None:
                tbl_pr = OxmlElement('w:tblPr')
                table._element.insert(0, tbl_pr)

            # Remove existing borders
            existing_borders = tbl_pr.find(qn('w:tblBorders'))
            if existing_borders is not None:
                tbl_pr.remove(existing_borders)

            # Create new borders element
            tbl_borders = OxmlElement('w:tblBorders')

            # Top border
            top_border = OxmlElement('w:top')
            top_border.set(qn('w:val'), 'single')
            top_border.set(qn('w:sz'), '4')
            top_border.set(qn('w:space'), '0')
            top_border.set(qn('w:color'), '000000')
            tbl_borders.append(top_border)

            # Bottom border
            bottom_border = OxmlElement('w:bottom')
            bottom_border.set(qn('w:val'), 'single')
            bottom_border.set(qn('w:sz'), '4')
            bottom_border.set(qn('w:space'), '0')
            bottom_border.set(qn('w:color'), '000000')
            tbl_borders.append(bottom_border)

            # Left border - none
            left_border = OxmlElement('w:left')
            left_border.set(qn('w:val'), 'none')
            tbl_borders.append(left_border)

            # Right border - none
            right_border = OxmlElement('w:right')
            right_border.set(qn('w:val'), 'none')
            tbl_borders.append(right_border)

            # Inside horizontal border
            inside_h_border = OxmlElement('w:insideH')
            inside_h_border.set(qn('w:val'), 'single')
            inside_h_border.set(qn('w:sz'), '4')
            inside_h_border.set(qn('w:space'), '0')
            inside_h_border.set(qn('w:color'), '000000')
            tbl_borders.append(inside_h_border)

            # Inside vertical border - none
            inside_v_border = OxmlElement('w:insideV')
            inside_v_border.set(qn('w:val'), 'none')
            tbl_borders.append(inside_v_border)

            tbl_pr.append(tbl_borders)

        except Exception as e:
            self.logger.warning(f"Could not apply horizontal borders: {str(e)}")

    def _setCellBackground(self, cell, color: RGBColor) -> None:
        """Set the background color of a table cell."""
        try:
            from docx.oxml.shared import OxmlElement, qn

            # Get cell properties
            tc_pr = cell._element.find(qn('w:tcPr'))
            if tc_pr is None:
                tc_pr = OxmlElement('w:tcPr')
                cell._element.insert(0, tc_pr)

            # Remove existing shading
            existing_shading = tc_pr.find(qn('w:shd'))
            if existing_shading is not None:
                tc_pr.remove(existing_shading)

            # Create new shading element
            shading = OxmlElement('w:shd')
            shading.set(qn('w:val'), 'clear')
            shading.set(qn('w:color'), 'auto')
            # Convert RGBColor to hex string by unpacking RGB components
            red, green, blue = color
            hex_color = f"{red:02x}{green:02x}{blue:02x}"
            shading.set(qn('w:fill'), hex_color)
            tc_pr.append(shading)

        except Exception as e:
            self.logger.warning(f"Could not set cell background: {str(e)}")

    def _setCellBackgroundFast(self, cell, hex_color: str) -> None:
        """
        Set the background color of a table cell using pre-calculated hex string.
        PERFORMANCE OPTIMIZED: Avoids RGBColor unpacking and string formatting in hot loop.
        """
        try:
            from docx.oxml.shared import OxmlElement, qn

            # Get cell properties
            tc_pr = cell._element.find(qn('w:tcPr'))
            if tc_pr is None:
                tc_pr = OxmlElement('w:tcPr')
                cell._element.insert(0, tc_pr)

            # Remove existing shading
            existing_shading = tc_pr.find(qn('w:shd'))
            if existing_shading is not None:
                tc_pr.remove(existing_shading)

            # Create new shading element with pre-calculated hex color
            shading = OxmlElement('w:shd')
            shading.set(qn('w:val'), 'clear')
            shading.set(qn('w:color'), 'auto')
            shading.set(qn('w:fill'), hex_color)
            tc_pr.append(shading)

        except Exception as e:
            self.logger.warning(f"Could not set cell background: {str(e)}")


    def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance."""
        try:
            # Extract from nested content structure
            content = list_data.get("content", {})
            if not isinstance(content, dict):
                return
            items = content.get("items", [])
            bullet_style = styles.get("bullet_list", {})

            # Pre-calculate and cache style objects to avoid repeated parsing
            font_size_pt = None
            text_color_rgb = None
            if bullet_style:
                if "font_size" in bullet_style:
                    font_size_pt = Pt(bullet_style["font_size"])
                if "color" in bullet_style:
                    color_hex = bullet_style["color"].lstrip('#')
                    text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))

            for item in items:
                if isinstance(item, str):
                    para = doc.add_paragraph(item, style='List Bullet')
                elif isinstance(item, dict) and "text" in item:
                    para = doc.add_paragraph(item["text"], style='List Bullet')

                # Apply bullet list styling from style set - use cached objects
                if bullet_style and para.runs:
                    # Use direct access instead of iterating
                    if len(para.runs) > 0:
                        run = para.runs[0]
                        if font_size_pt:
                            run.font.size = font_size_pt
                        if text_color_rgb:
                            run.font.color.rgb = text_color_rgb
                    else:
                        # Create run if none exists
                        run = para.add_run()
                        if font_size_pt:
                            run.font.size = font_size_pt
                        if text_color_rgb:
                            run.font.color.rgb = text_color_rgb

        except Exception as e:
            self.logger.warning(f"Error rendering bullet list: {str(e)}")

    def _renderJsonHeading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON heading to DOCX using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = heading_data.get("content", {})
            if not isinstance(content, dict):
                return
            text = content.get("text", "")
            level = content.get("level", 1)

            if text:
                level = max(1, min(6, level))
                # Use custom heading style if available, otherwise use built-in
                style_name = f"Heading {level}" if level <= 2 else "Heading 1"
                try:
                    para = doc.add_paragraph(text, style=style_name)
                except KeyError:
                    # Fallback to built-in heading if custom style doesn't exist
                    doc.add_heading(text, level=level)

        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")

    def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON paragraph to DOCX using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = paragraph_data.get("content", {})
            if isinstance(content, dict):
                text = content.get("text", "")
            elif isinstance(content, str):
                text = content
            else:
                text = ""

            # CRITICAL: Prevent rendering base64 image data as text
            # Base64 image data typically starts with /9j/ (JPEG) or iVBORw0KGgo (PNG)
            if text and (text.startswith("/9j/") or text.startswith("iVBORw0KGgo") or
                        (len(text) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in text[:100]))):
                # This looks like base64 data - don't render as text
                self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(text)})")
                para = doc.add_paragraph("[Error: Image data found in text content - image embedding may have failed]")
                if para.runs:
                    para.runs[0].font.color.rgb = RGBColor(255, 0, 0)  # Red color for error
                return

            if text:
                para = doc.add_paragraph(text)
                # Apply paragraph styling from style set - OPTIMIZED: pre-calculate style objects
                paragraph_style = styles.get("paragraph", {})
                if paragraph_style:
                    # Pre-calculate and cache style objects
                    font_size_pt = None
                    text_color_rgb = None
                    if "font_size" in paragraph_style:
                        font_size_pt = Pt(paragraph_style["font_size"])
                    if "color" in paragraph_style:
                        color_hex = paragraph_style["color"].lstrip('#')
                        text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
                    bold = paragraph_style.get("bold", False)

                    # Use direct access instead of iterating
                    if len(para.runs) > 0:
                        run = para.runs[0]
                        if font_size_pt:
                            run.font.size = font_size_pt
                        run.font.bold = bold
                        if text_color_rgb:
                            run.font.color.rgb = text_color_rgb
                    else:
                        # Create run if none exists
                        run = para.add_run()
                        if font_size_pt:
                            run.font.size = font_size_pt
                        run.font.bold = bold
                        if text_color_rgb:
                            run.font.color.rgb = text_color_rgb

                    if "align" in paragraph_style:
                        align = paragraph_style["align"]
                        if align == "center":
                            para.alignment = WD_ALIGN_PARAGRAPH.CENTER
                        elif align == "right":
                            para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
                        else:
                            para.alignment = WD_ALIGN_PARAGRAPH.LEFT

        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")

    def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON code block to DOCX using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = code_data.get("content", {})
            if not isinstance(content, dict):
                return
            code = content.get("code", "")
            language = content.get("language", "")
            code_style = styles.get("code_block", {})

            if code:
                if language:
                    lang_para = doc.add_paragraph(f"Code ({language}):")
                    if len(lang_para.runs) > 0:
                        lang_para.runs[0].bold = True

                # Pre-calculate and cache style objects
                code_font_name = code_style.get("font", "Courier New")
                code_font_size_pt = Pt(code_style.get("font_size", 9))
                code_text_color_rgb = None
                if "color" in code_style:
                    color_hex = code_style["color"].lstrip('#')
                    code_text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))

                code_para = doc.add_paragraph(code)
                # Use direct access instead of iterating
                if len(code_para.runs) > 0:
                    run = code_para.runs[0]
                    run.font.name = code_font_name
                    run.font.size = code_font_size_pt
                    if code_text_color_rgb:
                        run.font.color.rgb = code_text_color_rgb
                else:
                    # Create run if none exists
                    run = code_para.add_run()
                    run.font.name = code_font_name
                    run.font.size = code_font_size_pt
                    if code_text_color_rgb:
                        run.font.color.rgb = code_text_color_rgb

        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")

    def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON image to DOCX."""
        try:
            # Extract from nested content structure
            content = image_data.get("content", {})
            base64_data = ""
            alt_text = "Image"
            caption = ""

            if isinstance(content, dict):
                base64_data = content.get("base64Data", "")
                alt_text = content.get("altText", "Image")
                caption = content.get("caption", "")
            elif isinstance(content, str):
                # Content might be base64 string directly (shouldn't happen, but handle it)
                self.logger.warning("Image content is a string, not a dict. This should not happen.")
                return

            # If base64Data not found in content, try direct element fields (fallback)
            if not base64_data:
                base64_data = image_data.get("base64Data", "")
                if not alt_text or alt_text == "Image":
                    alt_text = image_data.get("altText", "Image")
                if not caption:
                    caption = image_data.get("caption", "")

            # CRITICAL: Ensure we don't render base64 data as text
            # If base64_data looks like it might be rendered elsewhere, skip it
            if not base64_data:
                raise Exception("No image data provided (base64Data is empty)")

            try:
                image_bytes = base64.b64decode(base64_data)
                image_stream = io.BytesIO(image_bytes)

                # Get image dimensions to calculate proper size
                try:
                    from PIL import Image as PILImage
                    pil_image = PILImage.open(image_stream)
                    img_width_px, img_height_px = pil_image.size

                    # DOCX page width is typically 8.5 inches, usable width ~6.5 inches with margins
                    # Standard margins: 1 inch left/right, so usable width = 6.5 inches
                    max_width_inches = 6.5
                    max_height_inches = 9.0  # Leave room for text above/below

                    # Calculate scale factor to fit within page dimensions
                    # Convert pixels to inches (assuming 96 DPI for modern displays, but images may vary)
                    # Use conservative estimate: 1 inch = 96 pixels
                    img_width_inches = img_width_px / 96.0
                    img_height_inches = img_height_px / 96.0

                    # Calculate scale to fit
                    width_scale = max_width_inches / img_width_inches if img_width_inches > max_width_inches else 1.0
                    height_scale = max_height_inches / img_height_inches if img_height_inches > max_height_inches else 1.0
                    scale = min(width_scale, height_scale, 1.0)  # Don't scale up, only down

                    final_width = img_width_inches * scale
                    final_height = img_height_inches * scale

                    # Reset stream for docx
                    image_stream.seek(0)
                    doc.add_picture(image_stream, width=Inches(final_width))
                except Exception:
                    # Fallback: use conservative default size if PIL fails
                    image_stream.seek(0)
                    doc.add_picture(image_stream, width=Inches(6.0))

                # Use caption from section if available, otherwise use alt_text
                if caption:
                    caption_text = caption
                elif alt_text and alt_text != "Image":
                    # Only use alt_text if it doesn't look like a usageHint
                    if "Render as visual element:" in alt_text:
                        # Extract filename from usageHint if possible
                        parts = alt_text.split("Render as visual element:")
                        if len(parts) > 1:
                            filename = parts[1].strip()
                            caption_text = f"Figure: {filename}"
                        else:
                            caption_text = alt_text
                    else:
                        caption_text = f"Figure: {alt_text}"
                else:
                    caption_text = None

                if caption_text:
                    caption_para = doc.add_paragraph(caption_text)
                    caption_para.runs[0].italic = True
            except Exception as embedError:
                # Image decoding or embedding failed
                raise Exception(f"Failed to decode or embed image: {str(embedError)}")

        except Exception as e:
            self.logger.error(f"Error embedding image in DOCX: {str(e)}")
            errorMsg = f"[Error: Could not embed image '{image_data.get('altText', 'Image')}'. {str(e)}]"
            errorPara = doc.add_paragraph(errorMsg)
            if errorPara.runs:
                errorPara.runs[0].font.color.rgb = RGBColor(255, 0, 0)  # Red color for error

    def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]:
        """Extract document structure from user prompt."""
        structure = {
            'title': title,
            'sections': [],
            'format': 'standard'
        }

        if not userPrompt:
            return structure

        # Extract title from prompt if not provided
        if not title or title == "Generated Document":
            # Look for "create a ... document" or "generate a ... report"
            title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
            if title_match:
                structure['title'] = title_match.group(1).strip().title()

        # Extract sections from numbered lists in prompt
        section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
        sections = re.findall(section_pattern, userPrompt)

        for num, section_text in sections:
            structure['sections'].append({
                'number': int(num),
                'title': section_text.strip(),
                'level': 2  # H2 level
            })

        # If no numbered sections found, try to extract from "including:" patterns
        if not structure['sections']:
            including_match = re.search(r'including:\s*(.+?)(?:\.|$)', userPrompt, re.DOTALL)
            if including_match:
                including_text = including_match.group(1)
                # Split by common separators
                parts = re.split(r'[,;]\s*', including_text)
                for i, part in enumerate(parts, 1):
                    part = part.strip()
                    if part:
                        structure['sections'].append({
                            'number': i,
                            'title': part,
                            'level': 2
                        })

        # If still no sections, extract from any list-like patterns
        if not structure['sections']:
            # Look for bullet points or dashes
            bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)'
            bullets = re.findall(bullet_pattern, userPrompt)
            for i, bullet in enumerate(bullets, 1):
                bullet = bullet.strip()
                if bullet and len(bullet) > 3:
                    structure['sections'].append({
                        'number': i,
                        'title': bullet,
                        'level': 2
                    })

        # If still no sections, extract from sentence structure
        if not structure['sections']:
            # Split prompt into sentences and use as sections
            sentences = re.split(r'[.!?]\s+', userPrompt)
            for i, sentence in enumerate(sentences[:5], 1):  # Max 5 sections
                sentence = sentence.strip()
                if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')):
                    structure['sections'].append({
                        'number': i,
                        'title': sentence[:50] + "..." if len(sentence) > 50 else sentence,
                        'level': 2
                    })

        # Final fallback: create sections from prompt keywords
        if not structure['sections']:
            # Extract key action words from prompt
            action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate']
            found_actions = []
            for action in action_words:
                if action in userPrompt.lower():
                    found_actions.append(action.title())

            if found_actions:
                for i, action in enumerate(found_actions[:3], 1):
                    structure['sections'].append({
                        'number': i,
                        'title': f"{action} Document Content",
                        'level': 2
                    })
            else:
                # Last resort: generic but meaningful sections
                structure['sections'] = [
                    {'number': 1, 'title': 'Document Analysis', 'level': 2},
                    {'number': 2, 'title': 'Key Information', 'level': 2},
                    {'number': 3, 'title': 'Summary and Conclusions', 'level': 2}
                ]

        return structure

    def _generateFromStructure(self, doc, content: str, structure: Dict[str, Any]):
        """Generate DOCX content based on extracted structure."""
        # Add sections based on prompt structure
        for section in structure['sections']:
            # Add section heading
            doc.add_heading(f"{section['number']}) {section['title']}", level=section['level'])

            # Add AI-generated content for this section
            # Try to extract relevant content for this section from the AI response
            section_content = self._extractSectionContent(content, section['title'])

            if section_content:
                doc.add_paragraph(section_content)
            else:
                # If no specific content found, add a note
                doc.add_paragraph(f"Content for {section['title']} based on document analysis.")

            # Add some spacing
            doc.add_paragraph()

        # Add the complete AI-generated content as additional analysis
        if content and content.strip():
            doc.add_heading("Complete Analysis", level=1)
            doc.add_paragraph(content)

    def _extractSectionContent(self, content: str, section_title: str) -> str:
        """Extract relevant content for a specific section from AI response."""
        if not content or not section_title:
            return ""

        # Look for content that matches the section title
        section_keywords = section_title.lower().split()

        # Split content into paragraphs
        paragraphs = content.split('\n\n')

        relevant_paragraphs = []
        for paragraph in paragraphs:
            paragraph_lower = paragraph.lower()
            # Check if paragraph contains keywords from section title
            if any(keyword in paragraph_lower for keyword in section_keywords if len(keyword) > 3):
                relevant_paragraphs.append(paragraph.strip())

        if relevant_paragraphs:
            return '\n\n'.join(relevant_paragraphs[:2])  # Max 2 paragraphs per section

        return ""

    def _setupDocumentStyles(self, doc: Document, styleSet: Dict[str, Any]) -> None:
        """Create all styles in document from style set.

        Creates styles BEFORE rendering so they're available for use.
        """
        try:
            from docx.enum.style import WD_STYLE_TYPE

            # Create Title style
            if "title" in styleSet:
                self._createStyle(doc, "Title", styleSet["title"], WD_STYLE_TYPE.PARAGRAPH)

            # Create Heading styles (Heading 1, Heading 2)
            if "heading1" in styleSet:
                self._createStyle(doc, "Heading 1", styleSet["heading1"], WD_STYLE_TYPE.PARAGRAPH)
            if "heading2" in styleSet:
                self._createStyle(doc, "Heading 2", styleSet["heading2"], WD_STYLE_TYPE.PARAGRAPH)

            # Create Paragraph style
            if "paragraph" in styleSet:
                self._createStyle(doc, "Custom Paragraph", styleSet["paragraph"], WD_STYLE_TYPE.PARAGRAPH)

            # Note: List Bullet and List Number are built-in Word styles, but we apply custom styling to runs

        except Exception as e:
            self.logger.warning(f"Could not set up document styles: {str(e)}")

    def _createStyle(self, doc: Document, styleName: str, styleConfig: Dict[str, Any], styleType) -> None:
        """Create or update a style in the document styles collection."""
        try:
            from docx.enum.style import WD_STYLE_TYPE

            # Try to get existing style, or create new one
            try:
                doc_style = doc.styles[styleName]
            except KeyError:
                # Create new style based on Normal
                doc_style = doc.styles.add_style(styleName, styleType)
                # Base it on Normal style
                doc_style.base_style = doc.styles['Normal']

            # Apply font configuration
            font = doc_style.font
            if "font_size" in styleConfig:
                font.size = Pt(styleConfig["font_size"])
            if "bold" in styleConfig:
                font.bold = styleConfig["bold"]
            if "color" in styleConfig:
                color_hex = styleConfig["color"].lstrip('#')
                font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
            if "font" in styleConfig:
                font.name = styleConfig["font"]

            # Set paragraph formatting for alignment
            if "align" in styleConfig:
                para_format = doc_style.paragraph_format
                align = styleConfig["align"]
                if align == "center":
                    para_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
                elif align == "right":
                    para_format.alignment = WD_ALIGN_PARAGRAPH.RIGHT
                else:
                    para_format.alignment = WD_ALIGN_PARAGRAPH.LEFT

        except Exception as e:
            self.logger.warning(f"Could not create style '{styleName}': {str(e)}")

    def _processSection(self, doc, lines: list):
        """Process a section of content into DOCX elements."""
        for line in lines:
            if not line.strip():
                continue

            # Check for tables (lines with |)
            if '|' in line and not line.startswith('|'):
                # This might be part of a table, process as table
                table_data = self._extractTableData(lines)
                if table_data:
                    self._addTable(doc, table_data)
                    return

            # Check for lists
            if line.startswith('- ') or line.startswith('* '):
                # This is a list item
                doc.add_paragraph(line[2:], style='List Bullet')
            elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
                # This is a numbered list item
                doc.add_paragraph(line[3:], style='List Number')
            else:
                # Regular paragraph
                doc.add_paragraph(line)

    def _extractTableData(self, lines: list) -> list:
        """Extract table data from lines."""
        table_data = []
        in_table = False

        for line in lines:
            if '|' in line:
                if not in_table:
                    in_table = True
                # Split by | and clean up
                cells = [cell.strip() for cell in line.split('|') if cell.strip()]
                if cells:
                    table_data.append(cells)
            elif in_table and not line.strip():
                # Empty line, might be end of table
                break

        return table_data if len(table_data) > 1 else []

    def _addTable(self, doc, table_data: list):
        """Add a table to the document."""
        try:
            if not table_data:
                return

            # Create table
            table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
            table.alignment = WD_TABLE_ALIGNMENT.CENTER

            # Add data to table
            for row_idx, row_data in enumerate(table_data):
                for col_idx, cell_data in enumerate(row_data):
                    if col_idx < len(table.rows[row_idx].cells):
                        table.rows[row_idx].cells[col_idx].text = cell_data

            # Style the table
            self._styleTable(table)

        except Exception as e:
            self.logger.warning(f"Could not add table: {str(e)}")

    def _styleTable(self, table):
        """Apply styling to the table."""
        try:
            # Style header row
            if len(table.rows) > 0:
                header_cells = table.rows[0].cells
                for cell in header_cells:
                    for paragraph in cell.paragraphs:
                        for run in paragraph.runs:
                            run.bold = True
        except Exception as e:
            self.logger.warning(f"Could not style table: {str(e)}")

    def _processTableRow(self, doc, line: str):
        """Process a table row and add it to the document."""
        if not line.strip():
            return

        # Split by pipe separator
        parts = [part.strip() for part in line.split('|')]

        if len(parts) >= 2:
            # This is a table row - create a table if it doesn't exist
            if not hasattr(self, '_current_table') or self._current_table is None:
                # Create new table
                self._current_table = doc.add_table(rows=1, cols=len(parts))
                self._current_table.style = 'Table Grid'

                # Add header row
                for i, part in enumerate(parts):
                    if i < len(self._current_table.rows[0].cells):
                        cell = self._current_table.rows[0].cells[i]
                        cell.text = part
                        # Make header bold
                        for paragraph in cell.paragraphs:
                            for run in paragraph.runs:
                                run.bold = True
            else:
                # Add data row to existing table
                row = self._current_table.add_row()
                for i, part in enumerate(parts):
                    if i < len(row.cells):
                        row.cells[i].text = part
        else:
            # Not a table row, treat as regular text
            doc.add_paragraph(line)

    def _cleanAiContent(self, content: str) -> str:
        """Clean AI-generated content by removing debug information and duplicates."""
        if not content:
            return ""

        # Remove debug information
        lines = content.split('\n')
        clean_lines = []

        for line in lines:
            # Skip debug lines and separators
            if (line.startswith('[Skipped ') or
                line.startswith('=== DOCUMENT:') or
                line.startswith('---') or
                line.startswith('FILENAME:') or
                line.strip() == '' or
                line.strip() == '---'):
                continue
            clean_lines.append(line)

        # Join lines and remove duplicate content
        clean_content = '\n'.join(clean_lines)

        # Remove duplicate sections by keeping only the first occurrence
        sections = clean_content.split('\n\n')
        seen_sections = set()
        unique_sections = []

        for section in sections:
            section_key = section.strip()[:50]  # Use first 50 chars as key
            if section_key not in seen_sections and section.strip():
                seen_sections.add(section_key)
                unique_sections.append(section)

        return '\n\n'.join(unique_sections)

    def _processTables(self, doc, content: str) -> str:
        """
        Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
        Returns the content with tables replaced by placeholders.
        """
        # csv is already imported at module level

        lines = content.split('\n')
        processed_lines = []
        i = 0

        while i < len(lines):
            line = lines[i].strip()

            # Check if this line looks like a table (contains pipes or commas with multiple fields)
            is_pipe_table = '|' in line and len(line.split('|')) >= 2
            is_csv_table = ',' in line and len(line.split(',')) >= 2

            if is_pipe_table or is_csv_table:
                # Collect consecutive table lines
                table_lines = []
                j = i

                # Determine separator and collect lines
                separator = '|' if is_pipe_table else ','
                while j < len(lines):
                    current_line = lines[j].strip()
                    if separator in current_line and len(current_line.split(separator)) >= 2:
                        table_lines.append(current_line)
                        j += 1
                    else:
                        break

                if len(table_lines) >= 2:  # At least header + 1 data row
                    # Create Word table
                    try:
                        if separator == '|':
                            # Process pipe-separated table
                            rows = []
                            for table_line in table_lines:
                                # Split by pipe and clean up
                                cells = [cell.strip() for cell in table_line.split('|')]
                                rows.append(cells)
                        else:
                            # Process CSV table
                            csv_content = '\n'.join(table_lines)
                            csv_reader = csv.reader(io.StringIO(csv_content))
                            rows = list(csv_reader)

                        if rows and len(rows[0]) > 0:
                            # Create Word table
                            table = doc.add_table(rows=len(rows), cols=len(rows[0]))
                            table.style = 'Table Grid'

                            # Populate table
                            for row_idx, row_data in enumerate(rows):
                                for col_idx, cell_data in enumerate(row_data):
                                    if col_idx < len(table.rows[row_idx].cells):
                                        table.rows[row_idx].cells[col_idx].text = cell_data.strip()

                                # Make header row bold
                                if row_idx == 0:
                                    for cell in table.rows[row_idx].cells:
                                        for paragraph in cell.paragraphs:
                                            for run in paragraph.runs:
                                                run.bold = True

                            # Add placeholder to mark where table was inserted
                            processed_lines.append(f"[TABLE_INSERTED_{len(processed_lines)}]")

                            # Skip the table lines
                            i = j
                            continue
                    except Exception as e:
                        # If table parsing fails, treat as regular text
                        pass

            processed_lines.append(line)
            i += 1

        return '\n'.join(processed_lines)

    def _parseAndFormatContent(self, doc, content: str, title: str):
        """Parse AI-generated content in standardized format and apply proper DOCX formatting."""
        if not content:
            return

        # Process tables and replace them with placeholders
        content = self._processTables(doc, content)

        # Parse content line by line in exact sequence
        lines = content.split('\n')

        for line in lines:
            line = line.strip()
            if not line:
                # Empty line - add paragraph break
                doc.add_paragraph()
                continue

            # Skip table placeholders (already processed)
            if line.startswith('[TABLE_INSERTED_'):
                continue

            # Check if this is a Markdown heading (# ## ###)
            if line.startswith('#'):
                level = len(line) - len(line.lstrip('#'))
                heading_text = line.lstrip('# ').strip()
                doc.add_heading(heading_text, level=min(level, 3))

            # Check if this is a numbered heading (1) Title, 2) Title, etc.)
            elif re.match(r'^\d+\)\s+.+', line):
                heading_text = re.sub(r'^\d+\)\s+', '', line)
                doc.add_heading(heading_text, level=1)

            # Check if this is a Markdown list item
            elif line.startswith('- ') or re.match(r'^\d+\.\s+', line):
                bullet_text = re.sub(r'^[-•]\s+|\d+\.\s+', '', line)
                self._add_bullet_point(doc, bullet_text)

            # Check if this is a code block
            elif line.startswith('```'):
                if not line.endswith('```'):
                    # Start of code block - collect until end
                    code_lines = [line]
                    continue
                else:
                    # End of code block
                    if 'code_lines' in locals():
                        code_lines.append(line)
                        code_text = '\n'.join(code_lines)
                        para = doc.add_paragraph()
                        run = para.add_run(code_text)
                        run.font.name = 'Courier New'
                        del code_lines

            # Regular paragraph
            else:
                self._addParagraphToDoc(doc, line)

    def _addParagraphToDoc(self, doc, text: str):
        """Add a paragraph to the document with proper formatting."""
        if not text.strip():
            return

        # Check for Markdown formatting (**bold**, *italic*)
        para = doc.add_paragraph()

        # Split by bold markers
        parts = text.split('**')
        for i, part in enumerate(parts):
            if i % 2 == 0:
                # Regular text - check for italic
                italic_parts = part.split('*')
                for j, italic_part in enumerate(italic_parts):
                    if j % 2 == 0:
                        # Regular text
                        if italic_part:
                            para.add_run(italic_part)
                    else:
                        # Italic text
                        if italic_part:
                            run = para.add_run(italic_part)
                            run.italic = True
            else:
                # Bold text
                if part:
                    run = para.add_run(part)
                    run.bold = True