gateway/modules/services/serviceGeneration/renderers/rendererDocx.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
DOCX renderer for report generation using python-docx.
"""

from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import io
import base64
import re
import csv

try:
    from docx import Document
    from docx.shared import Inches, Pt, RGBColor
    from docx.enum.text import WD_ALIGN_PARAGRAPH
    from docx.enum.table import WD_TABLE_ALIGNMENT
    DOCX_AVAILABLE = True
except ImportError:
    DOCX_AVAILABLE = False

class RendererDocx(BaseRenderer):
    """Renders content to DOCX format using python-docx."""

    @classmethod
    def getSupportedFormats(cls) -> List[str]:
        """Return supported DOCX formats."""
        return ['docx', 'doc']

    @classmethod
    def getFormatAliases(cls) -> List[str]:
        """Return format aliases."""
        return ['word', 'document']

    @classmethod
    def getPriority(cls) -> int:
        """Return priority for DOCX renderer."""
        return 115

    @classmethod
    def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
        """Return output style classification: Word documents are formatted documents."""
        return 'document'

    async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
        """Render extracted JSON content to DOCX format using AI-analyzed styling."""
        self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
        try:
            if not DOCX_AVAILABLE:
                # Fallback to HTML if python-docx not available
                from .rendererHtml import RendererHtml
                htmlRenderer = RendererHtml()
                return await htmlRenderer.render(extractedContent, title, userPrompt, aiService)

            # Generate DOCX using AI-analyzed styling
            docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)

            # Extract metadata for document type and other info
            metadata = extractedContent.get("metadata", {}) if extractedContent else {}
            documentType = metadata.get("documentType") if isinstance(metadata, dict) else None

            # Determine filename from document or title
            documents = extractedContent.get("documents", [])
            if documents and isinstance(documents[0], dict):
                filename = documents[0].get("filename")
                if not filename:
                    filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
            else:
                filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")

            # Convert DOCX content to bytes if it's a string (base64)
            if isinstance(docx_content, str):
                try:
                    docx_bytes = base64.b64decode(docx_content)
                except Exception:
                    docx_bytes = docx_content.encode('utf-8')
            else:
                docx_bytes = docx_content

            return [
                RenderedDocument(
                    documentData=docx_bytes,
                    mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                    filename=filename,
                    documentType=documentType,
                    metadata=metadata if isinstance(metadata, dict) else None
                )
            ]

        except Exception as e:
            self.logger.error(f"Error rendering DOCX: {str(e)}")
            # Return minimal fallback
            fallbackContent = f"DOCX Generation Error: {str(e)}"
            metadata = extractedContent.get("metadata", {}) if extractedContent else {}
            documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
            return [
                RenderedDocument(
                    documentData=fallbackContent.encode('utf-8'),
                    mimeType="text/plain",
                    filename=self._determineFilename(title, "text/plain"),
                    documentType=documentType,
                    metadata=metadata if isinstance(metadata, dict) else None
                )
            ]

    async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
        """Generate DOCX content from structured JSON document."""
        try:
            # Create new document
            doc = Document()

            # Get style set: use styles from metadata if available, otherwise enhance with AI
            styleSet = await self._getStyleSet(json_content, userPrompt, aiService)

            # Setup basic document styles and create all styles from style set
            self._setupBasicDocumentStyles(doc)
            self._setupDocumentStyles(doc, styleSet)

            # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
            if not self._validateJsonStructure(json_content):
                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")

            # Extract sections and metadata from standardized schema
            sections = self._extractSections(json_content)
            metadata = self._extractMetadata(json_content)

            # Use provided title (which comes from documents[].title) as primary source
            # Fallback to metadata.title only if title parameter is empty
            document_title = title if title else metadata.get("title", "Generated Document")

            # Add document title using Title style
            if document_title:
                doc.add_paragraph(document_title, style='Title')

            # Process each section in order
            for section in sections:
                self._renderJsonSection(doc, section, styleSet)

            # Save to buffer
            buffer = io.BytesIO()
            doc.save(buffer)
            buffer.seek(0)

            # Convert to base64
            docx_bytes = buffer.getvalue()
            docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')

            return docx_base64

        except Exception as e:
            self.logger.error(f"Error generating DOCX from JSON: {str(e)}")
            raise Exception(f"DOCX generation failed: {str(e)}")

    async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
        """Get style set - use styles from document generation metadata if available,
        otherwise enhance default styles with AI if userPrompt provided.

        WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
        not be generated separately by renderers. Only fall back to AI if styles not provided.

        Args:
            extractedContent: Document content with metadata (may contain styles)
            userPrompt: User's prompt (AI will detect style instructions in any language)
            aiService: AI service (used only if styles not in metadata and userPrompt provided)
            templateName: Name of template style set (None = default)

        Returns:
            Dict with style definitions for all document styles
        """
        # Get default style set
        if templateName == "corporate":
            defaultStyleSet = self._getCorporateStyleSet()
        elif templateName == "minimal":
            defaultStyleSet = self._getMinimalStyleSet()
        else:
            defaultStyleSet = self._getDefaultStyleSet()

        # FIRST: Check if styles are provided in document generation metadata (preferred approach)
        if extractedContent:
            metadata = extractedContent.get("metadata", {})
            if isinstance(metadata, dict):
                styles = metadata.get("styles")
                if styles and isinstance(styles, dict):
                    self.logger.debug("Using styles from document generation metadata")
                    return self._validateStylesContrast(styles)

        # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
        if userPrompt and aiService:
            self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
            enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
            return self._validateStylesContrast(enhancedStyleSet)
        else:
            # Use default styles only
            return defaultStyleSet

    async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
        """Enhance default styles with AI based on user prompt."""
        try:
            style_template = self._createAiStyleTemplate("docx", userPrompt, defaultStyleSet)
            enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet)
            return enhanced_styles
        except Exception as e:
            self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
            return defaultStyleSet

    def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and fix contrast issues in AI-generated styles."""
        try:
            # Fix table header contrast
            if "table_header" in styles:
                header = styles["table_header"]
                bg_color = header.get("background", "#FFFFFF")
                text_color = header.get("text_color", "#000000")

                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"

            # Fix table cell contrast
            if "table_cell" in styles:
                cell = styles["table_cell"]
                bg_color = cell.get("background", "#FFFFFF")
                text_color = cell.get("text_color", "#000000")

                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"

            return styles

        except Exception as e:
            self.logger.warning(f"Style validation failed: {str(e)}")
            return self._getDefaultStyleSet()

    def _getDefaultStyleSet(self) -> Dict[str, Any]:
        """Default DOCX style set - used when no style instructions present."""
        return {
            "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
            "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"},
            "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"},
            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
            "table_border": {"style": "horizontal_only", "color": "#000000", "thickness": "thin"},
            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 20},
            "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
        }

    def _setupBasicDocumentStyles(self, doc: Document) -> None:
        """Set up basic document styles."""
        try:
            # Set default font
            style = doc.styles['Normal']
            font = style.font
            font.name = 'Calibri'
            font.size = Pt(11)
        except Exception as e:
            self.logger.warning(f"Could not set up basic document styles: {str(e)}")


    def _clearTemplateContent(self, doc: Document) -> None:
        """Clear template content while preserving styles."""
        try:
            # Remove all paragraphs except keep the styles
            for paragraph in list(doc.paragraphs):
                # Keep the paragraph but clear its content
                paragraph.clear()

            # Remove all tables
            for table in list(doc.tables):
                table._element.getparent().remove(table._element)

        except Exception as e:
            self.logger.warning(f"Could not clear template content: {str(e)}")

    def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a single JSON section to DOCX using AI-generated styles.
        Supports three content formats: reference, object (base64), extracted_text.
        """
        try:
            section_type = section.get("content_type", "paragraph")
            elements = section.get("elements", [])

            # If no elements, skip this section (it has no content to render)
            if not elements:
                return

            # Process each element in the section
            for element in elements:
                element_type = element.get("type", "")

                # Support three content formats from Phase 5D
                if element_type == "reference":
                    # Document reference format
                    doc_ref = element.get("documentReference", "")
                    label = element.get("label", "Reference")
                    para = doc.add_paragraph(f"[Reference: {label}]")
                    para.runs[0].italic = True
                    continue
                elif element_type == "extracted_text":
                    # Extracted text format - render as paragraph
                    content = element.get("content", "")
                    source = element.get("source", "")
                    if content:
                        para = doc.add_paragraph(content)
                        if source:
                            para.add_run(f" (Source: {source})").italic = True
                    continue

                # Check element type, not section type (elements can have different types than section)
                if element_type == "table":
                    self._renderJsonTable(doc, element, styles)
                elif element_type == "bullet_list":
                    self._renderJsonBulletList(doc, element, styles)
                elif element_type == "heading":
                    self._renderJsonHeading(doc, element, styles)
                elif element_type == "paragraph":
                    self._renderJsonParagraph(doc, element, styles)
                elif element_type == "code_block":
                    self._renderJsonCodeBlock(doc, element, styles)
                elif element_type == "image":
                    self._renderJsonImage(doc, element, styles)
                else:
                    # Fallback: if element_type not set, use section_type
                    if section_type == "table":
                        self._renderJsonTable(doc, element, styles)
                    elif section_type == "bullet_list":
                        self._renderJsonBulletList(doc, element, styles)
                    elif section_type == "heading":
                        self._renderJsonHeading(doc, element, styles)
                    elif section_type == "paragraph":
                        # CRITICAL: Check if this is actually an image element before rendering as paragraph
                        # Image elements might not have type set, but have base64Data in content
                        content = element.get("content", {})
                        if isinstance(content, dict) and content.get("base64Data"):
                            # This is actually an image, render it as such
                            self._renderJsonImage(doc, element, styles)
                        else:
                            self._renderJsonParagraph(doc, element, styles)
                    elif section_type == "code_block":
                        self._renderJsonCodeBlock(doc, element, styles)
                    elif section_type == "image":
                        self._renderJsonImage(doc, element, styles)
                    else:
                        # Fallback to paragraph for unknown types, but check for image data first
                        content = element.get("content", {})
                        if isinstance(content, dict) and content.get("base64Data"):
                            # This is actually an image, render it as such
                            self._renderJsonImage(doc, element, styles)
                        else:
                            self._renderJsonParagraph(doc, element, styles)

        except Exception as e:
            self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
            # Add error paragraph as fallback
            error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")

    def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON table to DOCX using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = table_data.get("content", {})
            if not isinstance(content, dict):
                return
            headers = content.get("headers", [])
            rows = content.get("rows", [])

            if not headers or not rows:
                return

            # Create table
            table = doc.add_table(rows=len(rows) + 1, cols=len(headers))
            table.alignment = WD_TABLE_ALIGNMENT.CENTER

            # Apply table borders based on AI style
            border_style = styles["table_border"]["style"]
            if border_style == "horizontal_only":
                self._applyHorizontalBordersOnly(table)
            elif border_style == "grid":
                table.style = 'Table Grid'
            # else: no borders

            # Add headers with AI-generated styling
            header_row = table.rows[0]
            header_style = styles["table_header"]
            for i, header in enumerate(headers):
                if i < len(header_row.cells):
                    cell = header_row.cells[i]
                    cell.text = str(header)

                    # Apply background color
                    bg_color = header_style["background"].lstrip('#')
                    self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))

                    # Apply text styling
                    for paragraph in cell.paragraphs:
                        paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER if header_style["align"] == "center" else WD_ALIGN_PARAGRAPH.LEFT
                        for run in paragraph.runs:
                            run.bold = header_style["bold"]
                            run.font.size = Pt(11)
                            text_color = header_style["text_color"].lstrip('#')
                            run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))

            # Add data rows with AI-generated styling
            cell_style = styles["table_cell"]
            for row_idx, row_data in enumerate(rows):
                if row_idx + 1 < len(table.rows):
                    table_row = table.rows[row_idx + 1]
                    for col_idx, cell_data in enumerate(row_data):
                        if col_idx < len(table_row.cells):
                            cell = table_row.cells[col_idx]
                            cell.text = str(cell_data)

                            # Apply text styling
                            for paragraph in cell.paragraphs:
                                paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
                                for run in paragraph.runs:
                                    run.font.size = Pt(10)
                                    text_color = cell_style["text_color"].lstrip('#')
                                    run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))

        except Exception as e:
            self.logger.warning(f"Error rendering table: {str(e)}")

    def _applyHorizontalBordersOnly(self, table) -> None:
        """Apply only horizontal borders to the table (no vertical borders)."""
        try:
            from docx.oxml.shared import OxmlElement, qn

            # Get table properties
            tbl_pr = table._element.find(qn('w:tblPr'))
            if tbl_pr is None:
                tbl_pr = OxmlElement('w:tblPr')
                table._element.insert(0, tbl_pr)

            # Remove existing borders
            existing_borders = tbl_pr.find(qn('w:tblBorders'))
            if existing_borders is not None:
                tbl_pr.remove(existing_borders)

            # Create new borders element
            tbl_borders = OxmlElement('w:tblBorders')

            # Top border
            top_border = OxmlElement('w:top')
            top_border.set(qn('w:val'), 'single')
            top_border.set(qn('w:sz'), '4')
            top_border.set(qn('w:space'), '0')
            top_border.set(qn('w:color'), '000000')
            tbl_borders.append(top_border)

            # Bottom border
            bottom_border = OxmlElement('w:bottom')
            bottom_border.set(qn('w:val'), 'single')
            bottom_border.set(qn('w:sz'), '4')
            bottom_border.set(qn('w:space'), '0')
            bottom_border.set(qn('w:color'), '000000')
            tbl_borders.append(bottom_border)

            # Left border - none
            left_border = OxmlElement('w:left')
            left_border.set(qn('w:val'), 'none')
            tbl_borders.append(left_border)

            # Right border - none
            right_border = OxmlElement('w:right')
            right_border.set(qn('w:val'), 'none')
            tbl_borders.append(right_border)

            # Inside horizontal border
            inside_h_border = OxmlElement('w:insideH')
            inside_h_border.set(qn('w:val'), 'single')
            inside_h_border.set(qn('w:sz'), '4')
            inside_h_border.set(qn('w:space'), '0')
            inside_h_border.set(qn('w:color'), '000000')
            tbl_borders.append(inside_h_border)

            # Inside vertical border - none
            inside_v_border = OxmlElement('w:insideV')
            inside_v_border.set(qn('w:val'), 'none')
            tbl_borders.append(inside_v_border)

            tbl_pr.append(tbl_borders)

        except Exception as e:
            self.logger.warning(f"Could not apply horizontal borders: {str(e)}")

    def _setCellBackground(self, cell, color: RGBColor) -> None:
        """Set the background color of a table cell."""
        try:
            from docx.oxml.shared import OxmlElement, qn

            # Get cell properties
            tc_pr = cell._element.find(qn('w:tcPr'))
            if tc_pr is None:
                tc_pr = OxmlElement('w:tcPr')
                cell._element.insert(0, tc_pr)

            # Remove existing shading
            existing_shading = tc_pr.find(qn('w:shd'))
            if existing_shading is not None:
                tc_pr.remove(existing_shading)

            # Create new shading element
            shading = OxmlElement('w:shd')
            shading.set(qn('w:val'), 'clear')
            shading.set(qn('w:color'), 'auto')
            # Convert RGBColor to hex string by unpacking RGB components
            red, green, blue = color
            hex_color = f"{red:02x}{green:02x}{blue:02x}"
            shading.set(qn('w:fill'), hex_color)
            tc_pr.append(shading)

        except Exception as e:
            self.logger.warning(f"Could not set cell background: {str(e)}")


    def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON bullet list to DOCX using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = list_data.get("content", {})
            if not isinstance(content, dict):
                return
            items = content.get("items", [])
            bullet_style = styles.get("bullet_list", {})

            for item in items:
                if isinstance(item, str):
                    para = doc.add_paragraph(item, style='List Bullet')
                elif isinstance(item, dict) and "text" in item:
                    para = doc.add_paragraph(item["text"], style='List Bullet')

                # Apply bullet list styling from style set
                if bullet_style and para.runs:
                    for run in para.runs:
                        if "font_size" in bullet_style:
                            run.font.size = Pt(bullet_style["font_size"])
                        if "color" in bullet_style:
                            color_hex = bullet_style["color"].lstrip('#')
                            run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))

        except Exception as e:
            self.logger.warning(f"Error rendering bullet list: {str(e)}")

    def _renderJsonHeading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON heading to DOCX using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = heading_data.get("content", {})
            if not isinstance(content, dict):
                return
            text = content.get("text", "")
            level = content.get("level", 1)

            if text:
                level = max(1, min(6, level))
                # Use custom heading style if available, otherwise use built-in
                style_name = f"Heading {level}" if level <= 2 else "Heading 1"
                try:
                    para = doc.add_paragraph(text, style=style_name)
                except KeyError:
                    # Fallback to built-in heading if custom style doesn't exist
                    doc.add_heading(text, level=level)

        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")

    def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON paragraph to DOCX using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = paragraph_data.get("content", {})
            if isinstance(content, dict):
                text = content.get("text", "")
            elif isinstance(content, str):
                text = content
            else:
                text = ""

            # CRITICAL: Prevent rendering base64 image data as text
            # Base64 image data typically starts with /9j/ (JPEG) or iVBORw0KGgo (PNG)
            if text and (text.startswith("/9j/") or text.startswith("iVBORw0KGgo") or
                        (len(text) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in text[:100]))):
                # This looks like base64 data - don't render as text
                self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(text)})")
                para = doc.add_paragraph("[Error: Image data found in text content - image embedding may have failed]")
                if para.runs:
                    para.runs[0].font.color.rgb = RGBColor(255, 0, 0)  # Red color for error
                return

            if text:
                para = doc.add_paragraph(text)
                # Apply paragraph styling from style set
                paragraph_style = styles.get("paragraph", {})
                if paragraph_style:
                    for run in para.runs:
                        if "font_size" in paragraph_style:
                            run.font.size = Pt(paragraph_style["font_size"])
                        if "bold" in paragraph_style:
                            run.font.bold = paragraph_style["bold"]
                        if "color" in paragraph_style:
                            color_hex = paragraph_style["color"].lstrip('#')
                            run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
                    if "align" in paragraph_style:
                        align = paragraph_style["align"]
                        if align == "center":
                            para.alignment = WD_ALIGN_PARAGRAPH.CENTER
                        elif align == "right":
                            para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
                        else:
                            para.alignment = WD_ALIGN_PARAGRAPH.LEFT

        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")

    def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON code block to DOCX using AI-generated styles."""
        try:
            # Extract from nested content structure
            content = code_data.get("content", {})
            if not isinstance(content, dict):
                return
            code = content.get("code", "")
            language = content.get("language", "")
            code_style = styles.get("code_block", {})

            if code:
                if language:
                    lang_para = doc.add_paragraph(f"Code ({language}):")
                    if lang_para.runs:
                        lang_para.runs[0].bold = True

                code_para = doc.add_paragraph(code)
                for run in code_para.runs:
                    run.font.name = code_style.get("font", "Courier New")
                    run.font.size = Pt(code_style.get("font_size", 9))
                    if "color" in code_style:
                        color_hex = code_style["color"].lstrip('#')
                        run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))

        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")

    def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON image to DOCX."""
        try:
            # Extract from nested content structure
            content = image_data.get("content", {})
            base64_data = ""
            alt_text = "Image"
            caption = ""

            if isinstance(content, dict):
                base64_data = content.get("base64Data", "")
                alt_text = content.get("altText", "Image")
                caption = content.get("caption", "")
            elif isinstance(content, str):
                # Content might be base64 string directly (shouldn't happen, but handle it)
                self.logger.warning("Image content is a string, not a dict. This should not happen.")
                return

            # If base64Data not found in content, try direct element fields (fallback)
            if not base64_data:
                base64_data = image_data.get("base64Data", "")
                if not alt_text or alt_text == "Image":
                    alt_text = image_data.get("altText", "Image")
                if not caption:
                    caption = image_data.get("caption", "")

            # CRITICAL: Ensure we don't render base64 data as text
            # If base64_data looks like it might be rendered elsewhere, skip it
            if not base64_data:
                raise Exception("No image data provided (base64Data is empty)")

            try:
                image_bytes = base64.b64decode(base64_data)
                image_stream = io.BytesIO(image_bytes)

                # Get image dimensions to calculate proper size
                try:
                    from PIL import Image as PILImage
                    pil_image = PILImage.open(image_stream)
                    img_width_px, img_height_px = pil_image.size

                    # DOCX page width is typically 8.5 inches, usable width ~6.5 inches with margins
                    # Standard margins: 1 inch left/right, so usable width = 6.5 inches
                    max_width_inches = 6.5
                    max_height_inches = 9.0  # Leave room for text above/below

                    # Calculate scale factor to fit within page dimensions
                    # Convert pixels to inches (assuming 96 DPI for modern displays, but images may vary)
                    # Use conservative estimate: 1 inch = 96 pixels
                    img_width_inches = img_width_px / 96.0
                    img_height_inches = img_height_px / 96.0

                    # Calculate scale to fit
                    width_scale = max_width_inches / img_width_inches if img_width_inches > max_width_inches else 1.0
                    height_scale = max_height_inches / img_height_inches if img_height_inches > max_height_inches else 1.0
                    scale = min(width_scale, height_scale, 1.0)  # Don't scale up, only down

                    final_width = img_width_inches * scale
                    final_height = img_height_inches * scale

                    # Reset stream for docx
                    image_stream.seek(0)
                    doc.add_picture(image_stream, width=Inches(final_width))
                except Exception:
                    # Fallback: use conservative default size if PIL fails
                    image_stream.seek(0)
                    doc.add_picture(image_stream, width=Inches(6.0))

                # Use caption from section if available, otherwise use alt_text
                if caption:
                    caption_text = caption
                elif alt_text and alt_text != "Image":
                    # Only use alt_text if it doesn't look like a usageHint
                    if "Render as visual element:" in alt_text:
                        # Extract filename from usageHint if possible
                        parts = alt_text.split("Render as visual element:")
                        if len(parts) > 1:
                            filename = parts[1].strip()
                            caption_text = f"Figure: {filename}"
                        else:
                            caption_text = alt_text
                    else:
                        caption_text = f"Figure: {alt_text}"
                else:
                    caption_text = None

                if caption_text:
                    caption_para = doc.add_paragraph(caption_text)
                    caption_para.runs[0].italic = True
            except Exception as embedError:
                # Image decoding or embedding failed
                raise Exception(f"Failed to decode or embed image: {str(embedError)}")

        except Exception as e:
            self.logger.error(f"Error embedding image in DOCX: {str(e)}")
            errorMsg = f"[Error: Could not embed image '{image_data.get('altText', 'Image')}'. {str(e)}]"
            errorPara = doc.add_paragraph(errorMsg)
            if errorPara.runs:
                errorPara.runs[0].font.color.rgb = RGBColor(255, 0, 0)  # Red color for error

    def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]:
        """Extract document structure from user prompt."""
        structure = {
            'title': title,
            'sections': [],
            'format': 'standard'
        }

        if not userPrompt:
            return structure

        # Extract title from prompt if not provided
        if not title or title == "Generated Document":
            # Look for "create a ... document" or "generate a ... report"
            title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
            if title_match:
                structure['title'] = title_match.group(1).strip().title()

        # Extract sections from numbered lists in prompt
        section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
        sections = re.findall(section_pattern, userPrompt)

        for num, section_text in sections:
            structure['sections'].append({
                'number': int(num),
                'title': section_text.strip(),
                'level': 2  # H2 level
            })

        # If no numbered sections found, try to extract from "including:" patterns
        if not structure['sections']:
            including_match = re.search(r'including:\s*(.+?)(?:\.|$)', userPrompt, re.DOTALL)
            if including_match:
                including_text = including_match.group(1)
                # Split by common separators
                parts = re.split(r'[,;]\s*', including_text)
                for i, part in enumerate(parts, 1):
                    part = part.strip()
                    if part:
                        structure['sections'].append({
                            'number': i,
                            'title': part,
                            'level': 2
                        })

        # If still no sections, extract from any list-like patterns
        if not structure['sections']:
            # Look for bullet points or dashes
            bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)'
            bullets = re.findall(bullet_pattern, userPrompt)
            for i, bullet in enumerate(bullets, 1):
                bullet = bullet.strip()
                if bullet and len(bullet) > 3:
                    structure['sections'].append({
                        'number': i,
                        'title': bullet,
                        'level': 2
                    })

        # If still no sections, extract from sentence structure
        if not structure['sections']:
            # Split prompt into sentences and use as sections
            sentences = re.split(r'[.!?]\s+', userPrompt)
            for i, sentence in enumerate(sentences[:5], 1):  # Max 5 sections
                sentence = sentence.strip()
                if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')):
                    structure['sections'].append({
                        'number': i,
                        'title': sentence[:50] + "..." if len(sentence) > 50 else sentence,
                        'level': 2
                    })

        # Final fallback: create sections from prompt keywords
        if not structure['sections']:
            # Extract key action words from prompt
            action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate']
            found_actions = []
            for action in action_words:
                if action in userPrompt.lower():
                    found_actions.append(action.title())

            if found_actions:
                for i, action in enumerate(found_actions[:3], 1):
                    structure['sections'].append({
                        'number': i,
                        'title': f"{action} Document Content",
                        'level': 2
                    })
            else:
                # Last resort: generic but meaningful sections
                structure['sections'] = [
                    {'number': 1, 'title': 'Document Analysis', 'level': 2},
                    {'number': 2, 'title': 'Key Information', 'level': 2},
                    {'number': 3, 'title': 'Summary and Conclusions', 'level': 2}
                ]

        return structure

    def _generateFromStructure(self, doc, content: str, structure: Dict[str, Any]):
        """Generate DOCX content based on extracted structure."""
        # Add sections based on prompt structure
        for section in structure['sections']:
            # Add section heading
            doc.add_heading(f"{section['number']}) {section['title']}", level=section['level'])

            # Add AI-generated content for this section
            # Try to extract relevant content for this section from the AI response
            section_content = self._extractSectionContent(content, section['title'])

            if section_content:
                doc.add_paragraph(section_content)
            else:
                # If no specific content found, add a note
                doc.add_paragraph(f"Content for {section['title']} based on document analysis.")

            # Add some spacing
            doc.add_paragraph()

        # Add the complete AI-generated content as additional analysis
        if content and content.strip():
            doc.add_heading("Complete Analysis", level=1)
            doc.add_paragraph(content)

    def _extractSectionContent(self, content: str, section_title: str) -> str:
        """Extract relevant content for a specific section from AI response."""
        if not content or not section_title:
            return ""

        # Look for content that matches the section title
        section_keywords = section_title.lower().split()

        # Split content into paragraphs
        paragraphs = content.split('\n\n')

        relevant_paragraphs = []
        for paragraph in paragraphs:
            paragraph_lower = paragraph.lower()
            # Check if paragraph contains keywords from section title
            if any(keyword in paragraph_lower for keyword in section_keywords if len(keyword) > 3):
                relevant_paragraphs.append(paragraph.strip())

        if relevant_paragraphs:
            return '\n\n'.join(relevant_paragraphs[:2])  # Max 2 paragraphs per section

        return ""

    def _setupDocumentStyles(self, doc: Document, styleSet: Dict[str, Any]) -> None:
        """Create all styles in document from style set.

        Creates styles BEFORE rendering so they're available for use.
        """
        try:
            from docx.enum.style import WD_STYLE_TYPE

            # Create Title style
            if "title" in styleSet:
                self._createStyle(doc, "Title", styleSet["title"], WD_STYLE_TYPE.PARAGRAPH)

            # Create Heading styles (Heading 1, Heading 2)
            if "heading1" in styleSet:
                self._createStyle(doc, "Heading 1", styleSet["heading1"], WD_STYLE_TYPE.PARAGRAPH)
            if "heading2" in styleSet:
                self._createStyle(doc, "Heading 2", styleSet["heading2"], WD_STYLE_TYPE.PARAGRAPH)

            # Create Paragraph style
            if "paragraph" in styleSet:
                self._createStyle(doc, "Custom Paragraph", styleSet["paragraph"], WD_STYLE_TYPE.PARAGRAPH)

            # Note: List Bullet and List Number are built-in Word styles, but we apply custom styling to runs

        except Exception as e:
            self.logger.warning(f"Could not set up document styles: {str(e)}")

    def _createStyle(self, doc: Document, styleName: str, styleConfig: Dict[str, Any], styleType) -> None:
        """Create or update a style in the document styles collection."""
        try:
            from docx.enum.style import WD_STYLE_TYPE

            # Try to get existing style, or create new one
            try:
                doc_style = doc.styles[styleName]
            except KeyError:
                # Create new style based on Normal
                doc_style = doc.styles.add_style(styleName, styleType)
                # Base it on Normal style
                doc_style.base_style = doc.styles['Normal']

            # Apply font configuration
            font = doc_style.font
            if "font_size" in styleConfig:
                font.size = Pt(styleConfig["font_size"])
            if "bold" in styleConfig:
                font.bold = styleConfig["bold"]
            if "color" in styleConfig:
                color_hex = styleConfig["color"].lstrip('#')
                font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
            if "font" in styleConfig:
                font.name = styleConfig["font"]

            # Set paragraph formatting for alignment
            if "align" in styleConfig:
                para_format = doc_style.paragraph_format
                align = styleConfig["align"]
                if align == "center":
                    para_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
                elif align == "right":
                    para_format.alignment = WD_ALIGN_PARAGRAPH.RIGHT
                else:
                    para_format.alignment = WD_ALIGN_PARAGRAPH.LEFT

        except Exception as e:
            self.logger.warning(f"Could not create style '{styleName}': {str(e)}")

    def _processSection(self, doc, lines: list):
        """Process a section of content into DOCX elements."""
        for line in lines:
            if not line.strip():
                continue

            # Check for tables (lines with |)
            if '|' in line and not line.startswith('|'):
                # This might be part of a table, process as table
                table_data = self._extractTableData(lines)
                if table_data:
                    self._addTable(doc, table_data)
                    return

            # Check for lists
            if line.startswith('- ') or line.startswith('* '):
                # This is a list item
                doc.add_paragraph(line[2:], style='List Bullet')
            elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
                # This is a numbered list item
                doc.add_paragraph(line[3:], style='List Number')
            else:
                # Regular paragraph
                doc.add_paragraph(line)

    def _extractTableData(self, lines: list) -> list:
        """Extract table data from lines."""
        table_data = []
        in_table = False

        for line in lines:
            if '|' in line:
                if not in_table:
                    in_table = True
                # Split by | and clean up
                cells = [cell.strip() for cell in line.split('|') if cell.strip()]
                if cells:
                    table_data.append(cells)
            elif in_table and not line.strip():
                # Empty line, might be end of table
                break

        return table_data if len(table_data) > 1 else []

    def _addTable(self, doc, table_data: list):
        """Add a table to the document."""
        try:
            if not table_data:
                return

            # Create table
            table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
            table.alignment = WD_TABLE_ALIGNMENT.CENTER

            # Add data to table
            for row_idx, row_data in enumerate(table_data):
                for col_idx, cell_data in enumerate(row_data):
                    if col_idx < len(table.rows[row_idx].cells):
                        table.rows[row_idx].cells[col_idx].text = cell_data

            # Style the table
            self._styleTable(table)

        except Exception as e:
            self.logger.warning(f"Could not add table: {str(e)}")

    def _styleTable(self, table):
        """Apply styling to the table."""
        try:
            # Style header row
            if len(table.rows) > 0:
                header_cells = table.rows[0].cells
                for cell in header_cells:
                    for paragraph in cell.paragraphs:
                        for run in paragraph.runs:
                            run.bold = True
        except Exception as e:
            self.logger.warning(f"Could not style table: {str(e)}")

    def _processTableRow(self, doc, line: str):
        """Process a table row and add it to the document."""
        if not line.strip():
            return

        # Split by pipe separator
        parts = [part.strip() for part in line.split('|')]

        if len(parts) >= 2:
            # This is a table row - create a table if it doesn't exist
            if not hasattr(self, '_current_table') or self._current_table is None:
                # Create new table
                self._current_table = doc.add_table(rows=1, cols=len(parts))
                self._current_table.style = 'Table Grid'

                # Add header row
                for i, part in enumerate(parts):
                    if i < len(self._current_table.rows[0].cells):
                        cell = self._current_table.rows[0].cells[i]
                        cell.text = part
                        # Make header bold
                        for paragraph in cell.paragraphs:
                            for run in paragraph.runs:
                                run.bold = True
            else:
                # Add data row to existing table
                row = self._current_table.add_row()
                for i, part in enumerate(parts):
                    if i < len(row.cells):
                        row.cells[i].text = part
        else:
            # Not a table row, treat as regular text
            doc.add_paragraph(line)

    def _cleanAiContent(self, content: str) -> str:
        """Clean AI-generated content by removing debug information and duplicates."""
        if not content:
            return ""

        # Remove debug information
        lines = content.split('\n')
        clean_lines = []

        for line in lines:
            # Skip debug lines and separators
            if (line.startswith('[Skipped ') or
                line.startswith('=== DOCUMENT:') or
                line.startswith('---') or
                line.startswith('FILENAME:') or
                line.strip() == '' or
                line.strip() == '---'):
                continue
            clean_lines.append(line)

        # Join lines and remove duplicate content
        clean_content = '\n'.join(clean_lines)

        # Remove duplicate sections by keeping only the first occurrence
        sections = clean_content.split('\n\n')
        seen_sections = set()
        unique_sections = []

        for section in sections:
            section_key = section.strip()[:50]  # Use first 50 chars as key
            if section_key not in seen_sections and section.strip():
                seen_sections.add(section_key)
                unique_sections.append(section)

        return '\n\n'.join(unique_sections)

    def _processTables(self, doc, content: str) -> str:
        """
        Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
        Returns the content with tables replaced by placeholders.
        """
        # csv is already imported at module level

        lines = content.split('\n')
        processed_lines = []
        i = 0

        while i < len(lines):
            line = lines[i].strip()

            # Check if this line looks like a table (contains pipes or commas with multiple fields)
            is_pipe_table = '|' in line and len(line.split('|')) >= 2
            is_csv_table = ',' in line and len(line.split(',')) >= 2

            if is_pipe_table or is_csv_table:
                # Collect consecutive table lines
                table_lines = []
                j = i

                # Determine separator and collect lines
                separator = '|' if is_pipe_table else ','
                while j < len(lines):
                    current_line = lines[j].strip()
                    if separator in current_line and len(current_line.split(separator)) >= 2:
                        table_lines.append(current_line)
                        j += 1
                    else:
                        break

                if len(table_lines) >= 2:  # At least header + 1 data row
                    # Create Word table
                    try:
                        if separator == '|':
                            # Process pipe-separated table
                            rows = []
                            for table_line in table_lines:
                                # Split by pipe and clean up
                                cells = [cell.strip() for cell in table_line.split('|')]
                                rows.append(cells)
                        else:
                            # Process CSV table
                            csv_content = '\n'.join(table_lines)
                            csv_reader = csv.reader(io.StringIO(csv_content))
                            rows = list(csv_reader)

                        if rows and len(rows[0]) > 0:
                            # Create Word table
                            table = doc.add_table(rows=len(rows), cols=len(rows[0]))
                            table.style = 'Table Grid'

                            # Populate table
                            for row_idx, row_data in enumerate(rows):
                                for col_idx, cell_data in enumerate(row_data):
                                    if col_idx < len(table.rows[row_idx].cells):
                                        table.rows[row_idx].cells[col_idx].text = cell_data.strip()

                                # Make header row bold
                                if row_idx == 0:
                                    for cell in table.rows[row_idx].cells:
                                        for paragraph in cell.paragraphs:
                                            for run in paragraph.runs:
                                                run.bold = True

                            # Add placeholder to mark where table was inserted
                            processed_lines.append(f"[TABLE_INSERTED_{len(processed_lines)}]")

                            # Skip the table lines
                            i = j
                            continue
                    except Exception as e:
                        # If table parsing fails, treat as regular text
                        pass

            processed_lines.append(line)
            i += 1

        return '\n'.join(processed_lines)

    def _parseAndFormatContent(self, doc, content: str, title: str):
        """Parse AI-generated content in standardized format and apply proper DOCX formatting."""
        if not content:
            return

        # Process tables and replace them with placeholders
        content = self._processTables(doc, content)

        # Parse content line by line in exact sequence
        lines = content.split('\n')

        for line in lines:
            line = line.strip()
            if not line:
                # Empty line - add paragraph break
                doc.add_paragraph()
                continue

            # Skip table placeholders (already processed)
            if line.startswith('[TABLE_INSERTED_'):
                continue

            # Check if this is a Markdown heading (# ## ###)
            if line.startswith('#'):
                level = len(line) - len(line.lstrip('#'))
                heading_text = line.lstrip('# ').strip()
                doc.add_heading(heading_text, level=min(level, 3))

            # Check if this is a numbered heading (1) Title, 2) Title, etc.)
            elif re.match(r'^\d+\)\s+.+', line):
                heading_text = re.sub(r'^\d+\)\s+', '', line)
                doc.add_heading(heading_text, level=1)

            # Check if this is a Markdown list item
            elif line.startswith('- ') or re.match(r'^\d+\.\s+', line):
                bullet_text = re.sub(r'^[-•]\s+|\d+\.\s+', '', line)
                self._add_bullet_point(doc, bullet_text)

            # Check if this is a code block
            elif line.startswith('```'):
                if not line.endswith('```'):
                    # Start of code block - collect until end
                    code_lines = [line]
                    continue
                else:
                    # End of code block
                    if 'code_lines' in locals():
                        code_lines.append(line)
                        code_text = '\n'.join(code_lines)
                        para = doc.add_paragraph()
                        run = para.add_run(code_text)
                        run.font.name = 'Courier New'
                        del code_lines

            # Regular paragraph
            else:
                self._addParagraphToDoc(doc, line)

    def _addParagraphToDoc(self, doc, text: str):
        """Add a paragraph to the document with proper formatting."""
        if not text.strip():
            return

        # Check for Markdown formatting (**bold**, *italic*)
        para = doc.add_paragraph()

        # Split by bold markers
        parts = text.split('**')
        for i, part in enumerate(parts):
            if i % 2 == 0:
                # Regular text - check for italic
                italic_parts = part.split('*')
                for j, italic_part in enumerate(italic_parts):
                    if j % 2 == 0:
                        # Regular text
                        if italic_part:
                            para.add_run(italic_part)
                    else:
                        # Italic text
                        if italic_part:
                            run = para.add_run(italic_part)
                            run.italic = True
            else:
                # Bold text
                if part:
                    run = para.add_run(part)
                    run.bold = True