# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
HTML renderer for report generation.
"""

from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional

class RendererHtml(BaseRenderer):
    """Renders content to HTML format with format-specific extraction."""
    
    @classmethod
    def getSupportedFormats(cls) -> List[str]:
        """Return supported HTML formats."""
        return ['html', 'htm']
    
    @classmethod
    def getFormatAliases(cls) -> List[str]:
        """Return format aliases."""
        return ['web', 'webpage']
    
    @classmethod
    def getPriority(cls) -> int:
        """Return priority for HTML renderer."""
        return 100
    
    @classmethod
    def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
        """Return output style classification: HTML web pages are rendered documents."""
        return 'document'
    
    @classmethod
    def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
        """
        Return list of section content types that HTML renderer accepts.
        HTML renderer accepts all section types (HTML pages can contain all content types including images).
        """
        from modules.datamodels.datamodelJson import supportedSectionTypes
        return list(supportedSectionTypes)
    
    async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
        """
        Render HTML document with images as separate files.
        Returns list of documents: [HTML document, image1, image2, ...]
        """
        import base64
        
        # Extract images first
        images = self._extractImages(extractedContent)
        
        # Store images in instance for later retrieval
        self._renderedImages = images
        
        # Generate HTML using AI-analyzed styling
        htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
        
        # Replace base64 data URIs with relative file paths if images exist
        if images:
            htmlContent = self._replaceImageDataUris(htmlContent, images)
        
        # Determine HTML filename from document or title
        documents = extractedContent.get("documents", [])
        if documents and isinstance(documents[0], dict):
            htmlFilename = documents[0].get("filename")
            if not htmlFilename:
                htmlFilename = self._determineFilename(title, "text/html")
        else:
            htmlFilename = self._determineFilename(title, "text/html")
        
        # Extract metadata for document type and other info
        metadata = extractedContent.get("metadata", {}) if extractedContent else {}
        documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
        
        # Start with HTML document
        resultDocuments = [
            RenderedDocument(
                documentData=htmlContent.encode('utf-8'),
                mimeType="text/html",
                filename=htmlFilename,
                documentType=documentType,
                metadata=metadata if isinstance(metadata, dict) else None
            )
        ]
        
        # Add images as separate documents
        for img in images:
            base64Data = img.get("base64Data", "")
            filename = img.get("filename", f"image_{len(resultDocuments)}.png")
            mimeType = img.get("mimeType", "image/png")
            
            if base64Data:
                try:
                    # Decode base64 to bytes
                    imageBytes = base64.b64decode(base64Data)
                    resultDocuments.append(
                        RenderedDocument(
                            documentData=imageBytes,
                            mimeType=mimeType,
                            filename=filename
                        )
                    )
                    self.logger.debug(f"Added image file: {filename} ({len(imageBytes)} bytes)")
                except Exception as e:
                    self.logger.warning(f"Error creating image file {filename}: {str(e)}")
        
        return resultDocuments
    
    async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
        """Generate HTML content from structured JSON document using AI-generated styling."""
        try:
            # Get style set: use styles from metadata if available, otherwise enhance with AI
            styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
            
            # Validate JSON structure
            if not self._validateJsonStructure(jsonContent):
                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
            
            # Extract sections and metadata from standardized schema
            sections = self._extractSections(jsonContent)
            metadata = self._extractMetadata(jsonContent)
            
            # Use provided title (which comes from documents[].title) as primary source
            # Fallback to metadata.title only if title parameter is empty
            documentTitle = title if title else metadata.get("title", "Generated Document")
            
            # Build HTML document
            htmlParts = []
            
            # HTML document structure
            htmlParts.append('<!DOCTYPE html>')
            htmlParts.append('<html lang="en">')
            htmlParts.append('<head>')
            htmlParts.append('<meta charset="UTF-8">')
            htmlParts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
            htmlParts.append(f'<title>{documentTitle}</title>')
            htmlParts.append('<style>')
            htmlParts.append(self._generateCssStyles(styles))
            htmlParts.append('</style>')
            htmlParts.append('</head>')
            htmlParts.append('<body>')
            
            # Document header
            htmlParts.append(f'<header><h1 class="document-title">{documentTitle}</h1></header>')
            
            # Main content
            htmlParts.append('<main>')
            
            # Process each section
            for section in sections:
                sectionHtml = self._renderJsonSection(section, styles)
                if sectionHtml:
                    htmlParts.append(sectionHtml)
            
            htmlParts.append('</main>')
            
            # Footer
            htmlParts.append('<footer>')
            htmlParts.append(f'<p class="generated-info">Generated: {self._formatTimestamp()}</p>')
            htmlParts.append('</footer>')
            
            htmlParts.append('</body>')
            htmlParts.append('</html>')
            
            return '\n'.join(htmlParts)
            
        except Exception as e:
            self.logger.error(f"Error generating HTML from JSON: {str(e)}")
            raise Exception(f"HTML generation failed: {str(e)}")
    
    async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
        """Get style set - use styles from document generation metadata if available,
        otherwise enhance default styles with AI if userPrompt provided.
        
        WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
        not be generated separately by renderers. Only fall back to AI if styles not provided.
        
        Args:
            extractedContent: Document content with metadata (may contain styles)
            userPrompt: User's prompt (AI will detect style instructions in any language)
            aiService: AI service (used only if styles not in metadata and userPrompt provided)
            templateName: Name of template style set (None = default)
            
        Returns:
            Dict with style definitions for all document styles
        """
        # Get default style set
        defaultStyleSet = self._getDefaultStyleSet()
        
        # FIRST: Check if styles are provided in document generation metadata (preferred approach)
        if extractedContent:
            metadata = extractedContent.get("metadata", {})
            if isinstance(metadata, dict):
                styles = metadata.get("styles")
                if styles and isinstance(styles, dict):
                    self.logger.debug("Using styles from document generation metadata")
                    return self._validateStylesContrast(styles)
        
        # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
        if userPrompt and aiService:
            self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
            enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
            return self._validateStylesContrast(enhancedStyleSet)
        else:
            # Use default styles only
            return defaultStyleSet
    
    async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
        """Enhance default styles with AI based on user prompt."""
        try:
            style_template = self._createAiStyleTemplate("html", userPrompt, defaultStyleSet)
            enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet)
            return enhanced_styles
        except Exception as e:
            self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
            return defaultStyleSet
    
    def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and fix contrast issues in AI-generated styles."""
        try:
            # Fix table header contrast
            if "table_header" in styles:
                header = styles["table_header"]
                bgColor = header.get("background", "#FFFFFF")
                textColor = header.get("color", "#000000")
                
                # If both are white or both are dark, fix it
                if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
                    header["background"] = "#4F4F4F"
                    header["color"] = "#FFFFFF"
                elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
                    header["background"] = "#4F4F4F"
                    header["color"] = "#FFFFFF"
            
            # Fix table cell contrast
            if "table_cell" in styles:
                cell = styles["table_cell"]
                bgColor = cell.get("background", "#FFFFFF")
                textColor = cell.get("color", "#000000")
                
                # If both are white or both are dark, fix it
                if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
                    cell["background"] = "#FFFFFF"
                    cell["color"] = "#2F2F2F"
                elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
                    cell["background"] = "#FFFFFF"
                    cell["color"] = "#2F2F2F"
            
            return styles
            
        except Exception as e:
            self.logger.warning(f"Style validation failed: {str(e)}")
            return self._getDefaultStyleSet()
    
    def _getDefaultStyleSet(self) -> Dict[str, Any]:
        """Default HTML style set - used when no style instructions present."""
        return {
            "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
            "heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
            "heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
            "paragraph": {"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"},
            "table": {"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"},
            "table_header": {"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"},
            "table_cell": {"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"},
            "bullet_list": {"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"},
            "code_block": {"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"},
            "image": {"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"},
            "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
        }
    
    
    def _generateCssStyles(self, styles: Dict[str, Any]) -> str:
        """Generate CSS from style definitions."""
        css_parts = []
        
        # Body styles
        body_style = styles.get("body", {})
        css_parts.append("body {")
        for property_name, value in body_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        
        # Document title
        title_style = styles.get("title", {})
        css_parts.append(".document-title {")
        for property_name, value in title_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        
        # Headings
        for heading_level in ["heading1", "heading2"]:
            heading_style = styles.get(heading_level, {})
            css_class = f"h{heading_level[-1]}"
            css_parts.append(f"{css_class} {{")
            for property_name, value in heading_style.items():
                css_property = property_name.replace("_", "-")
                css_parts.append(f"  {css_property}: {value};")
            css_parts.append("}")
        
        # Paragraphs
        paragraph_style = styles.get("paragraph", {})
        css_parts.append("p {")
        for property_name, value in paragraph_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        
        # Tables
        table_style = styles.get("table", {})
        css_parts.append("table {")
        for property_name, value in table_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        
        # Table headers
        table_header_style = styles.get("table_header", {})
        css_parts.append("th {")
        for property_name, value in table_header_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        
        # Table cells
        table_cell_style = styles.get("table_cell", {})
        css_parts.append("td {")
        for property_name, value in table_cell_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        
        # Lists
        bullet_list_style = styles.get("bullet_list", {})
        css_parts.append("ul {")
        for property_name, value in bullet_list_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        
        # Code blocks
        code_block_style = styles.get("code_block", {})
        css_parts.append("pre {")
        for property_name, value in code_block_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        
        # Images
        image_style = styles.get("image", {})
        css_parts.append("img {")
        for property_name, value in image_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        
        # Generated info
        css_parts.append(".generated-info {")
        css_parts.append("  font-size: 0.9em;")
        css_parts.append("  color: #666;")
        css_parts.append("  text-align: center;")
        css_parts.append("  margin-top: 2em;")
        css_parts.append("  padding-top: 1em;")
        css_parts.append("  border-top: 1px solid #ddd;")
        css_parts.append("}")
        
        return '\n'.join(css_parts)
    
    def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a single JSON section to HTML using AI-generated styles.
        Supports three content formats: reference, object (base64), extracted_text.
        WICHTIG: Respektiert sectionType (content_type) für korrekte Rendering-Logik.
        """
        try:
            sectionType = self._getSectionType(section)
            sectionData = self._getSectionData(section)
            
            # WICHTIG: Respektiere sectionType (content_type) ZUERST, dann process elements entsprechend
            # Process elements according to section's content_type, not just element types
            
            if sectionType == "table":
                # Work directly with elements like other renderers
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonTable(element, styles)
                return ""
            elif sectionType == "bullet_list":
                # Work directly with elements like other renderers
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonBulletList(element, styles)
                return ""
            elif sectionType == "heading":
                # Work directly with elements like other renderers
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonHeading(element, styles)
                return ""
            elif sectionType == "paragraph":
                # Process paragraph elements, including extracted_text
                if isinstance(sectionData, list):
                    htmlParts = []
                    for element in sectionData:
                        element_type = element.get("type", "") if isinstance(element, dict) else ""
                        
                        if element_type == "reference":
                            doc_ref = element.get("documentReference", "")
                            label = element.get("label", "Reference")
                            htmlParts.append(f'<p class="reference"><em>[Reference: {label}]</em></p>')
                        elif element_type == "extracted_text":
                            content = element.get("content", "")
                            source = element.get("source", "")
                            if content:
                                source_text = f' <small><em>(Source: {source})</em></small>' if source else ''
                                htmlParts.append(f'<p>{content}{source_text}</p>')
                        elif isinstance(element, dict):
                            # Regular paragraph element - extract from nested content structure (standard JSON format)
                            content = element.get("content", {})
                            if isinstance(content, dict):
                                text = content.get("text", "")
                            elif isinstance(content, str):
                                text = content
                            else:
                                text = ""
                            
                            if text:
                                htmlParts.append(f'<p>{text}</p>')
                        elif isinstance(element, str):
                            htmlParts.append(f'<p>{element}</p>')
                    
                    if htmlParts:
                        return '\n'.join(htmlParts)
                # If sectionData is not a list, treat it as a dict
                if isinstance(sectionData, dict):
                    return self._renderJsonParagraph(sectionData, styles)
                return ""
            elif sectionType == "code_block":
                # Work directly with elements like other renderers
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonCodeBlock(element, styles)
                return ""
            elif sectionType == "image":
                # Work directly with elements like other renderers
                if isinstance(sectionData, list) and sectionData:
                    element = sectionData[0] if isinstance(sectionData[0], dict) else {}
                    return self._renderJsonImage(element, styles)
                return ""
            else:
                # Fallback: Check for special element types first
                if isinstance(sectionData, list):
                    htmlParts = []
                    for element in sectionData:
                        element_type = element.get("type", "") if isinstance(element, dict) else ""
                        
                        if element_type == "reference":
                            doc_ref = element.get("documentReference", "")
                            label = element.get("label", "Reference")
                            htmlParts.append(f'<p class="reference"><em>[Reference: {label}]</em></p>')
                        elif element_type == "extracted_text":
                            content = element.get("content", "")
                            source = element.get("source", "")
                            if content:
                                source_text = f' <small><em>(Source: {source})</em></small>' if source else ''
                                htmlParts.append(f'<p>{content}{source_text}</p>')
                    
                    if htmlParts:
                        return '\n'.join(htmlParts)
                # Fallback to paragraph for unknown types
                if isinstance(sectionData, dict):
                    return self._renderJsonParagraph(sectionData, styles)
                return ""
                
        except Exception as e:
            self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
            return f'<div class="error">[Error rendering section: {str(e)}]</div>'
    
    def _renderJsonTable(self, tableData: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON table to HTML using AI-generated styles."""
        try:
            # Extract from nested content structure: element.content.{headers, rows}
            content = tableData.get("content", {})
            if not isinstance(content, dict):
                return ""
            headers = content.get("headers", [])
            rows = content.get("rows", [])
            
            if not headers or not rows:
                return ""
            
            htmlParts = ['<table>']
            
            # Table header
            htmlParts.append('<thead><tr>')
            for header in headers:
                htmlParts.append(f'<th>{header}</th>')
            htmlParts.append('</tr></thead>')
            
            # Table body
            htmlParts.append('<tbody>')
            for row in rows:
                htmlParts.append('<tr>')
                for cellData in row:
                    htmlParts.append(f'<td>{cellData}</td>')
                htmlParts.append('</tr>')
            htmlParts.append('</tbody>')
            
            htmlParts.append('</table>')
            return '\n'.join(htmlParts)
            
        except Exception as e:
            self.logger.warning(f"Error rendering table: {str(e)}")
            return ""
    
    def _renderJsonBulletList(self, listData: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON bullet list to HTML using AI-generated styles."""
        try:
            # Extract from nested content structure: element.content.{items}
            content = listData.get("content", {})
            if not isinstance(content, dict):
                return ""
            items = content.get("items", [])
            
            if not items:
                return ""
            
            htmlParts = ['<ul>']
            for item in items:
                if isinstance(item, str):
                    htmlParts.append(f'<li>{item}</li>')
                elif isinstance(item, dict) and "text" in item:
                    htmlParts.append(f'<li>{item["text"]}</li>')
            htmlParts.append('</ul>')
            
            return '\n'.join(htmlParts)
            
        except Exception as e:
            self.logger.warning(f"Error rendering bullet list: {str(e)}")
            return ""
    
    def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON heading to HTML using AI-generated styles."""
        try:
            # Extract from nested content structure: element.content.{text, level}
            content = headingData.get("content", {})
            if not isinstance(content, dict):
                return ""
            text = content.get("text", "")
            level = content.get("level", 1)
            
            if text:
                level = max(1, min(6, level))
                return f'<h{level}>{text}</h{level}>'
            
            return ""
            
        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")
            return ""
    
    def _renderJsonParagraph(self, paragraphData: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON paragraph to HTML using AI-generated styles."""
        try:
            # Normalize inputs - paragraphData is typically a list of elements from _getSectionData
            if isinstance(paragraphData, list):
                # Extract text from all paragraph elements (expects nested content structure)
                texts = []
                for el in paragraphData:
                    if isinstance(el, dict):
                        content = el.get("content", {})
                        if isinstance(content, dict):
                            text = content.get("text", "")
                        elif isinstance(content, str):
                            text = content
                        else:
                            text = ""
                        if text:
                            texts.append(text)
                    elif isinstance(el, str):
                        texts.append(el)
                if texts:
                    # Join multiple paragraphs with <p> tags
                    return '\n'.join(f'<p>{text}</p>' for text in texts)
                return ""
            elif isinstance(paragraphData, str):
                return f'<p>{paragraphData}</p>'
            elif isinstance(paragraphData, dict):
                # Handle nested content structure: element.content vs element.text
                # Extract from nested content structure
                content = paragraphData.get("content", {})
                if isinstance(content, dict):
                    text = content.get("text", "")
                elif isinstance(content, str):
                    text = content
                else:
                    text = ""
                if text:
                    return f'<p>{text}</p>'
                return ""
            else:
                return ""
            
        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")
            return ""
    
    def _renderJsonCodeBlock(self, codeData: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON code block to HTML using AI-generated styles."""
        try:
            # Extract from nested content structure: element.content.{code, language}
            content = codeData.get("content", {})
            if not isinstance(content, dict):
                return ""
            code = content.get("code", "")
            language = content.get("language", "")
            
            if code:
                if language:
                    return f'<pre><code class="language-{language}">{code}</code></pre>'
                else:
                    return f'<pre><code>{code}</code></pre>'
            
            return ""
            
        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")
            return ""
    
    def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON image to HTML with placeholder for later replacement. Expects nested content structure."""
        try:
            import html
            # Extract from nested content structure (standard JSON format)
            content = imageData.get("content", {})
            if not isinstance(content, dict):
                return ""
            
            base64Data = content.get("base64Data", "")
            altText = content.get("altText", "Image")
            caption = content.get("caption", "")
            
            # Escape HTML in altText and caption to prevent injection
            altTextEscaped = html.escape(str(altText))
            captionEscaped = html.escape(str(caption)) if caption else ""
            
            if base64Data:
                # Use data URI as placeholder - will be replaced with file path in _replaceImageDataUris
                # Include a marker so we can find and replace it
                imageMarker = f"<!--IMAGE_MARKER:{len(base64Data)}:{altTextEscaped[:50]}-->"
                # Add max-width and max-height to ensure image fits within page dimensions
                # Typical page width is ~800-1200px, height varies but we limit to 600px for readability
                imgTag = f'<img src="data:image/png;base64,{base64Data}" alt="{altTextEscaped}" style="max-width: 100%; max-height: 600px; width: auto; height: auto;">'
                
                if captionEscaped:
                    return f'{imageMarker}<figure>{imgTag}<figcaption>{captionEscaped}</figcaption></figure>'
                else:
                    return f'{imageMarker}{imgTag}'
            
            return ""
            
        except Exception as e:
            self.logger.error(f"Error embedding image in HTML: {str(e)}")
            altText = imageData.get("altText", "Image")
            errorMsg = html.escape(f"[Error: Could not embed image '{altText}'. {str(e)}]")
            return f'<div class="error" style="color: red; padding: 10px; border: 1px solid red;">{errorMsg}</div>'
    
    def _extractImages(self, jsonContent: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Extract all images from JSON structure.
        
        Returns:
            List of image data dictionaries with base64Data, altText, caption, sectionId
        """
        images = []
        
        try:
            # Extract from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
            documents = jsonContent.get("documents", [])
            if not documents or not isinstance(documents, list):
                return images
            
            for doc in documents:
                if not isinstance(doc, dict):
                    continue
                sections = doc.get("sections", [])
                for section in sections:
                    if section.get("content_type") == "image":
                        elements = section.get("elements", [])
                        for element in elements:
                            # Extract from nested content structure
                            content = element.get("content", {})
                            base64Data = ""
                            
                            if isinstance(content, dict):
                                base64Data = content.get("base64Data", "")
                            elif isinstance(content, str):
                                # Content might be base64 string directly (shouldn't happen)
                                pass
                            
                            # If base64Data not found in content, try direct element fields (fallback)
                            if not base64Data:
                                base64Data = element.get("base64Data", "")
                            
                            # If base64Data still not found, try extracting from url data URI
                            if not base64Data:
                                url = element.get("url", "") or (content.get("url", "") if isinstance(content, dict) else "")
                                if url and isinstance(url, str) and url.startswith("data:image/"):
                                    # Extract base64 from data URI: data:image/png;base64,<base64>
                                    import re
                                    match = re.match(r'data:image/[^;]+;base64,(.+)', url)
                                    if match:
                                        base64Data = match.group(1)
                            
                            if base64Data:
                                sectionId = section.get("id", "unknown")
                                
                                # Bestimme MIME-Type und Extension
                                mimeType = element.get("mimeType", "") or (content.get("mimeType", "") if isinstance(content, dict) else "")
                                if not mimeType or mimeType == "unknown":
                                    # Versuche MIME-Type aus base64 zu erkennen
                                    if base64Data.startswith("/9j/"):
                                        mimeType = "image/jpeg"
                                    elif base64Data.startswith("iVBORw0KGgo"):
                                        mimeType = "image/png"
                                    else:
                                        mimeType = "image/png"  # Default
                                
                                # Bestimme Extension basierend auf MIME-Type
                                extension = "png"
                                if mimeType == "image/jpeg" or mimeType == "image/jpg":
                                    extension = "jpg"
                                elif mimeType == "image/png":
                                    extension = "png"
                                elif mimeType == "image/gif":
                                    extension = "gif"
                                elif mimeType == "image/webp":
                                    extension = "webp"
                                
                                # Generate filename from section ID
                                filename = f"{sectionId}.{extension}"
                                # Clean filename (remove invalid characters)
                                filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename)
                                
                                images.append({
                                    "base64Data": base64Data,
                                    "altText": element.get("altText", "Image"),
                                    "caption": element.get("caption"),
                                    "sectionId": sectionId,
                                    "filename": filename,
                                    "mimeType": mimeType
                                })
                                self.logger.debug(f"Extracted image from section {sectionId}: {filename}")
            
            self.logger.info(f"Extracted {len(images)} image(s) from JSON structure")
            return images
            
        except Exception as e:
            self.logger.warning(f"Error extracting images: {str(e)}")
            return []
    
    def _replaceImageDataUris(self, htmlContent: str, images: List[Dict[str, Any]]) -> str:
        """
        Replace base64 data URIs in HTML with relative file paths.
        
        Args:
            htmlContent: HTML content with data URIs
            images: List of image data dictionaries
            
        Returns:
            HTML content with relative file paths
        """
        try:
            import base64
            import re
            
            # Find entire img tags with data URIs and replace them
            # Pattern: <img src="data:image/[type];base64,<base64>" [other attributes]>
            imgTagPattern = r'<img\s+src="data:image/[^"]+"[^>]*>'
            
            def replaceImgTag(match):
                imgTag = match.group(0)
                
                # Extract base64 data from the img tag
                base64Match = re.search(r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)', imgTag)
                if not base64Match:
                    return imgTag  # Return original if no base64 found
                
                base64Data = base64Match.group(1)
                
                # Find matching image in images list
                matchingImage = None
                for img in images:
                    imgBase64 = img.get("base64Data", "")
                    # Vergleiche base64-Daten (kann unterschiedliche Längen haben durch Padding)
                    if imgBase64 == base64Data or imgBase64.startswith(base64Data[:100]) or base64Data.startswith(imgBase64[:100]):
                        matchingImage = img
                        break
                
                if matchingImage:
                    import html
                    # Use filename from image data (generated from section ID)
                    filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png")
                    
                    # Extract existing alt text or use from matchingImage
                    altMatch = re.search(r'alt="([^"]*)"', imgTag)
                    existingAlt = altMatch.group(1) if altMatch else ""
                    altText = html.escape(str(matchingImage.get("altText", existingAlt or "Image")))
                    caption = html.escape(str(matchingImage.get("caption", ""))) if matchingImage.get("caption") else ""
                    
                    # Create new img tag with filename
                    imgTag = f'<img src="{filename}" alt="{altText}">'
                    
                    if caption:
                        return f'<figure>{imgTag}<figcaption>{caption}</figcaption></figure>'
                    else:
                        return imgTag
                else:
                    # Keep original if no match found
                    return match.group(0)
            
            # Replace all img tags with data URIs (auch IMAGE_MARKER Kommentare entfernen)
            updatedHtml = re.sub(imgTagPattern, replaceImgTag, htmlContent)
            # Entferne IMAGE_MARKER Kommentare die übrig geblieben sind
            updatedHtml = re.sub(r'<!--IMAGE_MARKER:[^>]+-->', '', updatedHtml)
            
            return updatedHtml
            
        except Exception as e:
            self.logger.warning(f"Error replacing image data URIs: {str(e)}")
            return htmlContent  # Return original if replacement fails
    
    def getRenderedImages(self) -> List[Dict[str, Any]]:
        """
        Get images that were extracted during rendering.
        Returns list of image dicts with base64Data, altText, caption, and filename.
        """
        if not hasattr(self, '_renderedImages'):
            return []
        return self._renderedImages