gateway/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Base renderer class for all format renderers.
"""

from abc import ABC, abstractmethod
from typing import Dict, Any, List, Tuple, Optional
from modules.datamodels.datamodelJson import supportedSectionTypes
from modules.datamodels.datamodelDocument import RenderedDocument
import json
import logging
import re
from datetime import datetime, UTC
import base64
import io
from PIL import Image
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum

logger = logging.getLogger(__name__)

class BaseRenderer(ABC):
    """Base class for all format renderers."""

    def __init__(self, services=None):
        self.logger = logger
        self.services = services  # Add services attribute

    @classmethod
    def getSupportedFormats(cls) -> List[str]:
        """
        Return list of supported format names for this renderer.
        Override this method in subclasses to specify supported formats.
        """
        return []

    @classmethod
    def getFormatAliases(cls) -> List[str]:
        """
        Return list of format aliases for this renderer.
        Override this method in subclasses to specify format aliases.
        """
        return []

    @classmethod
    def getPriority(cls) -> int:
        """
        Return priority for this renderer (higher number = higher priority).
        Used when multiple renderers support the same format.
        """
        return 0

    @classmethod
    def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
        """
        Return the output style classification for this renderer.
        Returns: 'code', 'document', 'image', or other (e.g., 'video' for future use)
        Override this method in subclasses to specify the output style.

        Args:
            formatName: Optional format name (e.g., 'txt', 'js', 'csv') - useful for renderers
                       that handle multiple formats with different styles (e.g., RendererText)
        """
        return 'document'  # Default to document style

    @classmethod
    def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
        """
        Return list of section content types that this renderer accepts.
        This allows renderers to declare which section types they can process.

        Default implementation returns all supported section types.
        Override this method in subclasses to restrict accepted types.

        Args:
            formatName: Optional format name (e.g., 'txt', 'js', 'csv') - useful for renderers
                       that handle multiple formats with different accepted types (e.g., RendererText)

        Returns:
            List of accepted section content types (e.g., ["table", "paragraph", "heading"])
            Valid types: "table", "bullet_list", "heading", "paragraph", "code_block", "image"
        """
        # Default: accept all section types
        return list(supportedSectionTypes)

    @abstractmethod
    async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
        """
        Render extracted JSON content to multiple documents.
        Each renderer must implement this method.
        Can return 1..n documents (e.g., HTML + images).

        Args:
            extractedContent: Structured JSON content with sections and metadata (contains single document)
            title: Report title
            userPrompt: Original user prompt for context
            aiService: AI service instance for additional processing

        Returns:
            List of RenderedDocument objects.
            First document is the main document, additional documents are supporting files (e.g., images).
            Even if only one document is returned, it must be wrapped in a list.
        """
        pass

    def _determineFilename(self, title: str, mimeType: str) -> str:
        """Determine filename from title and mimeType."""
        import re
        # Get extension from mimeType
        extensionMap = {
            "text/html": "html",
            "application/pdf": "pdf",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
            "text/plain": "txt",
            "text/markdown": "md",
            "application/json": "json",
            "text/csv": "csv"
        }
        extension = extensionMap.get(mimeType, "txt")

        # Sanitize title for filename
        sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", title)
        sanitized = re.sub(r"_+", "_", sanitized).strip("_")
        if not sanitized:
            sanitized = "document"

        return f"{sanitized}.{extension}"

    def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
        Phase 5: Supports multiple documents - extracts all sections from all documents.
        """
        if "documents" not in reportData:
            raise ValueError("Report data must follow standardized schema with 'documents' array")

        documents = reportData.get("documents", [])
        if not isinstance(documents, list) or len(documents) == 0:
            raise ValueError("Standardized schema must contain at least one document in 'documents' array")

        # Phase 5: Extract sections from ALL documents
        all_sections = []
        for doc in documents:
            if isinstance(doc, dict) and "sections" in doc:
                sections = doc.get("sections", [])
                if isinstance(sections, list):
                    all_sections.extend(sections)

        if not all_sections:
            raise ValueError("No sections found in any document")

        return all_sections

    def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]:
        """
        Extract metadata from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
        """
        if "metadata" not in reportData:
            raise ValueError("Report data must follow standardized schema with 'metadata' field")

        metadata = reportData.get("metadata", {})
        if not isinstance(metadata, dict):
            raise ValueError("Metadata in standardized schema must be a dictionary")

        return metadata

    def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str:
        """Get title from report data or use fallback."""
        metadata = reportData.get('metadata', {})
        return metadata.get('title', fallbackTitle)

    def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool:
        """
        Validate that JSON content follows standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
        """
        if not isinstance(jsonContent, dict):
            return False

        # Validate metadata field exists
        if "metadata" not in jsonContent:
            return False

        if not isinstance(jsonContent.get("metadata"), dict):
            return False

        # Validate documents array exists and is not empty
        if "documents" not in jsonContent:
            return False

        documents = jsonContent.get("documents", [])
        if not isinstance(documents, list) or len(documents) == 0:
            return False

        # Validate first document has sections
        firstDoc = documents[0]
        if not isinstance(firstDoc, dict) or "sections" not in firstDoc:
            return False

        sections = firstDoc.get("sections", [])
        if not isinstance(sections, list):
            return False

        # Validate each section has content_type and elements
        for section in sections:
            if not isinstance(section, dict):
                return False
            if "content_type" not in section or "elements" not in section:
                return False

        return True

    def _getSectionType(self, section: Dict[str, Any]) -> str:
        """Get the type of a section; default to 'paragraph' for non-dict inputs."""
        if isinstance(section, dict):
            return section.get("content_type", "paragraph")
        # If section is a list or any other type, treat as paragraph elements
        return "paragraph"

    def _getSectionData(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Get the elements of a section; if a list is provided directly, return it."""
        if isinstance(section, dict):
            return section.get("elements", [])
        if isinstance(section, list):
            return section
        return []

    def _getSectionId(self, section: Dict[str, Any]) -> str:
        """Get the ID of a section (if available)."""
        if isinstance(section, dict):
            return section.get("id", "unknown")
        return "unknown"

    def _validateImageData(self, base64Data: str, altText: str) -> bool:
        """Validate image data."""
        if not base64Data:
            self.logger.warning("Image section has no base64 data")
            return False

        if not altText:
            self.logger.warning("Image section has no alt text")
            return False

        # Basic base64 validation
        try:
            base64.b64decode(base64Data, validate=True)
            return True
        except Exception as e:
            self.logger.warning(f"Invalid base64 image data: {str(e)}")
            return False

    def _getImageDimensions(self, base64Data: str) -> Tuple[int, int]:
        """
        Get image dimensions from base64 data.
        This is a helper method that format-specific renderers can use.
        """
        try:
            # Decode base64 data
            imageData = base64.b64decode(base64Data)
            image = Image.open(io.BytesIO(imageData))

            return image.size  # Returns (width, height)

        except Exception as e:
            self.logger.warning(f"Could not determine image dimensions: {str(e)}")
            return (0, 0)

    def _resizeImageIfNeeded(self, base64Data: str, maxWidth: int = 800, maxHeight: int = 600) -> str:
        """
        Resize image if it exceeds maximum dimensions.
        Returns the resized image as base64 string.
        """
        try:
            # Decode base64 data
            imageData = base64.b64decode(base64Data)
            image = Image.open(io.BytesIO(imageData))

            # Check if resizing is needed
            width, height = image.size
            if width <= maxWidth and height <= maxHeight:
                return base64Data  # No resizing needed

            # Calculate new dimensions maintaining aspect ratio
            ratio = min(maxWidth / width, maxHeight / height)
            newWidth = int(width * ratio)
            newHeight = int(height * ratio)

            # Resize image
            resizedImage = image.resize((newWidth, newHeight), Image.Resampling.LANCZOS)

            # Convert back to base64
            buffer = io.BytesIO()
            resizedImage.save(buffer, format=image.format or 'PNG')
            resizedData = buffer.getvalue()

            return base64.b64encode(resizedData).decode('utf-8')

        except Exception as e:
            self.logger.warning(f"Could not resize image: {str(e)}")
            return base64Data  # Return original if resize fails

    def _getSupportedSectionTypes(self) -> List[str]:
        """Return list of supported section types (from unified schema)."""
        return supportedSectionTypes

    def _isValidSectionType(self, sectionType: str) -> bool:
        """Check if a section type is valid."""
        return sectionType in self._getSupportedSectionTypes()

    def _formatTimestamp(self, timestamp: str = None) -> str:
        """Format timestamp for display."""
        if timestamp:
            return timestamp
        return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")

    # ===== GENERIC AI STYLING HELPERS =====

    async def _getAiStyles(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
        """
        Generic AI styling method that can be used by all renderers.

        Args:
            aiService: AI service instance
            styleTemplate: Format-specific style template
            defaultStyles: Default styles to fall back to

        Returns:
            Dict with styling definitions
        """
        # DEBUG: Show which renderer is calling this method

        if not aiService:
            return defaultStyles

        try:

            requestOptions = AiCallOptions()
            requestOptions.operationType = OperationTypeEnum.DATA_GENERATE

            request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)

            # DEBUG: Show the actual prompt being sent to AI
            self.logger.debug(f"AI Style Template Prompt:")
            self.logger.debug(f"{styleTemplate}")

            response = await aiService.callAi(request)

            # Save styling prompt and response to debug (fire and forget - don't block on slow file I/O)
            # The writeDebugFile calls os.listdir() which can be slow with many files
            # Run in background thread to avoid blocking rendering
            import threading
            def _writeDebugFiles():
                try:
                    self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
                    self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
                except Exception:
                    pass  # Silently fail - debug writing should never block rendering

            threading.Thread(target=_writeDebugFiles, daemon=True).start()

            # Clean and parse JSON
            result = response.content.strip() if response and response.content else ""

            # Check if result is empty
            if not result:
                self.logger.warning("AI styling returned empty response, using defaults")
                return defaultStyles

            # Extract JSON from markdown if present
            jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
            if jsonMatch:
                result = jsonMatch.group(1).strip()
            elif result.startswith('```json'):
                result = re.sub(r'^```json\s*', '', result)
                result = re.sub(r'\s*```$', '', result)
            elif result.startswith('```'):
                result = re.sub(r'^```\s*', '', result)
                result = re.sub(r'\s*```$', '', result)

            # Try to parse JSON
            try:
                styles = json.loads(result)
            except json.JSONDecodeError as jsonError:
                self.logger.warning(f"AI styling returned invalid JSON: {jsonError}")

                # Use print instead of logger to avoid truncation
                self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER")
                self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "RENDERER")

                self.logger.warning(f"Raw content that failed to parse: {result}")

                # Try to fix incomplete JSON by adding missing closing braces
                openBraces = result.count('{')
                closeBraces = result.count('}')

                if openBraces > closeBraces:
                    # JSON is incomplete, add missing closing braces
                    missingBraces = openBraces - closeBraces
                    result = result + '}' * missingBraces
                    self.logger.info(f"Added {missingBraces} missing closing brace(s)")
                    self.logger.debug(f"Fixed JSON: {result}")

                    # Try parsing the fixed JSON
                    try:
                        styles = json.loads(result)
                        self.logger.info("Successfully fixed incomplete JSON")
                    except json.JSONDecodeError as fixError:
                        self.logger.warning(f"Fixed JSON still invalid: {fixError}")
                        self.logger.warning(f"Fixed JSON content: {result}")
                        # Try to extract just the JSON part if it's embedded in text
                        jsonStart = result.find('{')
                        jsonEnd = result.rfind('}')
                        if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
                            jsonPart = result[jsonStart:jsonEnd+1]
                            try:
                                styles = json.loads(jsonPart)
                                self.logger.info("Successfully extracted JSON from explanatory text")
                            except json.JSONDecodeError:
                                self.logger.warning("Could not extract valid JSON from response, using defaults")
                                return defaultStyles
                        else:
                            return defaultStyles
                else:
                    # Try to extract just the JSON part if it's embedded in text
                    jsonStart = result.find('{')
                    jsonEnd = result.rfind('}')
                    if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
                        jsonPart = result[jsonStart:jsonEnd+1]
                        try:
                            styles = json.loads(jsonPart)
                            self.logger.info("Successfully extracted JSON from explanatory text")
                        except json.JSONDecodeError:
                            self.logger.warning("Could not extract valid JSON from response, using defaults")
                            return defaultStyles
                    else:
                        return defaultStyles

            # Convert colors to appropriate format
            styles = self._convertColorsFormat(styles)

            return styles

        except Exception as e:
            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
            return defaultStyles

    def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """
        Convert colors to appropriate format based on renderer type.
        Override this method in subclasses for format-specific color handling.
        """
        return styles

    def _createAiStyleTemplate(self, formatName: str, userPrompt: str, styleSchema: Dict[str, Any]) -> str:
        """
        Create a standardized AI style template for any format.

        Args:
            formatName: Name of the format (e.g., "docx", "xlsx", "pptx")
            userPrompt: User's original prompt
            styleSchema: Format-specific style schema

        Returns:
            Formatted prompt string
        """
        schemaJson = json.dumps(styleSchema, indent=4)

        # DEBUG: Show the schema being sent

        return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {formatName.upper()} documents.

User request: {userPrompt}

Use this schema as a template:
{schemaJson}

Requirements:
- Return ONLY the complete JSON object (no markdown, no explanations)
- If the user request contains style/formatting/design instructions (in any language), customize the styling accordingly (adapt styles and add styles if needed)
- If the user request has NO style instructions, return the default schema values unchanged
- Ensure all objects are properly closed with closing braces
- Only modify styles if style instructions are present in the user request

Return the complete JSON:"""