gateway/modules/serviceCenter/services/serviceGeneration/renderers/rendererImage.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Image renderer for report generation using AI image generation.
"""

from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import logging
import base64

logger = logging.getLogger(__name__)

class RendererImage(BaseRenderer):
    """Renders content to image format using AI image generation."""

    @classmethod
    def getSupportedFormats(cls) -> List[str]:
        """Return supported image formats."""
        return ['png', 'jpg', 'jpeg', 'image']

    @classmethod
    def getFormatAliases(cls) -> List[str]:
        """Return format aliases."""
        return ['img', 'picture', 'photo', 'graphic']

    @classmethod
    def getPriority(cls) -> int:
        """Return priority for image renderer."""
        return 90

    @classmethod
    def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
        """Return output style classification: Images are visual media."""
        return 'image'

    @classmethod
    def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
        """
        Return list of section content types that Image renderer accepts.
        Image renderer only accepts image sections (images are generated from image sections).
        """
        return ["image"]

    async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
        """Render extracted JSON content to image format using AI image generation."""
        try:
            # Generate AI image from content
            imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)

            # Determine filename from document or title
            documents = extractedContent.get("documents", [])
            if documents and isinstance(documents[0], dict):
                filename = documents[0].get("filename")
                if not filename:
                    filename = self._determineFilename(title, "image/png")
            else:
                filename = self._determineFilename(title, "image/png")

            # Convert image content to bytes (base64 string or bytes)
            if isinstance(imageContent, str):
                try:
                    imageBytes = base64.b64decode(imageContent)
                except Exception:
                    imageBytes = imageContent.encode('utf-8')
            else:
                imageBytes = imageContent

            # Extract metadata for document type and other info
            metadata = extractedContent.get("metadata", {}) if extractedContent else {}
            documentType = metadata.get("documentType") if isinstance(metadata, dict) else None

            return [
                RenderedDocument(
                    documentData=imageBytes,
                    mimeType="image/png",
                    filename=filename,
                    documentType=documentType,
                    metadata=metadata if isinstance(metadata, dict) else None
                )
            ]

        except Exception as e:
            self.logger.error(f"Error rendering image: {str(e)}")
            # Re-raise the exception instead of using fallback
            raise Exception(f"Image rendering failed: {str(e)}")

    async def _generateAiImage(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
        """Generate AI image from extracted content."""
        try:
            if not aiService:
                raise ValueError("AI service is required for image generation")

            # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
            if not self._validateJsonStructure(extractedContent):
                raise ValueError("Extracted content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")

            # Extract metadata from standardized schema
            metadata = self._extractMetadata(extractedContent)

            # Use provided title (which comes from documents[].title) as primary source
            # Fallback to metadata.title only if title parameter is empty
            documentTitle = title if title else metadata.get("title", "Generated Document")

            # Create AI prompt for image generation
            imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)

            # Save image generation prompt to debug
            aiService.services.utils.writeDebugFile(imagePrompt, "image_generation_prompt")

            # Format prompt as JSON with image generation parameters
            from modules.datamodels.datamodelAi import AiCallPromptImage, AiCallOptions, OperationTypeEnum
            import json

            promptModel = AiCallPromptImage(
                prompt=imagePrompt,
                size="1024x1024",
                quality="standard",
                style="vivid"
            )
            promptJson = promptModel.model_dump_json(exclude_none=True, indent=2)

            # Use unified callAiContent method
            options = AiCallOptions(
                operationType=OperationTypeEnum.IMAGE_GENERATE,
                resultFormat="base64"
            )

            # Use unified callAiContent method
            imageResponse = await aiService.callAiContent(
                prompt=promptJson,
                options=options,
                outputFormat="base64"
            )

            # Save image generation response to debug
            aiService.services.utils.writeDebugFile(str(imageResponse.content), "image_generation_response")

            # Extract base64 image data from AiResponse
            # AiResponse.documents contains DocumentData objects
            if imageResponse.documents and len(imageResponse.documents) > 0:
                imageData = imageResponse.documents[0].documentData
                if imageData:
                    return imageData

            # Fallback: check content field (might be base64 string)
            if imageResponse.content:
                return imageResponse.content

            raise ValueError("No image data returned from AI")

        except Exception as e:
            self.logger.error(f"Error generating AI image: {str(e)}")
            raise Exception(f"AI image generation failed: {str(e)}")

    async def _createImageGeneratePrompt(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
        """Create a detailed prompt for AI image generation based on the content."""
        try:
            # Start with base prompt
            promptParts = []

            # Add user's original intent if available
            if userPrompt:
                sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
                promptParts.append(f"User Request: {sanitized_prompt}")

            # Add document title
            promptParts.append(f"Document Title: {title}")

            # Analyze content and create visual description
            sections = self._extractSections(extractedContent)
            contentDescription = self._analyzeContentForVisualDescription(sections)

            if contentDescription:
                promptParts.append(f"Content to Visualize: {contentDescription}")

            # Add style guidance
            styleGuidance = self._getStyleGuidanceFromContent(extractedContent, userPrompt)
            if styleGuidance:
                promptParts.append(f"Visual Style: {styleGuidance}")

            # Combine all parts
            fullPrompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(promptParts)

            # Add technical requirements
            fullPrompt += "\n\nTechnical Requirements:"
            fullPrompt += "\n- High quality, professional appearance"
            fullPrompt += "\n- Clear, readable text if any text is included"
            fullPrompt += "\n- Appropriate colors and layout"
            fullPrompt += "\n- Suitable for business/professional use"

            # Truncate prompt if it exceeds DALL-E's 4000 character limit
            if len(fullPrompt) > 4000:
                # Use AI to compress the prompt intelligently
                compressedPrompt = await self._compressPromptWithAi(fullPrompt, aiService)
                if compressedPrompt and len(compressedPrompt) <= 4000:
                    return compressedPrompt

                # Fallback to minimal prompt if AI compression fails or is still too long
                minimalPrompt = f"Create a professional image representing: {title}"
                if userPrompt:
                    sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
                    minimalPrompt += f" - {sanitized_prompt}"

                # If even the minimal prompt is too long, truncate it
                if len(minimalPrompt) > 4000:
                    minimalPrompt = minimalPrompt[:3997] + "..."

                return minimalPrompt

            return fullPrompt

        except Exception as e:
            self.logger.warning(f"Error creating image prompt: {str(e)}")
            # Fallback to simple prompt
            return f"Create a professional image representing: {title}"

    async def _compressPromptWithAi(self, longPrompt: str, aiService=None) -> str:
        """Use AI to intelligently compress a long prompt while preserving key information."""
        try:
            if not aiService:
                return None

            compressionPrompt = f"""
You are an expert at creating concise, effective prompts for AI image generation.

The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.

Original prompt ({len(longPrompt)} characters):
{longPrompt}

Please create a compressed version that:
1. Keeps the most important visual elements and requirements
2. Maintains the core intent and style guidance
3. Preserves technical requirements
4. Stays under 4000 characters
5. Is optimized for DALL-E image generation

Return only the compressed prompt, no explanations.
"""

            # Use AI to compress the prompt - call the AI service correctly
            # The ai_service has an aiObjects attribute that contains the actual AI interface
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum

            request = AiCallRequest(
                prompt=compressionPrompt,
                options=AiCallOptions(
                    operationType=OperationTypeEnum.DATA_GENERATE,
                    maxTokens=None,  # Let the model use its full context length
                    temperature=0.3  # Lower temperature for more consistent compression
                )
            )

            response = await aiService.callAi(request)
            compressed = response.content.strip()

            # Validate the compressed prompt
            if compressed and len(compressed) <= 4000 and len(compressed) > 50:
                self.logger.info(f"Successfully compressed prompt from {len(longPrompt)} to {len(compressed)} characters")
                return compressed
            else:
                self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
                return None

        except Exception as e:
            self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
            return None

    def _analyzeContentForVisualDescription(self, sections: List[Dict[str, Any]]) -> str:
        """Analyze content sections and create a visual description for AI."""
        try:
            descriptions = []

            for section in sections:
                sectionType = self._getSectionType(section)
                sectionData = self._getSectionData(section)

                if sectionType == "table":
                    headers = sectionData.get("headers", [])
                    rows = sectionData.get("rows", [])
                    if headers and rows:
                        descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")

                elif sectionType == "bullet_list":
                    items = sectionData.get("items", [])
                    if items:
                        descriptions.append(f"List with {len(items)} items")

                elif sectionType == "heading":
                    text = sectionData.get("text", "")
                    level = sectionData.get("level", 1)
                    if text:
                        descriptions.append(f"Heading {level}: {text}")

                elif sectionType == "paragraph":
                    text = sectionData.get("text", "")
                    if text and len(text) > 10:  # Only include substantial paragraphs
                        # Truncate long text
                        truncated = text[:100] + "..." if len(text) > 100 else text
                        descriptions.append(f"Text content: {truncated}")

                elif sectionType == "code_block":
                    code = sectionData.get("code", "")
                    language = sectionData.get("language", "")
                    if code:
                        descriptions.append(f"Code block ({language}): {code[:50]}...")

            return "; ".join(descriptions) if descriptions else "General document content"

        except Exception as e:
            self.logger.warning(f"Error analyzing content: {str(e)}")
            return "Document content"

    def _getStyleGuidanceFromContent(self, extractedContent: Dict[str, Any], userPrompt: str = None) -> str:
        """Determine visual style guidance based on content and user prompt."""
        try:
            styleElements = []

            # Analyze user prompt for style hints
            if userPrompt:
                promptLower = userPrompt.lower()

                if any(word in promptLower for word in ["modern", "contemporary", "sleek"]):
                    styleElements.append("modern, clean design")
                elif any(word in promptLower for word in ["classic", "traditional", "formal"]):
                    styleElements.append("classic, formal design")
                elif any(word in promptLower for word in ["creative", "artistic", "colorful"]):
                    styleElements.append("creative, artistic design")
                elif any(word in promptLower for word in ["corporate", "business", "professional"]):
                    styleElements.append("corporate, professional design")

            # Analyze content type for additional style hints
            sections = self._extractSections(extractedContent)
            hasTables = any(self._getSectionType(s) == "table" for s in sections)
            hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections)
            hasCode = any(self._getSectionType(s) == "code_block" for s in sections)

            if hasTables:
                styleElements.append("data-focused layout")
            if hasLists:
                styleElements.append("organized, structured presentation")
            if hasCode:
                styleElements.append("technical, developer-friendly")

            # Default style if no specific guidance
            if not styleElements:
                styleElements.append("professional, clean design")

            return ", ".join(styleElements)

        except Exception as e:
            self.logger.warning(f"Error determining style guidance: {str(e)}")
            return "professional design"