# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Image renderer for report generation using AI image generation. """ from .rendererBaseTemplate import BaseRenderer from modules.datamodels.datamodelDocument import RenderedDocument from typing import Dict, Any, List, Optional import logging import base64 logger = logging.getLogger(__name__) class RendererImage(BaseRenderer): """Renders content to image format using AI image generation.""" @classmethod def getSupportedFormats(cls) -> List[str]: """Return supported image formats.""" return ['png', 'jpg', 'jpeg', 'image'] @classmethod def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['img', 'picture', 'photo', 'graphic'] @classmethod def getPriority(cls) -> int: """Return priority for image renderer.""" return 90 @classmethod def getOutputStyle(cls, formatName: Optional[str] = None) -> str: """Return output style classification: Images are visual media.""" return 'image' async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: """Render extracted JSON content to image format using AI image generation.""" try: # Generate AI image from content imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService) # Determine filename from document or title documents = extractedContent.get("documents", []) if documents and isinstance(documents[0], dict): filename = documents[0].get("filename") if not filename: filename = self._determineFilename(title, "image/png") else: filename = self._determineFilename(title, "image/png") # Convert image content to bytes (base64 string or bytes) if isinstance(imageContent, str): try: imageBytes = base64.b64decode(imageContent) except Exception: imageBytes = imageContent.encode('utf-8') else: imageBytes = imageContent # Extract metadata for document type and other info metadata = extractedContent.get("metadata", {}) if extractedContent else {} documentType = metadata.get("documentType") if isinstance(metadata, dict) else None return [ RenderedDocument( documentData=imageBytes, mimeType="image/png", filename=filename, documentType=documentType, metadata=metadata if isinstance(metadata, dict) else None ) ] except Exception as e: self.logger.error(f"Error rendering image: {str(e)}") # Re-raise the exception instead of using fallback raise Exception(f"Image rendering failed: {str(e)}") async def _generateAiImage(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate AI image from extracted content.""" try: if not aiService: raise ValueError("AI service is required for image generation") # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) if not self._validateJsonStructure(extractedContent): raise ValueError("Extracted content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") # Extract metadata from standardized schema metadata = self._extractMetadata(extractedContent) # Use provided title (which comes from documents[].title) as primary source # Fallback to metadata.title only if title parameter is empty documentTitle = title if title else metadata.get("title", "Generated Document") # Create AI prompt for image generation imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService) # Save image generation prompt to debug aiService.services.utils.writeDebugFile(imagePrompt, "image_generation_prompt") # Format prompt as JSON with image generation parameters from modules.datamodels.datamodelAi import AiCallPromptImage, AiCallOptions, OperationTypeEnum import json promptModel = AiCallPromptImage( prompt=imagePrompt, size="1024x1024", quality="standard", style="vivid" ) promptJson = promptModel.model_dump_json(exclude_none=True, indent=2) # Use unified callAiContent method options = AiCallOptions( operationType=OperationTypeEnum.IMAGE_GENERATE, resultFormat="base64" ) # Use unified callAiContent method imageResponse = await aiService.callAiContent( prompt=promptJson, options=options, outputFormat="base64" ) # Save image generation response to debug aiService.services.utils.writeDebugFile(str(imageResponse.content), "image_generation_response") # Extract base64 image data from AiResponse # AiResponse.documents contains DocumentData objects if imageResponse.documents and len(imageResponse.documents) > 0: imageData = imageResponse.documents[0].documentData if imageData: return imageData # Fallback: check content field (might be base64 string) if imageResponse.content: return imageResponse.content raise ValueError("No image data returned from AI") except Exception as e: self.logger.error(f"Error generating AI image: {str(e)}") raise Exception(f"AI image generation failed: {str(e)}") async def _createImageGeneratePrompt(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Create a detailed prompt for AI image generation based on the content.""" try: # Start with base prompt promptParts = [] # Add user's original intent if available if userPrompt: sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt promptParts.append(f"User Request: {sanitized_prompt}") # Add document title promptParts.append(f"Document Title: {title}") # Analyze content and create visual description sections = self._extractSections(extractedContent) contentDescription = self._analyzeContentForVisualDescription(sections) if contentDescription: promptParts.append(f"Content to Visualize: {contentDescription}") # Add style guidance styleGuidance = self._getStyleGuidanceFromContent(extractedContent, userPrompt) if styleGuidance: promptParts.append(f"Visual Style: {styleGuidance}") # Combine all parts fullPrompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(promptParts) # Add technical requirements fullPrompt += "\n\nTechnical Requirements:" fullPrompt += "\n- High quality, professional appearance" fullPrompt += "\n- Clear, readable text if any text is included" fullPrompt += "\n- Appropriate colors and layout" fullPrompt += "\n- Suitable for business/professional use" # Truncate prompt if it exceeds DALL-E's 4000 character limit if len(fullPrompt) > 4000: # Use AI to compress the prompt intelligently compressedPrompt = await self._compressPromptWithAi(fullPrompt, aiService) if compressedPrompt and len(compressedPrompt) <= 4000: return compressedPrompt # Fallback to minimal prompt if AI compression fails or is still too long minimalPrompt = f"Create a professional image representing: {title}" if userPrompt: sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt minimalPrompt += f" - {sanitized_prompt}" # If even the minimal prompt is too long, truncate it if len(minimalPrompt) > 4000: minimalPrompt = minimalPrompt[:3997] + "..." return minimalPrompt return fullPrompt except Exception as e: self.logger.warning(f"Error creating image prompt: {str(e)}") # Fallback to simple prompt return f"Create a professional image representing: {title}" async def _compressPromptWithAi(self, longPrompt: str, aiService=None) -> str: """Use AI to intelligently compress a long prompt while preserving key information.""" try: if not aiService: return None compressionPrompt = f""" You are an expert at creating concise, effective prompts for AI image generation. The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information. Original prompt ({len(longPrompt)} characters): {longPrompt} Please create a compressed version that: 1. Keeps the most important visual elements and requirements 2. Maintains the core intent and style guidance 3. Preserves technical requirements 4. Stays under 4000 characters 5. Is optimized for DALL-E image generation Return only the compressed prompt, no explanations. """ # Use AI to compress the prompt - call the AI service correctly # The ai_service has an aiObjects attribute that contains the actual AI interface from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum request = AiCallRequest( prompt=compressionPrompt, options=AiCallOptions( operationType=OperationTypeEnum.DATA_GENERATE, maxTokens=None, # Let the model use its full context length temperature=0.3 # Lower temperature for more consistent compression ) ) response = await aiService.callAi(request) compressed = response.content.strip() # Validate the compressed prompt if compressed and len(compressed) <= 4000 and len(compressed) > 50: self.logger.info(f"Successfully compressed prompt from {len(longPrompt)} to {len(compressed)} characters") return compressed else: self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars") return None except Exception as e: self.logger.warning(f"Error compressing prompt with AI: {str(e)}") return None def _analyzeContentForVisualDescription(self, sections: List[Dict[str, Any]]) -> str: """Analyze content sections and create a visual description for AI.""" try: descriptions = [] for section in sections: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) if sectionType == "table": headers = sectionData.get("headers", []) rows = sectionData.get("rows", []) if headers and rows: descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}") elif sectionType == "bullet_list": items = sectionData.get("items", []) if items: descriptions.append(f"List with {len(items)} items") elif sectionType == "heading": text = sectionData.get("text", "") level = sectionData.get("level", 1) if text: descriptions.append(f"Heading {level}: {text}") elif sectionType == "paragraph": text = sectionData.get("text", "") if text and len(text) > 10: # Only include substantial paragraphs # Truncate long text truncated = text[:100] + "..." if len(text) > 100 else text descriptions.append(f"Text content: {truncated}") elif sectionType == "code_block": code = sectionData.get("code", "") language = sectionData.get("language", "") if code: descriptions.append(f"Code block ({language}): {code[:50]}...") return "; ".join(descriptions) if descriptions else "General document content" except Exception as e: self.logger.warning(f"Error analyzing content: {str(e)}") return "Document content" def _getStyleGuidanceFromContent(self, extractedContent: Dict[str, Any], userPrompt: str = None) -> str: """Determine visual style guidance based on content and user prompt.""" try: styleElements = [] # Analyze user prompt for style hints if userPrompt: promptLower = userPrompt.lower() if any(word in promptLower for word in ["modern", "contemporary", "sleek"]): styleElements.append("modern, clean design") elif any(word in promptLower for word in ["classic", "traditional", "formal"]): styleElements.append("classic, formal design") elif any(word in promptLower for word in ["creative", "artistic", "colorful"]): styleElements.append("creative, artistic design") elif any(word in promptLower for word in ["corporate", "business", "professional"]): styleElements.append("corporate, professional design") # Analyze content type for additional style hints sections = self._extractSections(extractedContent) hasTables = any(self._getSectionType(s) == "table" for s in sections) hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections) hasCode = any(self._getSectionType(s) == "code_block" for s in sections) if hasTables: styleElements.append("data-focused layout") if hasLists: styleElements.append("organized, structured presentation") if hasCode: styleElements.append("technical, developer-friendly") # Default style if no specific guidance if not styleElements: styleElements.append("professional, clean design") return ", ".join(styleElements) except Exception as e: self.logger.warning(f"Error determining style guidance: {str(e)}") return "professional design"