gateway/modules/serviceCenter/services/serviceGeneration/renderers/rendererImage.py
2026-03-06 14:03:18 +01:00

355 lines
16 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Image renderer for report generation using AI image generation.
"""
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import logging
import base64
logger = logging.getLogger(__name__)
class RendererImage(BaseRenderer):
"""Renders content to image format using AI image generation."""
@classmethod
def getSupportedFormats(cls) -> List[str]:
"""Return supported image formats."""
return ['png', 'jpg', 'jpeg', 'image']
@classmethod
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['img', 'picture', 'photo', 'graphic']
@classmethod
def getPriority(cls) -> int:
"""Return priority for image renderer."""
return 90
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: Images are visual media."""
return 'image'
@classmethod
def getAcceptedSectionTypes(cls, formatName: Optional[str] = None) -> List[str]:
"""
Return list of section content types that Image renderer accepts.
Image renderer only accepts image sections (images are generated from image sections).
"""
return ["image"]
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to image format using AI image generation."""
try:
# Generate AI image from content
imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
# Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "image/png")
else:
filename = self._determineFilename(title, "image/png")
# Convert image content to bytes (base64 string or bytes)
if isinstance(imageContent, str):
try:
imageBytes = base64.b64decode(imageContent)
except Exception:
imageBytes = imageContent.encode('utf-8')
else:
imageBytes = imageContent
# Extract metadata for document type and other info
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
return [
RenderedDocument(
documentData=imageBytes,
mimeType="image/png",
filename=filename,
documentType=documentType,
metadata=metadata if isinstance(metadata, dict) else None
)
]
except Exception as e:
self.logger.error(f"Error rendering image: {str(e)}")
# Re-raise the exception instead of using fallback
raise Exception(f"Image rendering failed: {str(e)}")
async def _generateAiImage(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate AI image from extracted content."""
try:
if not aiService:
raise ValueError("AI service is required for image generation")
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
if not self._validateJsonStructure(extractedContent):
raise ValueError("Extracted content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
# Extract metadata from standardized schema
metadata = self._extractMetadata(extractedContent)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
documentTitle = title if title else metadata.get("title", "Generated Document")
# Create AI prompt for image generation
imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)
# Save image generation prompt to debug
aiService.services.utils.writeDebugFile(imagePrompt, "image_generation_prompt")
# Format prompt as JSON with image generation parameters
from modules.datamodels.datamodelAi import AiCallPromptImage, AiCallOptions, OperationTypeEnum
import json
promptModel = AiCallPromptImage(
prompt=imagePrompt,
size="1024x1024",
quality="standard",
style="vivid"
)
promptJson = promptModel.model_dump_json(exclude_none=True, indent=2)
# Use unified callAiContent method
options = AiCallOptions(
operationType=OperationTypeEnum.IMAGE_GENERATE,
resultFormat="base64"
)
# Use unified callAiContent method
imageResponse = await aiService.callAiContent(
prompt=promptJson,
options=options,
outputFormat="base64"
)
# Save image generation response to debug
aiService.services.utils.writeDebugFile(str(imageResponse.content), "image_generation_response")
# Extract base64 image data from AiResponse
# AiResponse.documents contains DocumentData objects
if imageResponse.documents and len(imageResponse.documents) > 0:
imageData = imageResponse.documents[0].documentData
if imageData:
return imageData
# Fallback: check content field (might be base64 string)
if imageResponse.content:
return imageResponse.content
raise ValueError("No image data returned from AI")
except Exception as e:
self.logger.error(f"Error generating AI image: {str(e)}")
raise Exception(f"AI image generation failed: {str(e)}")
async def _createImageGeneratePrompt(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Create a detailed prompt for AI image generation based on the content."""
try:
# Start with base prompt
promptParts = []
# Add user's original intent if available
if userPrompt:
sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
promptParts.append(f"User Request: {sanitized_prompt}")
# Add document title
promptParts.append(f"Document Title: {title}")
# Analyze content and create visual description
sections = self._extractSections(extractedContent)
contentDescription = self._analyzeContentForVisualDescription(sections)
if contentDescription:
promptParts.append(f"Content to Visualize: {contentDescription}")
# Add style guidance
styleGuidance = self._getStyleGuidanceFromContent(extractedContent, userPrompt)
if styleGuidance:
promptParts.append(f"Visual Style: {styleGuidance}")
# Combine all parts
fullPrompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(promptParts)
# Add technical requirements
fullPrompt += "\n\nTechnical Requirements:"
fullPrompt += "\n- High quality, professional appearance"
fullPrompt += "\n- Clear, readable text if any text is included"
fullPrompt += "\n- Appropriate colors and layout"
fullPrompt += "\n- Suitable for business/professional use"
# Truncate prompt if it exceeds DALL-E's 4000 character limit
if len(fullPrompt) > 4000:
# Use AI to compress the prompt intelligently
compressedPrompt = await self._compressPromptWithAi(fullPrompt, aiService)
if compressedPrompt and len(compressedPrompt) <= 4000:
return compressedPrompt
# Fallback to minimal prompt if AI compression fails or is still too long
minimalPrompt = f"Create a professional image representing: {title}"
if userPrompt:
sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
minimalPrompt += f" - {sanitized_prompt}"
# If even the minimal prompt is too long, truncate it
if len(minimalPrompt) > 4000:
minimalPrompt = minimalPrompt[:3997] + "..."
return minimalPrompt
return fullPrompt
except Exception as e:
self.logger.warning(f"Error creating image prompt: {str(e)}")
# Fallback to simple prompt
return f"Create a professional image representing: {title}"
async def _compressPromptWithAi(self, longPrompt: str, aiService=None) -> str:
"""Use AI to intelligently compress a long prompt while preserving key information."""
try:
if not aiService:
return None
compressionPrompt = f"""
You are an expert at creating concise, effective prompts for AI image generation.
The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
Original prompt ({len(longPrompt)} characters):
{longPrompt}
Please create a compressed version that:
1. Keeps the most important visual elements and requirements
2. Maintains the core intent and style guidance
3. Preserves technical requirements
4. Stays under 4000 characters
5. Is optimized for DALL-E image generation
Return only the compressed prompt, no explanations.
"""
# Use AI to compress the prompt - call the AI service correctly
# The ai_service has an aiObjects attribute that contains the actual AI interface
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
request = AiCallRequest(
prompt=compressionPrompt,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
maxTokens=None, # Let the model use its full context length
temperature=0.3 # Lower temperature for more consistent compression
)
)
response = await aiService.callAi(request)
compressed = response.content.strip()
# Validate the compressed prompt
if compressed and len(compressed) <= 4000 and len(compressed) > 50:
self.logger.info(f"Successfully compressed prompt from {len(longPrompt)} to {len(compressed)} characters")
return compressed
else:
self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
return None
except Exception as e:
self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
return None
def _analyzeContentForVisualDescription(self, sections: List[Dict[str, Any]]) -> str:
"""Analyze content sections and create a visual description for AI."""
try:
descriptions = []
for section in sections:
sectionType = self._getSectionType(section)
sectionData = self._getSectionData(section)
if sectionType == "table":
headers = sectionData.get("headers", [])
rows = sectionData.get("rows", [])
if headers and rows:
descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
elif sectionType == "bullet_list":
items = sectionData.get("items", [])
if items:
descriptions.append(f"List with {len(items)} items")
elif sectionType == "heading":
text = sectionData.get("text", "")
level = sectionData.get("level", 1)
if text:
descriptions.append(f"Heading {level}: {text}")
elif sectionType == "paragraph":
text = sectionData.get("text", "")
if text and len(text) > 10: # Only include substantial paragraphs
# Truncate long text
truncated = text[:100] + "..." if len(text) > 100 else text
descriptions.append(f"Text content: {truncated}")
elif sectionType == "code_block":
code = sectionData.get("code", "")
language = sectionData.get("language", "")
if code:
descriptions.append(f"Code block ({language}): {code[:50]}...")
return "; ".join(descriptions) if descriptions else "General document content"
except Exception as e:
self.logger.warning(f"Error analyzing content: {str(e)}")
return "Document content"
def _getStyleGuidanceFromContent(self, extractedContent: Dict[str, Any], userPrompt: str = None) -> str:
"""Determine visual style guidance based on content and user prompt."""
try:
styleElements = []
# Analyze user prompt for style hints
if userPrompt:
promptLower = userPrompt.lower()
if any(word in promptLower for word in ["modern", "contemporary", "sleek"]):
styleElements.append("modern, clean design")
elif any(word in promptLower for word in ["classic", "traditional", "formal"]):
styleElements.append("classic, formal design")
elif any(word in promptLower for word in ["creative", "artistic", "colorful"]):
styleElements.append("creative, artistic design")
elif any(word in promptLower for word in ["corporate", "business", "professional"]):
styleElements.append("corporate, professional design")
# Analyze content type for additional style hints
sections = self._extractSections(extractedContent)
hasTables = any(self._getSectionType(s) == "table" for s in sections)
hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections)
hasCode = any(self._getSectionType(s) == "code_block" for s in sections)
if hasTables:
styleElements.append("data-focused layout")
if hasLists:
styleElements.append("organized, structured presentation")
if hasCode:
styleElements.append("technical, developer-friendly")
# Default style if no specific guidance
if not styleElements:
styleElements.append("professional, clean design")
return ", ".join(styleElements)
except Exception as e:
self.logger.warning(f"Error determining style guidance: {str(e)}")
return "professional design"