341 lines
16 KiB
Python
341 lines
16 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Image renderer for report generation using AI image generation.
|
|
"""
|
|
|
|
from .rendererBaseTemplate import BaseRenderer
|
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
|
from typing import Dict, Any, List
|
|
import logging
|
|
import base64
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class RendererImage(BaseRenderer):
|
|
"""Renders content to image format using AI image generation."""
|
|
|
|
@classmethod
|
|
def getSupportedFormats(cls) -> List[str]:
|
|
"""Return supported image formats."""
|
|
return ['png', 'jpg', 'jpeg', 'image']
|
|
|
|
@classmethod
|
|
def getFormatAliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return ['img', 'picture', 'photo', 'graphic']
|
|
|
|
@classmethod
|
|
def getPriority(cls) -> int:
|
|
"""Return priority for image renderer."""
|
|
return 90
|
|
|
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
|
"""Render extracted JSON content to image format using AI image generation."""
|
|
try:
|
|
# Generate AI image from content
|
|
imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
|
|
|
|
# Determine filename from document or title
|
|
documents = extractedContent.get("documents", [])
|
|
if documents and isinstance(documents[0], dict):
|
|
filename = documents[0].get("filename")
|
|
if not filename:
|
|
filename = self._determineFilename(title, "image/png")
|
|
else:
|
|
filename = self._determineFilename(title, "image/png")
|
|
|
|
# Convert image content to bytes (base64 string or bytes)
|
|
if isinstance(imageContent, str):
|
|
try:
|
|
imageBytes = base64.b64decode(imageContent)
|
|
except Exception:
|
|
imageBytes = imageContent.encode('utf-8')
|
|
else:
|
|
imageBytes = imageContent
|
|
|
|
# Extract metadata for document type and other info
|
|
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
|
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
|
|
|
|
return [
|
|
RenderedDocument(
|
|
documentData=imageBytes,
|
|
mimeType="image/png",
|
|
filename=filename,
|
|
documentType=documentType,
|
|
metadata=metadata if isinstance(metadata, dict) else None
|
|
)
|
|
]
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering image: {str(e)}")
|
|
# Re-raise the exception instead of using fallback
|
|
raise Exception(f"Image rendering failed: {str(e)}")
|
|
|
|
async def _generateAiImage(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
|
"""Generate AI image from extracted content."""
|
|
try:
|
|
if not aiService:
|
|
raise ValueError("AI service is required for image generation")
|
|
|
|
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
|
|
if not self._validateJsonStructure(extractedContent):
|
|
raise ValueError("Extracted content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
|
|
|
|
# Extract metadata from standardized schema
|
|
metadata = self._extractMetadata(extractedContent)
|
|
|
|
# Use title from JSON metadata if available, otherwise use provided title
|
|
documentTitle = metadata.get("title", title)
|
|
|
|
# Create AI prompt for image generation
|
|
imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)
|
|
|
|
# Save image generation prompt to debug
|
|
aiService.services.utils.writeDebugFile(imagePrompt, "image_generation_prompt")
|
|
|
|
# Format prompt as JSON with image generation parameters
|
|
from modules.datamodels.datamodelAi import AiCallPromptImage, AiCallOptions, OperationTypeEnum
|
|
import json
|
|
|
|
promptModel = AiCallPromptImage(
|
|
prompt=imagePrompt,
|
|
size="1024x1024",
|
|
quality="standard",
|
|
style="vivid"
|
|
)
|
|
promptJson = promptModel.model_dump_json(exclude_none=True, indent=2)
|
|
|
|
# Use unified callAiContent method
|
|
options = AiCallOptions(
|
|
operationType=OperationTypeEnum.IMAGE_GENERATE,
|
|
resultFormat="base64"
|
|
)
|
|
|
|
# Use unified callAiContent method
|
|
imageResponse = await aiService.callAiContent(
|
|
prompt=promptJson,
|
|
options=options,
|
|
outputFormat="base64"
|
|
)
|
|
|
|
# Save image generation response to debug
|
|
aiService.services.utils.writeDebugFile(str(imageResponse.content), "image_generation_response")
|
|
|
|
# Extract base64 image data from AiResponse
|
|
# AiResponse.documents contains DocumentData objects
|
|
if imageResponse.documents and len(imageResponse.documents) > 0:
|
|
imageData = imageResponse.documents[0].documentData
|
|
if imageData:
|
|
return imageData
|
|
|
|
# Fallback: check content field (might be base64 string)
|
|
if imageResponse.content:
|
|
return imageResponse.content
|
|
|
|
raise ValueError("No image data returned from AI")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error generating AI image: {str(e)}")
|
|
raise Exception(f"AI image generation failed: {str(e)}")
|
|
|
|
async def _createImageGeneratePrompt(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
|
"""Create a detailed prompt for AI image generation based on the content."""
|
|
try:
|
|
# Start with base prompt
|
|
promptParts = []
|
|
|
|
# Add user's original intent if available
|
|
if userPrompt:
|
|
sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
|
|
promptParts.append(f"User Request: {sanitized_prompt}")
|
|
|
|
# Add document title
|
|
promptParts.append(f"Document Title: {title}")
|
|
|
|
# Analyze content and create visual description
|
|
sections = self._extractSections(extractedContent)
|
|
contentDescription = self._analyzeContentForVisualDescription(sections)
|
|
|
|
if contentDescription:
|
|
promptParts.append(f"Content to Visualize: {contentDescription}")
|
|
|
|
# Add style guidance
|
|
styleGuidance = self._getStyleGuidanceFromContent(extractedContent, userPrompt)
|
|
if styleGuidance:
|
|
promptParts.append(f"Visual Style: {styleGuidance}")
|
|
|
|
# Combine all parts
|
|
fullPrompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(promptParts)
|
|
|
|
# Add technical requirements
|
|
fullPrompt += "\n\nTechnical Requirements:"
|
|
fullPrompt += "\n- High quality, professional appearance"
|
|
fullPrompt += "\n- Clear, readable text if any text is included"
|
|
fullPrompt += "\n- Appropriate colors and layout"
|
|
fullPrompt += "\n- Suitable for business/professional use"
|
|
|
|
# Truncate prompt if it exceeds DALL-E's 4000 character limit
|
|
if len(fullPrompt) > 4000:
|
|
# Use AI to compress the prompt intelligently
|
|
compressedPrompt = await self._compressPromptWithAi(fullPrompt, aiService)
|
|
if compressedPrompt and len(compressedPrompt) <= 4000:
|
|
return compressedPrompt
|
|
|
|
# Fallback to minimal prompt if AI compression fails or is still too long
|
|
minimalPrompt = f"Create a professional image representing: {title}"
|
|
if userPrompt:
|
|
sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
|
|
minimalPrompt += f" - {sanitized_prompt}"
|
|
|
|
# If even the minimal prompt is too long, truncate it
|
|
if len(minimalPrompt) > 4000:
|
|
minimalPrompt = minimalPrompt[:3997] + "..."
|
|
|
|
return minimalPrompt
|
|
|
|
return fullPrompt
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error creating image prompt: {str(e)}")
|
|
# Fallback to simple prompt
|
|
return f"Create a professional image representing: {title}"
|
|
|
|
async def _compressPromptWithAi(self, longPrompt: str, aiService=None) -> str:
|
|
"""Use AI to intelligently compress a long prompt while preserving key information."""
|
|
try:
|
|
if not aiService:
|
|
return None
|
|
|
|
compressionPrompt = f"""
|
|
You are an expert at creating concise, effective prompts for AI image generation.
|
|
|
|
The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
|
|
|
|
Original prompt ({len(longPrompt)} characters):
|
|
{longPrompt}
|
|
|
|
Please create a compressed version that:
|
|
1. Keeps the most important visual elements and requirements
|
|
2. Maintains the core intent and style guidance
|
|
3. Preserves technical requirements
|
|
4. Stays under 4000 characters
|
|
5. Is optimized for DALL-E image generation
|
|
|
|
Return only the compressed prompt, no explanations.
|
|
"""
|
|
|
|
# Use AI to compress the prompt - call the AI service correctly
|
|
# The ai_service has an aiObjects attribute that contains the actual AI interface
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
|
|
|
request = AiCallRequest(
|
|
prompt=compressionPrompt,
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_GENERATE,
|
|
maxTokens=None, # Let the model use its full context length
|
|
temperature=0.3 # Lower temperature for more consistent compression
|
|
)
|
|
)
|
|
|
|
response = await aiService.callAi(request)
|
|
compressed = response.content.strip()
|
|
|
|
# Validate the compressed prompt
|
|
if compressed and len(compressed) <= 4000 and len(compressed) > 50:
|
|
self.logger.info(f"Successfully compressed prompt from {len(longPrompt)} to {len(compressed)} characters")
|
|
return compressed
|
|
else:
|
|
self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
|
|
return None
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
|
|
return None
|
|
|
|
def _analyzeContentForVisualDescription(self, sections: List[Dict[str, Any]]) -> str:
|
|
"""Analyze content sections and create a visual description for AI."""
|
|
try:
|
|
descriptions = []
|
|
|
|
for section in sections:
|
|
sectionType = self._getSectionType(section)
|
|
sectionData = self._getSectionData(section)
|
|
|
|
if sectionType == "table":
|
|
headers = sectionData.get("headers", [])
|
|
rows = sectionData.get("rows", [])
|
|
if headers and rows:
|
|
descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
|
|
|
|
elif sectionType == "bullet_list":
|
|
items = sectionData.get("items", [])
|
|
if items:
|
|
descriptions.append(f"List with {len(items)} items")
|
|
|
|
elif sectionType == "heading":
|
|
text = sectionData.get("text", "")
|
|
level = sectionData.get("level", 1)
|
|
if text:
|
|
descriptions.append(f"Heading {level}: {text}")
|
|
|
|
elif sectionType == "paragraph":
|
|
text = sectionData.get("text", "")
|
|
if text and len(text) > 10: # Only include substantial paragraphs
|
|
# Truncate long text
|
|
truncated = text[:100] + "..." if len(text) > 100 else text
|
|
descriptions.append(f"Text content: {truncated}")
|
|
|
|
elif sectionType == "code_block":
|
|
code = sectionData.get("code", "")
|
|
language = sectionData.get("language", "")
|
|
if code:
|
|
descriptions.append(f"Code block ({language}): {code[:50]}...")
|
|
|
|
return "; ".join(descriptions) if descriptions else "General document content"
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error analyzing content: {str(e)}")
|
|
return "Document content"
|
|
|
|
def _getStyleGuidanceFromContent(self, extractedContent: Dict[str, Any], userPrompt: str = None) -> str:
|
|
"""Determine visual style guidance based on content and user prompt."""
|
|
try:
|
|
styleElements = []
|
|
|
|
# Analyze user prompt for style hints
|
|
if userPrompt:
|
|
promptLower = userPrompt.lower()
|
|
|
|
if any(word in promptLower for word in ["modern", "contemporary", "sleek"]):
|
|
styleElements.append("modern, clean design")
|
|
elif any(word in promptLower for word in ["classic", "traditional", "formal"]):
|
|
styleElements.append("classic, formal design")
|
|
elif any(word in promptLower for word in ["creative", "artistic", "colorful"]):
|
|
styleElements.append("creative, artistic design")
|
|
elif any(word in promptLower for word in ["corporate", "business", "professional"]):
|
|
styleElements.append("corporate, professional design")
|
|
|
|
# Analyze content type for additional style hints
|
|
sections = self._extractSections(extractedContent)
|
|
hasTables = any(self._getSectionType(s) == "table" for s in sections)
|
|
hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections)
|
|
hasCode = any(self._getSectionType(s) == "code_block" for s in sections)
|
|
|
|
if hasTables:
|
|
styleElements.append("data-focused layout")
|
|
if hasLists:
|
|
styleElements.append("organized, structured presentation")
|
|
if hasCode:
|
|
styleElements.append("technical, developer-friendly")
|
|
|
|
# Default style if no specific guidance
|
|
if not styleElements:
|
|
styleElements.append("professional, clean design")
|
|
|
|
return ", ".join(styleElements)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error determining style guidance: {str(e)}")
|
|
return "professional design"
|