gateway/modules/services/serviceGeneration/renderers/rendererImage.py

295 lines
14 KiB
Python

"""
Image renderer for report generation using AI image generation.
"""
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
import base64
import logging
logger = logging.getLogger(__name__)
class RendererImage(BaseRenderer):
"""Renders content to image format using AI image generation."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported image formats."""
return ['png', 'jpg', 'jpeg', 'image']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['img', 'picture', 'photo', 'graphic']
@classmethod
def get_priority(cls) -> int:
"""Return priority for image renderer."""
return 90
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to image format using AI image generation."""
try:
# Generate AI image from content
image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service)
return image_content, "image/png"
except Exception as e:
self.logger.error(f"Error rendering image: {str(e)}")
# Re-raise the exception instead of using fallback
raise Exception(f"Image rendering failed: {str(e)}")
async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
"""Generate AI image from extracted content."""
try:
if not ai_service:
raise ValueError("AI service is required for image generation")
# Validate JSON structure
if not isinstance(extracted_content, dict):
raise ValueError("Extracted content must be a dictionary")
if "sections" not in extracted_content:
raise ValueError("Extracted content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = extracted_content.get("metadata", {}).get("title", title)
# Create AI prompt for image generation
image_prompt = await self._create_image_generation_prompt(extracted_content, document_title, user_prompt, ai_service)
# Save image generation prompt to debug
try:
from modules.shared.debugLogger import writeDebugFile
writeDebugFile(image_prompt, "rendererImageGenerationPrompt")
except Exception:
pass
# Generate image using AI
image_result = await ai_service.aiObjects.generateImage(
prompt=image_prompt,
size="1024x1024",
quality="standard",
style="vivid"
)
# Save image generation response to debug
try:
from modules.shared.debugLogger import writeDebugFile
writeDebugFile(str(image_result), "rendererImageGenerationResponse")
except Exception:
pass
# Extract base64 image data from result
if image_result and image_result.get("success", False):
image_data = image_result.get("image_data", "")
if image_data:
return image_data
else:
raise ValueError("No image data returned from AI")
else:
error_msg = image_result.get("error", "Unknown error") if image_result else "No result"
raise ValueError(f"AI image generation failed: {error_msg}")
except Exception as e:
self.logger.error(f"Error generating AI image: {str(e)}")
raise Exception(f"AI image generation failed: {str(e)}")
async def _create_image_generation_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
"""Create a detailed prompt for AI image generation based on the content."""
try:
# Start with base prompt
prompt_parts = []
# Add user's original intent if available
if user_prompt:
prompt_parts.append(f"User Request: {ai_service.sanitizePromptContent(user_prompt, 'userinput')}")
# Add document title
prompt_parts.append(f"Document Title: {title}")
# Analyze content and create visual description
sections = extracted_content.get("sections", [])
content_description = self._analyze_content_for_visual_description(sections)
if content_description:
prompt_parts.append(f"Content to Visualize: {content_description}")
# Add style guidance
style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt)
if style_guidance:
prompt_parts.append(f"Visual Style: {style_guidance}")
# Combine all parts
full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts)
# Add technical requirements
full_prompt += "\n\nTechnical Requirements:"
full_prompt += "\n- High quality, professional appearance"
full_prompt += "\n- Clear, readable text if any text is included"
full_prompt += "\n- Appropriate colors and layout"
full_prompt += "\n- Suitable for business/professional use"
# Truncate prompt if it exceeds DALL-E's 4000 character limit
if len(full_prompt) > 4000:
# Use AI to compress the prompt intelligently
compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service)
if compressed_prompt and len(compressed_prompt) <= 4000:
return compressed_prompt
# Fallback to minimal prompt if AI compression fails or is still too long
minimal_prompt = f"Create a professional image representing: {title}"
if user_prompt:
minimal_prompt += f" - {ai_service.sanitizePromptContent(user_prompt, 'userinput')}"
# If even the minimal prompt is too long, truncate it
if len(minimal_prompt) > 4000:
minimal_prompt = minimal_prompt[:3997] + "..."
return minimal_prompt
return full_prompt
except Exception as e:
self.logger.warning(f"Error creating image prompt: {str(e)}")
# Fallback to simple prompt
return f"Create a professional image representing: {title}"
async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str:
"""Use AI to intelligently compress a long prompt while preserving key information."""
try:
if not ai_service:
return None
compression_prompt = f"""
You are an expert at creating concise, effective prompts for AI image generation.
The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
Original prompt ({len(long_prompt)} characters):
{long_prompt}
Please create a compressed version that:
1. Keeps the most important visual elements and requirements
2. Maintains the core intent and style guidance
3. Preserves technical requirements
4. Stays under 4000 characters
5. Is optimized for DALL-E image generation
Return only the compressed prompt, no explanations.
"""
# Use AI to compress the prompt - call the AI service correctly
# The ai_service has an aiObjects attribute that contains the actual AI interface
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
request = AiCallRequest(
prompt=compression_prompt,
options=AiCallOptions(
operationType=OperationType.GENERAL,
maxTokens=2000,
temperature=0.3 # Lower temperature for more consistent compression
)
)
response = await ai_service.aiObjects.call(request)
compressed = response.content.strip()
# Validate the compressed prompt
if compressed and len(compressed) <= 4000 and len(compressed) > 50:
self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters")
return compressed
else:
self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
return None
except Exception as e:
self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
return None
def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str:
"""Analyze content sections and create a visual description for AI."""
try:
descriptions = []
for section in sections:
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
if section_type == "table":
headers = section_data.get("headers", [])
rows = section_data.get("rows", [])
if headers and rows:
descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
elif section_type == "bullet_list":
items = section_data.get("items", [])
if items:
descriptions.append(f"List with {len(items)} items")
elif section_type == "heading":
text = section_data.get("text", "")
level = section_data.get("level", 1)
if text:
descriptions.append(f"Heading {level}: {text}")
elif section_type == "paragraph":
text = section_data.get("text", "")
if text and len(text) > 10: # Only include substantial paragraphs
# Truncate long text
truncated = text[:100] + "..." if len(text) > 100 else text
descriptions.append(f"Text content: {truncated}")
elif section_type == "code_block":
code = section_data.get("code", "")
language = section_data.get("language", "")
if code:
descriptions.append(f"Code block ({language}): {code[:50]}...")
return "; ".join(descriptions) if descriptions else "General document content"
except Exception as e:
self.logger.warning(f"Error analyzing content: {str(e)}")
return "Document content"
def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str:
"""Determine visual style guidance based on content and user prompt."""
try:
style_elements = []
# Analyze user prompt for style hints
if user_prompt:
prompt_lower = user_prompt.lower()
if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]):
style_elements.append("modern, clean design")
elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]):
style_elements.append("classic, formal design")
elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]):
style_elements.append("creative, artistic design")
elif any(word in prompt_lower for word in ["corporate", "business", "professional"]):
style_elements.append("corporate, professional design")
# Analyze content type for additional style hints
sections = extracted_content.get("sections", [])
has_tables = any(self._get_section_type(s) == "table" for s in sections)
has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections)
has_code = any(self._get_section_type(s) == "code_block" for s in sections)
if has_tables:
style_elements.append("data-focused layout")
if has_lists:
style_elements.append("organized, structured presentation")
if has_code:
style_elements.append("technical, developer-friendly")
# Default style if no specific guidance
if not style_elements:
style_elements.append("professional, clean design")
return ", ".join(style_elements)
except Exception as e:
self.logger.warning(f"Error determining style guidance: {str(e)}")
return "professional design"