gateway/modules/services/serviceGeneration/renderers/rendererImage.py
2025-10-24 23:57:17 +02:00

286 lines
13 KiB
Python

"""
Image renderer for report generation using AI image generation.
"""
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
import logging
logger = logging.getLogger(__name__)
class RendererImage(BaseRenderer):
"""Renders content to image format using AI image generation."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported image formats."""
return ['png', 'jpg', 'jpeg', 'image']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['img', 'picture', 'photo', 'graphic']
@classmethod
def get_priority(cls) -> int:
"""Return priority for image renderer."""
return 90
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to image format using AI image generation."""
try:
# Generate AI image from content
image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service)
return image_content, "image/png"
except Exception as e:
self.logger.error(f"Error rendering image: {str(e)}")
# Re-raise the exception instead of using fallback
raise Exception(f"Image rendering failed: {str(e)}")
async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
"""Generate AI image from extracted content."""
try:
if not ai_service:
raise ValueError("AI service is required for image generation")
# Validate JSON structure
if not isinstance(extracted_content, dict):
raise ValueError("Extracted content must be a dictionary")
if "sections" not in extracted_content:
raise ValueError("Extracted content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = extracted_content.get("metadata", {}).get("title", title)
# Create AI prompt for image generation
image_prompt = await self._create_imageGenerate_prompt(extracted_content, document_title, user_prompt, ai_service)
# Save image generation prompt to debug
ai_service.services.utils.writeDebugFile(image_prompt, "image_generation_prompt")
# Generate image using AI
image_result = await ai_service.aiObjects.generateImage(
prompt=image_prompt,
size="1024x1024",
quality="standard",
style="vivid"
)
# Save image generation response to debug
ai_service.services.utils.writeDebugFile(str(image_result), "image_generation_response")
# Extract base64 image data from result
if image_result and image_result.get("success", False):
image_data = image_result.get("image_data", "")
if image_data:
return image_data
else:
raise ValueError("No image data returned from AI")
else:
error_msg = image_result.get("error", "Unknown error") if image_result else "No result"
raise ValueError(f"AI image generation failed: {error_msg}")
except Exception as e:
self.logger.error(f"Error generating AI image: {str(e)}")
raise Exception(f"AI image generation failed: {str(e)}")
async def _create_imageGenerate_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
"""Create a detailed prompt for AI image generation based on the content."""
try:
# Start with base prompt
prompt_parts = []
# Add user's original intent if available
if user_prompt:
prompt_parts.append(f"User Request: {ai_service.sanitizePromptContent(user_prompt, 'userinput')}")
# Add document title
prompt_parts.append(f"Document Title: {title}")
# Analyze content and create visual description
sections = extracted_content.get("sections", [])
content_description = self._analyze_content_for_visual_description(sections)
if content_description:
prompt_parts.append(f"Content to Visualize: {content_description}")
# Add style guidance
style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt)
if style_guidance:
prompt_parts.append(f"Visual Style: {style_guidance}")
# Combine all parts
full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts)
# Add technical requirements
full_prompt += "\n\nTechnical Requirements:"
full_prompt += "\n- High quality, professional appearance"
full_prompt += "\n- Clear, readable text if any text is included"
full_prompt += "\n- Appropriate colors and layout"
full_prompt += "\n- Suitable for business/professional use"
# Truncate prompt if it exceeds DALL-E's 4000 character limit
if len(full_prompt) > 4000:
# Use AI to compress the prompt intelligently
compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service)
if compressed_prompt and len(compressed_prompt) <= 4000:
return compressed_prompt
# Fallback to minimal prompt if AI compression fails or is still too long
minimal_prompt = f"Create a professional image representing: {title}"
if user_prompt:
minimal_prompt += f" - {ai_service.sanitizePromptContent(user_prompt, 'userinput')}"
# If even the minimal prompt is too long, truncate it
if len(minimal_prompt) > 4000:
minimal_prompt = minimal_prompt[:3997] + "..."
return minimal_prompt
return full_prompt
except Exception as e:
self.logger.warning(f"Error creating image prompt: {str(e)}")
# Fallback to simple prompt
return f"Create a professional image representing: {title}"
async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str:
"""Use AI to intelligently compress a long prompt while preserving key information."""
try:
if not ai_service:
return None
compression_prompt = f"""
You are an expert at creating concise, effective prompts for AI image generation.
The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
Original prompt ({len(long_prompt)} characters):
{long_prompt}
Please create a compressed version that:
1. Keeps the most important visual elements and requirements
2. Maintains the core intent and style guidance
3. Preserves technical requirements
4. Stays under 4000 characters
5. Is optimized for DALL-E image generation
Return only the compressed prompt, no explanations.
"""
# Use AI to compress the prompt - call the AI service correctly
# The ai_service has an aiObjects attribute that contains the actual AI interface
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
request = AiCallRequest(
prompt=compression_prompt,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
maxTokens=None, # Let the model use its full context length
temperature=0.3 # Lower temperature for more consistent compression
)
)
response = await ai_service.aiObjects.call(request)
compressed = response.content.strip()
# Validate the compressed prompt
if compressed and len(compressed) <= 4000 and len(compressed) > 50:
self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters")
return compressed
else:
self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
return None
except Exception as e:
self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
return None
def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str:
"""Analyze content sections and create a visual description for AI."""
try:
descriptions = []
for section in sections:
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
if section_type == "table":
headers = section_data.get("headers", [])
rows = section_data.get("rows", [])
if headers and rows:
descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
elif section_type == "bullet_list":
items = section_data.get("items", [])
if items:
descriptions.append(f"List with {len(items)} items")
elif section_type == "heading":
text = section_data.get("text", "")
level = section_data.get("level", 1)
if text:
descriptions.append(f"Heading {level}: {text}")
elif section_type == "paragraph":
text = section_data.get("text", "")
if text and len(text) > 10: # Only include substantial paragraphs
# Truncate long text
truncated = text[:100] + "..." if len(text) > 100 else text
descriptions.append(f"Text content: {truncated}")
elif section_type == "code_block":
code = section_data.get("code", "")
language = section_data.get("language", "")
if code:
descriptions.append(f"Code block ({language}): {code[:50]}...")
return "; ".join(descriptions) if descriptions else "General document content"
except Exception as e:
self.logger.warning(f"Error analyzing content: {str(e)}")
return "Document content"
def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str:
"""Determine visual style guidance based on content and user prompt."""
try:
style_elements = []
# Analyze user prompt for style hints
if user_prompt:
prompt_lower = user_prompt.lower()
if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]):
style_elements.append("modern, clean design")
elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]):
style_elements.append("classic, formal design")
elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]):
style_elements.append("creative, artistic design")
elif any(word in prompt_lower for word in ["corporate", "business", "professional"]):
style_elements.append("corporate, professional design")
# Analyze content type for additional style hints
sections = extracted_content.get("sections", [])
has_tables = any(self._get_section_type(s) == "table" for s in sections)
has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections)
has_code = any(self._get_section_type(s) == "code_block" for s in sections)
if has_tables:
style_elements.append("data-focused layout")
if has_lists:
style_elements.append("organized, structured presentation")
if has_code:
style_elements.append("technical, developer-friendly")
# Default style if no specific guidance
if not style_elements:
style_elements.append("professional, clean design")
return ", ".join(style_elements)
except Exception as e:
self.logger.warning(f"Error determining style guidance: {str(e)}")
return "professional design"