renders image generation tested and fixed - all renderers ready
This commit is contained in:
parent
df15f54f4b
commit
dedee0ecda
4 changed files with 491 additions and 6 deletions
|
|
@ -188,4 +188,83 @@ class AiOpenai:
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during image analysis: {str(e)}", exc_info=True)
|
||||
return f"[Error during image analysis: {str(e)}]"
|
||||
return f"[Error during image analysis: {str(e)}]"
|
||||
|
||||
async def generateImage(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> Dict[str, Any]:
|
||||
"""
|
||||
Generate an image using DALL-E 3.
|
||||
|
||||
Args:
|
||||
prompt: The text prompt for image generation
|
||||
size: Image size (1024x1024, 1792x1024, or 1024x1792)
|
||||
quality: Image quality (standard or hd)
|
||||
style: Image style (vivid or natural)
|
||||
|
||||
Returns:
|
||||
Dictionary with success status and image data
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Starting image generation with prompt: '{prompt[:100]}...'")
|
||||
|
||||
# DALL-E 3 API endpoint
|
||||
dalle_url = "https://api.openai.com/v1/images/generations"
|
||||
|
||||
payload = {
|
||||
"model": "dall-e-3",
|
||||
"prompt": prompt,
|
||||
"size": size,
|
||||
"quality": quality,
|
||||
"style": style,
|
||||
"n": 1,
|
||||
"response_format": "b64_json" # Get base64 data directly instead of URLs
|
||||
}
|
||||
|
||||
# Create a separate client for DALL-E API calls
|
||||
dalle_client = httpx.AsyncClient(
|
||||
timeout=120.0,
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.apiKey}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
)
|
||||
|
||||
response = await dalle_client.post(
|
||||
dalle_url,
|
||||
json=payload
|
||||
)
|
||||
|
||||
await dalle_client.aclose()
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"DALL-E API error: {response.status_code} - {response.text}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"DALL-E API error: {response.status_code} - {response.text}"
|
||||
}
|
||||
|
||||
responseJson = response.json()
|
||||
|
||||
if "data" in responseJson and len(responseJson["data"]) > 0:
|
||||
image_data = responseJson["data"][0]["b64_json"]
|
||||
|
||||
logger.info(f"Successfully generated image: {len(image_data)} characters")
|
||||
return {
|
||||
"success": True,
|
||||
"image_data": image_data,
|
||||
"size": size,
|
||||
"quality": quality,
|
||||
"style": style
|
||||
}
|
||||
else:
|
||||
logger.error("No image data in DALL-E response")
|
||||
return {
|
||||
"success": False,
|
||||
"error": "No image data in DALL-E response"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during image generation: {str(e)}", exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Error during image generation: {str(e)}"
|
||||
}
|
||||
|
|
@ -140,6 +140,109 @@ class BaseRenderer(ABC):
|
|||
alt_text = section_data.get("altText", "Image")
|
||||
return base64_data, alt_text
|
||||
|
||||
def _render_image_section(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
|
||||
"""
|
||||
Render an image section. This is a base implementation that should be overridden
|
||||
by format-specific renderers.
|
||||
|
||||
Args:
|
||||
section: Image section data
|
||||
styles: Optional styling information
|
||||
|
||||
Returns:
|
||||
Format-specific image representation
|
||||
"""
|
||||
section_data = self._get_section_data(section)
|
||||
base64_data, alt_text = self._extract_image_data(section_data)
|
||||
|
||||
# Base implementation returns a simple dict
|
||||
# Format-specific renderers should override this method
|
||||
return {
|
||||
"type": "image",
|
||||
"base64Data": base64_data,
|
||||
"altText": alt_text,
|
||||
"width": section_data.get("width", None),
|
||||
"height": section_data.get("height", None),
|
||||
"caption": section_data.get("caption", "")
|
||||
}
|
||||
|
||||
def _validate_image_data(self, base64_data: str, alt_text: str) -> bool:
|
||||
"""Validate image data."""
|
||||
if not base64_data:
|
||||
self.logger.warning("Image section has no base64 data")
|
||||
return False
|
||||
|
||||
if not alt_text:
|
||||
self.logger.warning("Image section has no alt text")
|
||||
return False
|
||||
|
||||
# Basic base64 validation
|
||||
try:
|
||||
import base64
|
||||
base64.b64decode(base64_data, validate=True)
|
||||
return True
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Invalid base64 image data: {str(e)}")
|
||||
return False
|
||||
|
||||
def _get_image_dimensions(self, base64_data: str) -> Tuple[int, int]:
|
||||
"""
|
||||
Get image dimensions from base64 data.
|
||||
This is a helper method that format-specific renderers can use.
|
||||
"""
|
||||
try:
|
||||
import base64
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
# Decode base64 data
|
||||
image_data = base64.b64decode(base64_data)
|
||||
image = Image.open(io.BytesIO(image_data))
|
||||
|
||||
return image.size # Returns (width, height)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not determine image dimensions: {str(e)}")
|
||||
return (0, 0)
|
||||
|
||||
def _resize_image_if_needed(self, base64_data: str, max_width: int = 800, max_height: int = 600) -> str:
|
||||
"""
|
||||
Resize image if it exceeds maximum dimensions.
|
||||
Returns the resized image as base64 string.
|
||||
"""
|
||||
try:
|
||||
import base64
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
# Decode base64 data
|
||||
image_data = base64.b64decode(base64_data)
|
||||
image = Image.open(io.BytesIO(image_data))
|
||||
|
||||
# Check if resizing is needed
|
||||
width, height = image.size
|
||||
if width <= max_width and height <= max_height:
|
||||
return base64_data # No resizing needed
|
||||
|
||||
# Calculate new dimensions maintaining aspect ratio
|
||||
ratio = min(max_width / width, max_height / height)
|
||||
new_width = int(width * ratio)
|
||||
new_height = int(height * ratio)
|
||||
|
||||
# Resize image
|
||||
resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
||||
|
||||
# Convert back to base64
|
||||
buffer = io.BytesIO()
|
||||
resized_image.save(buffer, format=image.format or 'PNG')
|
||||
resized_data = buffer.getvalue()
|
||||
|
||||
return base64.b64encode(resized_data).decode('utf-8')
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not resize image: {str(e)}")
|
||||
return base64_data # Return original if resize fails
|
||||
|
||||
def _get_supported_section_types(self) -> List[str]:
|
||||
"""Return list of supported section types."""
|
||||
return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"]
|
||||
|
|
@ -170,7 +273,19 @@ class BaseRenderer(ABC):
|
|||
return {"type": "code_block", "code": code, "language": language}
|
||||
elif section_type == "image":
|
||||
base64_data, alt_text = self._extract_image_data(section_data)
|
||||
return {"type": "image", "base64Data": base64_data, "altText": alt_text}
|
||||
# Validate image data
|
||||
if self._validate_image_data(base64_data, alt_text):
|
||||
return {
|
||||
"type": "image",
|
||||
"base64Data": base64_data,
|
||||
"altText": alt_text,
|
||||
"width": section_data.get("width"),
|
||||
"height": section_data.get("height"),
|
||||
"caption": section_data.get("caption", "")
|
||||
}
|
||||
else:
|
||||
# Return placeholder if image data is invalid
|
||||
return {"type": "paragraph", "text": f"[Image: {alt_text}]"}
|
||||
else:
|
||||
# Fallback to paragraph
|
||||
text = self._extract_paragraph_text(section_data)
|
||||
|
|
|
|||
281
modules/services/serviceGeneration/renderers/rendererImage.py
Normal file
281
modules/services/serviceGeneration/renderers/rendererImage.py
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
"""
|
||||
Image renderer for report generation using AI image generation.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import base64
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class RendererImage(BaseRenderer):
|
||||
"""Renders content to image format using AI image generation."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported image formats."""
|
||||
return ['png', 'jpg', 'jpeg', 'image']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['img', 'picture', 'photo', 'graphic']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for image renderer."""
|
||||
return 90
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to image format using AI image generation."""
|
||||
try:
|
||||
# Generate AI image from content
|
||||
image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service)
|
||||
|
||||
return image_content, "image/png"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering image: {str(e)}")
|
||||
# Re-raise the exception instead of using fallback
|
||||
raise Exception(f"Image rendering failed: {str(e)}")
|
||||
|
||||
async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
||||
"""Generate AI image from extracted content."""
|
||||
try:
|
||||
if not ai_service:
|
||||
raise ValueError("AI service is required for image generation")
|
||||
|
||||
# Validate JSON structure
|
||||
if not isinstance(extracted_content, dict):
|
||||
raise ValueError("Extracted content must be a dictionary")
|
||||
|
||||
if "sections" not in extracted_content:
|
||||
raise ValueError("Extracted content must contain 'sections' field")
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = extracted_content.get("metadata", {}).get("title", title)
|
||||
|
||||
# Create AI prompt for image generation
|
||||
image_prompt = await self._create_image_generation_prompt(extracted_content, document_title, user_prompt, ai_service)
|
||||
|
||||
# Generate image using AI
|
||||
image_result = await ai_service.aiObjects.generateImage(
|
||||
prompt=image_prompt,
|
||||
size="1024x1024",
|
||||
quality="standard",
|
||||
style="vivid"
|
||||
)
|
||||
|
||||
# Extract base64 image data from result
|
||||
if image_result and image_result.get("success", False):
|
||||
image_data = image_result.get("image_data", "")
|
||||
if image_data:
|
||||
return image_data
|
||||
else:
|
||||
raise ValueError("No image data returned from AI")
|
||||
else:
|
||||
error_msg = image_result.get("error", "Unknown error") if image_result else "No result"
|
||||
raise ValueError(f"AI image generation failed: {error_msg}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating AI image: {str(e)}")
|
||||
raise Exception(f"AI image generation failed: {str(e)}")
|
||||
|
||||
async def _create_image_generation_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
||||
"""Create a detailed prompt for AI image generation based on the content."""
|
||||
try:
|
||||
# Start with base prompt
|
||||
prompt_parts = []
|
||||
|
||||
# Add user's original intent if available
|
||||
if user_prompt:
|
||||
prompt_parts.append(f"User Request: {user_prompt}")
|
||||
|
||||
# Add document title
|
||||
prompt_parts.append(f"Document Title: {title}")
|
||||
|
||||
# Analyze content and create visual description
|
||||
sections = extracted_content.get("sections", [])
|
||||
content_description = self._analyze_content_for_visual_description(sections)
|
||||
|
||||
if content_description:
|
||||
prompt_parts.append(f"Content to Visualize: {content_description}")
|
||||
|
||||
# Add style guidance
|
||||
style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt)
|
||||
if style_guidance:
|
||||
prompt_parts.append(f"Visual Style: {style_guidance}")
|
||||
|
||||
# Combine all parts
|
||||
full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts)
|
||||
|
||||
# Add technical requirements
|
||||
full_prompt += "\n\nTechnical Requirements:"
|
||||
full_prompt += "\n- High quality, professional appearance"
|
||||
full_prompt += "\n- Clear, readable text if any text is included"
|
||||
full_prompt += "\n- Appropriate colors and layout"
|
||||
full_prompt += "\n- Suitable for business/professional use"
|
||||
|
||||
# Truncate prompt if it exceeds DALL-E's 4000 character limit
|
||||
if len(full_prompt) > 4000:
|
||||
# Use AI to compress the prompt intelligently
|
||||
compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service)
|
||||
if compressed_prompt and len(compressed_prompt) <= 4000:
|
||||
return compressed_prompt
|
||||
|
||||
# Fallback to minimal prompt if AI compression fails or is still too long
|
||||
minimal_prompt = f"Create a professional image representing: {title}"
|
||||
if user_prompt:
|
||||
minimal_prompt += f" - {user_prompt}"
|
||||
|
||||
# If even the minimal prompt is too long, truncate it
|
||||
if len(minimal_prompt) > 4000:
|
||||
minimal_prompt = minimal_prompt[:3997] + "..."
|
||||
|
||||
return minimal_prompt
|
||||
|
||||
return full_prompt
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error creating image prompt: {str(e)}")
|
||||
# Fallback to simple prompt
|
||||
return f"Create a professional image representing: {title}"
|
||||
|
||||
async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str:
|
||||
"""Use AI to intelligently compress a long prompt while preserving key information."""
|
||||
try:
|
||||
if not ai_service:
|
||||
return None
|
||||
|
||||
compression_prompt = f"""
|
||||
You are an expert at creating concise, effective prompts for AI image generation.
|
||||
|
||||
The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
|
||||
|
||||
Original prompt ({len(long_prompt)} characters):
|
||||
{long_prompt}
|
||||
|
||||
Please create a compressed version that:
|
||||
1. Keeps the most important visual elements and requirements
|
||||
2. Maintains the core intent and style guidance
|
||||
3. Preserves technical requirements
|
||||
4. Stays under 4000 characters
|
||||
5. Is optimized for DALL-E image generation
|
||||
|
||||
Return only the compressed prompt, no explanations.
|
||||
"""
|
||||
|
||||
# Use AI to compress the prompt - call the AI service correctly
|
||||
# The ai_service has an aiObjects attribute that contains the actual AI interface
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=compression_prompt,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationType.GENERAL,
|
||||
maxTokens=2000,
|
||||
temperature=0.3 # Lower temperature for more consistent compression
|
||||
)
|
||||
)
|
||||
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
compressed = response.content.strip()
|
||||
|
||||
# Validate the compressed prompt
|
||||
if compressed and len(compressed) <= 4000 and len(compressed) > 50:
|
||||
self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters")
|
||||
return compressed
|
||||
else:
|
||||
self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
|
||||
return None
|
||||
|
||||
def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str:
|
||||
"""Analyze content sections and create a visual description for AI."""
|
||||
try:
|
||||
descriptions = []
|
||||
|
||||
for section in sections:
|
||||
section_type = self._get_section_type(section)
|
||||
section_data = self._get_section_data(section)
|
||||
|
||||
if section_type == "table":
|
||||
headers = section_data.get("headers", [])
|
||||
rows = section_data.get("rows", [])
|
||||
if headers and rows:
|
||||
descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
|
||||
|
||||
elif section_type == "bullet_list":
|
||||
items = section_data.get("items", [])
|
||||
if items:
|
||||
descriptions.append(f"List with {len(items)} items")
|
||||
|
||||
elif section_type == "heading":
|
||||
text = section_data.get("text", "")
|
||||
level = section_data.get("level", 1)
|
||||
if text:
|
||||
descriptions.append(f"Heading {level}: {text}")
|
||||
|
||||
elif section_type == "paragraph":
|
||||
text = section_data.get("text", "")
|
||||
if text and len(text) > 10: # Only include substantial paragraphs
|
||||
# Truncate long text
|
||||
truncated = text[:100] + "..." if len(text) > 100 else text
|
||||
descriptions.append(f"Text content: {truncated}")
|
||||
|
||||
elif section_type == "code_block":
|
||||
code = section_data.get("code", "")
|
||||
language = section_data.get("language", "")
|
||||
if code:
|
||||
descriptions.append(f"Code block ({language}): {code[:50]}...")
|
||||
|
||||
return "; ".join(descriptions) if descriptions else "General document content"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error analyzing content: {str(e)}")
|
||||
return "Document content"
|
||||
|
||||
def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str:
|
||||
"""Determine visual style guidance based on content and user prompt."""
|
||||
try:
|
||||
style_elements = []
|
||||
|
||||
# Analyze user prompt for style hints
|
||||
if user_prompt:
|
||||
prompt_lower = user_prompt.lower()
|
||||
|
||||
if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]):
|
||||
style_elements.append("modern, clean design")
|
||||
elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]):
|
||||
style_elements.append("classic, formal design")
|
||||
elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]):
|
||||
style_elements.append("creative, artistic design")
|
||||
elif any(word in prompt_lower for word in ["corporate", "business", "professional"]):
|
||||
style_elements.append("corporate, professional design")
|
||||
|
||||
# Analyze content type for additional style hints
|
||||
sections = extracted_content.get("sections", [])
|
||||
has_tables = any(self._get_section_type(s) == "table" for s in sections)
|
||||
has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections)
|
||||
has_code = any(self._get_section_type(s) == "code_block" for s in sections)
|
||||
|
||||
if has_tables:
|
||||
style_elements.append("data-focused layout")
|
||||
if has_lists:
|
||||
style_elements.append("organized, structured presentation")
|
||||
if has_code:
|
||||
style_elements.append("technical, developer-friendly")
|
||||
|
||||
# Default style if no specific guidance
|
||||
if not style_elements:
|
||||
style_elements.append("professional, clean design")
|
||||
|
||||
return ", ".join(style_elements)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error determining style guidance: {str(e)}")
|
||||
return "professional design"
|
||||
|
|
@ -154,7 +154,7 @@ async def process_documents_and_generate_summary():
|
|||
|
||||
# userPrompt = "Analyze these documents and create a comprehensive DOCX summary document including: 1) Document types and purposes, 2) Key information and main points, 3) Important details and numbers, 4) Notable sections, 5) Overall assessment and recommendations."
|
||||
|
||||
userPrompt = "Analyze these documents and create a comprehensive form for a user to fill out"
|
||||
userPrompt = "Analyze these documents and create a fitting image for the content"
|
||||
|
||||
# userPrompt = "Extract the table from file and produce 2 lists in excel. one list with all entries, one list only with entries that are yellow highlighted."
|
||||
|
||||
|
|
@ -168,7 +168,7 @@ async def process_documents_and_generate_summary():
|
|||
prompt=userPrompt,
|
||||
documents=documents,
|
||||
options=ai_options,
|
||||
outputFormat="html",
|
||||
outputFormat="txt",
|
||||
title="Formulaire"
|
||||
)
|
||||
|
||||
|
|
@ -272,13 +272,17 @@ async def process_documents_and_generate_summary():
|
|||
file_ext = '.pptx'
|
||||
elif 'markdown' in doc_mime.lower() or 'md' in doc_mime.lower():
|
||||
file_ext = '.md'
|
||||
elif 'png' in doc_mime.lower() or 'image' in doc_mime.lower():
|
||||
file_ext = '.png'
|
||||
elif 'jpg' in doc_mime.lower() or 'jpeg' in doc_mime.lower():
|
||||
file_ext = '.jpg'
|
||||
else:
|
||||
logger.warning(f"⚠️ Unknown MIME type: {doc_mime}, using .bin")
|
||||
|
||||
# Also check filename for hints
|
||||
if doc_name and '.' in doc_name:
|
||||
name_ext = '.' + doc_name.split('.')[-1].lower()
|
||||
if name_ext in ['.docx', '.pdf', '.txt', '.html', '.json', '.csv', '.xlsx', '.pptx', '.md']:
|
||||
if name_ext in ['.docx', '.pdf', '.txt', '.html', '.json', '.csv', '.xlsx', '.pptx', '.md', '.png', '.jpg', '.jpeg']:
|
||||
file_ext = name_ext
|
||||
logger.info(f"📄 Using extension from filename: {file_ext}")
|
||||
|
||||
|
|
@ -293,8 +297,14 @@ async def process_documents_and_generate_summary():
|
|||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(doc_data)
|
||||
logger.info(f"✅ Document saved as text: {output_path} ({len(doc_data)} characters)")
|
||||
elif file_ext in ['.png', '.jpg', '.jpeg']:
|
||||
# Image formats - decode from base64
|
||||
doc_bytes = base64.b64decode(doc_data)
|
||||
with open(output_path, 'wb') as f:
|
||||
f.write(doc_bytes)
|
||||
logger.info(f"✅ Image saved: {output_path} ({len(doc_bytes)} bytes)")
|
||||
else:
|
||||
# Binary formats - decode from base64
|
||||
# Other binary formats - decode from base64
|
||||
doc_bytes = base64.b64decode(doc_data)
|
||||
with open(output_path, 'wb') as f:
|
||||
f.write(doc_bytes)
|
||||
|
|
|
|||
Loading…
Reference in a new issue