diff --git a/modules/connectors/connectorAiOpenai.py b/modules/connectors/connectorAiOpenai.py index 4a9f4888..692fe422 100644 --- a/modules/connectors/connectorAiOpenai.py +++ b/modules/connectors/connectorAiOpenai.py @@ -188,4 +188,83 @@ class AiOpenai: except Exception as e: logger.error(f"Error during image analysis: {str(e)}", exc_info=True) - return f"[Error during image analysis: {str(e)}]" \ No newline at end of file + return f"[Error during image analysis: {str(e)}]" + + async def generateImage(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> Dict[str, Any]: + """ + Generate an image using DALL-E 3. + + Args: + prompt: The text prompt for image generation + size: Image size (1024x1024, 1792x1024, or 1024x1792) + quality: Image quality (standard or hd) + style: Image style (vivid or natural) + + Returns: + Dictionary with success status and image data + """ + try: + logger.debug(f"Starting image generation with prompt: '{prompt[:100]}...'") + + # DALL-E 3 API endpoint + dalle_url = "https://api.openai.com/v1/images/generations" + + payload = { + "model": "dall-e-3", + "prompt": prompt, + "size": size, + "quality": quality, + "style": style, + "n": 1, + "response_format": "b64_json" # Get base64 data directly instead of URLs + } + + # Create a separate client for DALL-E API calls + dalle_client = httpx.AsyncClient( + timeout=120.0, + headers={ + "Authorization": f"Bearer {self.apiKey}", + "Content-Type": "application/json" + } + ) + + response = await dalle_client.post( + dalle_url, + json=payload + ) + + await dalle_client.aclose() + + if response.status_code != 200: + logger.error(f"DALL-E API error: {response.status_code} - {response.text}") + return { + "success": False, + "error": f"DALL-E API error: {response.status_code} - {response.text}" + } + + responseJson = response.json() + + if "data" in responseJson and len(responseJson["data"]) > 0: + image_data = responseJson["data"][0]["b64_json"] + + logger.info(f"Successfully generated image: {len(image_data)} characters") + return { + "success": True, + "image_data": image_data, + "size": size, + "quality": quality, + "style": style + } + else: + logger.error("No image data in DALL-E response") + return { + "success": False, + "error": "No image data in DALL-E response" + } + + except Exception as e: + logger.error(f"Error during image generation: {str(e)}", exc_info=True) + return { + "success": False, + "error": f"Error during image generation: {str(e)}" + } \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index 34c7387c..4c6b7001 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -140,6 +140,109 @@ class BaseRenderer(ABC): alt_text = section_data.get("altText", "Image") return base64_data, alt_text + def _render_image_section(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any: + """ + Render an image section. This is a base implementation that should be overridden + by format-specific renderers. + + Args: + section: Image section data + styles: Optional styling information + + Returns: + Format-specific image representation + """ + section_data = self._get_section_data(section) + base64_data, alt_text = self._extract_image_data(section_data) + + # Base implementation returns a simple dict + # Format-specific renderers should override this method + return { + "type": "image", + "base64Data": base64_data, + "altText": alt_text, + "width": section_data.get("width", None), + "height": section_data.get("height", None), + "caption": section_data.get("caption", "") + } + + def _validate_image_data(self, base64_data: str, alt_text: str) -> bool: + """Validate image data.""" + if not base64_data: + self.logger.warning("Image section has no base64 data") + return False + + if not alt_text: + self.logger.warning("Image section has no alt text") + return False + + # Basic base64 validation + try: + import base64 + base64.b64decode(base64_data, validate=True) + return True + except Exception as e: + self.logger.warning(f"Invalid base64 image data: {str(e)}") + return False + + def _get_image_dimensions(self, base64_data: str) -> Tuple[int, int]: + """ + Get image dimensions from base64 data. + This is a helper method that format-specific renderers can use. + """ + try: + import base64 + from PIL import Image + import io + + # Decode base64 data + image_data = base64.b64decode(base64_data) + image = Image.open(io.BytesIO(image_data)) + + return image.size # Returns (width, height) + + except Exception as e: + self.logger.warning(f"Could not determine image dimensions: {str(e)}") + return (0, 0) + + def _resize_image_if_needed(self, base64_data: str, max_width: int = 800, max_height: int = 600) -> str: + """ + Resize image if it exceeds maximum dimensions. + Returns the resized image as base64 string. + """ + try: + import base64 + from PIL import Image + import io + + # Decode base64 data + image_data = base64.b64decode(base64_data) + image = Image.open(io.BytesIO(image_data)) + + # Check if resizing is needed + width, height = image.size + if width <= max_width and height <= max_height: + return base64_data # No resizing needed + + # Calculate new dimensions maintaining aspect ratio + ratio = min(max_width / width, max_height / height) + new_width = int(width * ratio) + new_height = int(height * ratio) + + # Resize image + resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) + + # Convert back to base64 + buffer = io.BytesIO() + resized_image.save(buffer, format=image.format or 'PNG') + resized_data = buffer.getvalue() + + return base64.b64encode(resized_data).decode('utf-8') + + except Exception as e: + self.logger.warning(f"Could not resize image: {str(e)}") + return base64_data # Return original if resize fails + def _get_supported_section_types(self) -> List[str]: """Return list of supported section types.""" return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"] @@ -170,7 +273,19 @@ class BaseRenderer(ABC): return {"type": "code_block", "code": code, "language": language} elif section_type == "image": base64_data, alt_text = self._extract_image_data(section_data) - return {"type": "image", "base64Data": base64_data, "altText": alt_text} + # Validate image data + if self._validate_image_data(base64_data, alt_text): + return { + "type": "image", + "base64Data": base64_data, + "altText": alt_text, + "width": section_data.get("width"), + "height": section_data.get("height"), + "caption": section_data.get("caption", "") + } + else: + # Return placeholder if image data is invalid + return {"type": "paragraph", "text": f"[Image: {alt_text}]"} else: # Fallback to paragraph text = self._extract_paragraph_text(section_data) diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py new file mode 100644 index 00000000..863a52e2 --- /dev/null +++ b/modules/services/serviceGeneration/renderers/rendererImage.py @@ -0,0 +1,281 @@ +""" +Image renderer for report generation using AI image generation. +""" + +from .rendererBaseTemplate import BaseRenderer +from typing import Dict, Any, Tuple, List +import base64 +import logging + +logger = logging.getLogger(__name__) + +class RendererImage(BaseRenderer): + """Renders content to image format using AI image generation.""" + + @classmethod + def get_supported_formats(cls) -> List[str]: + """Return supported image formats.""" + return ['png', 'jpg', 'jpeg', 'image'] + + @classmethod + def get_format_aliases(cls) -> List[str]: + """Return format aliases.""" + return ['img', 'picture', 'photo', 'graphic'] + + @classmethod + def get_priority(cls) -> int: + """Return priority for image renderer.""" + return 90 + + async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + """Render extracted JSON content to image format using AI image generation.""" + try: + # Generate AI image from content + image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service) + + return image_content, "image/png" + + except Exception as e: + self.logger.error(f"Error rendering image: {str(e)}") + # Re-raise the exception instead of using fallback + raise Exception(f"Image rendering failed: {str(e)}") + + async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + """Generate AI image from extracted content.""" + try: + if not ai_service: + raise ValueError("AI service is required for image generation") + + # Validate JSON structure + if not isinstance(extracted_content, dict): + raise ValueError("Extracted content must be a dictionary") + + if "sections" not in extracted_content: + raise ValueError("Extracted content must contain 'sections' field") + + # Use title from JSON metadata if available, otherwise use provided title + document_title = extracted_content.get("metadata", {}).get("title", title) + + # Create AI prompt for image generation + image_prompt = await self._create_image_generation_prompt(extracted_content, document_title, user_prompt, ai_service) + + # Generate image using AI + image_result = await ai_service.aiObjects.generateImage( + prompt=image_prompt, + size="1024x1024", + quality="standard", + style="vivid" + ) + + # Extract base64 image data from result + if image_result and image_result.get("success", False): + image_data = image_result.get("image_data", "") + if image_data: + return image_data + else: + raise ValueError("No image data returned from AI") + else: + error_msg = image_result.get("error", "Unknown error") if image_result else "No result" + raise ValueError(f"AI image generation failed: {error_msg}") + + except Exception as e: + self.logger.error(f"Error generating AI image: {str(e)}") + raise Exception(f"AI image generation failed: {str(e)}") + + async def _create_image_generation_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + """Create a detailed prompt for AI image generation based on the content.""" + try: + # Start with base prompt + prompt_parts = [] + + # Add user's original intent if available + if user_prompt: + prompt_parts.append(f"User Request: {user_prompt}") + + # Add document title + prompt_parts.append(f"Document Title: {title}") + + # Analyze content and create visual description + sections = extracted_content.get("sections", []) + content_description = self._analyze_content_for_visual_description(sections) + + if content_description: + prompt_parts.append(f"Content to Visualize: {content_description}") + + # Add style guidance + style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt) + if style_guidance: + prompt_parts.append(f"Visual Style: {style_guidance}") + + # Combine all parts + full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts) + + # Add technical requirements + full_prompt += "\n\nTechnical Requirements:" + full_prompt += "\n- High quality, professional appearance" + full_prompt += "\n- Clear, readable text if any text is included" + full_prompt += "\n- Appropriate colors and layout" + full_prompt += "\n- Suitable for business/professional use" + + # Truncate prompt if it exceeds DALL-E's 4000 character limit + if len(full_prompt) > 4000: + # Use AI to compress the prompt intelligently + compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service) + if compressed_prompt and len(compressed_prompt) <= 4000: + return compressed_prompt + + # Fallback to minimal prompt if AI compression fails or is still too long + minimal_prompt = f"Create a professional image representing: {title}" + if user_prompt: + minimal_prompt += f" - {user_prompt}" + + # If even the minimal prompt is too long, truncate it + if len(minimal_prompt) > 4000: + minimal_prompt = minimal_prompt[:3997] + "..." + + return minimal_prompt + + return full_prompt + + except Exception as e: + self.logger.warning(f"Error creating image prompt: {str(e)}") + # Fallback to simple prompt + return f"Create a professional image representing: {title}" + + async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str: + """Use AI to intelligently compress a long prompt while preserving key information.""" + try: + if not ai_service: + return None + + compression_prompt = f""" +You are an expert at creating concise, effective prompts for AI image generation. + +The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information. + +Original prompt ({len(long_prompt)} characters): +{long_prompt} + +Please create a compressed version that: +1. Keeps the most important visual elements and requirements +2. Maintains the core intent and style guidance +3. Preserves technical requirements +4. Stays under 4000 characters +5. Is optimized for DALL-E image generation + +Return only the compressed prompt, no explanations. +""" + + # Use AI to compress the prompt - call the AI service correctly + # The ai_service has an aiObjects attribute that contains the actual AI interface + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType + + request = AiCallRequest( + prompt=compression_prompt, + options=AiCallOptions( + operationType=OperationType.GENERAL, + maxTokens=2000, + temperature=0.3 # Lower temperature for more consistent compression + ) + ) + + response = await ai_service.aiObjects.call(request) + compressed = response.content.strip() + + # Validate the compressed prompt + if compressed and len(compressed) <= 4000 and len(compressed) > 50: + self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters") + return compressed + else: + self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars") + return None + + except Exception as e: + self.logger.warning(f"Error compressing prompt with AI: {str(e)}") + return None + + def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str: + """Analyze content sections and create a visual description for AI.""" + try: + descriptions = [] + + for section in sections: + section_type = self._get_section_type(section) + section_data = self._get_section_data(section) + + if section_type == "table": + headers = section_data.get("headers", []) + rows = section_data.get("rows", []) + if headers and rows: + descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}") + + elif section_type == "bullet_list": + items = section_data.get("items", []) + if items: + descriptions.append(f"List with {len(items)} items") + + elif section_type == "heading": + text = section_data.get("text", "") + level = section_data.get("level", 1) + if text: + descriptions.append(f"Heading {level}: {text}") + + elif section_type == "paragraph": + text = section_data.get("text", "") + if text and len(text) > 10: # Only include substantial paragraphs + # Truncate long text + truncated = text[:100] + "..." if len(text) > 100 else text + descriptions.append(f"Text content: {truncated}") + + elif section_type == "code_block": + code = section_data.get("code", "") + language = section_data.get("language", "") + if code: + descriptions.append(f"Code block ({language}): {code[:50]}...") + + return "; ".join(descriptions) if descriptions else "General document content" + + except Exception as e: + self.logger.warning(f"Error analyzing content: {str(e)}") + return "Document content" + + def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str: + """Determine visual style guidance based on content and user prompt.""" + try: + style_elements = [] + + # Analyze user prompt for style hints + if user_prompt: + prompt_lower = user_prompt.lower() + + if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]): + style_elements.append("modern, clean design") + elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]): + style_elements.append("classic, formal design") + elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]): + style_elements.append("creative, artistic design") + elif any(word in prompt_lower for word in ["corporate", "business", "professional"]): + style_elements.append("corporate, professional design") + + # Analyze content type for additional style hints + sections = extracted_content.get("sections", []) + has_tables = any(self._get_section_type(s) == "table" for s in sections) + has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections) + has_code = any(self._get_section_type(s) == "code_block" for s in sections) + + if has_tables: + style_elements.append("data-focused layout") + if has_lists: + style_elements.append("organized, structured presentation") + if has_code: + style_elements.append("technical, developer-friendly") + + # Default style if no specific guidance + if not style_elements: + style_elements.append("professional, clean design") + + return ", ".join(style_elements) + + except Exception as e: + self.logger.warning(f"Error determining style guidance: {str(e)}") + return "professional design" diff --git a/test_document_processing.py b/test_document_processing.py index 53fbd80d..c16add06 100644 --- a/test_document_processing.py +++ b/test_document_processing.py @@ -154,7 +154,7 @@ async def process_documents_and_generate_summary(): # userPrompt = "Analyze these documents and create a comprehensive DOCX summary document including: 1) Document types and purposes, 2) Key information and main points, 3) Important details and numbers, 4) Notable sections, 5) Overall assessment and recommendations." - userPrompt = "Analyze these documents and create a comprehensive form for a user to fill out" + userPrompt = "Analyze these documents and create a fitting image for the content" # userPrompt = "Extract the table from file and produce 2 lists in excel. one list with all entries, one list only with entries that are yellow highlighted." @@ -168,7 +168,7 @@ async def process_documents_and_generate_summary(): prompt=userPrompt, documents=documents, options=ai_options, - outputFormat="html", + outputFormat="txt", title="Formulaire" ) @@ -272,13 +272,17 @@ async def process_documents_and_generate_summary(): file_ext = '.pptx' elif 'markdown' in doc_mime.lower() or 'md' in doc_mime.lower(): file_ext = '.md' + elif 'png' in doc_mime.lower() or 'image' in doc_mime.lower(): + file_ext = '.png' + elif 'jpg' in doc_mime.lower() or 'jpeg' in doc_mime.lower(): + file_ext = '.jpg' else: logger.warning(f"⚠️ Unknown MIME type: {doc_mime}, using .bin") # Also check filename for hints if doc_name and '.' in doc_name: name_ext = '.' + doc_name.split('.')[-1].lower() - if name_ext in ['.docx', '.pdf', '.txt', '.html', '.json', '.csv', '.xlsx', '.pptx', '.md']: + if name_ext in ['.docx', '.pdf', '.txt', '.html', '.json', '.csv', '.xlsx', '.pptx', '.md', '.png', '.jpg', '.jpeg']: file_ext = name_ext logger.info(f"📄 Using extension from filename: {file_ext}") @@ -293,8 +297,14 @@ async def process_documents_and_generate_summary(): with open(output_path, 'w', encoding='utf-8') as f: f.write(doc_data) logger.info(f"✅ Document saved as text: {output_path} ({len(doc_data)} characters)") + elif file_ext in ['.png', '.jpg', '.jpeg']: + # Image formats - decode from base64 + doc_bytes = base64.b64decode(doc_data) + with open(output_path, 'wb') as f: + f.write(doc_bytes) + logger.info(f"✅ Image saved: {output_path} ({len(doc_bytes)} bytes)") else: - # Binary formats - decode from base64 + # Other binary formats - decode from base64 doc_bytes = base64.b64decode(doc_data) with open(output_path, 'wb') as f: f.write(doc_bytes)