diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py index af1e51f6..fd4d8bcd 100644 --- a/modules/services/serviceAi/subStructureFilling.py +++ b/modules/services/serviceAi/subStructureFilling.py @@ -215,10 +215,16 @@ class StructureFiller: useAiCall = section.get("useAiCall", False) # WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden - # Aber: Wenn generationHint vorhanden ist, kann AI auch ohne ContentParts generieren (z.B. Executive Summary) + # Aber: Wenn generationHint vorhanden ist, SOLLTE AI verwendet werden, auch wenn useAiCall=false gesetzt ist + # (z.B. wenn AI die Struktur generiert hat, aber useAiCall falsch gesetzt wurde) if len(contentPartIds) == 0 and not generationHint: useAiCall = False logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False") + elif len(contentPartIds) == 0 and generationHint and not useAiCall: + # Override: If there's a generationHint but no content parts, we should use AI + # This handles cases where structure generation set useAiCall=false incorrectly + useAiCall = True + logger.info(f"Section {sectionId}: Overriding useAiCall=True (has generationHint but no content parts)") elements = [] @@ -658,14 +664,28 @@ class StructureFiller: logger.error(f"Error generating section {sectionId}: {str(e)}") # NICHT raise - Section wird mit Fehlermeldung gerendert else: - # Füge extrahierten Text direkt hinzu (kein AI-Call) - logger.debug(f"Processing section {sectionId}: Single extracted part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly") - elements.append({ - "type": "extracted_text", - "content": part.data, - "source": part.metadata.get("documentId"), - "extractionPrompt": part.metadata.get("extractionPrompt") - }) + # Füge extrahierten Content direkt hinzu (kein AI-Call) + # CRITICAL: Check part typeGroup to determine correct element type + if part.typeGroup == "image": + # Image content should be added as image element, not extracted_text + logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call - adding as image element") + elements.append({ + "type": "image", + "content": { + "base64Data": part.data, + "altText": part.metadata.get("usageHint", part.label), + "caption": part.metadata.get("caption", "") + } + }) + else: + # Text content - add as extracted_text element + logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly") + elements.append({ + "type": "extracted_text", + "content": part.data, + "source": part.metadata.get("documentId"), + "extractionPrompt": part.metadata.get("extractionPrompt") + }) section["elements"] = elements diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index 43c85c47..337811a4 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -335,14 +335,26 @@ class RendererDocx(BaseRenderer): elif section_type == "heading": self._renderJsonHeading(doc, element, styles) elif section_type == "paragraph": - self._renderJsonParagraph(doc, element, styles) + # CRITICAL: Check if this is actually an image element before rendering as paragraph + # Image elements might not have type set, but have base64Data in content + content = element.get("content", {}) + if isinstance(content, dict) and content.get("base64Data"): + # This is actually an image, render it as such + self._renderJsonImage(doc, element, styles) + else: + self._renderJsonParagraph(doc, element, styles) elif section_type == "code_block": self._renderJsonCodeBlock(doc, element, styles) elif section_type == "image": self._renderJsonImage(doc, element, styles) else: - # Fallback to paragraph for unknown types - self._renderJsonParagraph(doc, element, styles) + # Fallback to paragraph for unknown types, but check for image data first + content = element.get("content", {}) + if isinstance(content, dict) and content.get("base64Data"): + # This is actually an image, render it as such + self._renderJsonImage(doc, element, styles) + else: + self._renderJsonParagraph(doc, element, styles) except Exception as e: self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}") @@ -517,13 +529,22 @@ class RendererDocx(BaseRenderer): if not isinstance(content, dict): return items = content.get("items", []) - bullet_style = styles["bullet_list"] + bullet_style = styles.get("bullet_list", {}) for item in items: if isinstance(item, str): para = doc.add_paragraph(item, style='List Bullet') elif isinstance(item, dict) and "text" in item: para = doc.add_paragraph(item["text"], style='List Bullet') + + # Apply bullet list styling from style set + if bullet_style and para.runs: + for run in para.runs: + if "font_size" in bullet_style: + run.font.size = Pt(bullet_style["font_size"]) + if "color" in bullet_style: + color_hex = bullet_style["color"].lstrip('#') + run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") @@ -540,7 +561,13 @@ class RendererDocx(BaseRenderer): if text: level = max(1, min(6, level)) - doc.add_heading(text, level=level) + # Use custom heading style if available, otherwise use built-in + style_name = f"Heading {level}" if level <= 2 else "Heading 1" + try: + para = doc.add_paragraph(text, style=style_name) + except KeyError: + # Fallback to built-in heading if custom style doesn't exist + doc.add_heading(text, level=level) except Exception as e: self.logger.warning(f"Error rendering heading: {str(e)}") @@ -570,6 +597,25 @@ class RendererDocx(BaseRenderer): if text: para = doc.add_paragraph(text) + # Apply paragraph styling from style set + paragraph_style = styles.get("paragraph", {}) + if paragraph_style: + for run in para.runs: + if "font_size" in paragraph_style: + run.font.size = Pt(paragraph_style["font_size"]) + if "bold" in paragraph_style: + run.font.bold = paragraph_style["bold"] + if "color" in paragraph_style: + color_hex = paragraph_style["color"].lstrip('#') + run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + if "align" in paragraph_style: + align = paragraph_style["align"] + if align == "center": + para.alignment = WD_ALIGN_PARAGRAPH.CENTER + elif align == "right": + para.alignment = WD_ALIGN_PARAGRAPH.RIGHT + else: + para.alignment = WD_ALIGN_PARAGRAPH.LEFT except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") @@ -583,16 +629,21 @@ class RendererDocx(BaseRenderer): return code = content.get("code", "") language = content.get("language", "") + code_style = styles.get("code_block", {}) if code: if language: lang_para = doc.add_paragraph(f"Code ({language}):") - lang_para.runs[0].bold = True + if lang_para.runs: + lang_para.runs[0].bold = True code_para = doc.add_paragraph(code) for run in code_para.runs: - run.font.name = 'Courier New' - run.font.size = Pt(10) + run.font.name = code_style.get("font", "Courier New") + run.font.size = Pt(code_style.get("font_size", 9)) + if "color" in code_style: + color_hex = code_style["color"].lstrip('#') + run.font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) except Exception as e: self.logger.warning(f"Error rendering code block: {str(e)}") @@ -602,24 +653,38 @@ class RendererDocx(BaseRenderer): try: # Extract from nested content structure content = image_data.get("content", {}) - if not isinstance(content, dict): - return - base64_data = content.get("base64Data", "") - alt_text = content.get("altText", "Image") + base64_data = "" + alt_text = "Image" - if base64_data: - try: - image_bytes = base64.b64decode(base64_data) - doc.add_picture(io.BytesIO(image_bytes), width=Inches(4)) - - if alt_text: - caption_para = doc.add_paragraph(f"Figure: {alt_text}") - caption_para.runs[0].italic = True - except Exception as embedError: - # Image decoding or embedding failed - raise Exception(f"Failed to decode or embed image: {str(embedError)}") - else: + if isinstance(content, dict): + base64_data = content.get("base64Data", "") + alt_text = content.get("altText", "Image") + elif isinstance(content, str): + # Content might be base64 string directly (shouldn't happen, but handle it) + self.logger.warning("Image content is a string, not a dict. This should not happen.") + return + + # If base64Data not found in content, try direct element fields (fallback) + if not base64_data: + base64_data = image_data.get("base64Data", "") + if not alt_text or alt_text == "Image": + alt_text = image_data.get("altText", "Image") + + # CRITICAL: Ensure we don't render base64 data as text + # If base64_data looks like it might be rendered elsewhere, skip it + if not base64_data: raise Exception("No image data provided (base64Data is empty)") + + try: + image_bytes = base64.b64decode(base64_data) + doc.add_picture(io.BytesIO(image_bytes), width=Inches(4)) + + if alt_text and alt_text != "Image": + caption_para = doc.add_paragraph(f"Figure: {alt_text}") + caption_para.runs[0].italic = True + except Exception as embedError: + # Image decoding or embedding failed + raise Exception(f"Failed to decode or embed image: {str(embedError)}") except Exception as e: self.logger.error(f"Error embedding image in DOCX: {str(e)}") @@ -792,7 +857,11 @@ class RendererDocx(BaseRenderer): if "heading2" in styleSet: self._createStyle(doc, "Heading 2", styleSet["heading2"], WD_STYLE_TYPE.PARAGRAPH) - # Note: List Bullet and List Number are built-in Word styles, no need to create + # Create Paragraph style + if "paragraph" in styleSet: + self._createStyle(doc, "Custom Paragraph", styleSet["paragraph"], WD_STYLE_TYPE.PARAGRAPH) + + # Note: List Bullet and List Number are built-in Word styles, but we apply custom styling to runs except Exception as e: self.logger.warning(f"Could not set up document styles: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 4d7dafe0..dda2c09f 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -712,7 +712,6 @@ class RendererHtml(BaseRenderer): # Bestimme MIME-Type und Extension mimeType = element.get("mimeType", "") or (content.get("mimeType", "") if isinstance(content, dict) else "") - if not mimeType or mimeType == "unknown": if not mimeType or mimeType == "unknown": # Versuche MIME-Type aus base64 zu erkennen if base64Data.startswith("/9j/"): diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index 850a59a4..f824aa62 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -106,11 +106,85 @@ class RendererPptx(BaseRenderer): if hasImages: self._addImagesToSlide(slide, slide_data.get("images", []), styles) - # Set content with AI-generated styling (if not image-only slide) - if slide_content or not hasImages: + # Render sections with proper PowerPoint objects (tables, lists, etc.) + slide_sections = slide_data.get("sections", []) + if slide_sections: + # Use content placeholder for structured content content_shape = slide.placeholders[1] + text_frame = content_shape.text_frame + text_frame.clear() - # Format content text with AI styles + # Track vertical position for multiple content types + current_y = Inches(1.5) # Start below title + + for section in slide_sections: + section_type = section.get("content_type", "paragraph") + elements = section.get("elements", []) + + # Handle sections without elements (e.g., headings that create slides) + if not elements: + continue + + for element in elements: + if not isinstance(element, dict): + continue + + # Check element type first, fall back to section type + element_type = element.get("type", "") + if not element_type: + element_type = section_type + + if element_type == "table": + # Render as actual PowerPoint table + self._addTableToSlide(slide, element, styles, current_y) + current_y += Inches(2) # Space for table + elif element_type == "bullet_list" or element_type == "list": + # Render as actual PowerPoint bullet list + self._addBulletListToSlide(slide, element, styles, text_frame) + elif element_type == "heading": + # Render as heading in text frame + self._addHeadingToSlide(slide, element, styles, text_frame) + elif element_type == "paragraph": + # Render as paragraph in text frame + self._addParagraphToSlide(slide, element, styles, text_frame) + elif element_type == "code_block" or element_type == "code": + # Render as formatted code block + self._addCodeBlockToSlide(slide, element, styles, text_frame) + elif element_type == "extracted_text": + # Render extracted text as paragraph with styling + content = element.get("content", "") + source = element.get("source", "") + if content: + paragraph_style = styles.get("paragraph", {}) + p = text_frame.add_paragraph() + p.text = content + p.font.size = Pt(paragraph_style.get("font_size", 18)) + p.font.bold = paragraph_style.get("bold", False) + p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + if source: + p.add_run(f" (Source: {source})").font.italic = True + elif element_type == "reference": + # Render reference + label = element.get("label", "Reference") + p = text_frame.add_paragraph() + p.text = f"[Reference: {label}]" + p.font.italic = True + else: + # Fallback: try to render as paragraph + content = element.get("content", "") + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" + + if text: + self._addParagraphToSlide(slide, element, styles, text_frame) + + # Fallback: if no sections but has content text, render as before + elif slide_content and not hasImages: + content_shape = slide.placeholders[1] text_frame = content_shape.text_frame text_frame.clear() @@ -126,38 +200,12 @@ class RendererPptx(BaseRenderer): p.text = paragraph.strip() - # Apply AI-generated styling based on content type - if paragraph.startswith('#'): - # Header - p.text = paragraph.lstrip('#').strip() - heading_style = styles.get("heading", {}) - p.font.size = Pt(heading_style.get("font_size", 32)) - p.font.bold = heading_style.get("bold", True) - heading_color = self._getSafeColor(heading_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*heading_color) - elif paragraph.startswith('##'): - # Subheader - p.text = paragraph.lstrip('#').strip() - subheading_style = styles.get("subheading", {}) - p.font.size = Pt(subheading_style.get("font_size", 24)) - p.font.bold = subheading_style.get("bold", True) - subheading_color = self._getSafeColor(subheading_style.get("color", (79, 79, 79))) - p.font.color.rgb = RGBColor(*subheading_color) - elif paragraph.startswith('*') and paragraph.endswith('*'): - # Bold text - p.text = paragraph.strip('*') - paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) - p.font.bold = True - paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*paragraph_color) - else: - # Regular text - paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) - p.font.bold = paragraph_style.get("bold", False) - paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*paragraph_color) + # Apply AI-generated styling + paragraph_style = styles.get("paragraph", {}) + p.font.size = Pt(paragraph_style.get("font_size", 18)) + p.font.bold = paragraph_style.get("bold", False) + paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) + p.font.color.rgb = RGBColor(*paragraph_color) # Apply alignment align = paragraph_style.get("align", "left") @@ -396,8 +444,7 @@ class RendererPptx(BaseRenderer): if userPrompt and aiService: self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) - # Convert colors to PPTX format after getting styles - enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet) + # Colors already converted in _getAiStylesWithPptxColors return self._validateStylesReadability(enhancedStyleSet) else: # Use default styles only @@ -481,104 +528,19 @@ Return ONLY this JSON with your changes: JSON ONLY. NO OTHER TEXT.""" async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: - """Get AI styles with proper PowerPoint color conversion.""" + """Get AI styles with proper PowerPoint color conversion. Uses base _getAiStyles for debug file writing.""" if not aiService: return default_styles try: - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + # Use base template method which handles debug file writing + enhanced_styles = await self._getAiStyles(aiService, style_template, default_styles) - request_options = AiCallOptions() - request_options.operationType = OperationTypeEnum.DATA_GENERATE - - request = AiCallRequest(prompt=style_template, context="", options=request_options) - - # Check if AI service is properly configured - if not hasattr(aiService, 'aiObjects') or not aiService.aiObjects: - self.logger.warning("AI service not properly configured, using defaults") - return default_styles - - response = await aiService.callAi(request) - - # Check if response is valid - if not response: - self.logger.warning("AI service returned no response, using defaults") - return default_styles - - # json and re are already imported at module level - - # Clean and parse JSON - result = response.content.strip() if response and response.content else "" - - # Check if result is empty - if not result: - self.logger.warning("AI styling returned empty response, using defaults") - return default_styles - - # Log the raw response for debugging - self.logger.debug(f"AI styling raw response: {result[:200]}...") - - # Extract JSON from various formats - json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) - if json_match: - result = json_match.group(1).strip() - elif result.startswith('```json'): - result = re.sub(r'^```json\s*', '', result) - result = re.sub(r'\s*```$', '', result) - elif result.startswith('```'): - result = re.sub(r'^```\s*', '', result) - result = re.sub(r'\s*```$', '', result) - - # Try to extract JSON from explanatory text - json_patterns = [ - r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object - r'\{.*?"title".*?\}', # JSON with title field - r'\{.*?"font_size".*?\}', # JSON with font_size field - ] - - for pattern in json_patterns: - json_match = re.search(pattern, result, re.DOTALL) - if json_match: - result = json_match.group(0) - break - - # Additional cleanup - remove any leading/trailing whitespace and newlines - result = result.strip() - - # Check if result is still empty after cleanup - if not result: - self.logger.warning("AI styling returned empty content after cleanup, using defaults") - return default_styles - - # Try to parse JSON - try: - styles = json.loads(result) - self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}") - except json.JSONDecodeError as json_error: - self.logger.warning(f"AI styling returned invalid JSON: {json_error}") - self.logger.warning(f"Raw content that failed to parse: {result[:100]}...") - # Try to extract just the JSON part if it's embedded in text - json_start = result.find('{') - json_end = result.rfind('}') - if json_start != -1 and json_end != -1 and json_end > json_start: - json_part = result[json_start:json_end+1] - try: - styles = json.loads(json_part) - self.logger.info("Successfully extracted JSON from explanatory text") - self.logger.debug(f"Extracted AI styles: {list(styles.keys())}") - except json.JSONDecodeError: - self.logger.warning("Could not extract valid JSON from response, using defaults") - return default_styles - else: - return default_styles - - # Convert colors to PowerPoint RGB format - styles = self._convertColorsFormat(styles) - - return styles + # Convert colors to PPTX format (RGB tuples) + return self._convertColorsFormat(enhanced_styles) except Exception as e: - self.logger.warning(f"AI styling failed: {str(e)}, using defaults") + self.logger.warning(f"AI style enhancement failed: {str(e)}, using defaults") return default_styles def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: @@ -962,13 +924,10 @@ JSON ONLY. NO OTHER TEXT.""" return 1 # Default to title and content layout def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]: - """Create slides from sections based on content density and user intent.""" + """Create slides from sections: each heading creates a new slide, content accumulates until next heading.""" try: slides = [] - content_per_slide = styles.get("content_per_slide", "concise") - - # Group sections by type and create slides - current_slide_content = [] + current_slide_sections = [] # Store sections (not formatted text) for proper rendering current_slide_title = "Content Overview" for section in sections: @@ -981,13 +940,13 @@ JSON ONLY. NO OTHER TEXT.""" if section_type == "heading": # If we have accumulated content, create a slide - if current_slide_content: + if current_slide_sections: slides.append({ "title": current_slide_title, - "content": "\n\n".join(current_slide_content), + "sections": current_slide_sections.copy(), # Store sections for proper rendering "images": [] }) - current_slide_content = [] + current_slide_sections = [] # Start new slide with heading as title heading_found = False @@ -1012,13 +971,13 @@ JSON ONLY. NO OTHER TEXT.""" current_slide_title = section.get("id", "Untitled Section") elif section_type == "image": # Create separate slide for image - if current_slide_content: + if current_slide_sections: slides.append({ "title": current_slide_title, - "content": "\n\n".join(current_slide_content), + "sections": current_slide_sections.copy(), "images": [] }) - current_slide_content = [] + current_slide_sections = [] # Extract image data imageData = [] @@ -1045,20 +1004,18 @@ JSON ONLY. NO OTHER TEXT.""" slides.append({ "title": section.get("title") or (imageData[0].get("altText", "Image") if imageData else "Image"), - "content": "", + "sections": [], "images": imageData }) else: - # Add content to current slide - formatted_content = self._formatSectionContent(section) - if formatted_content: - current_slide_content.append(formatted_content) + # Add section to current slide (will be rendered properly) + current_slide_sections.append(section) # Add final slide if there's content - if current_slide_content: + if current_slide_sections: slides.append({ "title": current_slide_title, - "content": "\n\n".join(current_slide_content), + "sections": current_slide_sections.copy(), "images": [] }) @@ -1204,24 +1161,217 @@ JSON ONLY. NO OTHER TEXT.""" except Exception as e: logger.error(f"Error embedding images in PPTX slide: {str(e)}") - # Add error message text box to slide - try: - from pptx.util import Inches, Pt - from pptx.enum.text import PP_ALIGN - errorMsg = f"[Error: Could not embed image(s). {str(e)}]" - errorBox = slide.shapes.add_textbox( - Inches(0.5), - Inches(2), - slideWidth - Inches(1), - Inches(0.5) - ) - errorFrame = errorBox.text_frame - errorFrame.text = errorMsg - errorFrame.paragraphs[0].font.size = Pt(12) - errorFrame.paragraphs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color - errorFrame.paragraphs[0].alignment = PP_ALIGN.LEFT - except Exception as errorBoxError: - logger.error(f"Could not add error message to slide: {str(errorBoxError)}") + + def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float) -> None: + """Add a PowerPoint table to slide.""" + try: + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + from pptx.dml.color import RGBColor + + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return + + headers = content.get("headers", []) + rows = content.get("rows", []) + + if not headers: + return + + # Calculate table dimensions + num_cols = len(headers) + num_rows = len(rows) + 1 # +1 for header row + left = Inches(0.5) + width = slide.presentation.slide_width - Inches(1) + row_height = Inches(0.4) + + # Create table + table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, row_height * num_rows) + table = table_shape.table + + # Set column widths + col_width = width / num_cols + for col_idx in range(num_cols): + table.columns[col_idx].width = col_width + + # Add headers with styling + header_style = styles.get("table_header", {}) + header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121))) + header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255))) + header_font_size = header_style.get("font_size", 18) + + for col_idx, header in enumerate(headers): + cell = table.cell(0, col_idx) + cell.text = str(header) + cell.fill.solid() + cell.fill.fore_color.rgb = RGBColor(*header_bg_color) + cell.text_frame.paragraphs[0].font.bold = header_style.get("bold", True) + cell.text_frame.paragraphs[0].font.size = Pt(header_font_size) + cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*header_text_color) + + align = header_style.get("align", "center") + if align == "left": + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT + elif align == "right": + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT + else: + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER + + # Add data rows with styling + cell_style = styles.get("table_cell", {}) + cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255))) + cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47))) + cell_font_size = cell_style.get("font_size", 16) + + for row_idx, row_data in enumerate(rows, 1): + for col_idx, cell_data in enumerate(row_data[:num_cols]): + cell = table.cell(row_idx, col_idx) + cell.text = str(cell_data) + cell.fill.solid() + cell.fill.fore_color.rgb = RGBColor(*cell_bg_color) + cell.text_frame.paragraphs[0].font.size = Pt(cell_font_size) + cell.text_frame.paragraphs[0].font.bold = cell_style.get("bold", False) + cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*cell_text_color) + + align = cell_style.get("align", "left") + if align == "center": + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER + elif align == "right": + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT + else: + cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT + + except Exception as e: + logger.warning(f"Error adding table to slide: {str(e)}") + + def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + """Add bullet list to slide text frame.""" + try: + from pptx.util import Pt + from pptx.dml.color import RGBColor + + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return + + items = content.get("items", []) + if not items: + return + + list_style = styles.get("bullet_list", {}) + for item in items: + p = text_frame.add_paragraph() + if isinstance(item, dict): + p.text = item.get("text", "") + else: + p.text = str(item) + + p.level = 0 + p.font.size = Pt(list_style.get("font_size", 18)) + p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47)))) + + except Exception as e: + logger.warning(f"Error adding bullet list to slide: {str(e)}") + + def _addHeadingToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + """Add heading to slide text frame.""" + try: + from pptx.util import Pt + from pptx.dml.color import RGBColor + + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return + + text = content.get("text", "") + level = content.get("level", 1) + + if text: + p = text_frame.add_paragraph() + p.text = text + p.level = min(level - 1, 2) # PowerPoint supports 0-2 levels + + heading_style = styles.get("heading", {}) + p.font.size = Pt(heading_style.get("font_size", 32)) + p.font.bold = heading_style.get("bold", True) + p.font.color.rgb = RGBColor(*self._getSafeColor(heading_style.get("color", (47, 47, 47)))) + + except Exception as e: + logger.warning(f"Error adding heading to slide: {str(e)}") + + def _addParagraphToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + """Add paragraph to slide text frame.""" + try: + from pptx.util import Pt + from pptx.dml.color import RGBColor + from pptx.enum.text import PP_ALIGN + + # Extract from nested content structure + content = element.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" + + if text: + p = text_frame.add_paragraph() + p.text = text + + paragraph_style = styles.get("paragraph", {}) + p.font.size = Pt(paragraph_style.get("font_size", 18)) + p.font.bold = paragraph_style.get("bold", False) + p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + + align = paragraph_style.get("align", "left") + if align == "center": + p.alignment = PP_ALIGN.CENTER + elif align == "right": + p.alignment = PP_ALIGN.RIGHT + else: + p.alignment = PP_ALIGN.LEFT + + except Exception as e: + logger.warning(f"Error adding paragraph to slide: {str(e)}") + + def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + """Add code block to slide text frame.""" + try: + from pptx.util import Pt + from pptx.dml.color import RGBColor + + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return + + code = content.get("code", "") + language = content.get("language", "") + + if code: + code_style = styles.get("code_block", {}) + code_font = code_style.get("font", "Courier New") + code_font_size = code_style.get("font_size", 9) + code_color = self._getSafeColor(code_style.get("color", (47, 47, 47))) + + p = text_frame.add_paragraph() + if language: + p.text = f"Code ({language}):" + p.font.bold = True + p = text_frame.add_paragraph() + + p.text = code + p.font.name = code_font + p.font.size = Pt(code_font_size) + p.font.color.rgb = RGBColor(*code_color) + + except Exception as e: + logger.warning(f"Error adding code block to slide: {str(e)}") def _formatTimestamp(self) -> str: """Format current timestamp for presentation generation.""" diff --git a/tests/functional/test10_document_generation_formats.py b/tests/functional/test10_document_generation_formats.py index 941034ba..05532313 100644 --- a/tests/functional/test10_document_generation_formats.py +++ b/tests/functional/test10_document_generation_formats.py @@ -153,6 +153,17 @@ class DocumentGenerationFormatsTester10: " - A table summarizing transaction details\n" "5) A conclusion paragraph with recommendations\n\n" "Format as a professional PDF document suitable for printing." + ), + "html": ( + "Create a professional HTML document about 'Fuel Station Receipt Analysis' with:\n" + "1) A main title\n" + "2) An introduction paragraph explaining the receipt analysis\n" + "3) Extract and include the image from the attached PDF document (B2025-02c.pdf)\n" + "4) A section analyzing the receipt data with:\n" + " - Bullet points of key findings\n" + " - A table summarizing transaction details\n" + "5) A conclusion paragraph with recommendations\n\n" + "Format as a professional HTML document with proper styling, responsive design, and embedded CSS." ) } @@ -350,7 +361,8 @@ class DocumentGenerationFormatsTester10: "pdf": ["application/pdf"], "docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"], "xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"], - "pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] + "pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"], + "html": ["text/html", "application/xhtml+xml"] } # Expected file extensions @@ -358,7 +370,8 @@ class DocumentGenerationFormatsTester10: "pdf": [".pdf"], "docx": [".docx"], "xlsx": [".xlsx"], - "pptx": [".pptx"] + "pptx": [".pptx"], + "html": [".html", ".htm"] } formatLower = expectedFormat.lower() @@ -398,12 +411,12 @@ class DocumentGenerationFormatsTester10: return verification async def testAllFormats(self) -> Dict[str, Any]: - """Test document generation in DOCX, XLSX, PPTX, and PDF formats.""" + """Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats.""" print("\n" + "="*80) - print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, AND PDF FORMATS") + print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, PDF, AND HTML FORMATS") print("="*80) - formats = ["docx", "xlsx", "pptx", "pdf"] + formats = ["docx", "xlsx", "pptx", "pdf", "html"] results = {} for format in formats: @@ -456,7 +469,7 @@ class DocumentGenerationFormatsTester10: async def runTest(self): """Run the complete test.""" print("\n" + "="*80) - print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF") + print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF, HTML") print("="*80) try: