diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py
index f42bdb63..b2e256a4 100644
--- a/modules/aicore/aicorePluginOpenai.py
+++ b/modules/aicore/aicorePluginOpenai.py
@@ -59,14 +59,14 @@ class AiOpenai(BaseConnectorAi):
contextLength=128000,
costPer1kTokensInput=0.03,
costPer1kTokensOutput=0.06,
- speedRating=7, # Good speed for complex tasks
- qualityRating=9, # High quality
+ speedRating=8, # Good speed for complex tasks
+ qualityRating=10, # High quality
# capabilities removed (not used in business logic)
functionCall=self.callAiBasic,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED,
operationTypes=createOperationTypeRatings(
- (OperationTypeEnum.PLAN, 8),
+ (OperationTypeEnum.PLAN, 9),
(OperationTypeEnum.DATA_ANALYSE, 10),
(OperationTypeEnum.DATA_GENERATE, 10),
(OperationTypeEnum.DATA_EXTRACT, 7)
diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py
index fd4d8bcd..e0cdfc53 100644
--- a/modules/services/serviceAi/subStructureFilling.py
+++ b/modules/services/serviceAi/subStructureFilling.py
@@ -818,6 +818,11 @@ GENERATION HINT: {generationHint}
NOTE: Chapter already has a heading section. Do NOT generate a heading for the chapter title.
+IMPORTANT - SECTION INDEPENDENCE:
+- Each section is independent and self-contained
+- One section does NOT have information about another section
+- Each section must provide its own context and be understandable alone
+
AVAILABLE CONTENT PARTS:
{contentPartsIndex}
@@ -827,6 +832,7 @@ useAiCall RULES:
- useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed
- useAiCall: false if Format is "object" or "reference" (direct insertion)
- useAiCall: false if Format is "extracted" AND simple "include full text" instruction
+- useAiCall: true if NO ContentPartIds provided (content must be generated from scratch); Sections without ContentParts MUST have a clear, detailed generationHint explaining what content to generate
RETURN JSON:
{{
@@ -850,6 +856,7 @@ EXAMPLES (all content types):
- code_block: {{"id": "s5", "content_type": "code_block", "contentPartIds": ["extracted_1"], "generationHint": "Format code", "useAiCall": true, "elements": []}}
- image: {{"id": "s6", "content_type": "image", "contentPartIds": ["obj_1"], "generationHint": "Display image", "useAiCall": false, "elements": []}}
- reference: {{"id": "s7", "content_type": "paragraph", "contentPartIds": ["ref_1"], "generationHint": "Reference", "useAiCall": false, "elements": []}}
+- NO CONTENT PARTS (generate from scratch): {{"id": "s8", "content_type": "paragraph", "contentPartIds": [], "generationHint": "Write a detailed professional paragraph explaining [specific topic or purpose]. Include [key points to cover]. Address [important aspects]. Conclude with [summary or recommendations].", "useAiCall": true, "elements": []}}
CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
"""
@@ -985,13 +992,19 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th
## AVAILABLE CONTENT FOR THIS SECTION
{contentPartsText if contentPartsText else "(No content parts specified for this section)"}
+## IMPORTANT - SECTION INDEPENDENCE:
+- This section is independent and self-contained
+- You do NOT have information about other sections' content
+- Provide all necessary context within this section
+- Context above is for logical flow only, NOT for content dependencies
+
## INSTRUCTIONS
1. Generate content for section "{sectionId}" based on the generation hint above
2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data)
3. For table content_type: Create a single table with headers and rows from all ContentParts
4. For bullet_list content_type: Create a single list with items from all ContentParts
5. Format appropriately based on content_type ({contentType})
-6. Ensure the generated content fits logically between previous and following sections
+6. Ensure the generated content is self-contained and understandable independently
7. Return ONLY a JSON object with an "elements" array
8. Each element should match the content_type: {contentType}
@@ -1026,12 +1039,18 @@ CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid
## AVAILABLE CONTENT FOR THIS SECTION
{contentPartsText if contentPartsText else "(No content parts specified for this section)"}
+## IMPORTANT - SECTION INDEPENDENCE:
+- This section is independent and self-contained
+- You do NOT have information about other sections' content
+- Provide all necessary context within this section
+- Context above is for logical flow only, NOT for content dependencies
+
## INSTRUCTIONS
1. Generate content for section "{sectionId}" based on the generation hint above
2. Use the available content parts to populate this section
3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data
4. For extracted text: Format appropriately based on content_type ({contentType})
-5. Ensure the generated content fits logically between previous and following sections
+5. Ensure the generated content is self-contained and understandable independently
6. Return ONLY a JSON object with an "elements" array
7. Each element should match the content_type: {contentType}
diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py
index b8db20a1..a11fba62 100644
--- a/modules/services/serviceAi/subStructureGeneration.py
+++ b/modules/services/serviceAi/subStructureGeneration.py
@@ -145,25 +145,32 @@ class StructureGenerator:
if not contentPartsIndex:
contentPartsIndex = "\n(No content parts available)"
- prompt = f"""USER REQUEST:
+ prompt = f"""USER REQUEST (for context):
+```
{userPrompt}
+```
AVAILABLE CONTENT PARTS:
{contentPartsIndex}
-TASK: Generiere Chapter-Struktur für die zu generierenden Dokumente.
+TASK: Generate Chapter Structure for the documents to be generated.
-Für jedes Chapter:
+IMPORTANT - CHAPTER INDEPENDENCE:
+- Each chapter is independent and self-contained
+- One chapter does NOT have information about another chapter
+- Each chapter must provide its own context and be understandable alone
+
+For each chapter:
- chapter id
- level (1, 2, 3, etc.)
- title
-- contentPartIds: [Liste von ContentPart-IDs]
+- contentPartIds: [List of ContentPart IDs]
- contentPartInstructions: {{
"partId": {{
- "instruction": "Wie Content strukturiert werden soll"
+ "instruction": "How content should be structured"
}}
}}
-- generationHint: Beschreibung des Inhalts
+- generationHint: Description of the content (must be self-contained with all necessary context)
OUTPUT FORMAT: {outputFormat}
diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
index ee16c5a4..e582ddff 100644
--- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
+++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
@@ -450,16 +450,34 @@ class BaseRenderer(ABC):
code, language = self._extractCodeBlockData(sectionData)
return {"content_type": "code_block", "code": code, "language": language}
elif sectionType == "image":
- base64Data, altText = self._extractImageData(sectionData)
+ # Extract image data - preserve nested content structure
+ if isinstance(sectionData, list) and sectionData:
+ # Get first element from elements array
+ element = sectionData[0] if isinstance(sectionData[0], dict) else {}
+ elif isinstance(sectionData, dict):
+ element = sectionData
+ else:
+ return {"content_type": "paragraph", "text": "[Image: Invalid data]"}
+
+ # Extract from nested content structure (standard JSON format)
+ content = element.get("content", {})
+ if not isinstance(content, dict):
+ return {"content_type": "paragraph", "text": "[Image: Invalid content]"}
+
+ base64Data = content.get("base64Data", "")
+ altText = content.get("altText", "Image")
+ caption = content.get("caption", "")
+
# Validate image data
if self._validateImageData(base64Data, altText):
+ # Return nested structure matching standard JSON format
return {
- "content_type": "image",
- "base64Data": base64Data,
- "altText": altText,
- "width": sectionData.get("width") if isinstance(sectionData, dict) else None,
- "height": sectionData.get("height") if isinstance(sectionData, dict) else None,
- "caption": sectionData.get("caption", "") if isinstance(sectionData, dict) else ""
+ "content_type": "image",
+ "content": {
+ "base64Data": base64Data,
+ "altText": altText,
+ "caption": caption
+ }
}
else:
# Return placeholder if image data is invalid
diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py
index 337811a4..c7363918 100644
--- a/modules/services/serviceGeneration/renderers/rendererDocx.py
+++ b/modules/services/serviceGeneration/renderers/rendererDocx.py
@@ -677,7 +677,40 @@ class RendererDocx(BaseRenderer):
try:
image_bytes = base64.b64decode(base64_data)
- doc.add_picture(io.BytesIO(image_bytes), width=Inches(4))
+ image_stream = io.BytesIO(image_bytes)
+
+ # Get image dimensions to calculate proper size
+ try:
+ from PIL import Image as PILImage
+ pil_image = PILImage.open(image_stream)
+ img_width_px, img_height_px = pil_image.size
+
+ # DOCX page width is typically 8.5 inches, usable width ~6.5 inches with margins
+ # Standard margins: 1 inch left/right, so usable width = 6.5 inches
+ max_width_inches = 6.5
+ max_height_inches = 9.0 # Leave room for text above/below
+
+ # Calculate scale factor to fit within page dimensions
+ # Convert pixels to inches (assuming 96 DPI for modern displays, but images may vary)
+ # Use conservative estimate: 1 inch = 96 pixels
+ img_width_inches = img_width_px / 96.0
+ img_height_inches = img_height_px / 96.0
+
+ # Calculate scale to fit
+ width_scale = max_width_inches / img_width_inches if img_width_inches > max_width_inches else 1.0
+ height_scale = max_height_inches / img_height_inches if img_height_inches > max_height_inches else 1.0
+ scale = min(width_scale, height_scale, 1.0) # Don't scale up, only down
+
+ final_width = img_width_inches * scale
+ final_height = img_height_inches * scale
+
+ # Reset stream for docx
+ image_stream.seek(0)
+ doc.add_picture(image_stream, width=Inches(final_width))
+ except Exception:
+ # Fallback: use conservative default size if PIL fails
+ image_stream.seek(0)
+ doc.add_picture(image_stream, width=Inches(6.0))
if alt_text and alt_text != "Image":
caption_para = doc.add_paragraph(f"Figure: {alt_text}")
diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py
index dda2c09f..04e7e543 100644
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@@ -417,8 +417,15 @@ class RendererHtml(BaseRenderer):
source_text = f' (Source: {source})' if source else ''
htmlParts.append(f'
{content}{source_text}
')
elif isinstance(element, dict):
- # Regular paragraph element
- text = element.get("text", element.get("content", ""))
+ # Regular paragraph element - extract from nested content structure (standard JSON format)
+ content = element.get("content", {})
+ if isinstance(content, dict):
+ text = content.get("text", "")
+ elif isinstance(content, str):
+ text = content
+ else:
+ text = ""
+
if text:
htmlParts.append(f'
{text}
')
elif isinstance(element, str):
@@ -629,10 +636,11 @@ class RendererHtml(BaseRenderer):
"""Render a JSON image to HTML with placeholder for later replacement. Expects nested content structure."""
try:
import html
- # Extract from nested content structure
+ # Extract from nested content structure (standard JSON format)
content = imageData.get("content", {})
if not isinstance(content, dict):
return ""
+
base64Data = content.get("base64Data", "")
altText = content.get("altText", "Image")
caption = content.get("caption", "")
@@ -645,7 +653,9 @@ class RendererHtml(BaseRenderer):
# Use data URI as placeholder - will be replaced with file path in _replaceImageDataUris
# Include a marker so we can find and replace it
imageMarker = f""
- imgTag = f''
+ # Add max-width and max-height to ensure image fits within page dimensions
+ # Typical page width is ~800-1200px, height varies but we limit to 600px for readability
+ imgTag = f''
if captionEscaped:
return f'{imageMarker}{imgTag}{captionEscaped}'
diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py
index a6583a33..f1c3f7fa 100644
--- a/modules/services/serviceGeneration/renderers/rendererPdf.py
+++ b/modules/services/serviceGeneration/renderers/rendererPdf.py
@@ -839,11 +839,18 @@ class RendererPdf(BaseRenderer):
availableWidth = 430.0 # Slightly smaller than frame width for safety
availableHeight = 730.0 # Slightly smaller than frame height for safety
- # Convert original image size from pixels to points (assuming 72 DPI)
- # If image DPI is different, PIL will provide correct size
- # For safety, use a conservative conversion
- imgWidthPoints = originalWidth * (inch / 72) # Convert to inches, then to points
- imgHeightPoints = originalHeight * (inch / 72)
+ # Convert original image size from pixels to points
+ # PIL provides size in pixels, need to convert to points
+ # Standard conversion: 1 inch = 72 points, typical screen DPI = 96 pixels/inch
+ # So: pixels * (72/96) = points, or pixels * 0.75 = points
+ # But for images, we should use the image's actual DPI if available
+ dpi = pilImage.info.get('dpi', (96, 96))[0] # Default to 96 DPI if not specified
+ if dpi <= 0:
+ dpi = 96 # Fallback to 96 DPI
+
+ # Convert pixels to points: 1 point = 1/72 inch, so pixels * (72/dpi) = points
+ imgWidthPoints = originalWidth * (72.0 / dpi)
+ imgHeightPoints = originalHeight * (72.0 / dpi)
# Scale to fit within available page dimensions while maintaining aspect ratio
widthScale = availableWidth / imgWidthPoints if imgWidthPoints > 0 else 1.0
diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py
index f824aa62..2fc93892 100644
--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
@@ -74,19 +74,19 @@ class RendererPptx(BaseRenderer):
self._currentPresentation = prs
for i, slide_data in enumerate(slidesData):
- logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
- # Debug: Show slide content preview
+ slide_sections = slide_data.get("sections", [])
+ slide_images = list(slide_data.get("images", [])) # Make copy so we can append
slide_content = slide_data.get('content', '')
- if slide_content:
- logger.info(f" Content preview: '{slide_content[:100]}...'")
+ hasSections = slide_sections and len(slide_sections) > 0
+
+ logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - sections: {len(slide_sections)}, images: {len(slide_images)}, content: {len(slide_content)} chars")
+
+ # Determine layout: first slide (i==0) uses title slide layout, others use title+content
+ if i == 0:
+ slideLayoutIndex = 0 # Title slide layout
else:
- logger.warning(f" ⚠️ Slide {i+1} has NO content!")
+ slideLayoutIndex = 1 # Title and content layout
- # Check if slide has images
- hasImages = slide_data.get("images") and len(slide_data.get("images", [])) > 0
-
- # Create slide with appropriate layout based on content
- slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles)
slide_layout = prs.slide_layouts[slideLayoutIndex]
slide = prs.slides.add_slide(slide_layout)
@@ -94,25 +94,33 @@ class RendererPptx(BaseRenderer):
title_shape = slide.shapes.title
title_shape.text = slide_data.get("title", "Slide")
- # Apply title styling
+ # Apply title styling - LEFT ALIGNED by default
title_style = styles.get("title", {})
if title_shape.text_frame.paragraphs[0].font:
title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44))
title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True)
title_color = self._getSafeColor(title_style.get("color", (31, 78, 121)))
title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
-
- # Handle images first (if present)
- if hasImages:
- self._addImagesToSlide(slide, slide_data.get("images", []), styles)
+ # Set left alignment for title
+ title_shape.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
# Render sections with proper PowerPoint objects (tables, lists, etc.)
- slide_sections = slide_data.get("sections", [])
- if slide_sections:
- # Use content placeholder for structured content
- content_shape = slide.placeholders[1]
- text_frame = content_shape.text_frame
- text_frame.clear()
+ if hasSections:
+ # Use content placeholder for structured content (only if layout has placeholder[1])
+ try:
+ content_shape = slide.placeholders[1]
+ text_frame = content_shape.text_frame
+ text_frame.clear()
+ except (AttributeError, IndexError):
+ # Layout might not have placeholder[1], create textbox instead
+ from pptx.util import Inches
+ left = Inches(0.5)
+ top = Inches(1.5)
+ width = prs.slide_width - Inches(1)
+ height = prs.slide_height - top - Inches(0.5)
+ textbox = slide.shapes.add_textbox(left, top, width, height)
+ text_frame = textbox.text_frame
+ text_frame.word_wrap = True
# Track vertical position for multiple content types
current_y = Inches(1.5) # Start below title
@@ -121,6 +129,22 @@ class RendererPptx(BaseRenderer):
section_type = section.get("content_type", "paragraph")
elements = section.get("elements", [])
+ # Check if section has image content_type
+ if section_type == "image":
+ # Extract images from this section
+ for element in elements:
+ if isinstance(element, dict) and element.get("type") == "image":
+ content = element.get("content", {})
+ if isinstance(content, dict):
+ base64Data = content.get("base64Data")
+ if base64Data:
+ slide_images.append({
+ "base64Data": base64Data,
+ "altText": content.get("altText", "Image"),
+ "caption": content.get("caption", "")
+ })
+ continue # Skip rendering image sections as text
+
# Handle sections without elements (e.g., headings that create slides)
if not elements:
continue
@@ -134,53 +158,80 @@ class RendererPptx(BaseRenderer):
if not element_type:
element_type = section_type
+ # Skip image elements - they're handled separately
+ if element_type == "image":
+ content = element.get("content", {})
+ if isinstance(content, dict):
+ base64Data = content.get("base64Data")
+ if base64Data:
+ slide_images.append({
+ "base64Data": base64Data,
+ "altText": content.get("altText", "Image"),
+ "caption": content.get("caption", "")
+ })
+ continue
+
if element_type == "table":
# Render as actual PowerPoint table
self._addTableToSlide(slide, element, styles, current_y)
current_y += Inches(2) # Space for table
elif element_type == "bullet_list" or element_type == "list":
# Render as actual PowerPoint bullet list
- self._addBulletListToSlide(slide, element, styles, text_frame)
+ if text_frame:
+ self._addBulletListToSlide(slide, element, styles, text_frame)
elif element_type == "heading":
# Render as heading in text frame
- self._addHeadingToSlide(slide, element, styles, text_frame)
+ if text_frame:
+ self._addHeadingToSlide(slide, element, styles, text_frame)
elif element_type == "paragraph":
# Render as paragraph in text frame
- self._addParagraphToSlide(slide, element, styles, text_frame)
+ if text_frame:
+ self._addParagraphToSlide(slide, element, styles, text_frame)
elif element_type == "code_block" or element_type == "code":
# Render as formatted code block
- self._addCodeBlockToSlide(slide, element, styles, text_frame)
+ if text_frame:
+ self._addCodeBlockToSlide(slide, element, styles, text_frame)
elif element_type == "extracted_text":
# Render extracted text as paragraph with styling
- content = element.get("content", "")
- source = element.get("source", "")
- if content:
- paragraph_style = styles.get("paragraph", {})
- p = text_frame.add_paragraph()
- p.text = content
- p.font.size = Pt(paragraph_style.get("font_size", 18))
- p.font.bold = paragraph_style.get("bold", False)
- p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
- if source:
- p.add_run(f" (Source: {source})").font.italic = True
+ if text_frame:
+ content = element.get("content", "")
+ source = element.get("source", "")
+ if content:
+ paragraph_style = styles.get("paragraph", {})
+ p = text_frame.add_paragraph()
+ p.text = content
+ p.font.size = Pt(paragraph_style.get("font_size", 18))
+ p.font.bold = paragraph_style.get("bold", False)
+ p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
+ p.alignment = PP_ALIGN.LEFT # Left align by default
+ if source:
+ p.add_run(f" (Source: {source})").font.italic = True
elif element_type == "reference":
# Render reference
- label = element.get("label", "Reference")
- p = text_frame.add_paragraph()
- p.text = f"[Reference: {label}]"
- p.font.italic = True
+ if text_frame:
+ label = element.get("label", "Reference")
+ p = text_frame.add_paragraph()
+ p.text = f"[Reference: {label}]"
+ p.font.italic = True
+ p.alignment = PP_ALIGN.LEFT
else:
# Fallback: try to render as paragraph
- content = element.get("content", "")
- if isinstance(content, dict):
- text = content.get("text", "")
- elif isinstance(content, str):
- text = content
- else:
- text = ""
-
- if text:
- self._addParagraphToSlide(slide, element, styles, text_frame)
+ if text_frame:
+ content = element.get("content", "")
+ if isinstance(content, dict):
+ text = content.get("text", "")
+ elif isinstance(content, str):
+ text = content
+ else:
+ text = ""
+
+ if text:
+ self._addParagraphToSlide(slide, element, styles, text_frame)
+
+ # Handle images after processing sections (images may have been extracted from sections)
+ hasImages = len(slide_images) > 0
+ if hasImages:
+ self._addImagesToSlide(slide, slide_images, styles)
# Fallback: if no sections but has content text, render as before
elif slide_content and not hasImages:
@@ -1097,13 +1148,18 @@ JSON ONLY. NO OTHER TEXT."""
pilImage = PILImage.open(imageStream)
imgWidth, imgHeight = pilImage.size
- # Scale to fit available space (max 80% of slide)
- maxWidth = availableWidth * 0.8
- maxHeight = availableHeight * 0.8
+ # Scale to fit available space (max 90% of slide for better visibility)
+ # Convert PIL pixels to PowerPoint points (1 inch = 72 points, typical screen DPI = 96)
+ # Conversion: pixels * (72/96) = points
+ imgWidthPoints = imgWidth * (72.0 / 96.0)
+ imgHeightPoints = imgHeight * (72.0 / 96.0)
- scale = min(maxWidth / imgWidth, maxHeight / imgHeight, 1.0)
- finalWidth = imgWidth * scale
- finalHeight = imgHeight * scale
+ maxWidth = availableWidth * 0.9
+ maxHeight = availableHeight * 0.9
+
+ scale = min(maxWidth / imgWidthPoints, maxHeight / imgHeightPoints, 1.0)
+ finalWidth = imgWidthPoints * scale
+ finalHeight = imgHeightPoints * scale
# Center image
left = (slideWidth - finalWidth) / 2
@@ -1184,7 +1240,12 @@ JSON ONLY. NO OTHER TEXT."""
num_cols = len(headers)
num_rows = len(rows) + 1 # +1 for header row
left = Inches(0.5)
- width = slide.presentation.slide_width - Inches(1)
+ # Get presentation from stored reference or slide
+ if hasattr(self, '_currentPresentation'):
+ prs = self._currentPresentation
+ else:
+ prs = slide.presentation
+ width = prs.slide_width - Inches(1)
row_height = Inches(0.4)
# Create table
@@ -1251,6 +1312,7 @@ JSON ONLY. NO OTHER TEXT."""
try:
from pptx.util import Pt
from pptx.dml.color import RGBColor
+ from pptx.enum.text import PP_ALIGN
# Extract from nested content structure
content = element.get("content", {})
@@ -1272,6 +1334,16 @@ JSON ONLY. NO OTHER TEXT."""
p.level = 0
p.font.size = Pt(list_style.get("font_size", 18))
p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47))))
+ p.alignment = PP_ALIGN.LEFT # Left align bullet lists
+ p.space_before = Pt(6)
+ # Enable bullet points - set bullet type to enable bullets
+ try:
+ from pptx.enum.text import MSO_AUTO_NUMBER
+ p.paragraph_format.bullet.type = MSO_AUTO_NUMBER.BULLET
+ except (ImportError, AttributeError):
+ # Fallback: bullets are usually enabled by default when level is set
+ # Just ensure level is set (already done above)
+ pass
except Exception as e:
logger.warning(f"Error adding bullet list to slide: {str(e)}")
diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py
index 3ff49788..1051e7bf 100644
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@@ -1057,12 +1057,21 @@ class RendererXlsx(BaseRenderer):
# Create openpyxl Image
img = OpenpyxlImage(imageStream)
- # Set image size (max width 6 inches, maintain aspect ratio)
- maxWidth = 400 # pixels (approximately 6 inches at 72 DPI)
- if img.width > maxWidth:
- scale = maxWidth / img.width
- img.width = maxWidth
- img.height = int(img.height * scale)
+ # Calculate max width based on Excel column width
+ # Excel default column width is ~64 pixels (8.43 characters at default font)
+ # Use multiple columns for image width (typically 8-10 columns = ~512-640 pixels)
+ # Standard Excel sheet width is ~1024 pixels (14.5 inches at 72 DPI)
+ # Use 80% of sheet width to leave margins
+ maxWidth = 800 # pixels (approximately 11 inches at 72 DPI, fits within page)
+ maxHeight = 600 # pixels (approximately 8.3 inches at 72 DPI)
+
+ # Scale image to fit within page dimensions while maintaining aspect ratio
+ width_scale = maxWidth / img.width if img.width > maxWidth else 1.0
+ height_scale = maxHeight / img.height if img.height > maxHeight else 1.0
+ scale = min(width_scale, height_scale, 1.0) # Don't scale up, only down
+
+ img.width = int(img.width * scale)
+ img.height = int(img.height * scale)
# Anchor image to cell (A column, current row)
img.anchor = f'A{startRow}'
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index 1eb453ee..36673ed0 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -81,30 +81,61 @@ class ContentValidator:
if section.get("content_type") == "table":
if elements and isinstance(elements, list) and len(elements) > 0:
tableElement = elements[0]
- sectionSummary["caption"] = tableElement.get("caption")
- headers = tableElement.get("headers", [])
- rows = tableElement.get("rows", [])
- sectionSummary["columnCount"] = len(headers)
- sectionSummary["rowCount"] = len(rows)
- sectionSummary["headers"] = headers # Include headers for context
+ content = tableElement.get("content", {})
+ if isinstance(content, dict):
+ headers = content.get("headers", [])
+ rows = content.get("rows", [])
+ else:
+ headers = tableElement.get("headers", [])
+ rows = tableElement.get("rows", [])
+ if headers:
+ sectionSummary["columnCount"] = len(headers)
+ sectionSummary["headers"] = headers # Include headers for context
+ if rows:
+ sectionSummary["rowCount"] = len(rows)
+ sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None)
- # For lists: extract item count
- elif section.get("content_type") == "list":
+ # For lists and bullet_lists: extract item count
+ elif section.get("content_type") in ["list", "bullet_list"]:
if elements and isinstance(elements, list) and len(elements) > 0:
listElement = elements[0]
- items = listElement.get("items", [])
- sectionSummary["itemCount"] = len(items)
+ content = listElement.get("content", {})
+ if isinstance(content, dict):
+ items = content.get("items", [])
+ else:
+ items = listElement.get("items", [])
+ if items:
+ sectionSummary["itemCount"] = len(items)
- # For paragraphs/headings: extract text preview
+ # For paragraphs/headings: extract text statistics (no preview for security)
elif section.get("content_type") in ["paragraph", "heading"]:
if elements and isinstance(elements, list) and len(elements) > 0:
textElement = elements[0]
- text = textElement.get("text", "")
+ content = textElement.get("content", {})
+ if isinstance(content, dict):
+ text = content.get("text", "")
+ else:
+ text = textElement.get("text", "")
if text:
- sectionSummary["textPreview"] = text[:100] + ("..." if len(text) > 100 else "")
- # Also check for textPreview directly in section (for web crawl results)
- if section.get("textPreview"):
- sectionSummary["textPreview"] = section.get("textPreview")
+ sectionSummary["textLength"] = len(text)
+ sectionSummary["wordCount"] = len(text.split())
+ # Also check for text length if available directly in section
+ if section.get("textLength"):
+ sectionSummary["textLength"] = section.get("textLength")
+
+ # For code blocks: extract code statistics (no preview for security)
+ elif section.get("content_type") == "code_block":
+ if elements and isinstance(elements, list) and len(elements) > 0:
+ codeElement = elements[0]
+ content = codeElement.get("content", {})
+ if isinstance(content, dict):
+ code = content.get("code", "")
+ language = content.get("language", "")
+ if code:
+ sectionSummary["codeLength"] = len(code)
+ sectionSummary["codeLineCount"] = code.count('\n') + 1
+ if language:
+ sectionSummary["language"] = language
# Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu
contentPartIds = section.get("contentPartIds", [])
@@ -120,8 +151,30 @@ class ContentValidator:
# Include any additional fields from section (generic approach)
# This ensures all action-specific fields are preserved
+ # BUT exclude type-specific KPIs that don't belong to this content_type
+ contentType = section.get("content_type", "")
+ # Define KPIs that are ONLY valid for specific types
+ typeExclusiveKpis = {
+ "table": ["columnCount", "rowCount", "headers"], # Only for tables
+ "bullet_list": ["itemCount"], # Only for bullet_list
+ "list": ["itemCount"] # Only for list
+ }
+ excludedKpis = []
+ for kpiType, kpiFields in typeExclusiveKpis.items():
+ if kpiType != contentType:
+ excludedKpis.extend(kpiFields)
+
for key, value in section.items():
- if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately
+ if key not in sectionSummary and key not in ["elements"] and key not in excludedKpis:
+ # Don't copy type-specific KPIs if they're 0/empty and we didn't extract them ourselves
+ # This prevents copying columnCount: 0, rowCount: 0, headers: [] from structure generation phase
+ if key in ["columnCount", "rowCount", "headers", "itemCount"]:
+ # Skip if it's 0/empty - we'll only include KPIs we extracted from elements
+ if isinstance(value, int) and value == 0:
+ continue
+ if isinstance(value, list) and len(value) == 0:
+ continue
+
# Include simple types (str, int, float, bool, list of primitives)
if isinstance(value, (str, int, float, bool)) or (isinstance(value, list) and len(value) <= 10):
sectionSummary[key] = value
@@ -146,12 +199,60 @@ class ContentValidator:
if section.get("content_type") == "table":
if elements and isinstance(elements, list) and len(elements) > 0:
tableElement = elements[0]
- sectionSummary["caption"] = tableElement.get("caption")
- headers = tableElement.get("headers", [])
- rows = tableElement.get("rows", [])
- sectionSummary["columnCount"] = len(headers)
- sectionSummary["rowCount"] = len(rows)
- sectionSummary["headers"] = headers
+ content = tableElement.get("content", {})
+ if isinstance(content, dict):
+ headers = content.get("headers", [])
+ rows = content.get("rows", [])
+ else:
+ headers = tableElement.get("headers", [])
+ rows = tableElement.get("rows", [])
+ if headers:
+ sectionSummary["columnCount"] = len(headers)
+ sectionSummary["headers"] = headers
+ if rows:
+ sectionSummary["rowCount"] = len(rows)
+ sectionSummary["caption"] = tableElement.get("caption") or (content.get("caption") if isinstance(content, dict) else None)
+
+ # For lists and bullet_lists: extract item count
+ elif section.get("content_type") in ["list", "bullet_list"]:
+ if elements and isinstance(elements, list) and len(elements) > 0:
+ listElement = elements[0]
+ content = listElement.get("content", {})
+ if isinstance(content, dict):
+ items = content.get("items", [])
+ else:
+ items = listElement.get("items", [])
+ if items:
+ sectionSummary["itemCount"] = len(items)
+
+ # For paragraphs/headings: extract text statistics (no preview for security)
+ elif section.get("content_type") in ["paragraph", "heading"]:
+ if elements and isinstance(elements, list) and len(elements) > 0:
+ textElement = elements[0]
+ content = textElement.get("content", {})
+ if isinstance(content, dict):
+ text = content.get("text", "")
+ else:
+ text = textElement.get("text", "")
+ if text:
+ sectionSummary["textLength"] = len(text)
+ sectionSummary["wordCount"] = len(text.split())
+ if section.get("textLength"):
+ sectionSummary["textLength"] = section.get("textLength")
+
+ # For code blocks: extract code statistics (no preview for security)
+ elif section.get("content_type") == "code_block":
+ if elements and isinstance(elements, list) and len(elements) > 0:
+ codeElement = elements[0]
+ content = codeElement.get("content", {})
+ if isinstance(content, dict):
+ code = content.get("code", "")
+ language = content.get("language", "")
+ if code:
+ sectionSummary["codeLength"] = len(code)
+ sectionSummary["codeLineCount"] = code.count('\n') + 1
+ if language:
+ sectionSummary["language"] = language
# Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu
contentPartIds = section.get("contentPartIds", [])
@@ -166,8 +267,30 @@ class ContentValidator:
sectionSummary["note"] = "ContentParts referenced but metadata not available"
# Include any additional fields from section (generic approach)
+ # BUT exclude type-specific KPIs that don't belong to this content_type
+ contentType = section.get("content_type", "")
+ # Define KPIs that are ONLY valid for specific types
+ typeExclusiveKpis = {
+ "table": ["columnCount", "rowCount", "headers"], # Only for tables
+ "bullet_list": ["itemCount"], # Only for bullet_list
+ "list": ["itemCount"] # Only for list
+ }
+ excludedKpis = []
+ for kpiType, kpiFields in typeExclusiveKpis.items():
+ if kpiType != contentType:
+ excludedKpis.extend(kpiFields)
+
for key, value in section.items():
- if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately
+ if key not in sectionSummary and key not in ["elements"] and key not in excludedKpis:
+ # Don't copy type-specific KPIs if they're 0/empty and we didn't extract them ourselves
+ # This prevents copying columnCount: 0, rowCount: 0, headers: [] from structure generation phase
+ if key in ["columnCount", "rowCount", "headers", "itemCount"]:
+ # Skip if it's 0/empty - we'll only include KPIs we extracted from elements
+ if isinstance(value, int) and value == 0:
+ continue
+ if isinstance(value, list) and len(value) == 0:
+ continue
+
# Include simple types (str, int, float, bool, list of primitives)
if isinstance(value, (str, int, float, bool)) or (isinstance(value, list) and len(value) <= 10):
sectionSummary[key] = value
@@ -533,10 +656,11 @@ CRITICAL: Validate ONLY metadata/structure. Documents may be binary (PDF, DOCX,
VALIDATION RULES:
1. METADATA ONLY: Use jsonStructure (sections, contentPartIds, content_type, statistics) and contentPreview (dataType, contentLength, looksLikeRenderedContent) for validation. These are METADATA indicators, NOT actual content.
2. FORMAT VALIDATION: Check mimeType/format metadata only. Do NOT inspect content to determine format. Format mismatch = wrong_format gap.
-3. CONTENT EXISTENCE: Use contentPreview.looksLikeRenderedContent=true to confirm content exists. Use jsonStructure.content_type to confirm data types exist (e.g., "image" section = image exists). Do NOT validate content quality, accuracy, or completeness of actual data values.
-4. STRUCTURE VALIDATION: Use jsonStructure.sections, statistics (counts, rowCount, columnCount) as evidence. Trust structure metadata over format claims.
+3. CONTENT EXISTENCE: Use contentPreview.looksLikeRenderedContent=true to confirm content exists. Use jsonStructure.content_type to confirm data types exist (e.g., "image" section = image exists, "bullet_list" section = bullet list exists, "table" section = table exists). If a section with a content_type exists, the content has been delivered. Do NOT assume content was AI-generated vs extracted - if the section exists, it was delivered.
+4. STRUCTURE VALIDATION: Use jsonStructure.sections, statistics (counts, rowCount, columnCount, itemCount) as evidence. Trust structure metadata over format claims. Only check KPIs if they are present (missing KPIs mean elements not yet populated, not that content is missing).
5. PROCESS VALIDATION: Use ACTION HISTORY for process-oriented criteria (e.g., "search performed", "extraction done").
6. ONE CRITERION PER EVALUATION: Evaluate each criterion independently. Do not mention other criteria.
+7. NO ASSUMPTIONS: Do NOT assume content was AI-generated vs extracted. If a section exists with content_type, the content was delivered. Only validate what is present in the metadata.
VALIDATION STEPS:
- Check ACTION HISTORY for process-oriented criteria