enhanced document generation with images

2025-12-23 00:34:15 +01:00 · 2025-12-23 00:34:15 +01:00 · 982932d2f5
commit 982932d2f5
parent b2196bc6a3
39 changed files with 6236 additions and 442 deletions
--- a/modules/datamodels/datamodelJson.py
+++ b/modules/datamodels/datamodelJson.py
@ -19,12 +19,14 @@ supportedSectionTypes: List[str] = [
 ]
 # Canonical JSON template used for AI generation (documents array + sections)
-# Rendering pipelines can select the first document and read its sections.
+# This template is used for STRUCTURE generation - sections have empty elements arrays.
 # For content generation, elements arrays will be populated later.
 jsonTemplateDocument: str = """{
  "metadata": {
    "split_strategy": "single_document",
    "source_documents": [],
-    "extraction_method": "ai_generation"
+    "extraction_method": "ai_generation",
    "title": "{{DOCUMENT_TITLE}}"
  },
  "documents": [
    {
@ -33,56 +35,77 @@ jsonTemplateDocument: str = """{
      "filename": "document.json",
      "sections": [
        {
-          "id": "section_heading_example",
+          "id": "section_heading_main_title",
          "content_type": "heading",
-          "elements": [
+          "complexity": "simple",
-            {"level": 1, "text": "Heading Text"}
+          "generation_hint": "Main document title heading",
-          ],
+          "order": 1,
-          "order": 0
+          "elements": []
        },
        {
-          "id": "section_paragraph_example",
+          "id": "section_paragraph_introduction",
          "content_type": "paragraph",
-          "elements": [
+          "complexity": "simple",
-            {"text": "Paragraph text content"}
+          "generation_hint": "Introduction paragraph",
-          ],
+          "order": 2,
-          "order": 0
+          "elements": []
        },
        {
          "id": "section_heading_section_1",
          "content_type": "heading",
          "complexity": "simple",
          "generation_hint": "Section heading for topic 1",
          "order": 3,
          "elements": []
        },
        {
          "id": "section_paragraph_section_1",
          "content_type": "paragraph",
          "complexity": "simple",
          "generation_hint": "Content paragraph for section 1",
          "order": 4,
          "elements": []
        },
        {
          "id": "section_bullet_list_example",
          "content_type": "bullet_list",
-          "elements": [
+          "complexity": "simple",
-            {
+          "generation_hint": "Bullet list items",
-              "items": ["Item 1", "Item 2"]
+          "order": 5,
-            }
+          "elements": []
-          ],
+        },
-          "order": 0
+        {
          "id": "section_image_example",
          "content_type": "image",
          "complexity": "complex",
          "generation_hint": "Illustration for document",
          "image_prompt": "A detailed description for image generation",
          "order": 6,
          "elements": []
        },
        {
          "id": "section_table_example",
          "content_type": "table",
-          "elements": [
+          "complexity": "simple",
-            {
+          "generation_hint": "Data table with relevant information",
-              "headers": ["Column 1", "Column 2"],
+          "order": 7,
-              "rows": [
+          "elements": []
                ["Row 1 Col 1", "Row 1 Col 2"],
                ["Row 2 Col 1", "Row 2 Col 2"]
              ],
              "caption": "Table caption"
            }
          ],
          "order": 0
        },
        {
          "id": "section_code_example",
          "content_type": "code_block",
-          "elements": [
+          "complexity": "simple",
-            {
+          "generation_hint": "Code example or snippet",
-              "code": "function example() { return true; }",
+          "order": 8,
-              "language": "javascript"
+          "elements": []
-            }
+        },
-          ],
+        {
-          "order": 0
+          "id": "section_paragraph_conclusion",
          "content_type": "paragraph",
          "complexity": "simple",
          "generation_hint": "Conclusion paragraph",
          "order": 9,
          "elements": []
        }
      ]
    }
--- a/modules/datamodels/datamodelWorkflowActions.py
+++ b/modules/datamodels/datamodelWorkflowActions.py
@ -19,9 +19,9 @@ class WorkflowActionParameter(BaseModel):
    name: str = Field(description="Parameter name")
    type: str = Field(description="Python type as string: 'str', 'int', 'bool', 'List[str]', etc.")
    frontendType: FrontendType = Field(description="UI rendering type (from global FrontendType enum)")
-    frontendOptions: Optional[Union[str, List[Dict[str, Any]]]] = Field(
+    frontendOptions: Optional[Union[str, List[str]]] = Field(
        None,
-        description="Options for select/multiselect/custom types. String reference (e.g., 'user.connection') or static list. For custom types, this is automatically set to the API endpoint."
+        description="Options for select/multiselect/custom types. String reference (e.g., 'user.connection') or list of strings (e.g., ['txt', 'json']). For custom types, this is automatically set to the API endpoint."
    )
    required: bool = Field(False, description="Whether parameter is required")
    default: Optional[Any] = Field(None, description="Default value")
--- a/modules/services/init.py
+++ b/modules/services/init.py
@ -57,6 +57,9 @@ class Services:
        from modules.interfaces.interfaceDbComponentObjects import getInterface as getComponentInterface
        self.interfaceDbComponent = getComponentInterface(user)
        # Expose RBAC directly on services for convenience
        self.rbac = self.interfaceDbApp.rbac if self.interfaceDbApp else None
        # Initialize service packages
        from .serviceExtraction.mainServiceExtraction import ExtractionService
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@ -1206,37 +1206,74 @@ If no trackable items can be identified, return: {{"kpis": []}}
            else:
                content_for_generation = None
-            self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
+            # Detect if this is a section generation prompt (not full document generation)
-            from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
+            # Section prompts contain "SECTION TO GENERATE" marker
            isSectionGeneration = "SECTION TO GENERATE" in prompt or "CRITICAL: Return ONLY a JSON object with an \"elements\" array" in prompt
-            generation_prompt = await buildGenerationPrompt(
+            if isSectionGeneration:
-                outputFormat, prompt, title, content_for_generation, None, self.services
+                # For section generation, use the prompt directly without wrapping
-            )
+                # Section prompts are already complete and should not be wrapped in document generation template
-            
+                logger.debug("Detected section generation prompt - skipping document generation wrapper")
-            promptArgs = {
+                generation_prompt = prompt
-                "outputFormat": outputFormat,
+                
-                "userPrompt": prompt,
+                # Call AI directly without looping (sections are simple, single-call)
-                "title": title,
+                self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for section generation")
-                "extracted_content": content_for_generation,
+                request = AiCallRequest(
-                "services": self.services
+                    prompt=generation_prompt,
-            }
+                    context="",
-            
+                    options=options
-            self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
+                )
-            # Extract user prompt from promptArgs for task completion analysis
+                response = await self.callAi(request)
-            userPrompt = None
+                generated_json = response.content if response and response.content else ""
-            if promptArgs:
+                
-                userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
+                # For section generation, return the raw JSON content directly
-            
+                # No rendering needed - sections are just JSON elements
-            # Track generation progress - the looping function will update with byte progress
+                self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Section content generated")
-            generated_json = await self._callAiWithLooping(
+                self.services.chat.progressLogFinish(aiOperationId, True)
-                generation_prompt, 
+                
-                options, 
+                metadata = AiResponseMetadata(
-                "document_generation",
+                    title=title or "Section Content",
-                buildGenerationPrompt,
+                    operationType=opType.value if opType else None
-                promptArgs,
+                )
-                aiOperationId,
+                
-                userPrompt=userPrompt
+                return AiResponse(
-            )
+                    content=generated_json,
                    metadata=metadata,
                    documents=[]
                )
            else:
                # Full document generation - use the wrapper
                self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
                from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
                generation_prompt = await buildGenerationPrompt(
                    outputFormat, prompt, title, content_for_generation, None, self.services
                )
                promptArgs = {
                    "outputFormat": outputFormat,
                    "userPrompt": prompt,
                    "title": title,
                    "extracted_content": content_for_generation,
                    "services": self.services
                }
                self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
                # Extract user prompt from promptArgs for task completion analysis
                userPrompt = None
                if promptArgs:
                    userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
                # Track generation progress - the looping function will update with byte progress
                generated_json = await self._callAiWithLooping(
                    generation_prompt, 
                    options, 
                    "document_generation",
                    buildGenerationPrompt,
                    promptArgs,
                    aiOperationId,
                    userPrompt=userPrompt
                )
            # Calculate final size for completion message
            finalSize = len(generated_json.encode('utf-8')) if generated_json else 0
@ -1291,7 +1328,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
                from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
                generationService = GenerationService(self.services)
                self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
-                rendered_content, mime_type = await generationService.renderReport(
+                rendered_content, mime_type, _images = await generationService.renderReport(
                    generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
                )
                self.services.chat.progressLogFinish(renderOperationId, True)
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@ -82,14 +82,62 @@ class GenerationService:
                    documentData = doc_data['content']
                    mimeType = doc_data['mimeType']
-                    # Convert document data to string content
+                    # Handle binary data (images, PDFs, Office docs) differently from text
-                    content = convertDocumentDataToString(documentData, getFileExtension(documentName))
+                    # Check if this is a binary MIME type
                    binaryMimeTypes = {
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
                        "application/pdf",
                        "image/png", "image/jpeg", "image/jpg", "image/gif", "image/webp", "image/bmp", "image/svg+xml",
                    }
-                    # Skip empty or minimal content
+                    isBinaryMimeType = mimeType in binaryMimeTypes
-                    minimalContentPatterns = ['{}', '[]', 'null', '""', "''"]
+                    base64encoded = False
-                    if not content or content.strip() == "" or content.strip() in minimalContentPatterns:
+                    content = None
-                        logger.warning(f"Empty or minimal content for document {documentName}, skipping")
+                    
-                        continue
+                    if isBinaryMimeType:
                        # For binary data, handle bytes vs base64 string vs regular string
                        if isinstance(documentData, bytes):
                            # Already bytes - encode to base64 string for storage
                            import base64
                            content = base64.b64encode(documentData).decode('utf-8')
                            base64encoded = True
                        elif isinstance(documentData, str):
                            # Check if it's already valid base64
                            import base64
                            try:
                                # Try to decode to verify it's base64
                                base64.b64decode(documentData, validate=True)
                                # Valid base64 - use as is
                                content = documentData
                                base64encoded = True
                            except Exception:
                                # Not valid base64 - might be raw string, try encoding
                                try:
                                    content = base64.b64encode(documentData.encode('utf-8')).decode('utf-8')
                                    base64encoded = True
                                except Exception:
                                    logger.warning(f"Could not process binary data for {documentName}, skipping")
                                    continue
                        else:
                            # Other types - convert to string then base64
                            import base64
                            try:
                                content = base64.b64encode(str(documentData).encode('utf-8')).decode('utf-8')
                                base64encoded = True
                            except Exception:
                                logger.warning(f"Could not encode binary data for {documentName}, skipping")
                                continue
                    else:
                        # Text data - convert to string
                        content = convertDocumentDataToString(documentData, getFileExtension(documentName))
                        # Skip empty or minimal content
                        minimalContentPatterns = ['{}', '[]', 'null', '""', "''"]
                        if not content or content.strip() == "" or content.strip() in minimalContentPatterns:
                            logger.warning(f"Empty or minimal content for document {documentName}, skipping")
                            continue
                    # Normalize file extension based on mime type if missing or incorrect
                    try:
@ -102,6 +150,13 @@ class GenerationService:
                            "text/markdown": ".md",
                            "text/plain": ".txt",
                            "application/json": ".json",
                            "image/png": ".png",
                            "image/jpeg": ".jpg",
                            "image/jpg": ".jpg",
                            "image/gif": ".gif",
                            "image/webp": ".webp",
                            "image/bmp": ".bmp",
                            "image/svg+xml": ".svg",
                        }
                        expectedExt = mime_to_ext.get(mimeType)
                        if expectedExt:
@ -114,20 +169,6 @@ class GenerationService:
                    except Exception:
                        pass
                    # Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
                    base64encoded = False
                    try:
                        binaryMimeTypes = {
                            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
                            "application/pdf",
                        }
                        if isinstance(documentData, str) and mimeType in binaryMimeTypes:
                            base64encoded = True
                    except Exception:
                        base64encoded = False
                    # Create document with file in one step using interfaces directly
                    document = self._createDocument(
                        fileName=documentName,
@ -278,7 +319,7 @@ class GenerationService:
                'workflowId': 'unknown'
            }
-    async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]:
+    async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str, List[Dict[str, Any]]]:
        """
        Render extracted JSON content to the specified output format.
        Always uses unified "documents" array format.
@ -291,7 +332,8 @@ class GenerationService:
            aiService: AI service instance for generation prompt creation
        Returns:
-            tuple: (rendered_content, mime_type)
+            tuple: (rendered_content, mime_type, images_list)
            images_list: List of image dicts with base64Data, altText, caption, etc.
        """
        try:
            # Validate JSON input
@ -311,12 +353,10 @@ class GenerationService:
            if "sections" not in single_doc:
                raise ValueError("Document must contain 'sections' field")
-            # Create content for single document renderer
+            # Pass standardized schema to renderer (maintains architecture)
-            contentToRender = {
+            # Renderer should extract sections from documents array according to standardized schema
-                "sections": single_doc["sections"],
+            # Standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
-                "metadata": extractedContent.get("metadata", {}),
+            contentToRender = extractedContent  # Pass full standardized schema
                "continuation": extractedContent.get("continuation", None)
            }
            # Get the appropriate renderer for the format
            renderer = self._getFormatRenderer(outputFormat)
@ -324,9 +364,15 @@ class GenerationService:
                raise ValueError(f"Unsupported output format: {outputFormat}")
            # Render the JSON content directly (AI generation handled by main service)
            # Renderer receives standardized schema and extracts what it needs
            renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService)
-            return renderedContent, mimeType
+            # Get images from renderer if available
            images = []
            if hasattr(renderer, 'getRenderedImages'):
                images = renderer.getRenderedImages()
            return renderedContent, mimeType, images
        except Exception as e:
            logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
@ -353,14 +399,21 @@ class GenerationService:
    def _getFormatRenderer(self, output_format: str):
        """Get the appropriate renderer for the specified format using auto-discovery."""
        try:
-            from .renderers.registry import getRenderer
+            from .renderers.registry import getRenderer, getSupportedFormats
            renderer = getRenderer(output_format, services=self.services)
            if renderer:
                return renderer
            # Log available formats for debugging
            availableFormats = getSupportedFormats()
            logger.error(
                f"No renderer found for format '{output_format}'. "
                f"Available formats: {availableFormats}"
            )
            # Fallback to text renderer if no specific renderer found
-            logger.warning(f"No renderer found for format {output_format}, falling back to text")
+            logger.warning(f"Falling back to text renderer for format {output_format}")
            fallbackRenderer = getRenderer('text', services=self.services)
            if fallbackRenderer:
                return fallbackRenderer
@ -370,4 +423,6 @@ class GenerationService:
        except Exception as e:
            logger.error(f"Error getting renderer for {output_format}: {str(e)}")
            import traceback
            logger.debug(traceback.format_exc())
            return None
--- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
+++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
@ -66,12 +66,34 @@ class BaseRenderer(ABC):
        pass
    def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
-        """Extract sections from report data."""
+        """
-        return reportData.get('sections', [])
+        Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
        """
        if "documents" not in reportData:
            raise ValueError("Report data must follow standardized schema with 'documents' array")
        documents = reportData.get("documents", [])
        if not isinstance(documents, list) or len(documents) == 0:
            raise ValueError("Standardized schema must contain at least one document in 'documents' array")
        firstDoc = documents[0]
        if not isinstance(firstDoc, dict) or "sections" not in firstDoc:
            raise ValueError("Document in standardized schema must contain 'sections' field")
        return firstDoc.get("sections", [])
    def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]:
-        """Extract metadata from report data."""
+        """
-        return reportData.get('metadata', {})
+        Extract metadata from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
        """
        if "metadata" not in reportData:
            raise ValueError("Report data must follow standardized schema with 'metadata' field")
        metadata = reportData.get("metadata", {})
        if not isinstance(metadata, dict):
            raise ValueError("Metadata in standardized schema must be a dictionary")
        return metadata
    def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str:
        """Get title from report data or use fallback."""
@ -79,14 +101,33 @@ class BaseRenderer(ABC):
        return metadata.get('title', fallbackTitle)
    def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool:
-        """Validate that JSON content has the expected structure."""
+        """
        Validate that JSON content follows standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
        """
        if not isinstance(jsonContent, dict):
            return False
-        if "sections" not in jsonContent:
+        # Validate metadata field exists
        if "metadata" not in jsonContent:
            return False
-        sections = jsonContent.get("sections", [])
+        if not isinstance(jsonContent.get("metadata"), dict):
            return False
        # Validate documents array exists and is not empty
        if "documents" not in jsonContent:
            return False
        documents = jsonContent.get("documents", [])
        if not isinstance(documents, list) or len(documents) == 0:
            return False
        # Validate first document has sections
        firstDoc = documents[0]
        if not isinstance(firstDoc, dict) or "sections" not in firstDoc:
            return False
        sections = firstDoc.get("sections", [])
        if not isinstance(sections, list):
            return False
--- a/modules/services/serviceGeneration/renderers/rendererCsv.py
+++ b/modules/services/serviceGeneration/renderers/rendererCsv.py
@ -41,15 +41,16 @@ class RendererCsv(BaseRenderer):
    async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
        """Generate CSV content from structured JSON document."""
        try:
-            # Validate JSON structure
+            # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
-            if not isinstance(jsonContent, dict):
+            if not self._validateJsonStructure(jsonContent):
-                raise ValueError("JSON content must be a dictionary")
+                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
-            if "sections" not in jsonContent:
+            # Extract sections and metadata from standardized schema
-                raise ValueError("JSON content must contain 'sections' field")
+            sections = self._extractSections(jsonContent)
            metadata = self._extractMetadata(jsonContent)
            # Use title from JSON metadata if available, otherwise use provided title
-            documentTitle = jsonContent.get("metadata", {}).get("title", title)
+            documentTitle = metadata.get("title", title)
            # Generate CSV content
            csvRows = []
@ -60,7 +61,6 @@ class RendererCsv(BaseRenderer):
                csvRows.append([])  # Empty row
            # Process each section in order
            sections = jsonContent.get("sections", [])
            for section in sections:
                sectionCsv = self._renderJsonSectionToCsv(section)
                if sectionCsv:
--- a/modules/services/serviceGeneration/renderers/rendererDocx.py
+++ b/modules/services/serviceGeneration/renderers/rendererDocx.py
@ -71,22 +71,22 @@ class RendererDocx(BaseRenderer):
            self._setupBasicDocumentStyles(doc)
            self._setupDocumentStyles(doc, styleSet)
-            # Validate JSON structure
+            # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
-            if not isinstance(json_content, dict):
+            if not self._validateJsonStructure(json_content):
-                raise ValueError("JSON content must be a dictionary")
+                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
-            if "sections" not in json_content:
+            # Extract sections and metadata from standardized schema
-                raise ValueError("JSON content must contain 'sections' field")
+            sections = self._extractSections(json_content)
            metadata = self._extractMetadata(json_content)
            # Use title from JSON metadata if available, otherwise use provided title
-            document_title = json_content.get("metadata", {}).get("title", title)
+            document_title = metadata.get("title", title)
            # Add document title using Title style
            if document_title:
                doc.add_paragraph(document_title, style='Title')
            # Process each section in order
            sections = json_content.get("sections", [])
            for section in sections:
                self._renderJsonSection(doc, section, styleSet)
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@ -28,14 +28,25 @@ class RendererHtml(BaseRenderer):
    async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
        """Render extracted JSON content to HTML format using AI-analyzed styling."""
        try:
            # Extract images first
            images = self._extractImages(extractedContent)
            # Store images in instance for later retrieval
            self._renderedImages = images
            # Generate HTML using AI-analyzed styling
            htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
            # Replace base64 data URIs with relative file paths if images exist
            if images:
                htmlContent = self._replaceImageDataUris(htmlContent, images)
            return htmlContent, "text/html"
        except Exception as e:
            self.logger.error(f"Error rendering HTML: {str(e)}")
            # Return minimal HTML fallback
            self._renderedImages = []  # Initialize empty list on error
            return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
    async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
@ -45,14 +56,15 @@ class RendererHtml(BaseRenderer):
            styles = await self._getStyleSet(userPrompt, aiService)
            # Validate JSON structure
-            if not isinstance(jsonContent, dict):
+            if not self._validateJsonStructure(jsonContent):
-                raise ValueError("JSON content must be a dictionary")
+                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
-            if "sections" not in jsonContent:
+            # Extract sections and metadata from standardized schema
-                raise ValueError("JSON content must contain 'sections' field")
+            sections = self._extractSections(jsonContent)
            metadata = self._extractMetadata(jsonContent)
            # Use title from JSON metadata if available, otherwise use provided title
-            documentTitle = jsonContent.get("metadata", {}).get("title", title)
+            documentTitle = metadata.get("title", title)
            # Build HTML document
            htmlParts = []
@ -77,7 +89,6 @@ class RendererHtml(BaseRenderer):
            htmlParts.append('<main>')
            # Process each section
            sections = jsonContent.get("sections", [])
            for section in sections:
                sectionHtml = self._renderJsonSection(section, styles)
                if sectionHtml:
@ -377,12 +388,15 @@ class RendererHtml(BaseRenderer):
    def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON heading to HTML using AI-generated styles."""
        try:
-            # Normalize non-dict inputs
+            # Normalize inputs - headingData is typically a list of elements from _getSectionData
-            if isinstance(headingData, str):
+            if isinstance(headingData, list):
                # Extract first element from elements array
                if headingData and len(headingData) > 0:
                    headingData = headingData[0] if isinstance(headingData[0], dict) else {}
                else:
                    return ""
            elif isinstance(headingData, str):
                headingData = {"text": headingData, "level": 2}
            elif isinstance(headingData, list):
                # Render a list as bullet list under a default heading label
                return self._renderJsonBulletList({"items": headingData}, styles)
            elif not isinstance(headingData, dict):
                return ""
@ -402,21 +416,28 @@ class RendererHtml(BaseRenderer):
    def _renderJsonParagraph(self, paragraphData: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON paragraph to HTML using AI-generated styles."""
        try:
-            # Normalize non-dict inputs
+            # Normalize inputs - paragraphData is typically a list of elements from _getSectionData
-            if isinstance(paragraphData, str):
+            if isinstance(paragraphData, list):
-                paragraphData = {"text": paragraphData}
+                # Extract text from all paragraph elements
-            elif isinstance(paragraphData, list):
+                texts = []
-                # Treat list as bullet list paragraph
+                for el in paragraphData:
-                return self._renderJsonBulletList({"items": paragraphData}, styles)
+                    if isinstance(el, dict) and "text" in el:
-            elif not isinstance(paragraphData, dict):
+                        texts.append(el["text"])
                    elif isinstance(el, str):
                        texts.append(el)
                if texts:
                    # Join multiple paragraphs with <p> tags
                    return '\n'.join(f'<p>{text}</p>' for text in texts)
                return ""
            elif isinstance(paragraphData, str):
                return f'<p>{paragraphData}</p>'
            elif isinstance(paragraphData, dict):
                text = paragraphData.get("text", "")
                if text:
                    return f'<p>{text}</p>'
                return ""
            else:
                return ""
            text = paragraphData.get("text", "")
            if text:
                return f'<p>{text}</p>'
            return ""
        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")
@ -441,16 +462,145 @@ class RendererHtml(BaseRenderer):
            return ""
    def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str:
-        """Render a JSON image to HTML."""
+        """Render a JSON image to HTML with placeholder for later replacement."""
        try:
            base64Data = imageData.get("base64Data", "")
            altText = imageData.get("altText", "Image")
            caption = imageData.get("caption", "")
            if base64Data:
-                return f'<img src="data:image/png;base64,{base64Data}" alt="{altText}">'
+                # Use data URI as placeholder - will be replaced with file path in _replaceImageDataUris
                # Include a marker so we can find and replace it
                imageMarker = f"<!--IMAGE_MARKER:{len(base64Data)}:{altText[:50]}-->"
                imgTag = f'<img src="data:image/png;base64,{base64Data}" alt="{altText}">'
                if caption:
                    return f'{imageMarker}<figure>{imgTag}<figcaption>{caption}</figcaption></figure>'
                else:
                    return f'{imageMarker}{imgTag}'
            return ""
        except Exception as e:
            self.logger.warning(f"Error rendering image: {str(e)}")
            return f'<div class="error">[Image: {imageData.get("altText", "Image")}]</div>'
    def _extractImages(self, jsonContent: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Extract all images from JSON structure.
        Returns:
            List of image data dictionaries with base64Data, altText, caption, sectionId
        """
        images = []
        try:
            # Extract from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
            documents = jsonContent.get("documents", [])
            if not documents or not isinstance(documents, list):
                return images
            for doc in documents:
                if not isinstance(doc, dict):
                    continue
                sections = doc.get("sections", [])
                for section in sections:
                    if section.get("content_type") == "image":
                        elements = section.get("elements", [])
                        for element in elements:
                            base64Data = element.get("base64Data", "")
                            # If base64Data not found, try extracting from url data URI
                            if not base64Data:
                                url = element.get("url", "")
                                if url.startswith("data:image/"):
                                    # Extract base64 from data URI: data:image/png;base64,<base64>
                                    import re
                                    match = re.match(r'data:image/[^;]+;base64,(.+)', url)
                                    if match:
                                        base64Data = match.group(1)
                            if base64Data:
                                sectionId = section.get("id", "unknown")
                                # Generate filename from section ID
                                filename = f"{sectionId}.png"
                                # Clean filename (remove invalid characters)
                                filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename)
                                images.append({
                                    "base64Data": base64Data,
                                    "altText": element.get("altText", "Image"),
                                    "caption": element.get("caption"),
                                    "sectionId": sectionId,
                                    "filename": filename
                                })
                                self.logger.debug(f"Extracted image from section {sectionId}: {filename}")
            self.logger.info(f"Extracted {len(images)} image(s) from JSON structure")
            return images
        except Exception as e:
            self.logger.warning(f"Error extracting images: {str(e)}")
            return []
    def _replaceImageDataUris(self, htmlContent: str, images: List[Dict[str, Any]]) -> str:
        """
        Replace base64 data URIs in HTML with relative file paths.
        Args:
            htmlContent: HTML content with data URIs
            images: List of image data dictionaries
        Returns:
            HTML content with relative file paths
        """
        try:
            import base64
            import re
            # Find all image data URIs in HTML
            dataUriPattern = r'data:image/png;base64,([A-Za-z0-9+/=]+)'
            def replaceDataUri(match):
                base64Data = match.group(1)
                # Find matching image in images list
                matchingImage = None
                for img in images:
                    if img["base64Data"] == base64Data or img["base64Data"].startswith(base64Data[:100]):
                        matchingImage = img
                        break
                if matchingImage:
                    # Use filename from image data (generated from section ID)
                    filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png")
                    # Replace with relative path
                    altText = matchingImage.get("altText", "Image")
                    caption = matchingImage.get("caption", "")
                    if caption:
                        return f'<figure><img src="{filename}" alt="{altText}"><figcaption>{caption}</figcaption></figure>'
                    else:
                        return f'<img src="{filename}" alt="{altText}">'
                else:
                    # Keep original if no match found
                    return match.group(0)
            # Replace all data URIs
            updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent)
            return updatedHtml
        except Exception as e:
            self.logger.warning(f"Error replacing image data URIs: {str(e)}")
            return htmlContent  # Return original if replacement fails
    def getRenderedImages(self) -> List[Dict[str, Any]]:
        """
        Get images that were extracted during rendering.
        Returns list of image dicts with base64Data, altText, caption, and filename.
        """
        if not hasattr(self, '_renderedImages'):
            return []
        return self._renderedImages
--- a/modules/services/serviceGeneration/renderers/rendererImage.py
+++ b/modules/services/serviceGeneration/renderers/rendererImage.py
@ -123,7 +123,7 @@ class RendererImage(BaseRenderer):
            promptParts.append(f"Document Title: {title}")
            # Analyze content and create visual description
-            sections = extractedContent.get("sections", [])
+            sections = self._extractSections(extractedContent)
            contentDescription = self._analyzeContentForVisualDescription(sections)
            if contentDescription:
@ -286,7 +286,7 @@ Return only the compressed prompt, no explanations.
                    styleElements.append("corporate, professional design")
            # Analyze content type for additional style hints
-            sections = extractedContent.get("sections", [])
+            sections = self._extractSections(extractedContent)
            hasTables = any(self._getSectionType(s) == "table" for s in sections)
            hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections)
            hasCode = any(self._getSectionType(s) == "code_block" for s in sections)
--- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py
+++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py
@ -41,15 +41,16 @@ class RendererMarkdown(BaseRenderer):
    def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
        """Generate markdown content from structured JSON document."""
        try:
-            # Validate JSON structure
+            # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
-            if not isinstance(jsonContent, dict):
+            if not self._validateJsonStructure(jsonContent):
-                raise ValueError("JSON content must be a dictionary")
+                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
-            if "sections" not in jsonContent:
+            # Extract sections and metadata from standardized schema
-                raise ValueError("JSON content must contain 'sections' field")
+            sections = self._extractSections(jsonContent)
            metadata = self._extractMetadata(jsonContent)
            # Use title from JSON metadata if available, otherwise use provided title
-            documentTitle = jsonContent.get("metadata", {}).get("title", title)
+            documentTitle = metadata.get("title", title)
            # Build markdown content
            markdownParts = []
@ -59,7 +60,6 @@ class RendererMarkdown(BaseRenderer):
            markdownParts.append("")
            # Process each section
            sections = jsonContent.get("sections", [])
            for section in sections:
                sectionMarkdown = self._renderJsonSection(section)
                if sectionMarkdown:
--- a/modules/services/serviceGeneration/renderers/rendererPdf.py
+++ b/modules/services/serviceGeneration/renderers/rendererPdf.py
@ -65,14 +65,15 @@ class RendererPdf(BaseRenderer):
            styles = await self._getStyleSet(userPrompt, aiService)
            # Validate JSON structure
-            if not isinstance(json_content, dict):
+            if not self._validateJsonStructure(json_content):
-                raise ValueError("JSON content must be a dictionary")
+                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
-            if "sections" not in json_content:
+            # Extract sections and metadata from standardized schema
-                raise ValueError("JSON content must contain 'sections' field")
+            sections = self._extractSections(json_content)
            metadata = self._extractMetadata(json_content)
            # Use title from JSON metadata if available, otherwise use provided title
-            document_title = json_content.get("metadata", {}).get("title", title)
+            document_title = metadata.get("title", title)
            # Make title shorter to prevent wrapping/overlapping
            if len(document_title) > 40:
@ -102,8 +103,7 @@ class RendererPdf(BaseRenderer):
            story.append(Spacer(1, 30))  # Add spacing before page break
            story.append(PageBreak())
-            # Process each section
+            # Process each section (sections already extracted above)
            sections = json_content.get("sections", [])
            self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
            for i, section in enumerate(sections):
                self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
@ -505,7 +505,7 @@ class RendererPdf(BaseRenderer):
        except Exception as e:
            self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
-            return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
+            return [Paragraph(f"[Error rendering section: {str(e)}]", self._createNormalStyle(styles))]
    def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON table to PDF elements using AI-generated styles."""
@ -555,9 +555,9 @@ class RendererPdf(BaseRenderer):
            elements = []
            for item in items:
                if isinstance(item, str):
-                    elements.append(Paragraph(f"• {item}", self._create_normal_style(styles)))
+                    elements.append(Paragraph(f"• {item}", self._createNormalStyle(styles)))
                elif isinstance(item, dict) and "text" in item:
-                    elements.append(Paragraph(f"• {item['text']}", self._create_normal_style(styles)))
+                    elements.append(Paragraph(f"• {item['text']}", self._createNormalStyle(styles)))
            if elements:
                elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
@ -637,16 +637,84 @@ class RendererPdf(BaseRenderer):
            return []
    def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
-        """Render a JSON image to PDF elements."""
+        """Render a JSON image to PDF elements using reportlab."""
        try:
            base64_data = image_data.get("base64Data", "")
            alt_text = image_data.get("altText", "Image")
            caption = image_data.get("caption", "")
-            if base64_data:
+            # If base64Data not found, try extracting from url data URI
-                # For now, just add a placeholder since reportlab image handling is complex
+            if not base64_data:
                url = image_data.get("url", "")
                if url.startswith("data:image/"):
                    # Extract base64 from data URI: data:image/png;base64,<base64>
                    import re
                    match = re.match(r'data:image/[^;]+;base64,(.+)', url)
                    if match:
                        base64_data = match.group(1)
            if not base64_data:
                return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
-            return []
+            try:
                from reportlab.platypus import Image as ReportLabImage
                from reportlab.lib.units import inch
                import base64
                import io
                # Decode base64 image data
                imageBytes = base64.b64decode(base64_data)
                imageStream = io.BytesIO(imageBytes)
                # Create reportlab Image element
                # Try to get image dimensions from PIL
                try:
                    from PIL import Image as PILImage
                    pilImage = PILImage.open(imageStream)
                    imgWidth, imgHeight = pilImage.size
                    # Scale to fit page (max width 6 inches, maintain aspect ratio)
                    maxWidth = 6 * inch
                    if imgWidth > maxWidth:
                        scale = maxWidth / imgWidth
                        imgWidth = maxWidth
                        imgHeight = imgHeight * scale
                    else:
                        imgWidth = imgWidth * (inch / 72)  # Convert pixels to inches (assuming 72 DPI)
                        imgHeight = imgHeight * (inch / 72)
                    # Reset stream for reportlab
                    imageStream.seek(0)
                except Exception:
                    # Fallback: use default size
                    imgWidth = 4 * inch
                    imgHeight = 3 * inch
                    imageStream.seek(0)
                # Create reportlab Image
                reportlabImage = ReportLabImage(imageStream, width=imgWidth, height=imgHeight)
                elements = [reportlabImage]
                # Add caption if available
                if caption:
                    captionStyle = self._createNormalStyle(styles)
                    captionStyle.fontSize = 10
                    captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
                    elements.append(Paragraph(f"<i>{caption}</i>", captionStyle))
                elif alt_text and alt_text != "Image":
                    # Use alt text as caption if no caption provided
                    captionStyle = self._createNormalStyle(styles)
                    captionStyle.fontSize = 10
                    captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
                    elements.append(Paragraph(f"<i>Figure: {alt_text}</i>", captionStyle))
                return elements
            except Exception as imgError:
                self.logger.warning(f"Error embedding image in PDF: {str(imgError)}")
                # Fallback to placeholder
                return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
        except Exception as e:
            self.logger.warning(f"Error rendering image: {str(e)}")
--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
@ -66,6 +66,9 @@ class RendererPptx(BaseRenderer):
            # Debug: Show first 200 chars of content
            logger.info(f"JSON content preview: {str(extractedContent)[:200]}...")
            # Store prs reference for image methods
            self._currentPresentation = prs
            for i, slide_data in enumerate(slidesData):
                logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
                # Debug: Show slide content preview
@ -75,6 +78,9 @@ class RendererPptx(BaseRenderer):
                else:
                    logger.warning(f"  ⚠️ Slide {i+1} has NO content!")
                # Check if slide has images
                hasImages = slide_data.get("images") and len(slide_data.get("images", [])) > 0
                # Create slide with appropriate layout based on content
                slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles)
                slide_layout = prs.slide_layouts[slideLayoutIndex]
@ -92,67 +98,71 @@ class RendererPptx(BaseRenderer):
                    title_color = self._get_safe_color(title_style.get("color", (31, 78, 121)))
                    title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
-                # Set content with AI-generated styling
+                # Handle images first (if present)
-                content_shape = slide.placeholders[1]
+                if hasImages:
-                content_text = slide_data.get("content", "")
+                    self._addImagesToSlide(slide, slide_data.get("images", []), styles)
-                # Format content text with AI styles
+                # Set content with AI-generated styling (if not image-only slide)
-                text_frame = content_shape.text_frame
+                if slide_content or not hasImages:
-                text_frame.clear()
+                    content_shape = slide.placeholders[1]
-                
+                    
-                # Split content into paragraphs
+                    # Format content text with AI styles
-                paragraphs = content_text.split('\n\n')
+                    text_frame = content_shape.text_frame
-                
+                    text_frame.clear()
-                for i, paragraph in enumerate(paragraphs):
+                    
-                    if paragraph.strip():
+                    # Split content into paragraphs
-                        if i == 0:
+                    paragraphs = slide_content.split('\n\n')
-                            p = text_frame.paragraphs[0]
+                    
-                        else:
+                    for paraIdx, paragraph in enumerate(paragraphs):
-                            p = text_frame.add_paragraph()
+                        if paragraph.strip():
-                        
+                            if paraIdx == 0:
-                        p.text = paragraph.strip()
+                                p = text_frame.paragraphs[0]
-                        
+                            else:
-                        # Apply AI-generated styling based on content type
+                                p = text_frame.add_paragraph()
-                        if paragraph.startswith('#'):
+                            
-                            # Header
+                            p.text = paragraph.strip()
-                            p.text = paragraph.lstrip('#').strip()
+                            
-                            heading_style = styles.get("heading", {})
+                            # Apply AI-generated styling based on content type
-                            p.font.size = Pt(heading_style.get("font_size", 32))
+                            if paragraph.startswith('#'):
-                            p.font.bold = heading_style.get("bold", True)
+                                # Header
-                            heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47)))
+                                p.text = paragraph.lstrip('#').strip()
-                            p.font.color.rgb = RGBColor(*heading_color)
+                                heading_style = styles.get("heading", {})
-                        elif paragraph.startswith('##'):
+                                p.font.size = Pt(heading_style.get("font_size", 32))
-                            # Subheader
+                                p.font.bold = heading_style.get("bold", True)
-                            p.text = paragraph.lstrip('#').strip()
+                                heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47)))
-                            subheading_style = styles.get("subheading", {})
+                                p.font.color.rgb = RGBColor(*heading_color)
-                            p.font.size = Pt(subheading_style.get("font_size", 24))
+                            elif paragraph.startswith('##'):
-                            p.font.bold = subheading_style.get("bold", True)
+                                # Subheader
-                            subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79)))
+                                p.text = paragraph.lstrip('#').strip()
-                            p.font.color.rgb = RGBColor(*subheading_color)
+                                subheading_style = styles.get("subheading", {})
-                        elif paragraph.startswith('*') and paragraph.endswith('*'):
+                                p.font.size = Pt(subheading_style.get("font_size", 24))
-                            # Bold text
+                                p.font.bold = subheading_style.get("bold", True)
-                            p.text = paragraph.strip('*')
+                                subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79)))
-                            paragraph_style = styles.get("paragraph", {})
+                                p.font.color.rgb = RGBColor(*subheading_color)
-                            p.font.size = Pt(paragraph_style.get("font_size", 18))
+                            elif paragraph.startswith('*') and paragraph.endswith('*'):
-                            p.font.bold = True
+                                # Bold text
-                            paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
+                                p.text = paragraph.strip('*')
-                            p.font.color.rgb = RGBColor(*paragraph_color)
+                                paragraph_style = styles.get("paragraph", {})
-                        else:
+                                p.font.size = Pt(paragraph_style.get("font_size", 18))
-                            # Regular text
+                                p.font.bold = True
-                            paragraph_style = styles.get("paragraph", {})
+                                paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
-                            p.font.size = Pt(paragraph_style.get("font_size", 18))
+                                p.font.color.rgb = RGBColor(*paragraph_color)
-                            p.font.bold = paragraph_style.get("bold", False)
+                            else:
-                            paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
+                                # Regular text
-                            p.font.color.rgb = RGBColor(*paragraph_color)
+                                paragraph_style = styles.get("paragraph", {})
-                        
+                                p.font.size = Pt(paragraph_style.get("font_size", 18))
-                        # Apply alignment
+                                p.font.bold = paragraph_style.get("bold", False)
-                        align = paragraph_style.get("align", "left")
+                                paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
-                        if align == "center":
+                                p.font.color.rgb = RGBColor(*paragraph_color)
-                            p.alignment = PP_ALIGN.CENTER
+                            
-                        elif align == "right":
+                            # Apply alignment
-                            p.alignment = PP_ALIGN.RIGHT
+                            align = paragraph_style.get("align", "left")
-                        else:
+                            if align == "center":
-                            p.alignment = PP_ALIGN.LEFT
+                                p.alignment = PP_ALIGN.CENTER
                            elif align == "right":
                                p.alignment = PP_ALIGN.RIGHT
                            else:
                                p.alignment = PP_ALIGN.LEFT
            # If no slides were created, create a default slide
            if not slidesData:
@ -568,15 +578,16 @@ JSON ONLY. NO OTHER TEXT."""
        slides = []
        try:
-            # Validate JSON structure
+            # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
-            if not isinstance(json_content, dict):
+            if not self._validateJsonStructure(json_content):
-                raise ValueError("JSON content must be a dictionary")
+                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
-            if "sections" not in json_content:
+            # Extract sections and metadata from standardized schema
-                raise ValueError("JSON content must contain 'sections' field")
+            sections = self._extractSections(json_content)
            metadata = self._extractMetadata(json_content)
            # Use title from JSON metadata if available, otherwise use provided title
-            document_title = json_content.get("metadata", {}).get("title", title)
+            document_title = metadata.get("title", title)
            # Create title slide
            slides.append({
@ -585,7 +596,6 @@ JSON ONLY. NO OTHER TEXT."""
            })
            # Process sections into slides based on content and user intent
            sections = json_content.get("sections", [])
            slides.extend(self._createSlidesFromSections(sections, styles))
            # If no content slides were created, create a default content slide
@ -624,6 +634,24 @@ JSON ONLY. NO OTHER TEXT."""
            content_type = section.get("content_type", "paragraph")
            elements = section.get("elements", [])
            # Handle image sections specially
            if content_type == "image":
                # Extract image data
                images = []
                for element in elements:
                    if element.get("base64Data"):
                        images.append({
                            "base64Data": element.get("base64Data"),
                            "altText": element.get("altText", "Image"),
                            "caption": element.get("caption")
                        })
                return {
                    "title": section_title or element.get("altText", "Image"),
                    "content": "",  # No text content for image slides
                    "images": images
                }
            # Build slide content based on section type
            content_parts = []
@ -645,7 +673,8 @@ JSON ONLY. NO OTHER TEXT."""
            return {
                "title": section_title,
-                "content": slide_content
+                "content": slide_content,
                "images": []  # No images for non-image sections
            }
        except Exception as e:
@ -835,7 +864,8 @@ JSON ONLY. NO OTHER TEXT."""
                    if current_slide_content:
                        slides.append({
                            "title": current_slide_title,
-                            "content": "\n\n".join(current_slide_content)
+                            "content": "\n\n".join(current_slide_content),
                            "images": []
                        })
                        current_slide_content = []
@ -844,6 +874,31 @@ JSON ONLY. NO OTHER TEXT."""
                        if isinstance(element, dict) and "text" in element:
                            current_slide_title = element.get("text", "Untitled Section")
                            break
                elif section_type == "image":
                    # Create separate slide for image
                    if current_slide_content:
                        slides.append({
                            "title": current_slide_title,
                            "content": "\n\n".join(current_slide_content),
                            "images": []
                        })
                        current_slide_content = []
                    # Extract image data
                    imageData = []
                    for element in elements:
                        if element.get("base64Data"):
                            imageData.append({
                                "base64Data": element.get("base64Data"),
                                "altText": element.get("altText", "Image"),
                                "caption": element.get("caption")
                            })
                    slides.append({
                        "title": section.get("title") or (imageData[0].get("altText", "Image") if imageData else "Image"),
                        "content": "",
                        "images": imageData
                    })
                else:
                    # Add content to current slide
                    formatted_content = self._formatSectionContent(section)
@ -854,7 +909,8 @@ JSON ONLY. NO OTHER TEXT."""
            if current_slide_content:
                slides.append({
                    "title": current_slide_title,
-                    "content": "\n\n".join(current_slide_content)
+                    "content": "\n\n".join(current_slide_content),
                    "images": []
                })
            return slides
@ -869,6 +925,10 @@ JSON ONLY. NO OTHER TEXT."""
            content_type = section.get("content_type", "paragraph")
            elements = section.get("elements", [])
            # Image sections return empty content (handled separately)
            if content_type == "image":
                return ""
            # Process each element in the section
            content_parts = []
            for element in elements:
@ -891,6 +951,110 @@ JSON ONLY. NO OTHER TEXT."""
            logger.warning(f"Error formatting section content: {str(e)}")
            return ""
    def _addImagesToSlide(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any]) -> None:
        """Add images to a PowerPoint slide."""
        try:
            from pptx.util import Inches, Pt
            from pptx.enum.text import PP_ALIGN
            import base64
            import io
            if not images:
                return
            # Get slide dimensions from presentation
            if hasattr(self, '_currentPresentation'):
                prs = self._currentPresentation
            else:
                prs = slide.presentation
            slideWidth = prs.slide_width
            slideHeight = prs.slide_height
            titleHeight = Inches(1.5)  # Approximate title height
            # Available area for images
            availableWidth = slideWidth - Inches(1)  # Margins
            availableHeight = slideHeight - titleHeight - Inches(1)  # Title + margins
            # Position images
            if len(images) == 1:
                # Single image: center it
                img = images[0]
                base64Data = img.get("base64Data")
                if base64Data:
                    imageBytes = base64.b64decode(base64Data)
                    imageStream = io.BytesIO(imageBytes)
                    # Get image dimensions
                    try:
                        from PIL import Image as PILImage
                        pilImage = PILImage.open(imageStream)
                        imgWidth, imgHeight = pilImage.size
                        # Scale to fit available space (max 80% of slide)
                        maxWidth = availableWidth * 0.8
                        maxHeight = availableHeight * 0.8
                        scale = min(maxWidth / imgWidth, maxHeight / imgHeight, 1.0)
                        finalWidth = imgWidth * scale
                        finalHeight = imgHeight * scale
                        # Center image
                        left = (slideWidth - finalWidth) / 2
                        top = titleHeight + (availableHeight - finalHeight) / 2
                        imageStream.seek(0)
                    except Exception:
                        # Fallback: use default size
                        finalWidth = Inches(6)
                        finalHeight = Inches(4.5)
                        left = (slideWidth - finalWidth) / 2
                        top = titleHeight + Inches(1)
                        imageStream.seek(0)
                    # Add image to slide
                    slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight)
                    # Add caption if available
                    caption = img.get("caption") or img.get("altText")
                    if caption and caption != "Image":
                        # Add text box below image
                        captionTop = top + finalHeight + Inches(0.2)
                        captionBox = slide.shapes.add_textbox(
                            Inches(1),
                            captionTop,
                            slideWidth - Inches(2),
                            Inches(0.5)
                        )
                        captionFrame = captionBox.text_frame
                        captionFrame.text = caption
                        captionFrame.paragraphs[0].font.size = Pt(12)
                        captionFrame.paragraphs[0].font.italic = True
                        captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER
            else:
                # Multiple images: arrange in grid
                cols = 2 if len(images) <= 4 else 3
                rows = (len(images) + cols - 1) // cols
                imgWidth = (availableWidth - Inches(0.5) * (cols - 1)) / cols
                imgHeight = (availableHeight - Inches(0.5) * (rows - 1)) / rows
                for idx, img in enumerate(images):
                    base64Data = img.get("base64Data")
                    if base64Data:
                        row = idx // cols
                        col = idx % cols
                        imageBytes = base64.b64decode(base64Data)
                        imageStream = io.BytesIO(imageBytes)
                        left = Inches(0.5) + col * (imgWidth + Inches(0.5))
                        top = titleHeight + Inches(0.5) + row * (imgHeight + Inches(0.5))
                        slide.shapes.add_picture(imageStream, left, top, width=imgWidth, height=imgHeight)
        except Exception as e:
            logger.warning(f"Error adding images to slide: {str(e)}")
    def _formatTimestamp(self) -> str:
        """Format current timestamp for presentation generation."""
        from datetime import datetime, UTC
--- a/modules/services/serviceGeneration/renderers/rendererText.py
+++ b/modules/services/serviceGeneration/renderers/rendererText.py
@ -64,14 +64,15 @@ class RendererText(BaseRenderer):
        """Generate text content from structured JSON document."""
        try:
            # Validate JSON structure
-            if not isinstance(jsonContent, dict):
+            if not self._validateJsonStructure(jsonContent):
-                raise ValueError("JSON content must be a dictionary")
+                raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
-            if "sections" not in jsonContent:
+            # Extract sections and metadata from standardized schema
-                raise ValueError("JSON content must contain 'sections' field")
+            sections = self._extractSections(jsonContent)
            metadata = self._extractMetadata(jsonContent)
            # Use title from JSON metadata if available, otherwise use provided title
-            documentTitle = jsonContent.get("metadata", {}).get("title", title)
+            documentTitle = metadata.get("title", title)
            # Build text content
            textParts = []
@ -82,7 +83,6 @@ class RendererText(BaseRenderer):
            textParts.append("")
            # Process each section
            sections = jsonContent.get("sections", [])
            for section in sections:
                sectionText = self._renderJsonSection(section)
                if sectionText:
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@ -451,7 +451,7 @@ class RendererXlsx(BaseRenderer):
    def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
        """Generate sheet names based on actual content structure."""
-        sections = jsonContent.get("sections", [])
+        sections = self._extractSections(jsonContent)
        # If no sections, create a single sheet
        if not sections:
@ -496,7 +496,7 @@ class RendererXlsx(BaseRenderer):
            if not sheetNames:
                return
-            sections = jsonContent.get("sections", [])
+            sections = self._extractSections(jsonContent)
            tableSections = [s for s in sections if s.get("content_type") == "table"]
            if len(tableSections) > 1:
@ -607,7 +607,7 @@ class RendererXlsx(BaseRenderer):
                        row += 1
            # Content overview
-            sections = jsonContent.get("sections", [])
+            sections = self._extractSections(jsonContent)
            sheet[f'A{row + 1}'] = "Content Overview:"
            sheet[f'A{row + 1}'].font = Font(bold=True)
@ -640,7 +640,7 @@ class RendererXlsx(BaseRenderer):
    def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]):
        """Populate additional sheets based on content types."""
        try:
-            sections = jsonContent.get("sections", [])
+            sections = self._extractSections(jsonContent)
            for sheetName in sheetNames:
                if sheetName not in sheets:
@ -692,12 +692,14 @@ class RendererXlsx(BaseRenderer):
            for element in elements:
                if section_type == "table":
                    startRow = self._addTableToExcel(sheet, element, styles, startRow)
-                elif section_type == "list":
+                elif section_type == "bullet_list" or section_type == "list":
                    startRow = self._addListToExcel(sheet, element, styles, startRow)
                elif section_type == "paragraph":
                    startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
                elif section_type == "heading":
                    startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
                elif section_type == "image":
                    startRow = self._addImageToExcel(sheet, element, styles, startRow)
                else:
                    startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
@ -807,6 +809,75 @@ class RendererXlsx(BaseRenderer):
        except Exception as e:
            self.logger.warning(f"Could not add heading to Excel: {str(e)}")
            return startRow + 1
    def _addImageToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
        """Add an image element to Excel sheet using openpyxl."""
        try:
            base64Data = element.get("base64Data", "")
            altText = element.get("altText", "Image")
            caption = element.get("caption", "")
            if not base64Data:
                # No image data - add placeholder text
                sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]")
                return startRow + 1
            try:
                from openpyxl.drawing.image import Image as OpenpyxlImage
                import base64
                import io
                # Decode base64 image data
                imageBytes = base64.b64decode(base64Data)
                imageStream = io.BytesIO(imageBytes)
                # Create openpyxl Image
                img = OpenpyxlImage(imageStream)
                # Set image size (max width 6 inches, maintain aspect ratio)
                maxWidth = 400  # pixels (approximately 6 inches at 72 DPI)
                if img.width > maxWidth:
                    scale = maxWidth / img.width
                    img.width = maxWidth
                    img.height = int(img.height * scale)
                # Anchor image to cell (A column, current row)
                img.anchor = f'A{startRow}'
                # Add image to sheet
                sheet.add_image(img)
                # Calculate height needed for image (approximate)
                # Excel row height is in points (1/72 inch), image height is in pixels
                # Assuming 72 DPI: pixels = points
                imageHeightPoints = img.height / 1.33  # Approximate conversion
                sheet.row_dimensions[startRow].height = max(15, imageHeightPoints)  # Min 15 points
                # Add caption below image if available
                if caption:
                    startRow += 1
                    sheet.cell(row=startRow, column=1, value=caption)
                    sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10)
                    sheet.cell(row=startRow, column=1).alignment = Alignment(horizontal="left")
                elif altText and altText != "Image":
                    startRow += 1
                    sheet.cell(row=startRow, column=1, value=f"Figure: {altText}")
                    sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10)
                return startRow + 1
            except ImportError:
                self.logger.warning("openpyxl.drawing.image not available, using placeholder")
                sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]")
                return startRow + 1
            except Exception as imgError:
                self.logger.warning(f"Error embedding image in Excel: {str(imgError)}")
                sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]")
                return startRow + 1
        except Exception as e:
            self.logger.warning(f"Could not add image to Excel: {str(e)}")
            return startRow + 1
    def _formatTimestamp(self) -> str:
        """Format current timestamp for document generation."""
--- a/modules/services/serviceGeneration/subContentGenerator.py
+++ b/modules/services/serviceGeneration/subContentGenerator.py
@ -0,0 +1,840 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """
 Content Generator for hierarchical document generation.
 Generates content for each section in the document structure.
 """
 import logging
 import asyncio
 from typing import Dict, Any, Optional, List, Callable
 from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator
 logger = logging.getLogger(__name__)
 class ContentGenerator:
    """Generates content for document sections"""
    def __init__(self, services: Any):
        self.services = services
        self.integrator = ContentIntegrator(services)
    async def generateContent(
        self,
        structure: Dict[str, Any],
        cachedContent: Optional[Dict[str, Any]] = None,
        userPrompt: str = "",
        progressCallback: Optional[Callable] = None,
        parallelGeneration: bool = True,
        batchSize: int = 10
    ) -> Dict[str, Any]:
        """
        Generate content for all sections in structure.
        Args:
            structure: Document structure from Phase 1
            cachedContent: Extracted content cache
            userPrompt: Original user prompt
            progressCallback: Function to call for progress updates
            parallelGeneration: Enable parallel section generation
            batchSize: Number of sections to process in parallel
        Returns:
            Complete document structure with populated elements
        """
        try:
            documents = structure.get("documents", [])
            if not documents:
                logger.warning("No documents found in structure")
                return structure
            allGeneratedSections = []
            totalSectionsAcrossDocs = 0
            # Count total sections for progress tracking
            for doc in documents:
                totalSectionsAcrossDocs += len(doc.get("sections", []))
            if progressCallback:
                progressCallback(0, totalSectionsAcrossDocs, "Starting content generation...")
            currentSectionIndex = 0
            for docIdx, doc in enumerate(documents):
                sections = doc.get("sections", [])
                totalSections = len(sections)
                if totalSections == 0:
                    continue
                # Determine if parallel generation is beneficial
                # Use sequential if only 1 section or if sections depend on each other
                useParallel = parallelGeneration and totalSections > 1
                # Count images - if many images, parallel is still beneficial but slower
                imageCount = sum(1 for s in sections if s.get("content_type") == "image")
                if progressCallback and docIdx > 0:
                    progressCallback(
                        currentSectionIndex,
                        totalSectionsAcrossDocs,
                        f"Processing document {docIdx + 1}/{len(documents)}..."
                    )
                if useParallel:
                    # Generate in batches for parallel processing
                    generatedSections = await self._generateSectionsParallel(
                        sections=sections,
                        cachedContent=cachedContent,
                        userPrompt=userPrompt,
                        documentMetadata=structure.get("metadata", {}),
                        progressCallback=lambda idx, total, msg: progressCallback(
                            currentSectionIndex + idx,
                            totalSectionsAcrossDocs,
                            msg
                        ) if progressCallback else None,
                        batchSize=batchSize
                    )
                else:
                    # Generate sequentially (better for context-dependent sections)
                    generatedSections = await self._generateSectionsSequential(
                        sections=sections,
                        cachedContent=cachedContent,
                        userPrompt=userPrompt,
                        documentMetadata=structure.get("metadata", {}),
                        progressCallback=lambda idx, total, msg: progressCallback(
                            currentSectionIndex + idx,
                            totalSectionsAcrossDocs,
                            msg
                        ) if progressCallback else None
                    )
                allGeneratedSections.extend(generatedSections)
                currentSectionIndex += totalSections
            if progressCallback:
                progressCallback(
                    totalSectionsAcrossDocs,
                    totalSectionsAcrossDocs,
                    "Content generation complete"
                )
            # Integrate generated content into structure
            completeStructure = self.integrator.integrateContent(
                structure=structure,
                generatedSections=allGeneratedSections
            )
            return completeStructure
        except Exception as e:
            logger.error(f"Error generating content: {str(e)}")
            raise
    async def _generateSectionsSequential(
        self,
        sections: List[Dict[str, Any]],
        cachedContent: Optional[Dict[str, Any]],
        userPrompt: str,
        documentMetadata: Dict[str, Any],
        progressCallback: Optional[Callable] = None
    ) -> List[Dict[str, Any]]:
        """
        Generate sections sequentially with enhanced progress tracking.
        Uses previous sections for context continuity.
        """
        generatedSections = []
        previousSections = []
        totalSections = len(sections)
        for idx, section in enumerate(sections):
            try:
                contentType = section.get("content_type", "content")
                sectionId = section.get("id", f"section_{idx}")
                # Enhanced progress message
                if contentType == "image":
                    message = f"Generating image: {section.get('generation_hint', 'Image')[:50]}..."
                elif contentType == "heading":
                    message = f"Generating heading..."
                elif contentType == "paragraph":
                    message = f"Generating paragraph..."
                else:
                    message = f"Generating {contentType}..."
                if progressCallback:
                    progressCallback(
                        idx + 1,
                        totalSections,
                        message
                    )
                context = {
                    "userPrompt": userPrompt,
                    "cachedContent": cachedContent,
                    "previousSections": previousSections.copy(),
                    "targetSection": section,
                    "documentMetadata": documentMetadata,
                    "operationId": None
                }
                generated = await self._generateSectionContent(section, context)
                generatedSections.append(generated)
                previousSections.append(generated)
                # Log success
                if contentType == "image":
                    logger.info(f"Successfully generated image for section {sectionId}")
                elif not generated.get("error"):
                    logger.debug(f"Successfully generated {contentType} for section {sectionId}")
            except Exception as e:
                logger.error(f"Error generating section {section.get('id')}: {str(e)}")
                errorSection = self.integrator.createErrorSection(section, str(e))
                generatedSections.append(errorSection)
                previousSections.append(errorSection)
        return generatedSections
    async def _generateSectionsParallel(
        self,
        sections: List[Dict[str, Any]],
        cachedContent: Optional[Dict[str, Any]],
        userPrompt: str,
        documentMetadata: Dict[str, Any],
        progressCallback: Optional[Callable] = None,
        batchSize: int = 10
    ) -> List[Dict[str, Any]]:
        """
        Generate sections in parallel batches with enhanced progress tracking.
        Args:
            sections: List of sections to generate
            cachedContent: Extracted content cache
            userPrompt: Original user prompt
            documentMetadata: Document metadata
            progressCallback: Progress callback function
            batchSize: Number of sections to process in parallel per batch
        Returns:
            List of generated sections
        """
        generatedSections = []
        totalSections = len(sections)
        if totalSections == 0:
            return []
        # Adjust batch size based on section types (images take longer)
        imageCount = sum(1 for s in sections if s.get("content_type") == "image")
        if imageCount > 0:
            # Reduce batch size if many images (images are slower)
            adjustedBatchSize = min(batchSize, max(3, batchSize - imageCount // 2))
        else:
            adjustedBatchSize = batchSize
        # Process in batches
        totalBatches = (totalSections + adjustedBatchSize - 1) // adjustedBatchSize
        accumulatedPreviousSections = []  # Track sections from previous batches
        for batchNum, batchStart in enumerate(range(0, totalSections, adjustedBatchSize)):
            batch = sections[batchStart:batchStart + adjustedBatchSize]
            batchEnd = min(batchStart + adjustedBatchSize, totalSections)
            if progressCallback:
                progressCallback(
                    batchStart,
                    totalSections,
                    f"Processing batch {batchNum + 1}/{totalBatches} ({len(batch)} sections)..."
                )
            async def generateWithProgress(section: Dict[str, Any], globalIndex: int, localIndex: int, batchPreviousSections: List[Dict[str, Any]]):
                try:
                    contentType = section.get("content_type", "content")
                    sectionId = section.get("id", f"section_{globalIndex}")
                    # Enhanced progress message based on content type
                    if contentType == "image":
                        message = f"Generating image: {section.get('generation_hint', 'Image')[:50]}..."
                    elif contentType == "heading":
                        message = f"Generating heading..."
                    elif contentType == "paragraph":
                        message = f"Generating paragraph..."
                    else:
                        message = f"Generating {contentType}..."
                    if progressCallback:
                        progressCallback(
                            globalIndex + 1,
                            totalSections,
                            message
                        )
                    context = {
                        "userPrompt": userPrompt,
                        "cachedContent": cachedContent,
                        "previousSections": batchPreviousSections.copy(),  # Include sections from previous batches
                        "targetSection": section,
                        "documentMetadata": documentMetadata,
                        "operationId": None  # Can be set if needed for nested progress
                    }
                    result = await self._generateSectionContent(section, context)
                    # Log success
                    if contentType == "image":
                        logger.info(f"Successfully generated image for section {sectionId}")
                    elif not result.get("error"):
                        logger.debug(f"Successfully generated {contentType} for section {sectionId}")
                    return result
                except Exception as e:
                    logger.error(f"Error generating section {section.get('id')}: {str(e)}")
                    return self.integrator.createErrorSection(section, str(e))
            # Generate batch in parallel
            # Pass accumulated previous sections to each task in this batch
            batchTasks = [
                generateWithProgress(section, batchStart + idx, idx, accumulatedPreviousSections)
                for idx, section in enumerate(batch)
            ]
            batchResults = await asyncio.gather(
                *batchTasks,
                return_exceptions=True
            )
            # Handle exceptions and collect results
            for idx, result in enumerate(batchResults):
                if isinstance(result, Exception):
                    logger.error(f"Error in parallel generation batch {batchNum + 1}: {str(result)}")
                    errorSection = self.integrator.createErrorSection(batch[idx], str(result))
                    generatedSections.append(errorSection)
                    accumulatedPreviousSections.append(errorSection)  # Add to accumulated for next batch
                else:
                    generatedSections.append(result)
                    accumulatedPreviousSections.append(result)  # Add to accumulated for next batch
            # Update progress after batch completion
            if progressCallback:
                progressCallback(
                    batchEnd,
                    totalSections,
                    f"Completed batch {batchNum + 1}/{totalBatches}"
                )
        return generatedSections
    async def _generateSectionContent(
        self,
        section: Dict[str, Any],
        context: Dict[str, Any]
    ) -> Dict[str, Any]:
        """
        Generate content for a single section.
        Args:
            section: Section to generate content for
            context: Generation context
        Returns:
            Section with populated elements array
        """
        try:
            contentType = section.get("content_type", "")
            complexity = section.get("complexity", "simple")
            if contentType == "image":
                return await self._generateImageSection(section, context)
            elif complexity == "complex":
                return await self._generateComplexTextSection(section, context)
            else:
                return await self._generateSimpleSection(section, context)
        except Exception as e:
            logger.error(f"Error generating section {section.get('id')}: {str(e)}")
            return self.integrator.createErrorSection(section, str(e))
    async def _generateSimpleSection(
        self,
        section: Dict[str, Any],
        context: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Generate content for simple section (heading, paragraph)"""
        try:
            contentType = section.get("content_type", "")
            generationHint = section.get("generation_hint", "")
            # Create section-specific prompt
            sectionPrompt = self._createSectionPrompt(section, context)
            # Debug: Log section generation prompt
            if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
                sectionId = section.get('id', 'unknown')
                contentType = section.get('content_type', 'unknown')
                try:
                    self.services.utils.writeDebugFile(
                        sectionPrompt, 
                        f"document_generation_section_{sectionId}_{contentType}_prompt"
                    )
                except Exception as e:
                    logger.debug(f"Could not write debug file for section prompt: {e}")
            # Call AI to generate content
            from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
            options = AiCallOptions(
                operationType=OperationTypeEnum.DATA_GENERATE,
                resultFormat="json"
            )
            aiResponse = await self.services.ai.callAiContent(
                prompt=sectionPrompt,
                options=options,
                outputFormat="json"
            )
            # Debug: Log section generation response (always log, even if empty)
            sectionId = section.get('id', 'unknown')
            contentType = section.get('content_type', 'unknown')
            if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
                try:
                    responseContent = ''
                    if aiResponse:
                        if hasattr(aiResponse, 'content') and aiResponse.content:
                            responseContent = aiResponse.content
                        elif hasattr(aiResponse, 'documents') and aiResponse.documents:
                            responseContent = f"[Response has {len(aiResponse.documents)} documents]"
                        else:
                            responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]"
                    else:
                        responseContent = '[No response object]'
                    self.services.utils.writeDebugFile(
                        responseContent, 
                        f"document_generation_section_{sectionId}_{contentType}_response"
                    )
                    logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)")
                except Exception as e:
                    logger.warning(f"Could not write debug file for section response: {e}")
                    import traceback
                    logger.debug(traceback.format_exc())
            if not aiResponse or not aiResponse.content:
                logger.error(f"AI section generation returned empty response for section {sectionId}")
                logger.error(f"Response object: {aiResponse}, has content: {hasattr(aiResponse, 'content') if aiResponse else False}")
                raise ValueError("AI section generation returned empty response")
            # Extract JSON elements
            rawContent = aiResponse.content if aiResponse and aiResponse.content else ""
            if not rawContent or not rawContent.strip():
                logger.error(f"AI section generation returned empty response for section {sectionId}")
                logger.error(f"Response object: {aiResponse}, content length: {len(rawContent) if rawContent else 0}")
                raise ValueError("AI section generation returned empty response")
            extractedJson = self.services.utils.jsonExtractString(rawContent)
            if not extractedJson or not extractedJson.strip():
                logger.error(f"No JSON found in AI response for section {sectionId}")
                logger.error(f"Raw response (first 1000 chars): {rawContent[:1000]}")
                logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500] if extractedJson else 'None'}")
                raise ValueError("No JSON found in AI section response")
            import json
            try:
                elementsData = json.loads(extractedJson)
                logger.debug(f"Parsed JSON for section {section.get('id')}: type={type(elementsData)}, keys={list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}")
            except json.JSONDecodeError as e:
                logger.error(f"Failed to parse JSON from AI response for section {section.get('id')}")
                logger.error(f"JSON decode error: {str(e)}")
                logger.error(f"Extracted JSON length: {len(extractedJson)} chars")
                logger.error(f"Extracted JSON (first 1000 chars): {extractedJson[:1000]}")
                if len(extractedJson) > 1000:
                    logger.error(f"Extracted JSON (last 500 chars): {extractedJson[-500:]}")
                logger.error(f"Raw AI response length: {len(rawContent)} chars")
                logger.error(f"Raw AI response (first 1000 chars): {rawContent[:1000] if rawContent else 'None'}")
                # Try to recover from truncated JSON if it looks like it was cut off
                if "Expecting" in str(e) and ("delimiter" in str(e) or "value" in str(e)):
                    # Check if JSON starts correctly but is truncated
                    if extractedJson.strip().startswith('{"elements"'):
                        logger.warning(f"JSON appears truncated, attempting recovery...")
                        # Use closeJsonStructures which handles unterminated strings properly
                        try:
                            from modules.shared.jsonUtils import closeJsonStructures
                            recoveredJson = closeJsonStructures(extractedJson)
                            logger.info(f"Attempting to parse recovered JSON (closed structures)")
                            logger.debug(f"Recovered JSON length: {len(recoveredJson)} chars (original: {len(extractedJson)} chars)")
                            elementsData = json.loads(recoveredJson)
                            logger.info(f"Successfully recovered JSON for section {section.get('id')}")
                        except (json.JSONDecodeError, ValueError) as recoveryError:
                            logger.error(f"JSON recovery failed: {str(recoveryError)}")
                            logger.error(f"Recovered JSON (first 500 chars): {recoveredJson[:500] if 'recoveredJson' in locals() else 'N/A'}")
                            # Check if raw response might be truncated
                            if len(rawContent) <= len(extractedJson) + 100:  # Raw content is similar length to extracted
                                logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)")
                                logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits")
                            raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}")
                    else:
                        raise ValueError(f"Invalid JSON in AI response: {str(e)}")
                else:
                    raise ValueError(f"Invalid JSON in AI response: {str(e)}")
            # Extract elements array - handle various response formats
            elements = None
            if isinstance(elementsData, dict):
                # Try to find elements in various possible locations
                if "elements" in elementsData:
                    elements = elementsData["elements"]
                elif "content" in elementsData and isinstance(elementsData["content"], list):
                    # Some models return {"content": [...]}
                    elements = elementsData["content"]
                elif "data" in elementsData and isinstance(elementsData["data"], list):
                    # Some models return {"data": [...]}
                    elements = elementsData["data"]
                elif len(elementsData) == 1:
                    # Single key dict - might be the elements directly
                    firstValue = list(elementsData.values())[0]
                    if isinstance(firstValue, list):
                        elements = firstValue
                else:
                    # Try to convert entire dict to a single element
                    logger.warning(f"AI returned dict without 'elements' key, attempting to convert: {list(elementsData.keys())}")
                    # For heading/paragraph, create element from dict
                    if contentType == "heading":
                        text = elementsData.get("text") or elementsData.get("heading") or str(elementsData)
                        level = elementsData.get("level", 1)
                        elements = [{"level": level, "text": text}]
                    elif contentType == "paragraph":
                        text = elementsData.get("text") or elementsData.get("content") or str(elementsData)
                        elements = [{"text": text}]
                    else:
                        # Try to create element from dict structure
                        elements = [elementsData]
            elif isinstance(elementsData, list):
                elements = elementsData
            else:
                # Primitive value - wrap it
                logger.warning(f"AI returned primitive value, wrapping: {type(elementsData)}")
                if contentType == "heading":
                    elements = [{"level": 1, "text": str(elementsData)}]
                elif contentType == "paragraph":
                    elements = [{"text": str(elementsData)}]
                else:
                    elements = [{"text": str(elementsData)}]
            if elements is None:
                logger.error(f"Could not extract elements from AI response. Response structure: {type(elementsData)}, keys: {list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}")
                logger.error(f"Full response (first 500 chars): {str(extractedJson)[:500]}")
                raise ValueError(f"Invalid elements format in AI response. Expected dict with 'elements' key or list, got: {type(elementsData)}")
            # Validate elements is a list
            if not isinstance(elements, list):
                logger.warning(f"Elements is not a list, converting: {type(elements)}")
                elements = [elements]
            # Update section with elements
            section["elements"] = elements
            return section
        except Exception as e:
            logger.error(f"Error generating simple section: {str(e)}")
            raise
    async def _generateImageSection(
        self,
        section: Dict[str, Any],
        context: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Generate image for image section or include existing image"""
        try:
            # Check if this is an existing image to include
            imageSource = section.get("image_source", "generate")
            if imageSource == "existing":
                # Include existing image from cachedContent
                imageRefId = section.get("image_reference_id")
                if not imageRefId:
                    raise ValueError(f"Image section {section.get('id')} has image_source='existing' but no image_reference_id")
                cachedContent = context.get("cachedContent", {})
                imageDocuments = cachedContent.get("imageDocuments", [])
                # Find the image document
                imageDoc = next((img for img in imageDocuments if img.get("id") == imageRefId), None)
                if not imageDoc:
                    raise ValueError(f"Image document {imageRefId} not found in cachedContent.imageDocuments")
                # Create image element from existing image
                altText = imageDoc.get("altText", section.get("generation_hint", "Image"))
                mimeType = imageDoc.get("mimeType", "image/png")
                section["elements"] = [{
                    "base64Data": imageDoc.get("base64Data"),
                    "altText": altText,
                    "mimeType": mimeType,
                    "caption": section.get("metadata", {}).get("caption")
                }]
                logger.info(f"Successfully included existing image {imageRefId} for section {section.get('id')}")
                return section
            # Generate new image (existing logic)
            imagePrompt = section.get("image_prompt")
            if not imagePrompt:
                # Try to create from generation_hint
                generationHint = section.get("generation_hint", "")
                if generationHint:
                    imagePrompt = f"Create a professional illustration: {generationHint}"
                else:
                    raise ValueError(f"Image section {section.get('id')} missing image_prompt and generation_hint")
            # Call AI service for image generation
            from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptImage
            import json
            # Create image generation prompt
            promptModel = AiCallPromptImage(
                prompt=imagePrompt,
                size="1024x1024",
                quality="standard",
                style="vivid"
            )
            promptJson = promptModel.model_dump_json(exclude_none=True, indent=2)
            options = AiCallOptions(
                operationType=OperationTypeEnum.IMAGE_GENERATE,
                resultFormat="base64"
            )
            # Log image generation start
            logger.info(f"Starting image generation for section {section.get('id')}: {imagePrompt[:100]}...")
            # Call AI for image generation
            aiResponse = await self.services.ai.callAiContent(
                prompt=promptJson,
                options=options,
                outputFormat="base64"
            )
            # Extract base64 image data
            base64Data = None
            if aiResponse and aiResponse.documents and len(aiResponse.documents) > 0:
                imageDoc = aiResponse.documents[0]
                base64Data = imageDoc.documentData
                logger.debug(f"Image data extracted from documents: {len(base64Data) if base64Data else 0} chars")
            # Fallback: check content field (might be base64 string)
            if not base64Data and aiResponse and aiResponse.content:
                base64Data = aiResponse.content
                logger.debug(f"Image data extracted from content: {len(base64Data) if base64Data else 0} chars")
            if not base64Data:
                raise ValueError("Image generation returned no data")
            # Validate base64 data
            try:
                import base64
                base64.b64decode(base64Data[:100], validate=True)  # Validate first 100 chars
            except Exception as e:
                logger.warning(f"Image data may not be valid base64: {str(e)}")
                # Continue anyway - renderer will handle it
            # Create image element
            altText = section.get("generation_hint", "Image")
            if not altText or altText == "Image":
                # Use image_prompt as alt text if generation_hint is generic
                altText = section.get("image_prompt", "Image")[:100]  # Limit length
            caption = section.get("metadata", {}).get("caption")
            section["elements"] = [{
                "url": f"data:image/png;base64,{base64Data}",
                "base64Data": base64Data,
                "altText": altText,
                "caption": caption
            }]
            logger.info(f"Successfully generated image for section {section.get('id')}")
            return section
        except Exception as e:
            logger.error(f"Error generating image section: {str(e)}")
            raise
    async def _generateComplexTextSection(
        self,
        section: Dict[str, Any],
        context: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Generate content for complex text section (long chapter)"""
        # For now, use same approach as simple section
        # Can be enhanced later with chunking for very long content
        return await self._generateSimpleSection(section, context)
    def _createSectionPrompt(
        self,
        section: Dict[str, Any],
        context: Dict[str, Any]
    ) -> str:
        """Create sub-prompt for section content generation"""
        contentType = section.get("content_type", "")
        generationHint = section.get("generation_hint", "")
        userPrompt = context.get("userPrompt", "")
        cachedContent = context.get("cachedContent")
        previousSections = context.get("previousSections", [])
        documentMetadata = context.get("documentMetadata", {})
        # Get user language
        userLanguage = self._getUserLanguage()
        # Format cached content
        cachedContentText = ""
        if cachedContent and cachedContent.get("extractedContent"):
            cachedContentText = self._formatCachedContent(cachedContent)
        # Format previous sections for context
        previousSectionsText = ""
        if previousSections:
            formattedSections = []
            for s in previousSections[-10:]:  # Last 10 sections for context (increased from 5)
                prevContentType = s.get('content_type', 'unknown')  # Use different variable name to avoid shadowing
                order = s.get('order', 0)
                hint = s.get('generation_hint', '')
                elements = s.get('elements', [])
                # Extract actual content from elements
                contentPreview = ""
                if elements:
                    if prevContentType == "heading":
                        # Extract heading text
                        for elem in elements:
                            if isinstance(elem, dict) and "text" in elem:
                                contentPreview = f": \"{elem['text']}\""
                                break
                    elif prevContentType == "paragraph":
                        # Extract paragraph text (first 100 chars)
                        for elem in elements:
                            if isinstance(elem, dict) and "text" in elem:
                                text = elem['text']
                                contentPreview = f": \"{text[:100]}{'...' if len(text) > 100 else ''}\""
                                break
                    elif prevContentType == "bullet_list":
                        # Extract bullet items
                        for elem in elements:
                            if isinstance(elem, dict) and "items" in elem:
                                items = elem['items']
                                if items:
                                    contentPreview = f": {items[:3]}{'...' if len(items) > 3 else ''}"
                                break
                formattedSections.append(
                    f"- Section {order} ({prevContentType}){contentPreview}"
                )
            previousSectionsText = "\n".join(formattedSections)
        prompt = f"""{'='*80}
 SECTION TO GENERATE:
 {'='*80}
 Type: {contentType}
 Hint: {generationHint}
 {'='*80}
 CONTEXT:
 - User Request: {userPrompt}
 - Previous Sections: {len(previousSections)} sections already generated
 - Document Title: {documentMetadata.get('title', 'Unknown')}
 {'='*80}
 PREVIOUS SECTIONS (for continuity):
 {'='*80}
 {previousSectionsText if previousSectionsText else "This is the first section."}
 {'='*80}
 {'='*80}
 EXTRACTED CONTENT (if available):
 {'='*80}
 {cachedContentText if cachedContentText else "None"}
 {'='*80}
 TASK: Generate content for this section ONLY.
 INSTRUCTIONS:
 1. Generate content appropriate for section type: {contentType}
 2. Use the generation hint: {generationHint}
 3. Consider previous sections for continuity
 4. Use extracted content if relevant
 5. All content must be in the language '{userLanguage}'
 6. CRITICAL: Return ONLY a JSON object with an "elements" array. DO NOT return a full document structure.
 REQUIRED FORMAT - Return ONLY this structure:
 For heading:
 {{"elements": [{{"level": 1, "text": "Heading Text"}}]}}
 For paragraph:
 {{"elements": [{{"text": "Paragraph text content"}}]}}
 For table:
 {{"elements": [{{"headers": ["Col1", "Col2"], "rows": [["Row1", "Row2"]]}}]}}
 For bullet_list:
 {{"elements": [{{"items": ["Item 1", "Item 2"]}}]}}
 For code_block:
 {{"elements": [{{"code": "code content here", "language": "python"}}]}}
 CRITICAL RULES:
 - Return ONLY {{"elements": [...]}} - nothing else
 - DO NOT include "metadata", "documents", "sections", or any other fields
 - DO NOT return a full document structure
 - DO NOT add explanatory text before or after the JSON
 - The response must start with {{"elements": and end with }}
 - This is a SINGLE SECTION, not a full document
 """
        return prompt
    def _formatCachedContent(self, cachedContent: Dict[str, Any]) -> str:
        """Format cached content for prompt inclusion"""
        try:
            extractedContent = cachedContent.get("extractedContent", [])
            if not extractedContent:
                return "No content extracted."
            formattedParts = []
            for extracted in extractedContent:
                if hasattr(extracted, 'parts'):
                    for part in extracted.parts:
                        if hasattr(part, 'content'):
                            formattedParts.append(part.content)
                elif isinstance(extracted, dict):
                    formattedParts.append(str(extracted))
                else:
                    formattedParts.append(str(extracted))
            return "\n\n".join(formattedParts) if formattedParts else "No content extracted."
        except Exception as e:
            logger.warning(f"Error formatting cached content: {str(e)}")
            return "Error formatting cached content."
    def _getUserLanguage(self) -> str:
        """Get user language for document generation"""
        try:
            if self.services:
                if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
                    return self.services.currentUserLanguage
                elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
                    return self.services.user.language
        except Exception:
            pass
        return 'en'  # Default fallback
--- a/modules/services/serviceGeneration/subContentIntegrator.py
+++ b/modules/services/serviceGeneration/subContentIntegrator.py
@ -0,0 +1,167 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """
 Content Integrator for hierarchical document generation.
 Merges generated content into document structure and validates completeness.
 """
 import logging
 from typing import Dict, Any, List, Tuple
 logger = logging.getLogger(__name__)
 class ContentIntegrator:
    """Integrates generated content into document structure"""
    def __init__(self, services: Any = None):
        self.services = services
    def integrateContent(
        self,
        structure: Dict[str, Any],
        generatedSections: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        """
        Merge generated sections into document structure.
        Args:
            structure: Original document structure
            generatedSections: List of sections with populated elements
        Returns:
            Complete document structure ready for rendering
        """
        try:
            # Create mapping of section IDs to generated sections
            sectionMap = {section.get("id"): section for section in generatedSections}
            # Process each document
            for doc in structure.get("documents", []):
                sections = doc.get("sections", [])
                for idx, section in enumerate(sections):
                    sectionId = section.get("id")
                    # Find corresponding generated section
                    if sectionId in sectionMap:
                        generatedSection = sectionMap[sectionId]
                        # Merge elements into structure section
                        if "elements" in generatedSection:
                            section["elements"] = generatedSection["elements"]
                        # Preserve error information if present
                        if generatedSection.get("error"):
                            section["error"] = True
                            section["errorMessage"] = generatedSection.get("errorMessage")
                            section["originalContentType"] = generatedSection.get("originalContentType")
                    else:
                        # Section not generated - create error section
                        logger.warning(f"Section {sectionId} not found in generated sections")
                        section = self.createErrorSection(
                            section,
                            f"Section {sectionId} was not generated"
                        )
                        sections[idx] = section
            # Debug: Write final merged structure to debug file
            if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
                try:
                    import json
                    structureJson = json.dumps(structure, indent=2, ensure_ascii=False)
                    self.services.utils.writeDebugFile(
                        structureJson,
                        "document_generation_final_merged_json"
                    )
                    logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)")
                except Exception as e:
                    logger.debug(f"Could not write debug file for final merged JSON: {e}")
            return structure
        except Exception as e:
            logger.error(f"Error integrating content: {str(e)}")
            raise
    def validateCompleteness(
        self,
        document: Dict[str, Any]
    ) -> Tuple[bool, List[str]]:
        """
        Validate that all sections have content.
        Args:
            document: Document structure to validate
        Returns:
            (is_complete, list_of_missing_sections)
        """
        missingSections = []
        try:
            for doc in document.get("documents", []):
                sections = doc.get("sections", [])
                for section in sections:
                    sectionId = section.get("id", "unknown")
                    elements = section.get("elements", [])
                    # Check if section has content
                    if not elements or len(elements) == 0:
                        # Skip error sections (they have error text)
                        if not section.get("error"):
                            missingSections.append(sectionId)
                    else:
                        # Validate elements have actual content
                        hasContent = False
                        for element in elements:
                            # Check different content types
                            if element.get("text") or element.get("base64Data") or \
                               element.get("headers") or element.get("items") or \
                               element.get("code"):
                                hasContent = True
                                break
                        if not hasContent and not section.get("error"):
                            missingSections.append(sectionId)
            return len(missingSections) == 0, missingSections
        except Exception as e:
            logger.error(f"Error validating completeness: {str(e)}")
            return False, [f"Validation error: {str(e)}"]
    def createErrorSection(
        self,
        originalSection: Dict[str, Any],
        errorMessage: str
    ) -> Dict[str, Any]:
        """
        Create error placeholder section.
        Args:
            originalSection: Original section that failed
            errorMessage: Error message to display
        Returns:
            Error section with placeholder content
        """
        contentType = originalSection.get("content_type", "content")
        sectionId = originalSection.get("id", "unknown")
        return {
            "id": sectionId,
            "content_type": "paragraph",  # Change to paragraph for error display
            "elements": [{
                "text": f"[ERROR: Failed to generate {contentType} for section '{sectionId}'. Error: {errorMessage}]"
            }],
            "order": originalSection.get("order", 0),
            "error": True,
            "errorMessage": errorMessage,
            "originalContentType": contentType,
            "title": originalSection.get("title"),
            "generation_hint": originalSection.get("generation_hint"),
            "complexity": originalSection.get("complexity")
        }
--- a/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py
+++ b/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py
@ -0,0 +1,316 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """
 Document Purpose Analyzer for hierarchical document generation.
 Uses AI to analyze user prompt and determine purpose for each document.
 """
 import logging
 import json
 from typing import Dict, Any, List, Optional
 from modules.datamodels.datamodelChat import ChatDocument
 from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
 logger = logging.getLogger(__name__)
 class DocumentPurposeAnalyzer:
    """Analyzes user prompt and documents to determine document purposes"""
    def __init__(self, services: Any):
        self.services = services
    async def analyzeDocumentPurposes(
        self,
        userPrompt: str,
        chatDocuments: List[ChatDocument],
        actionContext: str = "generateDocument"
    ) -> Dict[str, Any]:
        """
        Use AI to analyze user prompt and determine purpose for each document.
        Args:
            userPrompt: User's original prompt
            chatDocuments: List of ChatDocument objects to analyze
            actionContext: Action name (e.g., "generateDocument", "extractData")
        Returns:
            {
                "document_purposes": [
                    {
                        "document_id": "...",
                        "purpose": "extract_text_content" | "include_image" | ...,
                        "reasoning": "...",
                        "extractionPrompt": "..." (if purpose requires extraction),
                        "processingNotes": "..."
                    }
                ],
                "overall_intent": "..."
            }
        """
        try:
            if not chatDocuments:
                return {
                    "document_purposes": [],
                    "overall_intent": "No documents provided"
                }
            # Create document metadata list for AI analysis
            documentMetadata = []
            for doc in chatDocuments:
                docInfo = {
                    "document_id": doc.id,
                    "fileName": doc.fileName,
                    "mimeType": doc.mimeType,
                    "fileSize": doc.fileSize
                }
                documentMetadata.append(docInfo)
            # Create analysis prompt
            analysisPrompt = self._createAnalysisPrompt(
                userPrompt=userPrompt,
                actionContext=actionContext,
                documentMetadata=documentMetadata
            )
            # Debug: Log purpose analysis prompt
            if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
                try:
                    self.services.utils.writeDebugFile(
                        analysisPrompt,
                        "document_purpose_analysis_prompt"
                    )
                except Exception as e:
                    logger.debug(f"Could not write debug file for purpose analysis prompt: {e}")
            # Call AI for analysis
            options = AiCallOptions(
                operationType=OperationTypeEnum.DATA_GENERATE,
                resultFormat="json"
            )
            aiResponse = await self.services.ai.callAiContent(
                prompt=analysisPrompt,
                options=options,
                outputFormat="json"
            )
            # Debug: Log purpose analysis response
            if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
                try:
                    responseContent = aiResponse.content if aiResponse and aiResponse.content else ''
                    responseMetadata = {
                        "status": aiResponse.status if aiResponse else "N/A",
                        "error": aiResponse.error if aiResponse else "N/A",
                        "documents_count": len(aiResponse.documents) if aiResponse and aiResponse.documents else 0
                    }
                    self.services.utils.writeDebugFile(
                        f"Response Content:\n{responseContent}\n\nResponse Metadata:\n{json.dumps(responseMetadata, indent=2)}",
                        "document_purpose_analysis_response"
                    )
                except Exception as e:
                    logger.debug(f"Could not write debug file for purpose analysis response: {e}")
            if not aiResponse or not aiResponse.content:
                logger.warning("AI purpose analysis returned empty response, using defaults")
                return self._createDefaultPurposes(chatDocuments, actionContext)
            # Extract and parse JSON
            extractedJson = self.services.utils.jsonExtractString(aiResponse.content)
            if not extractedJson:
                logger.warning("No JSON found in purpose analysis response, using defaults")
                return self._createDefaultPurposes(chatDocuments, actionContext)
            try:
                analysisResult = json.loads(extractedJson)
                # Validate structure
                if "document_purposes" not in analysisResult:
                    logger.warning("Invalid analysis result structure, using defaults")
                    return self._createDefaultPurposes(chatDocuments, actionContext)
                # Ensure all documents have purposes
                analyzedIds = {dp.get("document_id") for dp in analysisResult.get("document_purposes", [])}
                for doc in chatDocuments:
                    if doc.id not in analyzedIds:
                        logger.warning(f"Document {doc.id} not in analysis result, adding default purpose")
                        defaultPurpose = self._determineDefaultPurpose(doc, actionContext)
                        analysisResult["document_purposes"].append({
                            "document_id": doc.id,
                            "purpose": defaultPurpose,
                            "reasoning": f"Default purpose based on document type and action context",
                            "extractionPrompt": None,
                            "processingNotes": None
                        })
                return analysisResult
            except json.JSONDecodeError as e:
                logger.error(f"Failed to parse purpose analysis JSON: {str(e)}")
                logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500]}")
                return self._createDefaultPurposes(chatDocuments, actionContext)
        except Exception as e:
            logger.error(f"Error analyzing document purposes: {str(e)}")
            return self._createDefaultPurposes(chatDocuments, actionContext)
    def _createAnalysisPrompt(
        self,
        userPrompt: str,
        actionContext: str,
        documentMetadata: List[Dict[str, Any]]
    ) -> str:
        """Create AI prompt for document purpose analysis"""
        # Format document list
        docListText = ""
        for i, docInfo in enumerate(documentMetadata, 1):
            docListText += f"\n{i}. Document ID: {docInfo['document_id']}\n"
            docListText += f"   File Name: {docInfo['fileName']}\n"
            docListText += f"   MIME Type: {docInfo['mimeType']}\n"
            docListText += f"   File Size: {docInfo['fileSize']} bytes\n"
        # Get user language
        userLanguage = self._getUserLanguage()
        prompt = f"""{'='*80}
 DOCUMENT PURPOSE ANALYSIS
 {'='*80}
 USER PROMPT:
 {userPrompt}
 ACTION CONTEXT: {actionContext}
 DOCUMENTS PROVIDED:
 {docListText}
 {'='*80}
 TASK: For each document, determine its purpose based on:
 1. User prompt intent (what the user wants to do)
 2. Action context (what action is being performed)
 3. Document type (mimeType - is it text, image, etc.)
 4. Document metadata (fileName, size)
 AVAILABLE PURPOSES:
 - "extract_text_content": Extract text content for use in document generation
 - "include_image": Include the image directly in the generated document (for images)
 - "analyze_image_vision": Analyze image with vision AI to extract text/information (for images with text/charts)
 - "use_as_template": Use document structure/layout as template for generation
 - "use_as_reference": Use as background context/reference without detailed extraction
 - "extract_data": Extract structured data (key-value pairs, entities, fields)
 - "attach": Document is an attachment - don't process, just attach to output
 - "convert_format": Convert document format (for convert actions)
 - "translate": Translate document content (for translate actions)
 - "summarize": Create summary of document (for summarize actions)
 - "compare": Compare documents (for comparison actions)
 - "merge": Merge documents (for merge actions)
 - "extract_tables_charts": Extract tables and charts specifically
 - "use_for_styling": Use document for styling/formatting reference only
 - "extract_metadata": Extract only document metadata
 CRITICAL RULES:
 1. For images (mimeType starts with "image/"):
   - If user wants to "include" or "show" images → "include_image"
   - If user wants to "analyze", "read text", or "extract text" from images → "analyze_image_vision"
   - Default for images in generateDocument → "include_image"
 2. For text documents in generateDocument:
   - If user mentions "template" or "structure" → "use_as_template"
   - If user mentions "reference" or "context" → "use_as_reference"
   - Default → "extract_text_content"
 3. Consider action context:
   - generateDocument: Usually "extract_text_content" or "include_image"
   - extractData: Usually "extract_data"
   - translateDocument: Usually "translate"
   - summarizeDocument: Usually "summarize"
 4. Return ONLY valid JSON following this structure:
 {{
  "document_purposes": [
    {{
      "document_id": "document_id_here",
      "purpose": "extract_text_content",
      "reasoning": "Brief explanation in language '{userLanguage}'",
      "extractionPrompt": "Specific extraction prompt if purpose requires extraction, otherwise null",
      "processingNotes": "Any special processing requirements or null"
    }}
  ],
  "overall_intent": "Summary of how documents should be used together in language '{userLanguage}'"
 }}
 5. All content must be in the language '{userLanguage}'
 6. Return ONLY the JSON structure. No explanations before or after.
 Return ONLY the JSON structure.
 """
        return prompt
    def _createDefaultPurposes(
        self,
        chatDocuments: List[ChatDocument],
        actionContext: str
    ) -> Dict[str, Any]:
        """Create default purposes when AI analysis fails"""
        purposes = []
        for doc in chatDocuments:
            purpose = self._determineDefaultPurpose(doc, actionContext)
            purposes.append({
                "document_id": doc.id,
                "purpose": purpose,
                "reasoning": f"Default purpose based on document type ({doc.mimeType}) and action context ({actionContext})",
                "extractionPrompt": None,
                "processingNotes": None
            })
        return {
            "document_purposes": purposes,
            "overall_intent": f"Default processing for {len(chatDocuments)} document(s) in {actionContext} action"
        }
    def _determineDefaultPurpose(
        self,
        doc: ChatDocument,
        actionContext: str
    ) -> str:
        """Determine default purpose based on document type and action context"""
        mimeType = doc.mimeType or ""
        # Image documents
        if mimeType.startswith("image/"):
            if actionContext == "generateDocument":
                return "include_image"
            elif actionContext in ["extractData", "process"]:
                return "analyze_image_vision"
            else:
                return "include_image"  # Default for images
        # Action-specific defaults
        if actionContext == "extractData":
            return "extract_data"
        elif actionContext == "translateDocument":
            return "translate"
        elif actionContext == "summarizeDocument":
            return "summarize"
        elif actionContext == "convertDocument" or actionContext == "convert":
            return "convert_format"
        elif actionContext == "generateDocument":
            return "extract_text_content"
        else:
            # Default for other actions
            return "extract_text_content"
    def _getUserLanguage(self) -> str:
        """Get user language for document generation"""
        try:
            if self.services:
                if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
                    return self.services.currentUserLanguage
                elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
                    return self.services.user.language
        except Exception:
            pass
        return 'en'  # Default fallback
--- a/modules/services/serviceGeneration/subStructureGenerator.py
+++ b/modules/services/serviceGeneration/subStructureGenerator.py
@ -0,0 +1,488 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """
 Structure Generator for hierarchical document generation.
 Generates document skeleton with section placeholders.
 """
 import logging
 import json
 from typing import Dict, Any, Optional, List
 from modules.datamodels.datamodelJson import jsonTemplateDocument
 logger = logging.getLogger(__name__)
 class StructureGenerator:
    """Generates document structure with section placeholders"""
    def __init__(self, services: Any):
        self.services = services
    async def generateStructure(
        self,
        userPrompt: str,
        documentList: Optional[Any] = None,
        cachedContent: Optional[Dict[str, Any]] = None,
        maxSectionLength: int = 500,
        existingImages: Optional[List[Dict[str, Any]]] = None
    ) -> Dict[str, Any]:
        """
        Generate document structure with sections.
        Args:
            userPrompt: User's original prompt
            documentList: Optional document references
            cachedContent: Optional extracted content cache
            maxSectionLength: Maximum words for simple sections
            existingImages: Optional list of existing images to include
        Returns:
            Document structure with empty elements arrays
        """
        try:
            # Create structure generation prompt
            structurePrompt = self._createStructurePrompt(
                userPrompt=userPrompt,
                cachedContent=cachedContent,
                maxSectionLength=maxSectionLength,
                existingImages=existingImages or []
            )
            # Debug: Log structure generation prompt
            if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
                try:
                    self.services.utils.writeDebugFile(
                        structurePrompt, 
                        "document_generation_structure_prompt"
                    )
                except Exception as e:
                    logger.debug(f"Could not write debug file for structure prompt: {e}")
            # Call AI to generate structure
            from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
            options = AiCallOptions(
                operationType=OperationTypeEnum.DATA_GENERATE,
                resultFormat="json"
            )
            aiResponse = await self.services.ai.callAiContent(
                prompt=structurePrompt,
                options=options,
                outputFormat="json"
            )
            # Debug: Log structure generation response
            if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
                try:
                    self.services.utils.writeDebugFile(
                        aiResponse.content if aiResponse and aiResponse.content else '', 
                        "document_generation_structure_response"
                    )
                except Exception as e:
                    logger.debug(f"Could not write debug file for structure response: {e}")
            if not aiResponse or not aiResponse.content:
                raise ValueError("AI structure generation returned empty response")
            # Extract and parse JSON
            extractedJson = self.services.utils.jsonExtractString(aiResponse.content)
            if not extractedJson:
                raise ValueError("No JSON found in AI structure response")
            structure = json.loads(extractedJson)
            # Validate and enhance structure
            structure = self._validateAndEnhanceStructure(structure, maxSectionLength)
            return structure
        except Exception as e:
            logger.error(f"Error generating structure: {str(e)}")
            raise
    def _createStructurePrompt(
        self,
        userPrompt: str,
        cachedContent: Optional[Dict[str, Any]] = None,
        maxSectionLength: int = 500,
        existingImages: Optional[List[Dict[str, Any]]] = None
    ) -> str:
        """
        Create prompt for structure generation.
        """
        # Get user language
        userLanguage = self._getUserLanguage()
        # Format cached content if available
        cachedContentText = ""
        if cachedContent and cachedContent.get("extractedContent"):
            cachedContentText = self._formatCachedContent(cachedContent)
        # Use provided existingImages or extract from cachedContent
        if existingImages is None:
            existingImages = []
            if cachedContent and cachedContent.get("imageDocuments"):
                existingImages = cachedContent.get("imageDocuments", [])
        # Create structure template
        structureTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", "Document Title")
        prompt = f"""{'='*80}
 USER REQUEST:
 {'='*80}
 {userPrompt}
 {'='*80}
 TASK: Generate a document STRUCTURE (skeleton) with sections.
 Do NOT generate actual content yet - only the structure.
 {'='*80}
 EXTRACTED CONTENT (if available):
 {'='*80}
 {cachedContentText if cachedContentText else "No source documents provided."}
 {'='*80}
 INSTRUCTIONS:
 1. Analyze the user request and extracted content
 2. Create a document structure with CONTENT sections only
 3. For each section, specify:
   - id: Unique identifier (e.g., "section_title_1", "section_image_1")
   - content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block"
   - complexity: "simple" (can generate directly) or "complex" (needs sub-prompt)
   - generation_hint: Brief description of what content should be generated
   - image_prompt: (only for image sections) Detailed prompt for image generation
   - order: Section order number (starting from 1)
   - elements: [] (empty array - will be populated later)
 4. Identify image sections:
   - If user requests illustrations/images, create image sections
   - If existing images are provided in documentList (check EXISTING IMAGES section below), create image sections that reference them
   - Add image_prompt field with detailed description for image generation (only for new images)
   - Set complexity to "complex"
   - For existing images: Set image_source to "existing" and image_reference_id to the image document ID
   - Example for new image: {{"id": "section_image_1", "content_type": "image", "complexity": "complex", "generation_hint": "Illustration for chapter 1", "image_prompt": "A detailed description for image generation", "order": 2, "elements": []}}
   - Example for existing image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Include provided image", "image_source": "existing", "image_reference_id": "doc_id_here", "order": 2, "elements": []}}
 {'='*80}
 EXISTING IMAGES (to include in document):
 {'='*80}
 {self._formatExistingImages(existingImages) if existingImages else "No existing images provided."}
 {'='*80}
 6. Identify complex text sections:
   - Long chapters (>{maxSectionLength} words expected) should be marked as "complex"
   - Short paragraphs/headings should be "simple"
 7. Return ONLY valid JSON following this structure:
 {structureTemplate}
 5. CRITICAL RULES:
   - Return ONLY valid JSON (no comments, no trailing commas, double quotes only)
   - Follow the exact JSON schema structure provided
   - IMPORTANT: All sections MUST have empty elements arrays: "elements": [] (the template shows examples with content, but you must use empty arrays)
   - ALL sections MUST include "generation_hint" field with a brief description of what content should be generated
   - ALL sections MUST include "complexity" field: "simple" for short content, "complex" for long chapters/images
   - Image sections MUST include "image_prompt" field with detailed description for image generation
   - Order numbers MUST start from 1 (not 0)
   - All content must be in the language '{userLanguage}'
   - Do NOT generate actual content - only structure (skeleton)
   - Use only supported content_type values: "heading", "paragraph", "image", "table", "bullet_list", "code_block"
 Return ONLY the JSON structure. No explanations.
 """
        return prompt
    def _validateAndEnhanceStructure(
        self,
        structure: Dict[str, Any],
        maxSectionLength: int
    ) -> Dict[str, Any]:
        """
        Validate structure and enhance with complexity identification.
        """
        try:
            # Ensure structure has required fields
            if "documents" not in structure:
                if "sections" in structure:
                    # Convert single-document format to multi-document format
                    structure = {
                        "metadata": structure.get("metadata", {}),
                        "documents": [{
                            "id": "doc_1",
                            "title": structure.get("metadata", {}).get("title", "Document"),
                            "filename": "document.json",
                            "sections": structure.get("sections", [])
                        }]
                    }
                else:
                    raise ValueError("Structure missing 'documents' or 'sections' field")
            # Process each document
            for doc in structure.get("documents", []):
                sections = doc.get("sections", [])
                # Process and validate sections according to standardized schema
                for idx, section in enumerate(sections):
                    # Ensure required fields
                    if "id" not in section:
                        section["id"] = f"section_{idx + 1}"
                    sectionId = section.get("id", "")
                    section["order"] = idx + 1
                    if "elements" not in section:
                        section["elements"] = []
                    # Identify complexity if not set
                    if "complexity" not in section:
                        section["complexity"] = self._identifySectionComplexity(
                            section,
                            maxSectionLength
                        )
                    # Ensure generation_hint exists (required for content generation)
                    if "generation_hint" not in section or not section.get("generation_hint"):
                        # Create meaningful generation hint from section id or content type
                        contentType = section.get("content_type", "")
                        # Extract meaningful hint from section ID
                        meaningfulHint = self._extractMeaningfulHint(sectionId, contentType, section.get("elements", []))
                        section["generation_hint"] = meaningfulHint
                    # Ensure image sections have proper configuration
                    if section.get("content_type") == "image":
                        imageSource = section.get("image_source", "generate")
                        if imageSource == "existing":
                            # Existing image - ensure image_reference_id is set
                            if "image_reference_id" not in section:
                                logger.warning(f"Image section {sectionId} has image_source='existing' but no image_reference_id")
                            # Existing images are simple (no generation needed)
                            section["complexity"] = "simple"
                        else:
                            # New image generation - ensure image_prompt
                            if "image_prompt" not in section or not section.get("image_prompt"):
                                # Try to extract from generation_hint
                                generationHint = section.get("generation_hint", "")
                                if generationHint:
                                    # Enhance generation_hint to be a proper image prompt
                                    section["image_prompt"] = self._enhanceImagePrompt(generationHint)
                                else:
                                    # Create default based on document context
                                    docTitle = doc.get("title", "Document")
                                    section["image_prompt"] = f"Generate an illustration for: {docTitle}"
                            # Ensure complexity is set to complex for new image generation
                            section["complexity"] = "complex"
            return structure
        except Exception as e:
            logger.error(f"Error validating structure: {str(e)}")
            raise
    def _identifySectionComplexity(
        self,
        section: Dict[str, Any],
        maxSectionLength: int
    ) -> str:
        """
        Identify if section is simple or complex.
        Rules:
        - Images: always complex
        - Long chapters (>maxSectionLength words): complex
        - Others: simple
        """
        contentType = section.get("content_type", "")
        # Images are always complex
        if contentType == "image":
            return "complex"
        # Check generation_hint for length indicators
        generationHint = section.get("generation_hint", "").lower()
        # Keywords indicating long content
        longContentKeywords = [
            "chapter", "long", "detailed", "comprehensive", 
            "extensive", "full", "complete story"
        ]
        if any(keyword in generationHint for keyword in longContentKeywords):
            return "complex"
        # Default to simple
        return "simple"
    def _extractMeaningfulHint(
        self,
        sectionId: str,
        contentType: str,
        elements: List[Any]
    ) -> str:
        """
        Extract meaningful generation hint from section ID, content type, or elements.
        Args:
            sectionId: Section identifier (e.g., "section_heading_current_state")
            contentType: Content type (e.g., "heading", "paragraph")
            elements: Existing elements if any
        Returns:
            Meaningful generation hint string
        """
        sectionIdLower = sectionId.lower()
        # Try to extract text from existing elements first (most accurate)
        if elements and isinstance(elements, list) and len(elements) > 0:
            firstElement = elements[0]
            if isinstance(firstElement, dict):
                if "text" in firstElement and firstElement["text"]:
                    if contentType == "heading":
                        return firstElement["text"]
                    elif contentType == "paragraph":
                        return f"Content paragraph: {firstElement['text'][:50]}..."
        # Extract meaningful text from section ID
        # Remove common prefixes: "section_", "section_heading_", "section_paragraph_", etc.
        meaningfulPart = sectionId
        for prefix in ["section_heading_", "section_paragraph_", "section_bullet_list_", 
                       "section_code_block_", "section_image_", "section_"]:
            if meaningfulPart.lower().startswith(prefix):
                meaningfulPart = meaningfulPart[len(prefix):]
                break
        # Convert snake_case to Title Case
        # e.g., "current_state" -> "Current State"
        words = meaningfulPart.replace("_", " ").split()
        titleCase = " ".join(word.capitalize() for word in words if word)
        # Handle special cases
        if "introduction" in sectionIdLower or "intro" in sectionIdLower:
            return "Introduction paragraph"
        elif "conclusion" in sectionIdLower:
            return "Conclusion paragraph"
        elif "footer" in sectionIdLower or "copyright" in sectionIdLower:
            return "Footer content"
        elif "title" in sectionIdLower and "main" in sectionIdLower:
            # Main title - try to get from document title or use generic
            return "Main document title"
        # Create hint based on content type and extracted text
        if contentType == "heading":
            if titleCase:
                return titleCase
            else:
                return "Section heading"
        elif contentType == "paragraph":
            if titleCase:
                return f"Content paragraph about {titleCase.lower()}"
            else:
                return f"Content paragraph"
        elif contentType == "bullet_list":
            if titleCase:
                return f"Bullet list: {titleCase.lower()}"
            else:
                return "Bullet list items"
        elif contentType == "code_block":
            return "Code content"
        else:
            if titleCase:
                return f"Content for {titleCase.lower()}"
            else:
                return f"Content for {contentType} section"
    def _extractImagePrompts(
        self,
        structure: Dict[str, Any]
    ) -> Dict[str, str]:
        """
        Extract image generation prompts from structure.
        Maps section_id -> image_prompt
        """
        imagePrompts = {}
        for doc in structure.get("documents", []):
            for section in doc.get("sections", []):
                if section.get("content_type") == "image":
                    sectionId = section.get("id")
                    imagePrompt = section.get("image_prompt")
                    if sectionId and imagePrompt:
                        imagePrompts[sectionId] = imagePrompt
        return imagePrompts
    def _formatCachedContent(
        self,
        cachedContent: Dict[str, Any]
    ) -> str:
        """
        Format cached content for prompt inclusion.
        """
        try:
            extractedContent = cachedContent.get("extractedContent", [])
            if not extractedContent:
                return "No content extracted."
            # Format ContentPart objects
            formattedParts = []
            for extracted in extractedContent:
                if hasattr(extracted, 'parts'):
                    for part in extracted.parts:
                        if hasattr(part, 'content'):
                            formattedParts.append(part.content)
                elif isinstance(extracted, dict):
                    formattedParts.append(str(extracted))
                else:
                    formattedParts.append(str(extracted))
            return "\n\n".join(formattedParts) if formattedParts else "No content extracted."
        except Exception as e:
            logger.warning(f"Error formatting cached content: {str(e)}")
            return "Error formatting cached content."
    def _enhanceImagePrompt(self, generationHint: str) -> str:
        """
        Enhance generation hint to be a proper image generation prompt.
        Adds visual details and style guidance if missing.
        """
        # If hint already contains visual details, use as-is
        visualKeywords = ["illustration", "image", "picture", "visual", "depict", "show", "drawing"]
        if any(keyword.lower() in generationHint.lower() for keyword in visualKeywords):
            return generationHint
        # Enhance with visual description
        enhanced = f"Create a professional illustration: {generationHint}"
        return enhanced
    def _formatExistingImages(self, imageDocuments: List[Dict[str, Any]]) -> str:
        """Format existing images list for prompt inclusion"""
        if not imageDocuments:
            return "No existing images provided."
        formatted = []
        for i, imgDoc in enumerate(imageDocuments, 1):
            formatted.append(f"{i}. Image ID: {imgDoc.get('id')}")
            formatted.append(f"   File Name: {imgDoc.get('fileName', 'Unknown')}")
            formatted.append(f"   MIME Type: {imgDoc.get('mimeType', 'Unknown')}")
            formatted.append(f"   Alt Text: {imgDoc.get('altText', 'Image')}")
            formatted.append("")
        return "\n".join(formatted)
    def _getUserLanguage(self) -> str:
        """Get user language for document generation"""
        try:
            if self.services:
                if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
                    return self.services.currentUserLanguage
                elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
                    return self.services.user.language
        except Exception:
            pass
        return 'en'  # Default fallback
--- a/modules/shared/jsonUtils.py
+++ b/modules/shared/jsonUtils.py
@ -199,8 +199,7 @@ def closeJsonStructures(text: str) -> str:
    # Handle unterminated strings: find the last unclosed string
    # Look for patterns like: "value" or "value\n (unterminated)
-    # Simple heuristic: if we end with an unterminated string (odd number of quotes at end)
+    # Check if we're in the middle of a string value when text ends
    # Try to close it by finding the last opening quote and closing it
    if result.strip():
        # Count quotes - if odd number, we have an unterminated string
        quoteCount = result.count('"')
@ -219,6 +218,32 @@ def closeJsonStructures(text: str) -> str:
                    # Find where the string should end (before next comma, bracket, or brace)
                    # For now, just close it at the end
                    result += '"'
        else:
            # Even number of quotes, but might still be in middle of string if cut off
            # Check if text ends with a colon followed by a quote (start of string value)
            # or ends with text that looks like it's inside a string (no closing quote after last quote)
            import re
            # Pattern: ends with "text" where text doesn't end with quote
            # Look for pattern like: "text": "incomplete
            if re.search(r':\s*"[^"]*$', result):
                # We're in the middle of a string value, close it
                result += '"'
            # Also check if we end with text after a quote (like "key": "value but cut off)
            elif re.search(r'"\s*:\s*"[^"]*[^",}\]]$', result):
                # Check if last quote is followed by non-quote, non-structural chars
                lastQuotePos = result.rfind('"')
                if lastQuotePos >= 0:
                    afterQuote = result[lastQuotePos + 1:]
                    # If after quote we have text but no closing quote, comma, or brace, we're in a string
                    if afterQuote and not re.match(r'^\s*[,}\]\]]', afterQuote):
                        # Check if it's escaped
                        escapeCount = 0
                        i = lastQuotePos - 1
                        while i >= 0 and result[i] == '\\':
                            escapeCount += 1
                            i -= 1
                        if escapeCount % 2 == 0:
                            result += '"'
    # Count open/close brackets and braces
    openBraces = result.count('{')
--- a/modules/workflows/methods/methodAi/actions/convert.py
+++ b/modules/workflows/methods/methodAi/actions/convert.py
@ -98,7 +98,7 @@ async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
                    renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
                    renderOptions["includeHeader"] = parameters.get("includeHeader", True)
-                rendered_content, mime_type = await generationService.renderReport(
+                rendered_content, mime_type, _images = await generationService.renderReport(
                    jsonData, normalizedOutputFormat, title, None, None
                )
--- a/modules/workflows/methods/methodAi/actions/generateDocument.py
+++ b/modules/workflows/methods/methodAi/actions/generateDocument.py
@ -3,13 +3,18 @@
 """
 Generate Document action for AI operations.
-Generates documents from scratch or based on templates/inputs.
+Generates documents from scratch or based on templates/inputs using hierarchical approach.
 """
 import logging
-from typing import Dict, Any
+import time
 from typing import Dict, Any, Optional
 from modules.workflows.methods.methodBase import action
-from modules.datamodels.datamodelChat import ActionResult
+from modules.datamodels.datamodelChat import ActionResult, ActionDocument
 from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
 from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator
 from modules.services.serviceGeneration.subContentGenerator import ContentGenerator
 from modules.services.serviceGeneration.subDocumentPurposeAnalyzer import DocumentPurposeAnalyzer
 logger = logging.getLogger(__name__)
@ -17,15 +22,18 @@ logger = logging.getLogger(__name__)
 async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
    """
    GENERAL:
-    - Purpose: Generate documents from scratch or based on templates/inputs.
+    - Purpose: Generate documents from scratch or based on templates/inputs using hierarchical approach.
    - Input requirements: prompt or description (required); optional documentList (for templates/references).
-    - Output format: Document in specified format (default: docx).
+    - Output format: Document in specified format. Any format supported by dynamically registered renderers is acceptable (default: txt).
    Parameters:
    - prompt (str, required): Description of the document to generate.
    - documentList (list, optional): Template documents or reference documents to use as a guide.
    - documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
-    - resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx.
+    - resultType (str, optional): Output format. Any format supported by dynamically registered renderers is acceptable (formats are discovered automatically from renderer registry). Common formats: txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg. Default: txt.
    - maxSectionLength (int, optional): Maximum words for simple sections. Default: 500.
    - parallelGeneration (bool, optional): Enable parallel section generation. Default: True.
    - progressLogging (bool, optional): Send ChatLog progress updates. Default: True.
    """
    prompt = parameters.get("prompt")
    if not prompt:
@ -33,21 +41,361 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
    documentList = parameters.get("documentList", [])
    documentType = parameters.get("documentType")
-    resultType = parameters.get("resultType", "docx")
+    resultType = parameters.get("resultType", "txt")
-    aiPrompt = f"Generate a document based on the following requirements: {prompt}"
+    # Auto-detect format from prompt if not explicitly provided
-    if documentType:
+    if resultType == "txt" and prompt:
-        aiPrompt += f" Document type: {documentType}."
+        promptLower = prompt.lower()
-    if documentList:
+        if "html" in promptLower or "html5" in promptLower:
-        aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style."
+            resultType = "html"
-    aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization."
+            logger.info(f"Auto-detected HTML format from prompt")
        elif "pdf" in promptLower:
            resultType = "pdf"
            logger.info(f"Auto-detected PDF format from prompt")
        elif "markdown" in promptLower or " md " in promptLower or promptLower.endswith(" md"):
            resultType = "md"
            logger.info(f"Auto-detected Markdown format from prompt")
        elif ("text" in promptLower or "txt" in promptLower) and "html" not in promptLower:
            resultType = "txt"
            logger.info(f"Auto-detected Text format from prompt")
-    processParams = {
+    maxSectionLength = parameters.get("maxSectionLength", 500)
-        "aiPrompt": aiPrompt,
+    parallelGeneration = parameters.get("parallelGeneration", True)
-        "resultType": resultType
+    progressLogging = parameters.get("progressLogging", True)
    }
    if documentList:
        processParams["documentList"] = documentList
-    return await self.process(processParams)
+    # Create operation ID for progress tracking
    workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
    operationId = f"doc_gen_{workflowId}_{int(time.time())}"
    parentOperationId = parameters.get('parentOperationId')
    try:
        # Phase 1: Structure Generation
        if progressLogging:
            self.services.chat.progressLogStart(
                operationId,
                "Document",
                "Structure Generation",
                "Generating document structure...",
                parentOperationId=parentOperationId
            )
        structureGenerator = StructureGenerator(self.services)
        # Analyze document purposes and process documents accordingly
        cachedContent = None
        imageDocuments = []
        documentPurposes = {}
        if documentList:
            if progressLogging:
                self.services.chat.progressLogUpdate(operationId, 0.1, "Analyzing document purposes...")
            # Convert documentList to DocumentReferenceList
            from modules.datamodels.datamodelDocref import DocumentReferenceList
            if isinstance(documentList, DocumentReferenceList):
                docRefList = documentList
            elif isinstance(documentList, str):
                docRefList = DocumentReferenceList.from_string_list([documentList])
            elif isinstance(documentList, list):
                docRefList = DocumentReferenceList.from_string_list(documentList)
            else:
                docRefList = DocumentReferenceList(references=[])
            # Get ChatDocuments
            chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
            if chatDocuments:
                logger.info(f"Analyzing purposes for {len(chatDocuments)} documents")
                # Analyze document purposes using AI
                purposeAnalyzer = DocumentPurposeAnalyzer(self.services)
                purposeAnalysis = await purposeAnalyzer.analyzeDocumentPurposes(
                    userPrompt=prompt,
                    chatDocuments=chatDocuments,
                    actionContext="generateDocument"
                )
                documentPurposes = {dp["document_id"]: dp for dp in purposeAnalysis.get("document_purposes", [])}
                logger.info(f"Purpose analysis complete: {purposeAnalysis.get('overall_intent', 'N/A')}")
                # Separate documents by purpose
                textDocs = []
                imageDocsToInclude = []
                imageDocsToAnalyze = []
                for doc in chatDocuments:
                    docPurpose = documentPurposes.get(doc.id, {})
                    purpose = docPurpose.get("purpose", "extract_text_content")
                    if purpose == "include_image":
                        imageDocsToInclude.append(doc)
                    elif purpose == "analyze_image_vision":
                        imageDocsToAnalyze.append(doc)
                    elif purpose in ["extract_text_content", "use_as_template", "use_as_reference", "extract_data"]:
                        textDocs.append(doc)
                    # Skip "attach" purpose - don't process
                # Process text documents (extract content)
                extractedResults = []
                if textDocs:
                    if progressLogging:
                        self.services.chat.progressLogUpdate(operationId, 0.15, f"Extracting content from {len(textDocs)} text document(s)...")
                    # Prepare extraction options with purpose-specific prompts
                    extractionOptionsList = []
                    for doc in textDocs:
                        docPurpose = documentPurposes.get(doc.id, {})
                        extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all content from the document"
                        extractionOptions = ExtractionOptions(
                            prompt=extractionPrompt,
                            mergeStrategy=MergeStrategy(
                                mergeType="concatenate",
                                groupBy="typeGroup",
                                orderBy="id"
                            ),
                            processDocumentsIndividually=True
                        )
                        extractionOptionsList.append((doc, extractionOptions))
                    # Extract content from text documents
                    for doc, extractionOptions in extractionOptionsList:
                        try:
                            docResults = self.services.extraction.extractContent(
                                [doc],
                                extractionOptions,
                                parentOperationId=operationId
                            )
                            extractedResults.extend(docResults)
                        except Exception as e:
                            logger.error(f"Error extracting content from {doc.fileName}: {str(e)}")
                    logger.info(f"Extracted content from {len(extractedResults)} text document(s)")
                # Process images to analyze (vision call)
                if imageDocsToAnalyze:
                    if progressLogging:
                        self.services.chat.progressLogUpdate(operationId, 0.2, f"Analyzing {len(imageDocsToAnalyze)} image(s) with vision AI...")
                    # Extract content from images using vision analysis
                    for doc in imageDocsToAnalyze:
                        try:
                            docPurpose = documentPurposes.get(doc.id, {})
                            extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all text and information from this image"
                            extractionOptions = ExtractionOptions(
                                prompt=extractionPrompt,
                                mergeStrategy=MergeStrategy(
                                    mergeType="concatenate",
                                    groupBy="typeGroup",
                                    orderBy="id"
                                ),
                                processDocumentsIndividually=True
                            )
                            docResults = self.services.extraction.extractContent(
                                [doc],
                                extractionOptions,
                                parentOperationId=operationId
                            )
                            extractedResults.extend(docResults)
                        except Exception as e:
                            logger.error(f"Error analyzing image {doc.fileName}: {str(e)}")
                    logger.info(f"Analyzed {len(imageDocsToAnalyze)} image(s) with vision AI")
                # Process images to include (store image data)
                if imageDocsToInclude:
                    if progressLogging:
                        self.services.chat.progressLogUpdate(operationId, 0.25, f"Preparing {len(imageDocsToInclude)} image(s) for inclusion...")
                    # Get image data for inclusion
                    from modules.interfaces.interfaceDbComponentObjects import getInterface
                    dbInterface = getInterface()
                    for doc in imageDocsToInclude:
                        try:
                            # Get image bytes
                            imageBytes = dbInterface.getFileData(doc.fileId)
                            if imageBytes:
                                # Encode to base64
                                import base64
                                base64Data = base64.b64encode(imageBytes).decode('utf-8')
                                # Create image document entry
                                imageDoc = {
                                    "id": doc.id,
                                    "fileName": doc.fileName,
                                    "mimeType": doc.mimeType,
                                    "base64Data": base64Data,
                                    "altText": doc.fileName or "Image",
                                    "fileSize": doc.fileSize
                                }
                                imageDocuments.append(imageDoc)
                                logger.debug(f"Prepared image {doc.fileName} for inclusion ({len(base64Data)} chars base64)")
                            else:
                                logger.warning(f"Could not retrieve image data for {doc.fileName}")
                        except Exception as e:
                            logger.error(f"Error preparing image {doc.fileName} for inclusion: {str(e)}")
                    logger.info(f"Prepared {len(imageDocuments)} image(s) for inclusion")
                # Build cachedContent with all information
                cachedContent = {
                    "extractedContent": extractedResults,
                    "imageDocuments": imageDocuments,
                    "documentPurposes": documentPurposes,
                    "extractionTimestamp": time.time(),
                    "sourceDocuments": [doc.id for doc in chatDocuments]
                }
                logger.info(f"Document processing complete: {len(extractedResults)} extracted, {len(imageDocuments)} images to include")
        # Generate structure
        if progressLogging:
            self.services.chat.progressLogUpdate(operationId, 0.2, "Generating document structure...")
        structure = await structureGenerator.generateStructure(
            userPrompt=prompt,
            documentList=documentList if documentList else None,
            cachedContent=cachedContent,
            maxSectionLength=maxSectionLength,
            existingImages=imageDocuments  # Pass existing images for structure generation
        )
        if progressLogging:
            self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated")
        # Phase 2: Content Generation
        if progressLogging:
            self.services.chat.progressLogUpdate(
                operationId,
                0.34,
                "Starting content generation..."
            )
        contentGenerator = ContentGenerator(self.services)
        # Create enhanced progress callback
        def progressCallback(sectionIndex: int, totalSections: int, message: str):
            if progressLogging:
                # Calculate progress: 34% to 90% for content generation phase
                if totalSections > 0:
                    progress = 0.34 + (0.56 * (sectionIndex / totalSections))
                else:
                    progress = 0.34
                # Format message
                if sectionIndex > 0 and totalSections > 0:
                    progressMessage = f"Section {sectionIndex}/{totalSections}: {message}"
                else:
                    progressMessage = message
                self.services.chat.progressLogUpdate(
                    operationId,
                    progress,
                    progressMessage
                )
        completeStructure = await contentGenerator.generateContent(
            structure=structure,
            cachedContent=cachedContent,
            userPrompt=prompt,
            progressCallback=progressCallback,
            parallelGeneration=parallelGeneration
        )
        if progressLogging:
            self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated")
        # Phase 3: Integration & Rendering
        if progressLogging:
            self.services.chat.progressLogUpdate(
                operationId,
                0.91,
                "Rendering final document..."
            )
        # Use existing renderReport method
        title = structure.get("metadata", {}).get("title", "Generated Document")
        if documentType:
            title = f"{title} ({documentType})"
        renderedContent, mimeType, images = await self.services.generation.renderReport(
            extractedContent=completeStructure,
            outputFormat=resultType,
            title=title,
            userPrompt=prompt,
            aiService=self.services.ai
        )
        # Build list of documents to return
        documents = [
            ActionDocument(
                documentName=f"document.{resultType}",
                documentData=renderedContent,
                mimeType=mimeType
            )
        ]
        # Add images as separate documents
        if images:
            logger.info(f"Processing {len(images)} image(s) from renderer")
            import base64
            for idx, imageData in enumerate(images):
                try:
                    base64Data = imageData.get("base64Data", "")
                    altText = imageData.get("altText", f"image_{idx + 1}")
                    caption = imageData.get("caption", "")
                    sectionId = imageData.get("sectionId", f"section_{idx + 1}")
                    if base64Data:
                        # Decode base64 to bytes
                        imageBytes = base64.b64decode(base64Data)
                        # Determine filename and mime type
                        filename = imageData.get("filename", f"image_{idx + 1}.png")
                        if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')):
                            filename = f"image_{idx + 1}.png"
                        # Determine mime type from filename
                        if filename.lower().endswith('.png'):
                            imageMimeType = "image/png"
                        elif filename.lower().endswith(('.jpg', '.jpeg')):
                            imageMimeType = "image/jpeg"
                        elif filename.lower().endswith('.gif'):
                            imageMimeType = "image/gif"
                        elif filename.lower().endswith('.webp'):
                            imageMimeType = "image/webp"
                        else:
                            imageMimeType = "image/png"  # Default
                        # Add image document
                        documents.append(ActionDocument(
                            documentName=filename,
                            documentData=imageBytes,
                            mimeType=imageMimeType
                        ))
                        logger.info(f"Added image document: {filename} (section: {sectionId}, {len(imageBytes)} bytes, alt: {altText})")
                    else:
                        logger.warning(f"Image {idx + 1} (section: {sectionId}) has no base64Data, skipping")
                except Exception as e:
                    logger.error(f"Error adding image document {idx + 1}: {str(e)}", exc_info=True)
                    continue
        else:
            logger.debug("No images returned from renderer")
        # Note: Document creation is handled by the workflow system
        # We just return the rendered content and images in ActionResult
        if progressLogging:
            self.services.chat.progressLogFinish(operationId, True)
        return ActionResult.isSuccess(documents=documents)
    except Exception as e:
        logger.error(f"Error in hierarchical document generation: {str(e)}")
        if progressLogging:
            self.services.chat.progressLogFinish(operationId, False)
        return ActionResult.isFailure(error=str(e))
--- a/modules/workflows/methods/methodAi/methodAi.py
+++ b/modules/workflows/methods/methodAi/methodAi.py
@ -353,11 +353,10 @@ class MethodAi(MethodBase):
                    "resultType": WorkflowActionParameter(
                        name="resultType",
                        type="str",
-                        frontendType=FrontendType.SELECT,
+                        frontendType=FrontendType.TEXT,
                        frontendOptions=["docx", "pdf", "txt", "md"],
                        required=False,
-                        default="docx",
+                        default="txt",
-                        description="Output format"
+                        description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Any format supported by renderers is acceptable. Default: txt"
                    )
                },
                execute=generateDocument.__get__(self, self.__class__)
--- a/modules/workflows/methods/methodBase.py
+++ b/modules/workflows/methods/methodBase.py
@ -98,9 +98,13 @@ class MethodBase:
            self.logger.error(f"Method {self.name} has no _actions dictionary defined. Actions will not be available.")
            return result
        totalActions = len(self._actions)
        deniedActions = []
        for actionName, actionDef in self._actions.items():
            # RBAC-Check: Prüfe ob Action für aktuellen User verfügbar ist
            if not self._checkActionPermission(actionDef.actionId):
                deniedActions.append(f"{actionName} ({actionDef.actionId})")
                continue  # Skip if user doesn't have permission
            # Konvertiere WorkflowActionDefinition zu System-Format
@ -110,6 +114,11 @@ class MethodBase:
                'method': self._createActionWrapper(actionDef)
            }
        if deniedActions:
            self.logger.warning(f"Method {self.name}: {len(deniedActions)}/{totalActions} actions denied by RBAC: {deniedActions[:5]}{'...' if len(deniedActions) > 5 else ''}")
        if not result and totalActions > 0:
            self.logger.error(f"Method {self.name}: ALL {totalActions} actions denied by RBAC! This will result in empty action list.")
        return result
    def _checkActionPermission(self, actionId: str) -> bool:
@ -120,22 +129,36 @@ class MethodBase:
        REQUIREMENT: RBAC-Service muss verfügbar sein.
        """
        if not hasattr(self.services, 'rbac') or not self.services.rbac:
-            self.logger.error(f"RBAC service not available. Action {actionId} will be denied.")
+            self.logger.error(f"RBAC service not available (services.rbac is None). Action {actionId} will be denied.")
            return False
-        currentUser = self.services.chat.getCurrentUser()
+        # Get current user from services.user (not from chat service)
        currentUser = getattr(self.services, 'user', None)
        if not currentUser:
-            self.logger.warning(f"No current user found. Action {actionId} will be denied.")
+            self.logger.warning(f"No current user found (services.user is None). Action {actionId} will be denied.")
            return False
        # RBAC-Check: RESOURCE context, item = actionId
-        permissions = self.services.rbac.getUserPermissions(
+        try:
-            user=currentUser,
+            permissions = self.services.rbac.getUserPermissions(
-            context=AccessRuleContext.RESOURCE,
+                user=currentUser,
-            item=actionId
+                context=AccessRuleContext.RESOURCE,
-        )
+                item=actionId
-        
+            )
-        return permissions.view
+            hasPermission = permissions.view
            if not hasPermission:
                # Log detailed RBAC denial info
                userRoles = getattr(currentUser, 'roleLabels', []) or []
                self.logger.warning(
                    f"RBAC denied action {actionId} for user {currentUser.id}. "
                    f"User roles: {userRoles}, "
                    f"Permissions: view={permissions.view}, edit={permissions.edit}, delete={permissions.delete}. "
                    f"No matching RBAC rule found for context=RESOURCE, item={actionId}"
                )
            return hasPermission
        except Exception as e:
            self.logger.error(f"RBAC check failed for action {actionId}: {str(e)}. Action will be denied.")
            return False
    def _convertParametersToSystemFormat(self, parameters: Dict[str, WorkflowActionParameter]) -> Dict[str, Dict[str, Any]]:
        """Convert WorkflowActionParameter dict to system format for API/UI consumption"""
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@ -37,52 +37,6 @@ class ContentValidator:
        """
        return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory)
    def _analyzeDocuments(self, documents: List[Any]) -> List[Dict[str, Any]]:
        """Generic document analysis - create simple summaries with metadata."""
        summaries = []
        for doc in documents:
            try:
                data = getattr(doc, 'documentData', None)
                name = getattr(doc, 'documentName', 'Unknown')
                mimeType = getattr(doc, 'mimeType', 'unknown')
                formatExt = self._detectFormat(doc)
                sizeInfo = self._calculateSize(doc)
                # Simple preview: if it's dict/list, dump JSON; otherwise use string
                preview = None
                if data is not None:
                    if isinstance(data, (dict, list)):
                        preview = json.dumps(data, indent=2, ensure_ascii=False)
                        # Truncate if too large
                        if len(preview) > MAX_CONTENT_SIZE_FOR_FULL_PREVIEW:
                            preview = preview[:PREVIEW_SAMPLE_SIZE] + f"\n\n[Truncated - {self._formatBytes(sizeInfo['bytes'])} total]"
                    else:
                        text = str(data)
                        if len(text) > MAX_CONTENT_SIZE_FOR_FULL_PREVIEW:
                            preview = text[:PREVIEW_SAMPLE_SIZE] + f"\n\n[Truncated - {self._formatBytes(sizeInfo['bytes'])} total]"
                        else:
                            preview = text
                summary = {
                    "name": name,
                    "mimeType": mimeType,
                    "format": formatExt,
                    "size": sizeInfo["readable"],
                    "preview": preview
                }
                summaries.append(summary)
            except Exception as e:
                logger.warning(f"Error analyzing document {getattr(doc, 'documentName', 'Unknown')}: {str(e)}")
                summaries.append({
                    "name": getattr(doc, 'documentName', 'Unknown'),
                    "mimeType": getattr(doc, 'mimeType', 'unknown'),
                    "format": "unknown",
                    "size": "0 B",
                    "preview": None,
                    "error": str(e)
                })
        return summaries
    def _summarizeJsonStructure(self, jsonData: Any) -> Dict[str, Any]:
        """Summarize JSON document structure for validation - extracts main objects, statistics, captions, and IDs."""
        try:
@ -120,9 +74,11 @@ class ContentValidator:
                            "order": section.get("order")
                        }
                        # Get elements for processing
                        elements = section.get("elements", [])
                        # For tables: extract caption and statistics
                        if section.get("content_type") == "table":
                            elements = section.get("elements", [])
                            if elements and isinstance(elements, list) and len(elements) > 0:
                                tableElement = elements[0]
                                sectionSummary["caption"] = tableElement.get("caption")
@ -134,7 +90,6 @@ class ContentValidator:
                        # For lists: extract item count
                        elif section.get("content_type") == "list":
                            elements = section.get("elements", [])
                            if elements and isinstance(elements, list) and len(elements) > 0:
                                listElement = elements[0]
                                items = listElement.get("items", [])
@ -142,7 +97,6 @@ class ContentValidator:
                        # For paragraphs/headings: extract text preview
                        elif section.get("content_type") in ["paragraph", "heading"]:
                            elements = section.get("elements", [])
                            if elements and isinstance(elements, list) and len(elements) > 0:
                                textElement = elements[0]
                                text = textElement.get("text", "")
@ -174,8 +128,10 @@ class ContentValidator:
                            "order": section.get("order")
                        }
                        # Get elements for processing
                        elements = section.get("elements", [])
                        if section.get("content_type") == "table":
                            elements = section.get("elements", [])
                            if elements and isinstance(elements, list) and len(elements) > 0:
                                tableElement = elements[0]
                                sectionSummary["caption"] = tableElement.get("caption")
@ -475,6 +431,12 @@ VALIDATION RULES:
 5. Format understanding: Different formats can represent equivalent data structures. Focus on content, not format name.
 6. Multi-step workflow awareness: If ACTION HISTORY is present, consider the workflow as a whole. Document metadata (e.g., extraction_method) describes how data was EXTRACTED in the last step, not necessarily how it was OBTAINED in the workflow.
 7. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help.
 8. CRITICAL - Data vs Data Description: When criteria require specific data types (e.g., images, tables, charts, files), distinguish between:
   - ACTUAL DATA: The actual data itself (binary data, structured data, embedded content)
   - DATA DESCRIPTIONS: Text fields that describe or specify what data should be created (e.g., "image_description", "table_description", "chart_specification") - these are TEXT METADATA, NOT the actual data
   - If only descriptions/specifications exist but no actual data, the criterion is NOT met. Descriptions are instructions for creating data, not the data itself.
   - Check content types in sections/elements: if content_type matches the required data type (e.g., "image" for images, "table" for tables), actual data exists. If only text fields describing the data exist, the data is missing.
   - Check document statistics: if counts for the required data type are 0, the data is missing even if descriptions exist.
 VALIDATION STEPS:
 - Check ACTION HISTORY first (if present) for PROCESS-ORIENTED criteria (e.g., "search performed", "sources used", "verification done")
--- a/modules/workflows/processing/core/actionExecutor.py
+++ b/modules/workflows/processing/core/actionExecutor.py
@ -84,43 +84,85 @@ class ActionExecutor:
                enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats
                logger.info(f"Expected formats: {action.expectedDocumentFormats}")
-            # Get current task execution operationId to pass as parent to action methods
+            # Get current task execution operationId (taskExec_*) - this is the parent of the action
-            # This MUST be the "Service Workflow Execution" operation ID (taskExec_*)
+            taskOperationId = None
            parentOperationId = None
            try:
                progressLogger = self.services.chat.createProgressLogger()
                activeOperations = progressLogger.getActiveOperations()
-                logger.debug(f"Looking for parent operation ID. Active operations: {list(activeOperations.keys())}")
+                logger.debug(f"Looking for task operation ID. Active operations: {list(activeOperations.keys())}")
                # Look for task execution operation (starts with "taskExec_")
-                # This is the "Service Workflow Execution" level that should be parent of ALL actions
+                # This is the Task level that should be parent of this action
                for opId in activeOperations.keys():
                    if opId.startswith("taskExec_"):
-                        parentOperationId = opId
+                        taskOperationId = opId
-                        logger.info(f"Found parent operation ID: {parentOperationId} for action {action.execMethod}.{action.execAction}")
+                        logger.info(f"Found task operation ID: {taskOperationId} for action {action.execMethod}.{action.execAction}")
                        break
-                if not parentOperationId:
+                if not taskOperationId:
-                    logger.warning(f"No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}")
+                    logger.error(f"CRITICAL: No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}. Action logs will appear at root level!")
            except Exception as e:
-                logger.error(f"Error getting parent operation ID: {str(e)}")
+                logger.error(f"Error getting task operation ID: {str(e)}")
-            # Add parentOperationId to parameters so action methods can use it
+            # Create action operationId entry - Action is child of Task
-            # This is critical for UI dashboard hierarchical display
+            import time
-            if parentOperationId:
+            actionOperationId = f"action_{action.execMethod}_{action.execAction}_{workflow.id}_{taskNum}_{actionNum}_{int(time.time())}"
-                enhancedParameters['parentOperationId'] = parentOperationId
+            
-                logger.info(f"Passing parentOperationId '{parentOperationId}' to action {action.execMethod}.{action.execAction}")
+            try:
                # Start action progress tracking - Action is child of Task
                # CRITICAL: If taskOperationId is None, the action will appear at root level
                self.services.chat.progressLogStart(
                    actionOperationId,
                    action.execMethod.capitalize(),
                    action.execAction,
                    f"Task {taskNum} Action {actionNum}",
                    parentOperationId=taskOperationId  # Will be None if taskExec_ not found
                )
            except Exception as e:
                logger.error(f"Error starting action progress log: {str(e)}")
            # Add action operationId to parameters so action methods can use it for their steps
            # Action steps should be children of the action, not the task
            # CRITICAL: This must always be set, even if taskOperationId is None
            enhancedParameters['parentOperationId'] = actionOperationId
            if taskOperationId:
                logger.info(f"Created action operationId '{actionOperationId}' (parent: {taskOperationId}) for action {action.execMethod}.{action.execAction}")
            else:
-                logger.warning(f"WARNING: No parentOperationId found for action {action.execMethod}.{action.execAction}. Action logs will appear at root level!")
+                logger.warning(f"Created action operationId '{actionOperationId}' WITHOUT parent (taskExec_ not found) for action {action.execMethod}.{action.execAction}. Action will appear at root level!")
            # Check workflow status before executing the action
            checkWorkflowStopped(self.services)
-            result = await self.executeAction(
+            # Execute action and track success for progress log
-                methodName=action.execMethod,
+            result = None
-                actionName=action.execAction,
+            actionSuccess = False
-                parameters=enhancedParameters
+            try:
-            )
+                result = await self.executeAction(
                    methodName=action.execMethod,
                    actionName=action.execAction,
                    parameters=enhancedParameters
                )
                actionSuccess = result.success if result else False
            except Exception as e:
                logger.error(f"Error executing action: {str(e)}")
                actionSuccess = False
            finally:
                # Finish action progress tracking
                try:
                    self.services.chat.progressLogFinish(actionOperationId, actionSuccess)
                except Exception as e:
                    logger.error(f"Error finishing action progress log: {str(e)}")
            # If action execution failed, return error result
            if result is None:
                action.setError("Action execution failed")
                return ActionResult(
                    success=False,
                    documents=[],
                    resultLabel=action.execResultLabel,
                    error="Action execution failed"
                )
            resultLabel = action.execResultLabel
            # Trace action result with full document metadata
--- a/modules/workflows/processing/modes/modeDynamic.py
+++ b/modules/workflows/processing/modes/modeDynamic.py
@ -565,10 +565,9 @@ class DynamicMode(BaseMode):
                    methodInstance = _methods[methodName]['instance']
                    if actionName in methodInstance.actions:
                        action_info = methodInstance.actions[actionName]
-                        docstring = action_info.get('description', '')
+                        # Use structured WorkflowActionParameter objects from new system
-                        # Extract parameter names from docstring to check if documentList exists
+                        parameters_def = action_info.get('parameters', {})
-                        paramDescriptions, _ = methodInstance._extractParameterDetails(docstring)
+                        if 'documentList' in parameters_def:
                        if 'documentList' in paramDescriptions:
                            # Convert DocumentReferenceList to string list for database serialization
                            # Action methods will convert it back to DocumentReferenceList when needed
                            parameters['documentList'] = docList.to_string_list()
@ -596,10 +595,9 @@ class DynamicMode(BaseMode):
                    methodInstance = _methods[methodName]['instance']
                    if actionName in methodInstance.actions:
                        action_info = methodInstance.actions[actionName]
-                        docstring = action_info.get('description', '')
+                        # Use structured WorkflowActionParameter objects from new system
-                        # Extract parameter names from docstring to check if connectionReference exists
+                        parameters_def = action_info.get('parameters', {})
-                        paramDescriptions, _ = methodInstance._extractParameterDetails(docstring)
+                        if 'connectionReference' in parameters_def:
                        if 'connectionReference' in paramDescriptions:
                            parameters['connectionReference'] = connectionRef
                            logger.info(f"Added connectionReference to parameters: {connectionRef}")
        except Exception as e:
--- a/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md
+++ b/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md
@ -0,0 +1,354 @@
 # Architecture & Implementation Analysis
 ## Deep Review of Hierarchical Document Generation
 **Date**: 2025-12-22  
 **Status**: Critical Issues Found
 ---
 ## Executive Summary
 The hierarchical document generation system is **partially implemented** but has **critical architectural mismatches** and **implementation gaps** that prevent it from working correctly. While core components exist, several fundamental issues need to be addressed.
 ---
 ## ✅ What's Correctly Implemented
 ### Phase 1: Core Infrastructure ✅
 - ✅ `StructureGenerator` class exists with `generateStructure()` method
 - ✅ `ContentGenerator` class exists with `generateContent()` method  
 - ✅ `ContentIntegrator` class exists with `integrateContent()` method
 - ✅ `generateDocument` action uses hierarchical approach
 - ✅ Basic progress logging implemented
 - ✅ Error handling with `createErrorSection()` implemented
 ### Phase 2: Image Generation ✅
 - ✅ `_generateImageSection()` method implemented
 - ✅ Image prompt extraction from structure
 - ✅ Base64 image data storage
 - ✅ Error handling for image failures
 ### Phase 3: Parallel Processing ✅
 - ✅ `_generateSectionsParallel()` method implemented
 - ✅ `_generateSectionsSequential()` method implemented
 - ✅ Batch processing for large documents
 - ✅ Progress callback system
 - ✅ Exception handling in parallel execution
 ---
 ## ❌ Critical Issues Found
 ### Issue 1: Previous Sections Context Not Working in Parallel Mode ⚠️ **PARTIALLY FIXED**
 **Problem**: 
 - In parallel mode, sections within the same batch cannot see each other (correct)
 - BUT: Sections in later batches should see sections from earlier batches
 - **Current Status**: Code was fixed to accumulate previous sections, but needs verification
 **Location**: `subContentGenerator.py` lines 240-319
 **Fix Applied**: 
 - Added `accumulatedPreviousSections` to track sections across batches
 - Pass accumulated sections to each batch
 - **VERIFICATION NEEDED**: Test that prompts actually show previous sections
 **Risk**: Medium - May cause continuity issues in generated content
 ---
 ### Issue 2: Variable Shadowing Bug ✅ **FIXED**
 **Problem**: 
 - `contentType` variable was shadowed in loop, causing wrong section type in prompts
 **Location**: `subContentGenerator.py` line 676
 **Fix Applied**: 
 - Renamed loop variable to `prevContentType`
 **Status**: ✅ Fixed
 ---
 ### Issue 3: Missing `generation_hint` in Structure Response ✅ **FIXED**
 **Problem**: 
 - Structure generator creates generic hints like "Section heading" instead of meaningful hints
 - AI generates same content for all headings because hints are identical
 **Location**: `subStructureGenerator.py` lines 242-269
 **Fix Applied**: 
 - Added `_extractMeaningfulHint()` method to extract meaningful hints from section IDs
 - Example: `section_heading_current_state` → "Current State"
 **Status**: ✅ Fixed
 ---
 ### Issue 4: JSON Template Architecture Mismatch ✅ **FIXED**
 **Problem**: 
 - `jsonTemplateDocument` showed filled `elements` arrays, but structure generation requires empty arrays
 - Template missing `generation_hint` and `complexity` fields
 - Template showed `order: 0` but should start from 1
 **Location**: `datamodelJson.py`
 **Fix Applied**: 
 - Updated template to show empty `elements: []`
 - Added `generation_hint` to all sections
 - Added `complexity` to all sections
 - Changed `order` to start from 1
 - Added `title` to metadata
 **Status**: ✅ Fixed
 ---
 ### Issue 5: Structure Prompt Instructions Mismatch ✅ **FIXED**
 **Problem**: 
 - Prompt said "All sections must have empty elements arrays" but template showed filled arrays
 - Prompt didn't explicitly require `generation_hint` and `complexity` fields
 **Location**: `subStructureGenerator.py` lines 181-190
 **Fix Applied**: 
 - Enhanced prompt to explicitly require `generation_hint` and `complexity`
 - Clarified that template examples show structure, but elements must be empty
 **Status**: ✅ Fixed
 ---
 ## ⚠️ Remaining Issues & Gaps
 ### Issue 6: Missing Validation Before Content Generation ⚠️ **NOT IMPLEMENTED**
 **Problem**: 
 - No validation that structure has required fields before content generation
 - No check that all sections have `generation_hint` before generating content
 **Expected** (from Phase 6):
 ```python
 # Validate structure before content generation
 if not validateStructure(structure):
    raise ValueError("Invalid structure")
 ```
 **Current**: Validation happens in `_validateAndEnhanceStructure()` but only adds missing fields, doesn't validate
 **Impact**: Low - Enhancement adds missing fields, but explicit validation would be better
 **Recommendation**: Add explicit validation method
 ---
 ### Issue 7: Previous Sections Formatting Missing Content ⚠️ **PARTIALLY IMPLEMENTED**
 **Problem**: 
 - Previous sections formatting extracts content from `elements`, but if sections don't have elements yet (in parallel mode), it shows nothing
 - Should show `generation_hint` as fallback when elements not available
 **Location**: `subContentGenerator.py` lines 671-709
 **Current Behavior**: 
 - Shows content preview if elements exist
 - Shows nothing if elements don't exist
 **Expected Behavior**: 
 - Show content preview if elements exist
 - Show `generation_hint` as fallback if elements don't exist
 **Impact**: Medium - Reduces context quality in parallel generation
 **Recommendation**: Add fallback to show `generation_hint` when elements not available
 ---
 ### Issue 8: Debug File Shows Raw Response, Not Validated Structure ⚠️ **NOT FIXED**
 **Problem**: 
 - Debug file writes `aiResponse.content` (raw AI response) before validation
 - Can't verify if `generation_hint` was added by validation
 **Location**: `subStructureGenerator.py` lines 77-84
 **Impact**: Low - Makes debugging harder but doesn't affect functionality
 **Recommendation**: Write validated structure to separate debug file
 ---
 ### Issue 9: Missing Unit Tests ⚠️ **NOT IMPLEMENTED**
 **Problem**: 
 - No unit tests for any components (Phase 7 requirement)
 - No tests for structure generation
 - No tests for content generation
 - No tests for integration
 **Impact**: High - No way to verify correctness or catch regressions
 **Recommendation**: Add comprehensive unit tests
 ---
 ### Issue 10: Missing Integration Tests ⚠️ **NOT IMPLEMENTED**
 **Problem**: 
 - No end-to-end tests
 - No tests with images
 - No tests with long documents
 - No error scenario tests
 **Impact**: High - No verification of complete flow
 **Recommendation**: Add integration tests
 ---
 ### Issue 11: Content Caching Not Optimized ⚠️ **PARTIALLY IMPLEMENTED**
 **Problem**: 
 - Content is extracted and cached, but:
  - No cache validation (check if documents changed)
  - No cache reuse verification
  - Content is passed to prompts but may not be formatted efficiently
 **Expected** (from Phase 5):
 - Cache validation
 - Efficient formatting
 - Performance testing
 **Current**: Basic caching exists but not optimized
 **Impact**: Medium - Works but could be more efficient
 **Recommendation**: Add cache validation and optimization
 ---
 ### Issue 12: Renderer Updates Not Verified ⚠️ **UNKNOWN**
 **Problem**: 
 - Implementation plan requires renderer updates for images
 - HTML renderer should create separate image files
 - PDF/XLSX/PPTX renderers should embed images
 - **Status unknown** - need to verify renderers handle images correctly
 **Impact**: High - Images may not render correctly
 **Recommendation**: Verify all renderers handle images correctly
 ---
 ## 📋 Architecture Compliance Check
 ### Data Structure Compliance ✅
 | Field | Required | Implemented | Status |
 |-------|----------|-------------|--------|
 | `metadata.title` | Yes | ✅ | ✅ |
 | `metadata.split_strategy` | Yes | ✅ | ✅ |
 | `sections[].id` | Yes | ✅ | ✅ |
 | `sections[].content_type` | Yes | ✅ | ✅ |
 | `sections[].complexity` | Yes | ✅ | ✅ |
 | `sections[].generation_hint` | Yes | ✅ | ✅ |
 | `sections[].order` | Yes | ✅ | ✅ |
 | `sections[].elements` | Yes | ✅ | ✅ |
 | `sections[].image_prompt` | Image only | ✅ | ✅ |
 ### Component Method Compliance ✅
 | Component | Method | Required | Implemented | Status |
 |-----------|--------|----------|-------------|--------|
 | StructureGenerator | `generateStructure()` | Yes | ✅ | ✅ |
 | StructureGenerator | `_createStructurePrompt()` | Yes | ✅ | ✅ |
 | StructureGenerator | `_identifySectionComplexity()` | Yes | ✅ | ✅ |
 | StructureGenerator | `_extractImagePrompts()` | Yes | ✅ | ✅ |
 | StructureGenerator | `_validateAndEnhanceStructure()` | Yes | ✅ | ✅ |
 | StructureGenerator | `_extractMeaningfulHint()` | Yes | ✅ | ✅ |
 | ContentGenerator | `generateContent()` | Yes | ✅ | ✅ |
 | ContentGenerator | `_generateSectionContent()` | Yes | ✅ | ✅ |
 | ContentGenerator | `_generateSimpleSection()` | Yes | ✅ | ✅ |
 | ContentGenerator | `_generateComplexTextSection()` | Yes | ✅ | ✅ |
 | ContentGenerator | `_generateImageSection()` | Yes | ✅ | ✅ |
 | ContentGenerator | `_generateSectionsParallel()` | Yes | ✅ | ✅ |
 | ContentGenerator | `_generateSectionsSequential()` | Yes | ✅ | ✅ |
 | ContentGenerator | `_createSectionPrompt()` | Yes | ✅ | ✅ |
 | ContentIntegrator | `integrateContent()` | Yes | ✅ | ✅ |
 | ContentIntegrator | `validateCompleteness()` | Yes | ✅ | ✅ |
 | ContentIntegrator | `createErrorSection()` | Yes | ✅ | ✅ |
 ---
 ## 🎯 Priority Fixes Needed
 ### Critical (Must Fix)
 1. ✅ **Issue 2**: Variable shadowing bug - **FIXED**
 2. ✅ **Issue 3**: Missing generation_hint - **FIXED**
 3. ✅ **Issue 4**: JSON template mismatch - **FIXED**
 4. ✅ **Issue 5**: Prompt instructions mismatch - **FIXED**
 5. ⚠️ **Issue 1**: Previous sections context - **NEEDS VERIFICATION**
 ### High Priority (Should Fix)
 6. ⚠️ **Issue 12**: Renderer image handling - **NEEDS VERIFICATION**
 7. ⚠️ **Issue 9**: Missing unit tests - **NOT IMPLEMENTED**
 8. ⚠️ **Issue 10**: Missing integration tests - **NOT IMPLEMENTED**
 ### Medium Priority (Nice to Have)
 9. ⚠️ **Issue 7**: Previous sections formatting fallback - **PARTIALLY IMPLEMENTED**
 10. ⚠️ **Issue 11**: Content caching optimization - **PARTIALLY IMPLEMENTED**
 11. ⚠️ **Issue 6**: Structure validation - **NOT IMPLEMENTED**
 12. ⚠️ **Issue 8**: Debug file improvements - **NOT IMPLEMENTED**
 ---
 ## ✅ Summary
 ### What Works
 - Core infrastructure is implemented
 - Image generation is integrated
 - Parallel processing is implemented
 - Error handling is in place
 - Progress logging works
 ### What's Fixed (This Session)
 - Variable shadowing bug
 - Missing generation_hint extraction
 - JSON template architecture mismatch
 - Prompt instructions clarity
 - Previous sections tracking (needs verification)
 ### What Needs Work
 - Unit and integration tests
 - Renderer verification
 - Previous sections formatting fallback
 - Cache optimization
 - Structure validation
 ### Overall Status
 **Architecture**: ✅ **85% Compliant**  
 **Implementation**: ✅ **80% Complete**  
 **Testing**: ❌ **0% Complete**  
 **Production Ready**: ⚠️ **Not Yet** (needs testing and verification)
 ---
 ## Next Steps
 1. **Verify Issue 1 Fix**: Test that previous sections are correctly tracked in parallel mode
 2. **Verify Issue 12**: Test that all renderers handle images correctly
 3. **Add Unit Tests**: Start with critical components (StructureGenerator, ContentGenerator)
 4. **Add Integration Tests**: Test end-to-end flow with various scenarios
 5. **Improve Previous Sections Formatting**: Add fallback to show generation_hint when elements not available
 6. **Add Structure Validation**: Explicit validation before content generation
 7. **Optimize Content Caching**: Add cache validation and efficient formatting
 ---
 **Analysis Complete**: 2025-12-22
--- a/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md
+++ b/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md
@ -0,0 +1,459 @@
 # Concept: Hierarchical Document Generation with Image Integration
 ## Executive Summary
 This concept proposes a **three-phase hierarchical approach** to document generation that enables proper image integration and handles complex documents efficiently.
 **Key Decisions**:
 - ✅ **Performance**: Parallel processing with ChatLog progress messages
 - ✅ **Error Handling**: Skip failed sections, show error messages
 - ✅ **Image Storage**: Store as base64 in JSON (renderers need direct access)
 - ✅ **Backward Compatibility**: Not needed - implement as new default
 **Renderer Status**:
 - ✅ **Ready**: Text, Markdown, DOCX renderers
 - ⚠️ **Needs Update**: HTML (create separate image files), PDF (embed images)
 - ⚠️ **Needs Implementation**: XLSX, PPTX (add image support)
 ## Problem Statement
 Currently, the document generation system has the following limitations:
 1. **No Image Integration**: Images are generated separately but cannot be embedded into document structures
 2. **Single-Pass Generation**: Documents are generated in one AI call, making it difficult to handle complex sections (long text, images, chapters)
 3. **Repeated Extraction**: Content extraction may happen multiple times unnecessarily
 4. **No Structured Approach**: No mechanism to first define document structure, then populate sections
 ## Current Architecture Analysis
 ### Current Flow:
 ```
 User Request → ai.generateDocument → ai.process → AI JSON Generation → Renderer → Final Document
 ```
 ### Issues:
 - AI generates complete JSON structure in one pass
 - Images are generated separately via `ai.generate` action
 - No mechanism to integrate generated images into document structure
 - JSON schema supports `image` content_type, but AI rarely generates it
 - Content extraction happens per action, not cached/reused
 ### Current Image Handling:
 - Images can be rendered IF they exist in JSON structure (`content_type: "image"`)
 - Image data expected as `base64Data` in elements
 - Renderers support image rendering (Docx, PDF, HTML, etc.)
 - But images are never generated WITHIN document generation
 ## Proposed Solution: Hierarchical Document Generation
 ### Core Concept
 **Three-Phase Approach:**
 1. **Structure Generation Phase**: Generate document skeleton with section placeholders
 2. **Content Generation Phase**: Generate content for each section (text or image) via sub-prompts
 3. **Integration Phase**: Merge all generated content into final document structure
 ### Architecture Overview
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │ Phase 1: Structure Generation                                │
 │ - Generate document skeleton                                 │
 │ - Identify sections (text, image, complex)                   │
 │ - Create section placeholders with metadata                  │
 └─────────────────────────────────────────────────────────────┘
                        ↓
 ┌─────────────────────────────────────────────────────────────┐
 │ Phase 2: Content Generation (Tree-like)                     │
 │                                                              │
 │  ┌──────────────────────────────────────────────┐          │
 │  │ Section 1: Heading (simple)                 │          │
 │  │ → Generate directly                          │          │
 │  └──────────────────────────────────────────────┘          │
 │                                                              │
 │  ┌──────────────────────────────────────────────┐          │
 │  │ Section 2: Paragraph (simple)                │          │
 │  │ → Generate directly                          │          │
 │  └──────────────────────────────────────────────┘          │
 │                                                              │
 │  ┌──────────────────────────────────────────────┐          │
 │  │ Section 3: Image (complex)                  │          │
 │  │ → Sub-prompt: Generate image                │          │
 │  │ → Store image data                          │          │
 │  │ → Create image section with base64Data      │          │
 │  └──────────────────────────────────────────────┘          │
 │                                                              │
 │  ┌──────────────────────────────────────────────┐          │
 │  │ Section 4: Long Chapter (complex)            │          │
 │  │ → Sub-prompt: Generate chapter content      │          │
 │  │ → Split into subsections if needed          │          │
 │  └──────────────────────────────────────────────┘          │
 └─────────────────────────────────────────────────────────────┘
                        ↓
 ┌─────────────────────────────────────────────────────────────┐
 │ Phase 3: Integration                                         │
 │ - Merge all generated content                                │
 │ - Replace placeholders with actual data                      │
 │ - Validate structure completeness                            │
 │ - Render to final format                                     │
 └─────────────────────────────────────────────────────────────┘
 ```
 ## Detailed Design
 ### Phase 1: Structure Generation
 **Purpose**: Create document skeleton with section metadata
 **Process**:
 1. AI generates document structure with sections
 2. Each section includes:
   - `id`: Unique identifier
   - `content_type`: Type (heading, paragraph, image, table, etc.)
   - `complexity`: "simple" or "complex"
   - `generation_hint`: Instructions for content generation
   - `order`: Section order
   - `elements`: Empty or placeholder
 **Example Structure**:
 ```json
 {
  "metadata": {
    "title": "Children's Bedtime Story",
    "split_strategy": "single_document"
  },
  "documents": [{
    "id": "doc_1",
    "sections": [
      {
        "id": "section_title",
        "content_type": "heading",
        "complexity": "simple",
        "generation_hint": "Story title",
        "order": 1,
        "elements": []
      },
      {
        "id": "section_intro",
        "content_type": "paragraph",
        "complexity": "simple",
        "generation_hint": "Introduction paragraph",
        "order": 2,
        "elements": []
      },
      {
        "id": "section_image_1",
        "content_type": "image",
        "complexity": "complex",
        "generation_hint": "Illustration: Rabbit meeting owl in moonlit forest",
        "image_prompt": "A small brown rabbit sitting in a peaceful forest clearing under moonlight with stars, meeting a wise owl perched on a branch",
        "order": 3,
        "elements": []
      },
      {
        "id": "section_chapter_1",
        "content_type": "paragraph",
        "complexity": "complex",
        "generation_hint": "First chapter: Rabbit's adventure begins",
        "order": 4,
        "elements": []
      }
    ]
  }]
 }
 ```
 ### Phase 2: Content Generation
 **Purpose**: Generate actual content for each section
 **Process**:
 1. Iterate through sections in order
 2. For each section:
   - **Simple sections** (heading, short paragraph):
     - Generate content directly via AI
     - Populate `elements` array
   - **Complex sections** (image, long chapter):
     - Create sub-prompt based on `generation_hint` and `image_prompt`
     - Generate content via specialized action:
       - Images: `ai.generate` with image generation
       - Long text: `ai.process` with focused prompt
     - Store generated content
     - Populate `elements` array
 **Content Caching**:
 - Extract content from source documents ONCE at the start
 - Cache extracted content for reuse across all sections
 - Pass cached content to sub-prompts to avoid re-extraction
 **Image Generation**:
 - For `content_type: "image"` sections:
  - Use `image_prompt` from structure
  - Call `ai.generate` action with image generation
  - Receive base64 image data
  - Create image element:
    ```json
    {
      "url": "data:image/png;base64,<base64_data>",
      "base64Data": "<base64_data>",
      "altText": "<alt_text>",
      "caption": "<caption>"
    }
    ```
 ### Phase 3: Integration
 **Purpose**: Merge all content into final document structure
 **Process**:
 1. Validate all sections have content
 2. Merge generated content into structure
 3. Replace placeholders with actual data
 4. Finalize JSON structure
 5. Render to target format (docx, pdf, html, etc.)
 ## Implementation Strategy
 ### New Components Needed
 1. **Structure Generator** (`structureGenerator.py`)
   - Generates document skeleton
   - Identifies section complexity
   - Creates generation hints
 2. **Content Generator** (`contentGenerator.py`)
   - Generates content for each section
   - Handles simple vs complex sections
   - Manages sub-prompts and image generation
   - Caches extracted content
 3. **Content Integrator** (`contentIntegrator.py`)
   - Merges generated content
   - Validates completeness
   - Finalizes document structure
 ### Modified Components
 1. **`generateDocument` action**
   - Implement hierarchical generation as default
   - Orchestrate three phases
   - Add progress logging for each phase
 2. **`process` action**
   - Support content caching (extract once, reuse)
   - Support sub-prompt generation for sections
 3. **Prompt Builder** (`subPromptBuilderGeneration.py`)
   - Add structure generation prompt
   - Add section-specific content prompts
   - Add image generation prompt templates
 4. **Renderers** (Update required):
   - **HTML Renderer**: Create separate image files and link them
   - **PDF Renderer**: Embed images using reportlab
   - **XLSX Renderer**: Add image embedding support
   - **PPTX Renderer**: Add image embedding support
 ### New Action Parameters
 **For `generateDocument`**:
 - `enableImageIntegration`: boolean (default: true)
 - `maxSectionLength`: int (threshold for "complex" sections, default: 500 words)
 - `parallelGeneration`: boolean (default: true) - enable parallel section generation
 - `progressLogging`: boolean (default: true) - send ChatLog progress updates
 **For sub-prompts**:
 - `sectionContext`: Previous sections for context
 - `cachedContent`: Extracted content cache (to avoid re-extraction)
 - `targetSection`: Section metadata
 - `previousSections`: Array of already-generated sections for continuity
 ## Benefits
 1. **Image Integration**: Images can be generated and embedded into documents
 2. **Structured Approach**: Clear separation of structure and content
 3. **Efficiency**: Content extracted once, reused across sections
 4. **Scalability**: Can handle very long documents by splitting into sections
 5. **Quality**: Better control over complex sections (images, long chapters)
 6. **Flexibility**: Can generate different content types per section
 ## Migration Strategy
 **Note**: No backwards compatibility needed - can implement directly as new default.
 1. **Phase 1**: Implement hierarchical generation as new default
 2. **Phase 2**: Update renderers (HTML, PDF, XLSX, PPTX) for image support
 3. **Phase 3**: Testing and refinement
 4. **Phase 4**: Remove old single-pass mode (or keep as internal fallback only)
 ## Example Workflow
 **User Request**: "Create a children's bedtime story with 5 illustrations"
 **Phase 1 Output**:
 ```json
 {
  "metadata": {"title": "Flöckchen's Adventure"},
  "documents": [{
    "sections": [
      {"id": "title", "content_type": "heading", "complexity": "simple", ...},
      {"id": "intro", "content_type": "paragraph", "complexity": "simple", ...},
      {"id": "img1", "content_type": "image", "complexity": "complex", 
       "image_prompt": "Rabbit meeting owl", ...},
      {"id": "chapter1", "content_type": "paragraph", "complexity": "complex", ...},
      {"id": "img2", "content_type": "image", "complexity": "complex", ...},
      ...
    ]
  }]
 }
 ```
 **Phase 2 Process**:
 - Generate title → populate elements
 - Generate intro → populate elements
 - Generate image 1 → call `ai.generate`, store base64 → populate elements
 - Generate chapter 1 → sub-prompt → populate elements
 - Generate image 2 → call `ai.generate`, store base64 → populate elements
 - ...
 **Phase 3 Output**: Complete document with all sections populated, ready for rendering
 ## Renderer Readiness Assessment
 ### Current Renderer Status for Image Handling:
 1. **Text Renderer** (`rendererText.py`): ✅ **READY**
   - Skips images, shows placeholder: `[Image: altText]`
   - No changes needed
 2. **Markdown Renderer** (`rendererMarkdown.py`): ✅ **READY**
   - Shows placeholder with truncated base64: `![altText](data:image/png;base64,...)`
   - No changes needed (markdown limitation)
 3. **HTML Renderer** (`rendererHtml.py`): ⚠️ **NEEDS UPDATE**
   - Currently: Embeds base64 directly in `<img>` tag as data URI
   - **Required Change**: Create separate image files and link to them
   - Implementation: Generate image files (e.g., `image_1.png`, `image_2.png`) alongside HTML
   - Update `<img>` tags to use relative paths: `<img src="image_1.png" alt="...">`
   - Return multiple files: HTML file + image files
 4. **PDF Renderer** (`rendererPdf.py`): ⚠️ **NEEDS UPDATE**
   - Currently: Shows placeholder `[Image: altText]`
   - **Required Change**: Embed images directly in PDF using reportlab
   - Implementation: Use `reportlab.platypus.Image()` with base64 decoded bytes
 5. **DOCX Renderer** (`rendererDocx.py`): ✅ **READY**
   - Embeds images directly using `doc.add_picture()`
   - Adds captions below images
   - No changes needed
 6. **XLSX Renderer** (`rendererXlsx.py`): ⚠️ **NEEDS IMPLEMENTATION**
   - Currently: No image handling found
   - **Required Change**: Add image support using openpyxl
   - Implementation: Use `openpyxl.drawing.image.Image()` to embed images in cells
   - Store images in worksheet cells or as floating images
 7. **PPTX Renderer** (`rendererPptx.py`): ⚠️ **NEEDS IMPLEMENTATION**
   - Currently: No image handling found
   - **Required Change**: Add image support using python-pptx
   - Implementation: Use `slide.shapes.add_picture()` to add images to slides
 ### Renderer Update Requirements:
 **Priority 1 (Critical for HTML output)**:
 - HTML Renderer: Create separate image files and link them
 **Priority 2 (Important for document formats)**:
 - PDF Renderer: Embed images using reportlab
 - XLSX Renderer: Add image embedding support
 - PPTX Renderer: Add image embedding support
 ## Answers to Open Questions
 ### 1. Performance: How to handle very large documents (100+ sections)?
 **Answer**: Use parallel processing where possible, with progress ChatLog messages.
 **Implementation Strategy**:
 - **Parallel Section Generation**: Generate independent sections in parallel using asyncio
 - **Batch Processing**: Process sections in batches (e.g., 10 sections at a time)
 - **Progress Tracking**: Send ChatLog progress updates:
  - "Generating structure..." (Phase 1)
  - "Generating content for section X/Y..." (Phase 2)
  - "Generating image for section X..." (Phase 2 - images)
  - "Merging content..." (Phase 3)
  - "Rendering final document..." (Phase 3)
 - **Streaming**: For very large documents, consider streaming partial results
 **Example Progress Messages**:
 ```
 Phase 1: Structure Generation (0% → 33%)
 Phase 2: Content Generation (33% → 90%)
  - Section 1/10: Heading (34%)
  - Section 2/10: Paragraph (40%)
  - Section 3/10: Image generation (50%)
  - Section 4/10: Chapter (60%)
  ...
 Phase 3: Integration & Rendering (90% → 100%)
 ```
 ### 2. Error Handling: What if one section fails?
 **Answer**: Skip failed sections, keep section title and type, show error message in the section.
 **Implementation Strategy**:
 - **Graceful Degradation**: Continue processing remaining sections
 - **Error Section**: Create error placeholder section:
  ```json
  {
    "id": "section_failed_3",
    "content_type": "paragraph",
    "elements": [{
      "text": "[ERROR: Failed to generate content for this section. Error: <error_message>]"
    }],
    "order": 3,
    "error": true,
    "errorMessage": "<detailed_error>"
  }
  ```
 - **Logging**: Log errors for debugging but don't fail entire document
 - **User Notification**: Include error count in final progress message
 ### 3. Image Storage: Where to store generated images?
 **Answer**: Store images in JSON as base64, as renderers need them afterwards.
 **Implementation Strategy**:
 - **In-Memory Storage**: Keep base64 strings in JSON structure during generation
 - **JSON Structure**: Store in section elements:
  ```json
  {
    "url": "data:image/png;base64,<base64_data>",
    "base64Data": "<full_base64_string>",
    "altText": "Image description",
    "caption": "Optional caption"
  }
  ```
 - **Memory Management**: For very large images, consider compression or chunking
 - **Renderer Access**: All renderers can access `base64Data` directly from JSON
 - **HTML Special Case**: HTML renderer will extract base64, decode, and save as separate files during rendering
 ### 4. Backward Compatibility: How to ensure existing workflows still work?
 **Answer**: No backwards compatibility needed.
 **Implementation Strategy**:
 - **New Default**: Hierarchical generation becomes the default mode
 - **Clean Migration**: All document generation uses hierarchical approach
 - **No Fallback**: Remove single-pass mode (or keep as internal fallback only)
 - **Breaking Change**: Acceptable since this is a new feature/enhancement
 ## Next Steps
 1. **Review and Approval**: Get feedback on concept
 2. **Detailed Design**: Design API and data structures
 3. **Prototype**: Implement Phase 1 (structure generation)
 4. **Testing**: Test with real use cases
 5. **Full Implementation**: Implement all phases
 6. **Migration**: Migrate existing workflows
--- a/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md
+++ b/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md
--- a/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md
+++ b/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md
@ -0,0 +1,398 @@
 # Implementation Plan: Hierarchical Document Generation
 ## Overview
 This document outlines the step-by-step implementation plan for the hierarchical document generation system with image integration.
 ## Implementation Phases
 ### Phase 1: Core Infrastructure (Week 1)
 **Goal**: Set up core components and data structures
 #### Tasks:
 1. **Create StructureGenerator Component**
   - [ ] Create `subStructureGenerator.py`
   - [ ] Implement `generateStructure()` method
   - [ ] Implement `_createStructurePrompt()` method
   - [ ] Implement `_identifySectionComplexity()` method
   - [ ] Implement `_extractImagePrompts()` method
   - [ ] Add unit tests
 2. **Create ContentGenerator Component**
   - [ ] Create `subContentGenerator.py`
   - [ ] Implement `generateContent()` method
   - [ ] Implement `_generateSectionContent()` method
   - [ ] Implement `_generateSimpleSection()` method
   - [ ] Implement `_generateComplexTextSection()` method
   - [ ] Implement `_createSectionPrompt()` method
   - [ ] Add unit tests
 3. **Create ContentIntegrator Component**
   - [ ] Create `subContentIntegrator.py`
   - [ ] Implement `integrateContent()` method
   - [ ] Implement `validateCompleteness()` method
   - [ ] Implement `createErrorSection()` method
   - [ ] Add unit tests
 4. **Update generateDocument Action**
   - [ ] Modify `generateDocument.py` to use hierarchical approach
   - [ ] Add Phase 1: Structure generation
   - [ ] Add Phase 2: Content generation (sequential first)
   - [ ] Add Phase 3: Integration & rendering
   - [ ] Add basic progress logging
   - [ ] Add error handling
 **Deliverables**:
 - Core components created
 - Basic hierarchical generation working (sequential)
 - Unit tests passing
 **Estimated Time**: 3-4 days
 ---
 ### Phase 2: Image Generation Integration (Week 1-2)
 **Goal**: Integrate image generation into content generation
 #### Tasks:
 1. **Implement Image Section Generation**
   - [ ] Add `_generateImageSection()` method to ContentGenerator
   - [ ] Integrate with `ai.generate` action
   - [ ] Handle base64 image data storage
   - [ ] Add image prompt extraction from structure
   - [ ] Add error handling for image generation failures
 2. **Update Structure Generation Prompt**
   - [ ] Add image section detection in structure prompt
   - [ ] Add image_prompt field extraction
   - [ ] Test with user prompts requesting images
 3. **Test Image Integration**
   - [ ] Test image generation in document structure
   - [ ] Test multiple images in one document
   - [ ] Test image generation failures
 **Deliverables**:
 - Image generation integrated
 - Images stored as base64 in JSON
 - Error handling for image failures
 **Estimated Time**: 2-3 days
 ---
 ### Phase 3: Parallel Processing & Progress Logging (Week 2)
 **Goal**: Implement parallel section generation and detailed progress logging
 #### Tasks:
 1. **Implement Parallel Generation**
   - [ ] Add `_generateSectionsParallel()` method
   - [ ] Use `asyncio.gather()` for parallel execution
   - [ ] Add batch processing for large documents
   - [ ] Handle exceptions in parallel execution
   - [ ] Test parallel vs sequential performance
 2. **Enhance Progress Logging**
   - [ ] Create progress callback system
   - [ ] Add detailed progress messages:
     - Structure generation progress
     - Section-by-section progress
     - Image generation progress
     - Rendering progress
   - [ ] Calculate accurate progress percentages
   - [ ] Test progress updates
 3. **Update generateDocument Action**
   - [ ] Integrate parallel generation
   - [ ] Add progress callback to content generation
   - [ ] Update progress logging throughout phases
 **Deliverables**:
 - Parallel section generation working
 - Detailed progress logging
 - Performance improvements
 **Estimated Time**: 2-3 days
 ---
 ### Phase 4: Renderer Updates (Week 2-3)
 **Goal**: Update renderers to properly handle images
 #### Tasks:
 1. **Update HTML Renderer**
   - [ ] Modify `rendererHtml.py`
   - [ ] Add `_extractImages()` method
   - [ ] Implement separate image file creation
   - [ ] Update HTML to use relative image paths
   - [ ] Handle multiple image files
   - [ ] Test HTML + image files output
 2. **Update PDF Renderer**
   - [ ] Modify `rendererPdf.py`
   - [ ] Update `_renderJsonImage()` to embed images
   - [ ] Use `reportlab.platypus.Image()` with base64
   - [ ] Handle image sizing and positioning
   - [ ] Test PDF with embedded images
 3. **Update XLSX Renderer**
   - [ ] Modify `rendererXlsx.py`
   - [ ] Add `_renderJsonImage()` method
   - [ ] Use `openpyxl.drawing.image.Image()` to embed images
   - [ ] Handle image placement in cells
   - [ ] Test XLSX with images
 4. **Update PPTX Renderer**
   - [ ] Modify `rendererPptx.py`
   - [ ] Add `_renderJsonImage()` method
   - [ ] Use `slide.shapes.add_picture()` to add images
   - [ ] Handle image sizing on slides
   - [ ] Test PPTX with images
 **Deliverables**:
 - All renderers support images
 - HTML creates separate image files
 - PDF/XLSX/PPTX embed images directly
 **Estimated Time**: 4-5 days
 ---
 ### Phase 5: Content Caching & Optimization (Week 3)
 **Goal**: Implement content caching to avoid re-extraction
 #### Tasks:
 1. **Implement Content Cache**
   - [ ] Create ContentCache data structure
   - [ ] Extract content once at start of generation
   - [ ] Pass cached content to all sub-prompts
   - [ ] Add cache validation (check if documents changed)
   - [ ] Test cache reuse
 2. **Optimize Prompt Building**
   - [ ] Update structure prompt to use cached content
   - [ ] Update section prompts to use cached content
   - [ ] Format cached content efficiently
   - [ ] Test prompt sizes
 3. **Performance Testing**
   - [ ] Test with large documents
   - [ ] Test with multiple source documents
   - [ ] Measure performance improvements
   - [ ] Optimize bottlenecks
 **Deliverables**:
 - Content caching implemented
 - No redundant content extraction
 - Performance optimized
 **Estimated Time**: 2-3 days
 ---
 ### Phase 6: Error Handling & Edge Cases (Week 3-4)
 **Goal**: Robust error handling and edge case coverage
 #### Tasks:
 1. **Enhance Error Handling**
   - [ ] Improve error section creation
   - [ ] Add error recovery strategies
   - [ ] Handle partial failures gracefully
   - [ ] Add error logging and reporting
 2. **Handle Edge Cases**
   - [ ] Empty document list
   - [ ] No sections generated
   - [ ] All sections fail
   - [ ] Very large images
   - [ ] Very long documents (100+ sections)
   - [ ] Missing image prompts
   - [ ] Invalid section types
 3. **Add Validation**
   - [ ] Validate structure before content generation
   - [ ] Validate content before integration
   - [ ] Validate final document before rendering
   - [ ] Add comprehensive error messages
 **Deliverables**:
 - Robust error handling
 - Edge cases covered
 - Clear error messages
 **Estimated Time**: 2-3 days
 ---
 ### Phase 7: Testing & Refinement (Week 4)
 **Goal**: Comprehensive testing and refinement
 #### Tasks:
 1. **Unit Testing**
   - [ ] Complete unit tests for all components
   - [ ] Test all methods
   - [ ] Test error scenarios
   - [ ] Achieve >80% code coverage
 2. **Integration Testing**
   - [ ] Test end-to-end document generation
   - [ ] Test with various document types
   - [ ] Test with images
   - [ ] Test with long documents
   - [ ] Test error scenarios
 3. **Performance Testing**
   - [ ] Test with 10, 50, 100+ sections
   - [ ] Measure generation time
   - [ ] Measure memory usage
   - [ ] Compare parallel vs sequential
   - [ ] Optimize if needed
 4. **User Acceptance Testing**
   - [ ] Test with real user scenarios
   - [ ] Test bedtime story with images (original use case)
   - [ ] Test business documents
   - [ ] Test technical documents
   - [ ] Gather feedback
 5. **Documentation**
   - [ ] Update API documentation
   - [ ] Add code comments
   - [ ] Update user guides
   - [ ] Create examples
 **Deliverables**:
 - Comprehensive test suite
 - Performance benchmarks
 - Documentation complete
 - Ready for production
 **Estimated Time**: 3-4 days
 ---
 ## Dependencies
 ### External Dependencies
 - `asyncio` - For parallel processing
 - `base64` - For image encoding/decoding
 - `reportlab` - For PDF image embedding
 - `openpyxl` - For XLSX image embedding
 - `python-pptx` - For PPTX image embedding
 ### Internal Dependencies
 - `serviceGeneration` - Main generation service
 - `serviceAi` - AI service for generation
 - `serviceExtraction` - Content extraction service
 - `methodAi.actions.generate` - Image generation action
 - `methodAi.actions.process` - Text generation action
 ## Risk Mitigation
 ### Risks and Mitigation Strategies
 1. **Risk**: Image generation failures break entire document
   - **Mitigation**: Error handling creates error sections, continues processing
 2. **Risk**: Parallel generation causes memory issues
   - **Mitigation**: Batch processing, limit concurrent operations
 3. **Risk**: Large base64 images cause JSON size issues
   - **Mitigation**: Consider compression or chunking for very large images
 4. **Risk**: HTML renderer needs to return multiple files
   - **Mitigation**: Modify return type or create file bundle system
 5. **Risk**: Performance not meeting expectations
   - **Mitigation**: Profile and optimize bottlenecks, consider caching
 ## Success Criteria
 ### Functional Requirements
 - ✅ Documents can be generated with embedded images
 - ✅ HTML renderer creates separate image files
 - ✅ PDF/XLSX/PPTX renderers embed images
 - ✅ Progress logging shows detailed progress
 - ✅ Error handling prevents complete failures
 - ✅ Content extraction happens only once
 ### Performance Requirements
 - ✅ Parallel generation improves performance by 2x+ for multi-section documents
 - ✅ Progress updates appear within 1 second of action
 - ✅ Documents with 50+ sections complete in <5 minutes
 ### Quality Requirements
 - ✅ >80% code coverage
 - ✅ All edge cases handled
 - ✅ Clear error messages
 - ✅ Comprehensive documentation
 ## Rollout Plan
 ### Step 1: Internal Testing (Week 4)
 - Deploy to development environment
 - Internal team testing
 - Fix critical issues
 ### Step 2: Beta Testing (Week 5)
 - Deploy to staging environment
 - Select beta users
 - Gather feedback
 - Fix issues
 ### Step 3: Production Deployment (Week 6)
 - Deploy to production
 - Monitor performance
 - Monitor errors
 - Gather user feedback
 ### Step 4: Optimization (Ongoing)
 - Monitor usage patterns
 - Optimize based on real-world usage
 - Add enhancements based on feedback
 ## Timeline Summary
 | Phase | Duration | Start | End |
 |-------|----------|-------|-----|
 | Phase 1: Core Infrastructure | 3-4 days | Day 1 | Day 4 |
 | Phase 2: Image Integration | 2-3 days | Day 4 | Day 7 |
 | Phase 3: Parallel Processing | 2-3 days | Day 7 | Day 10 |
 | Phase 4: Renderer Updates | 4-5 days | Day 10 | Day 15 |
 | Phase 5: Content Caching | 2-3 days | Day 15 | Day 18 |
 | Phase 6: Error Handling | 2-3 days | Day 18 | Day 21 |
 | Phase 7: Testing & Refinement | 3-4 days | Day 21 | Day 25 |
 **Total Estimated Time**: 4-5 weeks
 ## Next Steps
 1. **Review and Approve Plan**
   - Review implementation plan
   - Approve timeline
   - Assign resources
 2. **Set Up Development Environment**
   - Create feature branch
   - Set up test infrastructure
   - Prepare development tools
 3. **Begin Phase 1**
   - Start with StructureGenerator
   - Set up project structure
   - Begin implementation
--- a/modules/workflows/processing/shared/RENDERING_ISSUE_ANALYSIS.md
+++ b/modules/workflows/processing/shared/RENDERING_ISSUE_ANALYSIS.md
@ -0,0 +1,238 @@
 # Rendering Issue Analysis
 ## Why HTML Documents Are Being Rendered as Text
 **Date**: 2025-12-22  
 **Issue**: Documents requested as HTML are being output as text/plain
 ---
 ## Root Cause Analysis
 ### Issue 1: `resultType` Not Extracted from Task Objective ❌ **CRITICAL**
 **Problem**: 
 - Task objective clearly states: "Generate a complete, well-structured **HTML document**"
 - Validation shows: `EXPECTED FORMATS: ['html']`
 - But action was called with: `ai.generateDocument {}` (empty parameters)
 - So `resultType` defaults to `"docx"` instead of `"html"`
 **Location**: 
 - `generateDocument.py` line 44: `resultType = parameters.get("resultType", "docx")`
 - No parameter extraction from task objective/prompt
 **Impact**: **CRITICAL** - Wrong format is used even though task clearly requests HTML
 **Fix Needed**: 
 - Extract `resultType` from task objective/prompt before calling action
 - Or enhance `generateDocument` to detect format from prompt if not provided
 ---
 ### Issue 2: HTML Not in Action Definition Options ❌ **CRITICAL**
 **Problem**: 
 - Action definition in `methodAi.py` line 357 only lists: `["docx", "pdf", "txt", "md"]`
 - `"html"` is **NOT** in the allowed options
 - But docstring says HTML is supported: `"resultType (str, optional): Output format (docx, pdf, txt, md, html, etc.)"`
 **Location**: 
 - `methodAi.py` line 357: `frontendOptions=["docx", "pdf", "txt", "md"]`
 **Impact**: **CRITICAL** - Even if HTML is requested, it might be rejected or not recognized
 **Fix Needed**: 
 - Add `"html"` to `frontendOptions` list
 ---
 ### Issue 3: Renderer Fallback to Text ❌ **CRITICAL**
 **Problem**: 
 - When `resultType="docx"` is used (default)
 - If docx renderer fails or is not found
 - System falls back to text renderer (line 403-404 of `mainServiceGeneration.py`)
 - This explains why output is `text/plain` instead of HTML
 **Location**: 
 - `mainServiceGeneration.py` lines 393-409: `_getFormatRenderer()` method
 - Line 403: `logger.warning(f"No renderer found for format {output_format}, falling back to text")`
 **Impact**: **CRITICAL** - Wrong format is rendered
 **Fix Needed**: 
 - Fix docx renderer if it's failing
 - Or better: Extract correct format from prompt
 ---
 ### Issue 4: Missing Parameter Extraction ❌ **HIGH PRIORITY**
 **Problem**: 
 - Task objective contains format information ("HTML document")
 - But no parameter extraction step extracts `resultType` from prompt
 - Action is called with empty parameters `{}`
 **Location**: 
 - Workflow execution - parameter extraction phase
 - Should extract `resultType: "html"` from task objective
 **Impact**: **HIGH** - System can't infer format from user intent
 **Fix Needed**: 
 - Add parameter extraction that detects format from prompt
 - Or enhance `generateDocument` to auto-detect format from prompt
 ---
 ## Flow Analysis
 ### Expected Flow:
 ```
 1. Task Objective: "Generate HTML document..."
 2. Parameter Extraction: Extract resultType="html" from objective
 3. Action Call: ai.generateDocument({resultType: "html", prompt: "..."})
 4. Content Generation: Generate sections with content
 5. Integration: Merge sections into complete structure
 6. Rendering: Call renderReport(outputFormat="html")
 7. HTML Renderer: Render to HTML
 8. Output: document.html (text/html)
 ```
 ### Actual Flow (Broken):
 ```
 1. Task Objective: "Generate HTML document..."
 2. Parameter Extraction: ❌ MISSING - no extraction
 3. Action Call: ai.generateDocument({}) ❌ Empty parameters
 4. Content Generation: ✅ Generate sections with content
 5. Integration: ✅ Merge sections into complete structure
 6. Rendering: Call renderReport(outputFormat="docx") ❌ Wrong format
 7. Docx Renderer: ❌ Fails or not found
 8. Fallback: Text renderer ❌ Wrong renderer
 9. Output: document.text (text/plain) ❌ Wrong format
 ```
 ---
 ## Fixes Required
 ### Fix 1: Add HTML to Action Definition Options ✅ **EASY**
 **File**: `gateway/modules/workflows/methods/methodAi/methodAi.py`  
 **Line**: 357
 **Change**:
 ```python
 frontendOptions=["docx", "pdf", "txt", "md", "html"],  # Added "html"
 ```
 ---
 ### Fix 2: Extract resultType from Prompt ✅ **MEDIUM**
 **Option A**: Enhance `generateDocument` to detect format from prompt
 **File**: `gateway/modules/workflows/methods/methodAi/actions/generateDocument.py`  
 **After line 44**:
 ```python
 resultType = parameters.get("resultType", "docx")
 # Auto-detect format from prompt if not provided
 if resultType == "docx" and prompt:
    promptLower = prompt.lower()
    if "html" in promptLower or "html5" in promptLower:
        resultType = "html"
    elif "pdf" in promptLower:
        resultType = "pdf"
    elif "markdown" in promptLower or "md" in promptLower:
        resultType = "md"
    elif "text" in promptLower or "txt" in promptLower:
        resultType = "txt"
 ```
 **Option B**: Extract in parameter planning phase (better, but requires workflow changes)
 ---
 ### Fix 3: Improve Renderer Error Handling ✅ **MEDIUM**
 **File**: `gateway/modules/services/serviceGeneration/mainServiceGeneration.py`  
 **Lines**: 393-409
 **Enhance**: Better error messages and logging when renderer not found
 ```python
 def _getFormatRenderer(self, output_format: str):
    """Get the appropriate renderer for the specified format using auto-discovery."""
    try:
        from .renderers.registry import getRenderer
        renderer = getRenderer(output_format, services=self.services)
        if renderer:
            return renderer
        # Log available formats for debugging
        from .renderers.registry import getSupportedFormats
        availableFormats = getSupportedFormats()
        logger.error(
            f"No renderer found for format '{output_format}'. "
            f"Available formats: {availableFormats}"
        )
        # Fallback to text renderer if no specific renderer found
        logger.warning(f"Falling back to text renderer for format {output_format}")
        fallbackRenderer = getRenderer('text', services=self.services)
        if fallbackRenderer:
            return fallbackRenderer
        logger.error("Even text renderer fallback failed")
        return None
    except Exception as e:
        logger.error(f"Error getting renderer for {output_format}: {str(e)}")
        return None
 ```
 ---
 ## Verification Steps
 After fixes:
 1. **Test HTML Generation**:
   - Task: "Generate HTML document about AI"
   - Expected: `resultType="html"` extracted or detected
   - Expected: HTML renderer used
   - Expected: Output is `document.html` with `text/html` MIME type
 2. **Test Format Detection**:
   - Task: "Generate PDF report"
   - Expected: `resultType="pdf"` detected
   - Expected: PDF renderer used
 3. **Test Explicit Parameter**:
   - Action: `ai.generateDocument({resultType: "html", prompt: "..."})`
   - Expected: HTML renderer used (no fallback)
 ---
 ## Summary
 **Root Causes**:
 1. ❌ `resultType` not extracted from task objective
 2. ❌ HTML not in action definition options
 3. ❌ Renderer fallback to text when docx fails
 4. ❌ No format auto-detection from prompt
 **Priority**: **CRITICAL** - System cannot produce HTML documents as requested
 **Estimated Fix Time**: 
 - Fix 1: 5 minutes
 - Fix 2: 30 minutes
 - Fix 3: 15 minutes
 - **Total**: ~1 hour
 ---
 **Analysis Complete**: 2025-12-22
--- a/modules/workflows/processing/shared/methodDiscovery.py
+++ b/modules/workflows/processing/shared/methodDiscovery.py
@ -68,7 +68,7 @@ def discoverMethods(serviceCenter):
                                # Method not discovered yet - create new instance
                                methodInstance = item(serviceCenter)
-                                # Use the actions property from MethodBase which handles @action decorator
+                                # Use the actions property from MethodBase which handles WorkflowActionDefinition
                                actions = methodInstance.actions
                                # Create method info
@ -131,7 +131,7 @@ def getMethodsList(serviceCenter):
    return "\n\n".join(methodsList)
 def getActionParameterList(methodName: str, actionName: str, methods: Dict[str, Any]) -> str:
-    """Get action parameter list from method docstring for AI parameter generation (list only)."""
+    """Get action parameter list from WorkflowActionParameter structure for AI parameter generation (list only)."""
    try:
        if not methods or methodName not in methods:
            return ""
@ -141,17 +141,21 @@ def getActionParameterList(methodName: str, actionName: str, methods: Dict[str,
            return ""
        action_info = methodInstance.actions[actionName]
-        # Extract parameter descriptions from docstring
+        # Use structured WorkflowActionParameter objects from new system
-        docstring = action_info.get('description', '')
+        parameters = action_info.get('parameters', {})
        paramDescriptions, paramTypes = methodInstance._extractParameterDetails(docstring)
        param_list = []
-        for paramName, paramDesc in paramDescriptions.items():
+        for paramName, paramInfo in parameters.items():
-            paramType = paramTypes.get(paramName, 'Any')
+            paramType = paramInfo.get('type', 'Any')
            paramDesc = paramInfo.get('description', '')
            paramRequired = paramInfo.get('required', False)
            # Format: paramName (type, required/optional): description
            reqText = "required" if paramRequired else "optional"
            if paramDesc:
-                param_list.append(f"- {paramName} ({paramType}): {paramDesc}")
+                param_list.append(f"- {paramName} ({paramType}, {reqText}): {paramDesc}")
            else:
-                param_list.append(f"- {paramName} ({paramType})")
+                param_list.append(f"- {paramName} ({paramType}, {reqText})")
        # Return list only, without leading headings or trailing text
        return "\n".join(param_list)
--- a/modules/workflows/processing/shared/placeholderFactory.py
+++ b/modules/workflows/processing/shared/placeholderFactory.py
@ -88,10 +88,23 @@ def extractAvailableMethods(service: Any) -> str:
        # Create a flat JSON format with compound action names for better AI parsing
        available_actions_json = {}
        processed_methods = set()  # Track processed methods to avoid duplicates
        for methodName, methodInfo in methods.items():
            # Skip short name aliases - only process full class names (MethodXxx)
            # Short names are stored as aliases but we want to avoid processing them twice
            if not methodName.startswith('Method'):
                continue
            # Convert MethodAi -> ai, MethodDocument -> document, etc.
            shortName = methodName.replace('Method', '').lower()
            # Skip if we've already processed this method (via its short name alias)
            if shortName in processed_methods:
                continue
            processed_methods.add(shortName)
            for actionName, actionInfo in methodInfo['actions'].items():
                # Create compound action name: method.action
                compoundActionName = f"{shortName}.{actionName}"
--- a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
+++ b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
@ -343,6 +343,12 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
 - Check "structureComparison.gap" to see what's missing. If quantitative gaps are available, use them.
 - Next action should ONLY generate the MISSING part, NOT repeat what's already delivered
 CRITICAL - Missing Data Generation Strategy:
 - When gap analysis shows missing data (found count = 0 but required count > 0):
  * Generate the missing data FIRST as separate outputs before attempting integration
  * Do NOT try to generate AND integrate missing data in one step - data must exist before integration
  * Only AFTER missing data exists can you integrate it with existing data in a subsequent action
 === OUTPUT FORMAT ===
 Return ONLY JSON (no markdown, no explanations). The decision MUST:
 - Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
--- a/modules/workflows/processing/workflowProcessor.py
+++ b/modules/workflows/processing/workflowProcessor.py
@ -28,6 +28,7 @@ class WorkflowProcessor:
        self.services = services
        self.mode = self._createMode(services.workflow.workflowMode)
        self.workflow = services.workflow
        self.workflowExecOperationId = None  # Will be set by workflowManager for task hierarchy
    def _createMode(self, workflowMode: WorkflowModeEnum) -> BaseMode:
        """Create the appropriate mode implementation based on workflow mode"""
@ -111,16 +112,20 @@ class WorkflowProcessor:
        # Init progress logger
        operationId = f"taskExec_{workflow.id}_{taskIndex}_{int(time.time())}"
        # Get parent operationId (Service Workflow Execution) if available
        parentOperationId = getattr(self, 'workflowExecOperationId', None)
        try:
            # Check workflow status before executing task
            checkWorkflowStopped(self.services)
-            # Start progress tracking
+            # Start progress tracking - Task is child of Service Workflow Execution
            self.services.chat.progressLogStart(
                operationId,
                "Workflow Execution",
                "Task Execution",
-                f"Task {taskIndex}"
+                f"Task {taskIndex}",
                parentOperationId=parentOperationId
            )
            logger.info(f"=== STARTING TASK EXECUTION ===")
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@ -566,72 +566,89 @@ class WorkflowManager:
        allTaskResults: List = []
        previousResults: List[str] = []
-        for idx, taskStep in enumerate(taskPlan.tasks):
+        # Create "Service Workflow Execution" root entry - parent of all tasks
-            currentTaskIndex = idx + 1
+        workflowExecOperationId = f"workflowExec_{workflow.id}"
-            logger.info(f"Task {currentTaskIndex}/{totalTasks}: {taskStep.objective}")
+        self.services.chat.progressLogStart(
            workflowExecOperationId,
            "Service",
            "Workflow Execution",
            f"Executing {totalTasks} task(s)"
        )
        # Store workflow execution operationId in workflowProcessor for task hierarchy
        handling.workflowExecOperationId = workflowExecOperationId
        try:
            for idx, taskStep in enumerate(taskPlan.tasks):
                currentTaskIndex = idx + 1
                logger.info(f"Task {currentTaskIndex}/{totalTasks}: {taskStep.objective}")
-            # Update workflow state before executing task (fixes "Task 0" issue)
+                # Update workflow state before executing task (fixes "Task 0" issue)
-            handling.updateWorkflowBeforeExecutingTask(currentTaskIndex)
+                handling.updateWorkflowBeforeExecutingTask(currentTaskIndex)
-            # Build TaskContext (mode-specific behavior is inside WorkflowProcessor)
+                # Build TaskContext (mode-specific behavior is inside WorkflowProcessor)
-            taskContext = TaskContext(
+                taskContext = TaskContext(
-                taskStep=taskStep,
+                    taskStep=taskStep,
-                workflow=workflow,
+                    workflow=workflow,
-                workflowId=workflow.id,
+                    workflowId=workflow.id,
-                availableDocuments=None,
+                    availableDocuments=None,
-                availableConnections=None,
+                    availableConnections=None,
-                previousResults=previousResults,
+                    previousResults=previousResults,
-                previousHandover=None,
+                    previousHandover=None,
-                improvements=[],
+                    improvements=[],
-                retryCount=0,
+                    retryCount=0,
-                previousActionResults=[],
+                    previousActionResults=[],
-                previousReviewResult=None,
+                    previousReviewResult=None,
-                isRegeneration=False,
+                    isRegeneration=False,
-                failurePatterns=[],
+                    failurePatterns=[],
-                failedActions=[],
+                    failedActions=[],
-                successfulActions=[],
+                    successfulActions=[],
-                criteriaProgress={
+                    criteriaProgress={
-                    'met_criteria': set(),
+                        'met_criteria': set(),
-                    'unmet_criteria': set(),
+                        'unmet_criteria': set(),
-                    'attempt_history': []
+                        'attempt_history': []
-                }
+                    }
            )
            taskResult = await handling.executeTask(taskStep, workflow, taskContext)
            # Persist task result for cross-task/round document references
            # Convert ChatTaskResult to WorkflowTaskResult for persistence
            from modules.datamodels.datamodelWorkflow import TaskResult as WorkflowTaskResult
            from modules.datamodels.datamodelChat import ActionResult
            # Get final ActionResult from task execution (last action result)
            finalActionResult = None
            if hasattr(taskResult, 'actionResult'):
                finalActionResult = taskResult.actionResult
            elif taskContext.previousActionResults and len(taskContext.previousActionResults) > 0:
                # Use last action result from context
                finalActionResult = taskContext.previousActionResults[-1]
            # Create WorkflowTaskResult for persistence
            if finalActionResult:
                workflowTaskResult = WorkflowTaskResult(
                    taskId=taskStep.id,
                    actionResult=finalActionResult
                )
-                # Persist task result (creates ChatMessage + ChatDocuments)
+
-                await handling.persistTaskResult(workflowTaskResult, workflow, taskContext)
+                taskResult = await handling.executeTask(taskStep, workflow, taskContext)
            handoverData = await handling.prepareTaskHandover(taskStep, [], taskResult, workflow)
            allTaskResults.append({
                'taskStep': taskStep,
                'taskResult': taskResult,
                'handoverData': handoverData
            })
            if taskResult.success and taskResult.feedback:
                previousResults.append(taskResult.feedback)
-        # Mark workflow as completed; error/stop cases update status elsewhere
+                # Persist task result for cross-task/round document references
-        workflow.status = "completed"
+                # Convert ChatTaskResult to WorkflowTaskResult for persistence
                from modules.datamodels.datamodelWorkflow import TaskResult as WorkflowTaskResult
                from modules.datamodels.datamodelChat import ActionResult
                # Get final ActionResult from task execution (last action result)
                finalActionResult = None
                if hasattr(taskResult, 'actionResult'):
                    finalActionResult = taskResult.actionResult
                elif taskContext.previousActionResults and len(taskContext.previousActionResults) > 0:
                    # Use last action result from context
                    finalActionResult = taskContext.previousActionResults[-1]
                # Create WorkflowTaskResult for persistence
                if finalActionResult:
                    workflowTaskResult = WorkflowTaskResult(
                        taskId=taskStep.id,
                        actionResult=finalActionResult
                    )
                    # Persist task result (creates ChatMessage + ChatDocuments)
                    await handling.persistTaskResult(workflowTaskResult, workflow, taskContext)
                handoverData = await handling.prepareTaskHandover(taskStep, [], taskResult, workflow)
                allTaskResults.append({
                    'taskStep': taskStep,
                    'taskResult': taskResult,
                    'handoverData': handoverData
                })
                if taskResult.success and taskResult.feedback:
                    previousResults.append(taskResult.feedback)
            # Mark workflow as completed; error/stop cases update status elsewhere
            workflow.status = "completed"
        finally:
            # Finish "Service Workflow Execution" entry
            self.services.chat.progressLogFinish(workflowExecOperationId, True)
        return None
    async def _processWorkflowResults(self) -> None:
--- a/requirements.txt
+++ b/requirements.txt
@ -71,6 +71,9 @@ google-cloud-texttospeech==2.16.3
 ## MSFT Integration
 msal==1.24.1
 ## Azure Integration
 azure-communication-email>=1.0.0  # Azure Communication Services Email
 ## Testing Dependencies
 pytest>=8.0.0
 pytest-asyncio>=0.21.0
--- a/tests/functional/test09_document_generation_formats.py
+++ b/tests/functional/test09_document_generation_formats.py
@ -0,0 +1,410 @@
 #!/usr/bin/env python3
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """
 Document Generation Formats Test - Tests document generation in all supported formats
 Tests HTML, PDF, DOCX, XLSX, and PPTX generation with images and various content types.
 """
 import asyncio
 import json
 import sys
 import os
 import time
 import base64
 from typing import Dict, Any, List, Optional
 # Add the gateway to path (go up 2 levels from tests/functional/)
 _gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
 if _gateway_path not in sys.path:
    sys.path.insert(0, _gateway_path)
 # Import the service initialization
 from modules.services import getInterface as getServices
 from modules.datamodels.datamodelChat import UserInputRequest, WorkflowModeEnum
 from modules.datamodels.datamodelUam import User
 from modules.features.workflow import chatStart
 import modules.interfaces.interfaceDbChatObjects as interfaceDbChatObjects
 class DocumentGenerationFormatsTester:
    def __init__(self):
        # Use root user for testing (has full access to everything)
        from modules.interfaces.interfaceDbAppObjects import getRootInterface
        rootInterface = getRootInterface()
        self.testUser = rootInterface.currentUser
        # Initialize services using the existing system
        self.services = getServices(self.testUser, None)  # Test user, no workflow
        self.workflow = None
        self.testResults = {}
        self.generatedDocuments = {}
    async def initialize(self):
        """Initialize the test environment."""
        # Enable debug file logging for tests
        from modules.shared.configuration import APP_CONFIG
        APP_CONFIG.set("APP_DEBUG_CHAT_WORKFLOW_ENABLED", True)
        # Set logging level to INFO to see workflow progress
        import logging
        logging.getLogger().setLevel(logging.INFO)
        print(f"Initialized test with user: {self.testUser.id}")
        print(f"Mandate ID: {self.testUser.mandateId}")
        print(f"Debug logging enabled: {APP_CONFIG.get('APP_DEBUG_CHAT_WORKFLOW_ENABLED', False)}")
    def createTestPrompt(self, format: str) -> str:
        """Create a test prompt for document generation in the specified format."""
        prompts = {
            "html": "Create a professional HTML document about 'The Future of Artificial Intelligence' with: 1) A main title, 2) An introduction paragraph, 3) Three key sections with headings, 4) A bullet list of benefits, 5) An image showing AI technology (generate it), 6) A conclusion paragraph. Format as HTML.",
            "pdf": "Create a professional PDF report about 'Climate Change Impact Analysis' with: 1) A title page, 2) An executive summary, 3) Three main sections with data tables, 4) Charts/graphs described, 5) An image showing environmental impact (generate it), 6) Conclusions and recommendations. Format as PDF.",
            "docx": "Create a comprehensive Word document about 'Project Management Best Practices' with: 1) A cover page with title, 2) Table of contents, 3) Five chapters with headings and paragraphs, 4) A table comparing methodologies, 5) An image illustrating project workflow (generate it), 6) Appendices. Format as DOCX.",
            "xlsx": "Create an Excel workbook about 'Sales Performance Analysis' with: 1) A summary sheet with key metrics, 2) A detailed data sheet with sales data in a table format (columns: Month, Product, Sales, Units, Revenue), 3) A chart sheet with visualizations described, 4) An analysis sheet with calculations. Format as XLSX.",
            "pptx": "Create a PowerPoint presentation about 'Digital Transformation Strategy' with: 1) A title slide, 2) An agenda slide, 3) Five content slides with bullet points, 4) A slide with an image showing transformation roadmap (generate it), 5) A conclusion slide. Format as PPTX."
        }
        return prompts.get(format.lower(), prompts["docx"])
    async def generateDocumentInFormat(self, format: str) -> Dict[str, Any]:
        """Generate a document in the specified format using workflow."""
        print("\n" + "="*80)
        print(f"GENERATING DOCUMENT IN {format.upper()} FORMAT")
        print("="*80)
        prompt = self.createTestPrompt(format)
        print(f"Prompt: {prompt[:200]}...")
        # Create user input request
        userInput = UserInputRequest(
            prompt=prompt,
            userLanguage="en"
        )
        # Start workflow
        print(f"\nStarting workflow for {format.upper()} generation...")
        workflow = await chatStart(
            currentUser=self.testUser,
            userInput=userInput,
            workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC,
            workflowId=None
        )
        if not workflow:
            return {
                "success": False,
                "error": "Failed to start workflow"
            }
        self.workflow = workflow
        print(f"Workflow started: {workflow.id}")
        # Wait for workflow completion
        print(f"Waiting for workflow completion...")
        completed = await self.waitForWorkflowCompletion(timeout=300)  # 5 minute timeout
        if not completed:
            return {
                "success": False,
                "error": "Workflow did not complete within timeout",
                "workflowId": workflow.id,
                "status": workflow.status if workflow else "unknown"
            }
        # Analyze results
        results = self.analyzeWorkflowResults()
        # Extract documents for this format
        documents = results.get("documents", [])
        formatDocuments = [d for d in documents if d.get("fileName", "").endswith(f".{format.lower()}")]
        return {
            "success": True,
            "format": format,
            "workflowId": workflow.id,
            "status": results.get("status"),
            "documentCount": len(formatDocuments),
            "documents": formatDocuments,
            "results": results
        }
    async def waitForWorkflowCompletion(self, timeout: int = 300, checkInterval: int = 2) -> bool:
        """Wait for workflow to complete."""
        if not self.workflow:
            return False
        startTime = time.time()
        lastStatus = None
        interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser)
        while True:
            # Check timeout
            if time.time() - startTime > timeout:
                print(f"\n⏱️ Timeout after {timeout} seconds")
                return False
            # Get current workflow status
            try:
                currentWorkflow = interfaceDbChat.getWorkflow(self.workflow.id)
                if not currentWorkflow:
                    print("\n❌ Workflow not found")
                    return False
                currentStatus = currentWorkflow.status
                elapsed = int(time.time() - startTime)
                # Print status if it changed
                if currentStatus != lastStatus:
                    print(f"Workflow status: {currentStatus} (elapsed: {elapsed}s)")
                    lastStatus = currentStatus
                # Check if workflow is complete
                if currentStatus in ["completed", "stopped", "failed"]:
                    self.workflow = currentWorkflow
                    statusIcon = "✅" if currentStatus == "completed" else "❌"
                    print(f"\n{statusIcon} Workflow finished with status: {currentStatus} (elapsed: {elapsed}s)")
                    return currentStatus == "completed"
                # Wait before next check
                await asyncio.sleep(checkInterval)
            except Exception as e:
                print(f"\n⚠️ Error checking workflow status: {str(e)}")
                await asyncio.sleep(checkInterval)
    def analyzeWorkflowResults(self) -> Dict[str, Any]:
        """Analyze workflow results and extract information."""
        if not self.workflow:
            return {"error": "No workflow to analyze"}
        interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser)
        workflow = interfaceDbChat.getWorkflow(self.workflow.id)
        if not workflow:
            return {"error": "Workflow not found"}
        # Get unified chat data
        chatData = interfaceDbChat.getUnifiedChatData(workflow.id, None)
        # Count messages
        messages = chatData.get("messages", [])
        userMessages = [m for m in messages if m.get("role") == "user"]
        assistantMessages = [m for m in messages if m.get("role") == "assistant"]
        # Count documents
        documents = chatData.get("documents", [])
        # Get logs
        logs = chatData.get("logs", [])
        results = {
            "workflowId": workflow.id,
            "status": workflow.status,
            "workflowMode": str(workflow.workflowMode) if hasattr(workflow, 'workflowMode') else None,
            "currentRound": workflow.currentRound,
            "totalTasks": workflow.totalTasks,
            "totalActions": workflow.totalActions,
            "messageCount": len(messages),
            "userMessageCount": len(userMessages),
            "assistantMessageCount": len(assistantMessages),
            "documentCount": len(documents),
            "logCount": len(logs),
            "documents": documents,
            "logs": logs
        }
        print(f"\nWorkflow Results:")
        print(f"  Status: {results['status']}")
        print(f"  Tasks: {results['totalTasks']}")
        print(f"  Actions: {results['totalActions']}")
        print(f"  Messages: {results['messageCount']}")
        print(f"  Documents: {results['documentCount']}")
        # Print document details
        if documents:
            print(f"\nGenerated Documents:")
            for doc in documents:
                fileName = doc.get("fileName", "unknown")
                fileSize = doc.get("fileSize", 0)
                mimeType = doc.get("mimeType", "unknown")
                print(f"  - {fileName} ({fileSize} bytes, {mimeType})")
        return results
    def verifyDocumentFormat(self, document: Dict[str, Any], expectedFormat: str) -> Dict[str, Any]:
        """Verify that a document matches the expected format."""
        fileName = document.get("fileName", "")
        mimeType = document.get("mimeType", "")
        fileSize = document.get("fileSize", 0)
        # Expected MIME types
        expectedMimeTypes = {
            "html": ["text/html", "application/xhtml+xml"],
            "pdf": ["application/pdf"],
            "docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
            "xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"],
            "pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"]
        }
        # Expected file extensions
        expectedExtensions = {
            "html": [".html", ".htm"],
            "pdf": [".pdf"],
            "docx": [".docx"],
            "xlsx": [".xlsx"],
            "pptx": [".pptx"]
        }
        formatLower = expectedFormat.lower()
        expectedMimes = expectedMimeTypes.get(formatLower, [])
        expectedExts = expectedExtensions.get(formatLower, [])
        # Check file extension
        hasCorrectExtension = any(fileName.lower().endswith(ext) for ext in expectedExts)
        # Check MIME type
        hasCorrectMimeType = any(mimeType.lower() == mime.lower() for mime in expectedMimes)
        # Check file size (should be > 0)
        hasValidSize = fileSize > 0
        verification = {
            "format": expectedFormat,
            "fileName": fileName,
            "mimeType": mimeType,
            "fileSize": fileSize,
            "hasCorrectExtension": hasCorrectExtension,
            "hasCorrectMimeType": hasCorrectMimeType,
            "hasValidSize": hasValidSize,
            "isValid": hasCorrectExtension and hasValidSize
        }
        return verification
    async def testAllFormats(self) -> Dict[str, Any]:
        """Test document generation in all formats."""
        print("\n" + "="*80)
        print("TESTING DOCUMENT GENERATION IN ALL FORMATS")
        print("="*80)
        formats = ["html", "pdf", "docx", "xlsx", "pptx"]
        results = {}
        for format in formats:
            try:
                print(f"\n{'='*80}")
                print(f"Testing {format.upper()} format...")
                print(f"{'='*80}")
                result = await self.generateDocumentInFormat(format)
                results[format] = result
                if result.get("success"):
                    documents = result.get("documents", [])
                    if documents:
                        # Verify first document
                        verification = self.verifyDocumentFormat(documents[0], format)
                        result["verification"] = verification
                        print(f"\n✅ {format.upper()} generation successful!")
                        print(f"   Documents: {len(documents)}")
                        print(f"   Verification: {'✅ PASS' if verification['isValid'] else '❌ FAIL'}")
                        if verification.get("fileName"):
                            print(f"   File: {verification['fileName']}")
                            print(f"   Size: {verification['fileSize']} bytes")
                            print(f"   MIME: {verification['mimeType']}")
                    else:
                        print(f"\n⚠️ {format.upper()} generation completed but no documents found")
                else:
                    error = result.get("error", "Unknown error")
                    print(f"\n❌ {format.upper()} generation failed: {error}")
                # Small delay between tests
                await asyncio.sleep(2)
            except Exception as e:
                import traceback
                print(f"\n❌ Error testing {format.upper()}: {str(e)}")
                print(traceback.format_exc())
                results[format] = {
                    "success": False,
                    "error": str(e),
                    "traceback": traceback.format_exc()
                }
        return results
    async def runTest(self):
        """Run the complete test."""
        print("\n" + "="*80)
        print("DOCUMENT GENERATION FORMATS TEST")
        print("="*80)
        try:
            # Initialize
            await self.initialize()
            # Test all formats
            results = await self.testAllFormats()
            # Summary
            print("\n" + "="*80)
            print("TEST SUMMARY")
            print("="*80)
            successCount = 0
            failCount = 0
            for format, result in results.items():
                if result.get("success"):
                    successCount += 1
                    status = "✅ PASS"
                    docCount = result.get("documentCount", 0)
                    verification = result.get("verification", {})
                    isValid = verification.get("isValid", False)
                    statusIcon = "✅" if isValid else "⚠️"
                    print(f"{statusIcon} {format.upper():6s}: {status} - {docCount} document(s)")
                else:
                    failCount += 1
                    error = result.get("error", "Unknown error")
                    print(f"❌ {format.upper():6s}: FAIL - {error}")
            print(f"\nTotal: {successCount} passed, {failCount} failed out of {len(results)} formats")
            self.testResults = {
                "success": failCount == 0,
                "successCount": successCount,
                "failCount": failCount,
                "totalFormats": len(results),
                "results": results
            }
            return self.testResults
        except Exception as e:
            import traceback
            print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}")
            print(f"Traceback:\n{traceback.format_exc()}")
            self.testResults = {
                "success": False,
                "error": str(e),
                "traceback": traceback.format_exc()
            }
            return self.testResults
 async def main():
    """Run document generation formats test."""
    tester = DocumentGenerationFormatsTester()
    results = await tester.runTest()
    # Print final results as JSON for easy parsing
    print("\n" + "="*80)
    print("FINAL RESULTS (JSON)")
    print("="*80)
    print(json.dumps(results, indent=2, default=str))
 if __name__ == "__main__":
    asyncio.run(main())