diff --git a/modules/datamodels/datamodelJson.py b/modules/datamodels/datamodelJson.py index 8ead97e1..784cc042 100644 --- a/modules/datamodels/datamodelJson.py +++ b/modules/datamodels/datamodelJson.py @@ -19,12 +19,14 @@ supportedSectionTypes: List[str] = [ ] # Canonical JSON template used for AI generation (documents array + sections) -# Rendering pipelines can select the first document and read its sections. +# This template is used for STRUCTURE generation - sections have empty elements arrays. +# For content generation, elements arrays will be populated later. jsonTemplateDocument: str = """{ "metadata": { "split_strategy": "single_document", "source_documents": [], - "extraction_method": "ai_generation" + "extraction_method": "ai_generation", + "title": "{{DOCUMENT_TITLE}}" }, "documents": [ { @@ -33,56 +35,77 @@ jsonTemplateDocument: str = """{ "filename": "document.json", "sections": [ { - "id": "section_heading_example", + "id": "section_heading_main_title", "content_type": "heading", - "elements": [ - {"level": 1, "text": "Heading Text"} - ], - "order": 0 + "complexity": "simple", + "generation_hint": "Main document title heading", + "order": 1, + "elements": [] }, { - "id": "section_paragraph_example", + "id": "section_paragraph_introduction", "content_type": "paragraph", - "elements": [ - {"text": "Paragraph text content"} - ], - "order": 0 + "complexity": "simple", + "generation_hint": "Introduction paragraph", + "order": 2, + "elements": [] + }, + { + "id": "section_heading_section_1", + "content_type": "heading", + "complexity": "simple", + "generation_hint": "Section heading for topic 1", + "order": 3, + "elements": [] + }, + { + "id": "section_paragraph_section_1", + "content_type": "paragraph", + "complexity": "simple", + "generation_hint": "Content paragraph for section 1", + "order": 4, + "elements": [] }, { "id": "section_bullet_list_example", "content_type": "bullet_list", - "elements": [ - { - "items": ["Item 1", "Item 2"] - } - ], - "order": 0 + "complexity": "simple", + "generation_hint": "Bullet list items", + "order": 5, + "elements": [] + }, + { + "id": "section_image_example", + "content_type": "image", + "complexity": "complex", + "generation_hint": "Illustration for document", + "image_prompt": "A detailed description for image generation", + "order": 6, + "elements": [] }, { "id": "section_table_example", "content_type": "table", - "elements": [ - { - "headers": ["Column 1", "Column 2"], - "rows": [ - ["Row 1 Col 1", "Row 1 Col 2"], - ["Row 2 Col 1", "Row 2 Col 2"] - ], - "caption": "Table caption" - } - ], - "order": 0 + "complexity": "simple", + "generation_hint": "Data table with relevant information", + "order": 7, + "elements": [] }, { "id": "section_code_example", "content_type": "code_block", - "elements": [ - { - "code": "function example() { return true; }", - "language": "javascript" - } - ], - "order": 0 + "complexity": "simple", + "generation_hint": "Code example or snippet", + "order": 8, + "elements": [] + }, + { + "id": "section_paragraph_conclusion", + "content_type": "paragraph", + "complexity": "simple", + "generation_hint": "Conclusion paragraph", + "order": 9, + "elements": [] } ] } diff --git a/modules/datamodels/datamodelWorkflowActions.py b/modules/datamodels/datamodelWorkflowActions.py index 1857883b..a3812955 100644 --- a/modules/datamodels/datamodelWorkflowActions.py +++ b/modules/datamodels/datamodelWorkflowActions.py @@ -19,9 +19,9 @@ class WorkflowActionParameter(BaseModel): name: str = Field(description="Parameter name") type: str = Field(description="Python type as string: 'str', 'int', 'bool', 'List[str]', etc.") frontendType: FrontendType = Field(description="UI rendering type (from global FrontendType enum)") - frontendOptions: Optional[Union[str, List[Dict[str, Any]]]] = Field( + frontendOptions: Optional[Union[str, List[str]]] = Field( None, - description="Options for select/multiselect/custom types. String reference (e.g., 'user.connection') or static list. For custom types, this is automatically set to the API endpoint." + description="Options for select/multiselect/custom types. String reference (e.g., 'user.connection') or list of strings (e.g., ['txt', 'json']). For custom types, this is automatically set to the API endpoint." ) required: bool = Field(False, description="Whether parameter is required") default: Optional[Any] = Field(None, description="Default value") diff --git a/modules/services/__init__.py b/modules/services/__init__.py index 6edfe13b..32e0cb3f 100644 --- a/modules/services/__init__.py +++ b/modules/services/__init__.py @@ -57,6 +57,9 @@ class Services: from modules.interfaces.interfaceDbComponentObjects import getInterface as getComponentInterface self.interfaceDbComponent = getComponentInterface(user) + # Expose RBAC directly on services for convenience + self.rbac = self.interfaceDbApp.rbac if self.interfaceDbApp else None + # Initialize service packages from .serviceExtraction.mainServiceExtraction import ExtractionService diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 67a47163..648e922c 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -1206,37 +1206,74 @@ If no trackable items can be identified, return: {{"kpis": []}} else: content_for_generation = None - self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") - from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt + # Detect if this is a section generation prompt (not full document generation) + # Section prompts contain "SECTION TO GENERATE" marker + isSectionGeneration = "SECTION TO GENERATE" in prompt or "CRITICAL: Return ONLY a JSON object with an \"elements\" array" in prompt - generation_prompt = await buildGenerationPrompt( - outputFormat, prompt, title, content_for_generation, None, self.services - ) - - promptArgs = { - "outputFormat": outputFormat, - "userPrompt": prompt, - "title": title, - "extracted_content": content_for_generation, - "services": self.services - } - - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") - # Extract user prompt from promptArgs for task completion analysis - userPrompt = None - if promptArgs: - userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt") - - # Track generation progress - the looping function will update with byte progress - generated_json = await self._callAiWithLooping( - generation_prompt, - options, - "document_generation", - buildGenerationPrompt, - promptArgs, - aiOperationId, - userPrompt=userPrompt - ) + if isSectionGeneration: + # For section generation, use the prompt directly without wrapping + # Section prompts are already complete and should not be wrapped in document generation template + logger.debug("Detected section generation prompt - skipping document generation wrapper") + generation_prompt = prompt + + # Call AI directly without looping (sections are simple, single-call) + self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for section generation") + request = AiCallRequest( + prompt=generation_prompt, + context="", + options=options + ) + response = await self.callAi(request) + generated_json = response.content if response and response.content else "" + + # For section generation, return the raw JSON content directly + # No rendering needed - sections are just JSON elements + self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Section content generated") + self.services.chat.progressLogFinish(aiOperationId, True) + + metadata = AiResponseMetadata( + title=title or "Section Content", + operationType=opType.value if opType else None + ) + + return AiResponse( + content=generated_json, + metadata=metadata, + documents=[] + ) + else: + # Full document generation - use the wrapper + self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") + from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt + + generation_prompt = await buildGenerationPrompt( + outputFormat, prompt, title, content_for_generation, None, self.services + ) + + promptArgs = { + "outputFormat": outputFormat, + "userPrompt": prompt, + "title": title, + "extracted_content": content_for_generation, + "services": self.services + } + + self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") + # Extract user prompt from promptArgs for task completion analysis + userPrompt = None + if promptArgs: + userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt") + + # Track generation progress - the looping function will update with byte progress + generated_json = await self._callAiWithLooping( + generation_prompt, + options, + "document_generation", + buildGenerationPrompt, + promptArgs, + aiOperationId, + userPrompt=userPrompt + ) # Calculate final size for completion message finalSize = len(generated_json.encode('utf-8')) if generated_json else 0 @@ -1291,7 +1328,7 @@ If no trackable items can be identified, return: {{"kpis": []}} from modules.services.serviceGeneration.mainServiceGeneration import GenerationService generationService = GenerationService(self.services) self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format") - rendered_content, mime_type = await generationService.renderReport( + rendered_content, mime_type, _images = await generationService.renderReport( generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self ) self.services.chat.progressLogFinish(renderOperationId, True) diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index cb1d6f9f..5b518afa 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -82,14 +82,62 @@ class GenerationService: documentData = doc_data['content'] mimeType = doc_data['mimeType'] - # Convert document data to string content - content = convertDocumentDataToString(documentData, getFileExtension(documentName)) + # Handle binary data (images, PDFs, Office docs) differently from text + # Check if this is a binary MIME type + binaryMimeTypes = { + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/pdf", + "image/png", "image/jpeg", "image/jpg", "image/gif", "image/webp", "image/bmp", "image/svg+xml", + } - # Skip empty or minimal content - minimalContentPatterns = ['{}', '[]', 'null', '""', "''"] - if not content or content.strip() == "" or content.strip() in minimalContentPatterns: - logger.warning(f"Empty or minimal content for document {documentName}, skipping") - continue + isBinaryMimeType = mimeType in binaryMimeTypes + base64encoded = False + content = None + + if isBinaryMimeType: + # For binary data, handle bytes vs base64 string vs regular string + if isinstance(documentData, bytes): + # Already bytes - encode to base64 string for storage + import base64 + content = base64.b64encode(documentData).decode('utf-8') + base64encoded = True + elif isinstance(documentData, str): + # Check if it's already valid base64 + import base64 + try: + # Try to decode to verify it's base64 + base64.b64decode(documentData, validate=True) + # Valid base64 - use as is + content = documentData + base64encoded = True + except Exception: + # Not valid base64 - might be raw string, try encoding + try: + content = base64.b64encode(documentData.encode('utf-8')).decode('utf-8') + base64encoded = True + except Exception: + logger.warning(f"Could not process binary data for {documentName}, skipping") + continue + else: + # Other types - convert to string then base64 + import base64 + try: + content = base64.b64encode(str(documentData).encode('utf-8')).decode('utf-8') + base64encoded = True + except Exception: + logger.warning(f"Could not encode binary data for {documentName}, skipping") + continue + else: + # Text data - convert to string + content = convertDocumentDataToString(documentData, getFileExtension(documentName)) + + # Skip empty or minimal content + minimalContentPatterns = ['{}', '[]', 'null', '""', "''"] + if not content or content.strip() == "" or content.strip() in minimalContentPatterns: + logger.warning(f"Empty or minimal content for document {documentName}, skipping") + continue # Normalize file extension based on mime type if missing or incorrect try: @@ -102,6 +150,13 @@ class GenerationService: "text/markdown": ".md", "text/plain": ".txt", "application/json": ".json", + "image/png": ".png", + "image/jpeg": ".jpg", + "image/jpg": ".jpg", + "image/gif": ".gif", + "image/webp": ".webp", + "image/bmp": ".bmp", + "image/svg+xml": ".svg", } expectedExt = mime_to_ext.get(mimeType) if expectedExt: @@ -114,20 +169,6 @@ class GenerationService: except Exception: pass - # Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text - base64encoded = False - try: - binaryMimeTypes = { - "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "application/vnd.openxmlformats-officedocument.presentationml.presentation", - "application/pdf", - } - if isinstance(documentData, str) and mimeType in binaryMimeTypes: - base64encoded = True - except Exception: - base64encoded = False - # Create document with file in one step using interfaces directly document = self._createDocument( fileName=documentName, @@ -278,7 +319,7 @@ class GenerationService: 'workflowId': 'unknown' } - async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]: + async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str, List[Dict[str, Any]]]: """ Render extracted JSON content to the specified output format. Always uses unified "documents" array format. @@ -291,7 +332,8 @@ class GenerationService: aiService: AI service instance for generation prompt creation Returns: - tuple: (rendered_content, mime_type) + tuple: (rendered_content, mime_type, images_list) + images_list: List of image dicts with base64Data, altText, caption, etc. """ try: # Validate JSON input @@ -311,12 +353,10 @@ class GenerationService: if "sections" not in single_doc: raise ValueError("Document must contain 'sections' field") - # Create content for single document renderer - contentToRender = { - "sections": single_doc["sections"], - "metadata": extractedContent.get("metadata", {}), - "continuation": extractedContent.get("continuation", None) - } + # Pass standardized schema to renderer (maintains architecture) + # Renderer should extract sections from documents array according to standardized schema + # Standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + contentToRender = extractedContent # Pass full standardized schema # Get the appropriate renderer for the format renderer = self._getFormatRenderer(outputFormat) @@ -324,9 +364,15 @@ class GenerationService: raise ValueError(f"Unsupported output format: {outputFormat}") # Render the JSON content directly (AI generation handled by main service) + # Renderer receives standardized schema and extracts what it needs renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService) - return renderedContent, mimeType + # Get images from renderer if available + images = [] + if hasattr(renderer, 'getRenderedImages'): + images = renderer.getRenderedImages() + + return renderedContent, mimeType, images except Exception as e: logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}") @@ -353,14 +399,21 @@ class GenerationService: def _getFormatRenderer(self, output_format: str): """Get the appropriate renderer for the specified format using auto-discovery.""" try: - from .renderers.registry import getRenderer + from .renderers.registry import getRenderer, getSupportedFormats renderer = getRenderer(output_format, services=self.services) if renderer: return renderer + # Log available formats for debugging + availableFormats = getSupportedFormats() + logger.error( + f"No renderer found for format '{output_format}'. " + f"Available formats: {availableFormats}" + ) + # Fallback to text renderer if no specific renderer found - logger.warning(f"No renderer found for format {output_format}, falling back to text") + logger.warning(f"Falling back to text renderer for format {output_format}") fallbackRenderer = getRenderer('text', services=self.services) if fallbackRenderer: return fallbackRenderer @@ -370,4 +423,6 @@ class GenerationService: except Exception as e: logger.error(f"Error getting renderer for {output_format}: {str(e)}") + import traceback + logger.debug(traceback.format_exc()) return None \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index 1f013457..491c1d06 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -66,12 +66,34 @@ class BaseRenderer(ABC): pass def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]: - """Extract sections from report data.""" - return reportData.get('sections', []) + """ + Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + """ + if "documents" not in reportData: + raise ValueError("Report data must follow standardized schema with 'documents' array") + + documents = reportData.get("documents", []) + if not isinstance(documents, list) or len(documents) == 0: + raise ValueError("Standardized schema must contain at least one document in 'documents' array") + + firstDoc = documents[0] + if not isinstance(firstDoc, dict) or "sections" not in firstDoc: + raise ValueError("Document in standardized schema must contain 'sections' field") + + return firstDoc.get("sections", []) def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]: - """Extract metadata from report data.""" - return reportData.get('metadata', {}) + """ + Extract metadata from standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + """ + if "metadata" not in reportData: + raise ValueError("Report data must follow standardized schema with 'metadata' field") + + metadata = reportData.get("metadata", {}) + if not isinstance(metadata, dict): + raise ValueError("Metadata in standardized schema must be a dictionary") + + return metadata def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str: """Get title from report data or use fallback.""" @@ -79,14 +101,33 @@ class BaseRenderer(ABC): return metadata.get('title', fallbackTitle) def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool: - """Validate that JSON content has the expected structure.""" + """ + Validate that JSON content follows standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + """ if not isinstance(jsonContent, dict): return False - if "sections" not in jsonContent: + # Validate metadata field exists + if "metadata" not in jsonContent: return False - sections = jsonContent.get("sections", []) + if not isinstance(jsonContent.get("metadata"), dict): + return False + + # Validate documents array exists and is not empty + if "documents" not in jsonContent: + return False + + documents = jsonContent.get("documents", []) + if not isinstance(documents, list) or len(documents) == 0: + return False + + # Validate first document has sections + firstDoc = documents[0] + if not isinstance(firstDoc, dict) or "sections" not in firstDoc: + return False + + sections = firstDoc.get("sections", []) if not isinstance(sections, list): return False diff --git a/modules/services/serviceGeneration/renderers/rendererCsv.py b/modules/services/serviceGeneration/renderers/rendererCsv.py index d0a3ec04..c18d7481 100644 --- a/modules/services/serviceGeneration/renderers/rendererCsv.py +++ b/modules/services/serviceGeneration/renderers/rendererCsv.py @@ -41,15 +41,16 @@ class RendererCsv(BaseRenderer): async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: """Generate CSV content from structured JSON document.""" try: - # Validate JSON structure - if not isinstance(jsonContent, dict): - raise ValueError("JSON content must be a dictionary") + # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) + if not self._validateJsonStructure(jsonContent): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in jsonContent: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(jsonContent) + metadata = self._extractMetadata(jsonContent) # Use title from JSON metadata if available, otherwise use provided title - documentTitle = jsonContent.get("metadata", {}).get("title", title) + documentTitle = metadata.get("title", title) # Generate CSV content csvRows = [] @@ -60,7 +61,6 @@ class RendererCsv(BaseRenderer): csvRows.append([]) # Empty row # Process each section in order - sections = jsonContent.get("sections", []) for section in sections: sectionCsv = self._renderJsonSectionToCsv(section) if sectionCsv: diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index f33b898d..48fb94f1 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -71,22 +71,22 @@ class RendererDocx(BaseRenderer): self._setupBasicDocumentStyles(doc) self._setupDocumentStyles(doc, styleSet) - # Validate JSON structure - if not isinstance(json_content, dict): - raise ValueError("JSON content must be a dictionary") + # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) + if not self._validateJsonStructure(json_content): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in json_content: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(json_content) + metadata = self._extractMetadata(json_content) # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + document_title = metadata.get("title", title) # Add document title using Title style if document_title: doc.add_paragraph(document_title, style='Title') # Process each section in order - sections = json_content.get("sections", []) for section in sections: self._renderJsonSection(doc, section, styleSet) diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index bc15917d..163690d3 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -28,14 +28,25 @@ class RendererHtml(BaseRenderer): async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to HTML format using AI-analyzed styling.""" try: + # Extract images first + images = self._extractImages(extractedContent) + + # Store images in instance for later retrieval + self._renderedImages = images + # Generate HTML using AI-analyzed styling htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService) + # Replace base64 data URIs with relative file paths if images exist + if images: + htmlContent = self._replaceImageDataUris(htmlContent, images) + return htmlContent, "text/html" except Exception as e: self.logger.error(f"Error rendering HTML: {str(e)}") # Return minimal HTML fallback + self._renderedImages = [] # Initialize empty list on error return f"{title}

{title}

Error rendering report: {str(e)}

", "text/html" async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: @@ -45,14 +56,15 @@ class RendererHtml(BaseRenderer): styles = await self._getStyleSet(userPrompt, aiService) # Validate JSON structure - if not isinstance(jsonContent, dict): - raise ValueError("JSON content must be a dictionary") + if not self._validateJsonStructure(jsonContent): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in jsonContent: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(jsonContent) + metadata = self._extractMetadata(jsonContent) # Use title from JSON metadata if available, otherwise use provided title - documentTitle = jsonContent.get("metadata", {}).get("title", title) + documentTitle = metadata.get("title", title) # Build HTML document htmlParts = [] @@ -77,7 +89,6 @@ class RendererHtml(BaseRenderer): htmlParts.append('
') # Process each section - sections = jsonContent.get("sections", []) for section in sections: sectionHtml = self._renderJsonSection(section, styles) if sectionHtml: @@ -377,12 +388,15 @@ class RendererHtml(BaseRenderer): def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON heading to HTML using AI-generated styles.""" try: - # Normalize non-dict inputs - if isinstance(headingData, str): + # Normalize inputs - headingData is typically a list of elements from _getSectionData + if isinstance(headingData, list): + # Extract first element from elements array + if headingData and len(headingData) > 0: + headingData = headingData[0] if isinstance(headingData[0], dict) else {} + else: + return "" + elif isinstance(headingData, str): headingData = {"text": headingData, "level": 2} - elif isinstance(headingData, list): - # Render a list as bullet list under a default heading label - return self._renderJsonBulletList({"items": headingData}, styles) elif not isinstance(headingData, dict): return "" @@ -402,21 +416,28 @@ class RendererHtml(BaseRenderer): def _renderJsonParagraph(self, paragraphData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON paragraph to HTML using AI-generated styles.""" try: - # Normalize non-dict inputs - if isinstance(paragraphData, str): - paragraphData = {"text": paragraphData} - elif isinstance(paragraphData, list): - # Treat list as bullet list paragraph - return self._renderJsonBulletList({"items": paragraphData}, styles) - elif not isinstance(paragraphData, dict): + # Normalize inputs - paragraphData is typically a list of elements from _getSectionData + if isinstance(paragraphData, list): + # Extract text from all paragraph elements + texts = [] + for el in paragraphData: + if isinstance(el, dict) and "text" in el: + texts.append(el["text"]) + elif isinstance(el, str): + texts.append(el) + if texts: + # Join multiple paragraphs with

tags + return '\n'.join(f'

{text}

' for text in texts) + return "" + elif isinstance(paragraphData, str): + return f'

{paragraphData}

' + elif isinstance(paragraphData, dict): + text = paragraphData.get("text", "") + if text: + return f'

{text}

' + return "" + else: return "" - - text = paragraphData.get("text", "") - - if text: - return f'

{text}

' - - return "" except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") @@ -441,16 +462,145 @@ class RendererHtml(BaseRenderer): return "" def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str: - """Render a JSON image to HTML.""" + """Render a JSON image to HTML with placeholder for later replacement.""" try: base64Data = imageData.get("base64Data", "") altText = imageData.get("altText", "Image") + caption = imageData.get("caption", "") if base64Data: - return f'{altText}' + # Use data URI as placeholder - will be replaced with file path in _replaceImageDataUris + # Include a marker so we can find and replace it + imageMarker = f"" + imgTag = f'{altText}' + + if caption: + return f'{imageMarker}
{imgTag}
{caption}
' + else: + return f'{imageMarker}{imgTag}' return "" except Exception as e: self.logger.warning(f"Error rendering image: {str(e)}") return f'
[Image: {imageData.get("altText", "Image")}]
' + + def _extractImages(self, jsonContent: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Extract all images from JSON structure. + + Returns: + List of image data dictionaries with base64Data, altText, caption, sectionId + """ + images = [] + + try: + # Extract from standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + documents = jsonContent.get("documents", []) + if not documents or not isinstance(documents, list): + return images + + for doc in documents: + if not isinstance(doc, dict): + continue + sections = doc.get("sections", []) + for section in sections: + if section.get("content_type") == "image": + elements = section.get("elements", []) + for element in elements: + base64Data = element.get("base64Data", "") + + # If base64Data not found, try extracting from url data URI + if not base64Data: + url = element.get("url", "") + if url.startswith("data:image/"): + # Extract base64 from data URI: data:image/png;base64, + import re + match = re.match(r'data:image/[^;]+;base64,(.+)', url) + if match: + base64Data = match.group(1) + + if base64Data: + sectionId = section.get("id", "unknown") + # Generate filename from section ID + filename = f"{sectionId}.png" + # Clean filename (remove invalid characters) + filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename) + + images.append({ + "base64Data": base64Data, + "altText": element.get("altText", "Image"), + "caption": element.get("caption"), + "sectionId": sectionId, + "filename": filename + }) + self.logger.debug(f"Extracted image from section {sectionId}: {filename}") + + self.logger.info(f"Extracted {len(images)} image(s) from JSON structure") + return images + + except Exception as e: + self.logger.warning(f"Error extracting images: {str(e)}") + return [] + + def _replaceImageDataUris(self, htmlContent: str, images: List[Dict[str, Any]]) -> str: + """ + Replace base64 data URIs in HTML with relative file paths. + + Args: + htmlContent: HTML content with data URIs + images: List of image data dictionaries + + Returns: + HTML content with relative file paths + """ + try: + import base64 + import re + + # Find all image data URIs in HTML + dataUriPattern = r'data:image/png;base64,([A-Za-z0-9+/=]+)' + + def replaceDataUri(match): + base64Data = match.group(1) + + # Find matching image in images list + matchingImage = None + for img in images: + if img["base64Data"] == base64Data or img["base64Data"].startswith(base64Data[:100]): + matchingImage = img + break + + if matchingImage: + # Use filename from image data (generated from section ID) + filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png") + + # Replace with relative path + altText = matchingImage.get("altText", "Image") + caption = matchingImage.get("caption", "") + + if caption: + return f'
{altText}
{caption}
' + else: + return f'{altText}' + else: + # Keep original if no match found + return match.group(0) + + # Replace all data URIs + updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent) + + return updatedHtml + + except Exception as e: + self.logger.warning(f"Error replacing image data URIs: {str(e)}") + return htmlContent # Return original if replacement fails + + def getRenderedImages(self) -> List[Dict[str, Any]]: + """ + Get images that were extracted during rendering. + Returns list of image dicts with base64Data, altText, caption, and filename. + """ + if not hasattr(self, '_renderedImages'): + return [] + return self._renderedImages diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py index 53392d07..7ea450b2 100644 --- a/modules/services/serviceGeneration/renderers/rendererImage.py +++ b/modules/services/serviceGeneration/renderers/rendererImage.py @@ -123,7 +123,7 @@ class RendererImage(BaseRenderer): promptParts.append(f"Document Title: {title}") # Analyze content and create visual description - sections = extractedContent.get("sections", []) + sections = self._extractSections(extractedContent) contentDescription = self._analyzeContentForVisualDescription(sections) if contentDescription: @@ -286,7 +286,7 @@ Return only the compressed prompt, no explanations. styleElements.append("corporate, professional design") # Analyze content type for additional style hints - sections = extractedContent.get("sections", []) + sections = self._extractSections(extractedContent) hasTables = any(self._getSectionType(s) == "table" for s in sections) hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections) hasCode = any(self._getSectionType(s) == "code_block" for s in sections) diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py index b07c8d51..3c9569e9 100644 --- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py +++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py @@ -41,15 +41,16 @@ class RendererMarkdown(BaseRenderer): def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: """Generate markdown content from structured JSON document.""" try: - # Validate JSON structure - if not isinstance(jsonContent, dict): - raise ValueError("JSON content must be a dictionary") + # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) + if not self._validateJsonStructure(jsonContent): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in jsonContent: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(jsonContent) + metadata = self._extractMetadata(jsonContent) # Use title from JSON metadata if available, otherwise use provided title - documentTitle = jsonContent.get("metadata", {}).get("title", title) + documentTitle = metadata.get("title", title) # Build markdown content markdownParts = [] @@ -59,7 +60,6 @@ class RendererMarkdown(BaseRenderer): markdownParts.append("") # Process each section - sections = jsonContent.get("sections", []) for section in sections: sectionMarkdown = self._renderJsonSection(section) if sectionMarkdown: diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py index 9d0e483d..1cfcfad7 100644 --- a/modules/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/services/serviceGeneration/renderers/rendererPdf.py @@ -65,14 +65,15 @@ class RendererPdf(BaseRenderer): styles = await self._getStyleSet(userPrompt, aiService) # Validate JSON structure - if not isinstance(json_content, dict): - raise ValueError("JSON content must be a dictionary") + if not self._validateJsonStructure(json_content): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in json_content: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(json_content) + metadata = self._extractMetadata(json_content) # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + document_title = metadata.get("title", title) # Make title shorter to prevent wrapping/overlapping if len(document_title) > 40: @@ -102,8 +103,7 @@ class RendererPdf(BaseRenderer): story.append(Spacer(1, 30)) # Add spacing before page break story.append(PageBreak()) - # Process each section - sections = json_content.get("sections", []) + # Process each section (sections already extracted above) self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER") for i, section in enumerate(sections): self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER") @@ -505,7 +505,7 @@ class RendererPdf(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}") - return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))] + return [Paragraph(f"[Error rendering section: {str(e)}]", self._createNormalStyle(styles))] def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON table to PDF elements using AI-generated styles.""" @@ -555,9 +555,9 @@ class RendererPdf(BaseRenderer): elements = [] for item in items: if isinstance(item, str): - elements.append(Paragraph(f"• {item}", self._create_normal_style(styles))) + elements.append(Paragraph(f"• {item}", self._createNormalStyle(styles))) elif isinstance(item, dict) and "text" in item: - elements.append(Paragraph(f"• {item['text']}", self._create_normal_style(styles))) + elements.append(Paragraph(f"• {item['text']}", self._createNormalStyle(styles))) if elements: elements.append(Spacer(1, bullet_style_def.get("space_after", 3))) @@ -637,16 +637,84 @@ class RendererPdf(BaseRenderer): return [] def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: - """Render a JSON image to PDF elements.""" + """Render a JSON image to PDF elements using reportlab.""" try: base64_data = image_data.get("base64Data", "") alt_text = image_data.get("altText", "Image") + caption = image_data.get("caption", "") - if base64_data: - # For now, just add a placeholder since reportlab image handling is complex + # If base64Data not found, try extracting from url data URI + if not base64_data: + url = image_data.get("url", "") + if url.startswith("data:image/"): + # Extract base64 from data URI: data:image/png;base64, + import re + match = re.match(r'data:image/[^;]+;base64,(.+)', url) + if match: + base64_data = match.group(1) + + if not base64_data: return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))] - return [] + try: + from reportlab.platypus import Image as ReportLabImage + from reportlab.lib.units import inch + import base64 + import io + + # Decode base64 image data + imageBytes = base64.b64decode(base64_data) + imageStream = io.BytesIO(imageBytes) + + # Create reportlab Image element + # Try to get image dimensions from PIL + try: + from PIL import Image as PILImage + pilImage = PILImage.open(imageStream) + imgWidth, imgHeight = pilImage.size + + # Scale to fit page (max width 6 inches, maintain aspect ratio) + maxWidth = 6 * inch + if imgWidth > maxWidth: + scale = maxWidth / imgWidth + imgWidth = maxWidth + imgHeight = imgHeight * scale + else: + imgWidth = imgWidth * (inch / 72) # Convert pixels to inches (assuming 72 DPI) + imgHeight = imgHeight * (inch / 72) + + # Reset stream for reportlab + imageStream.seek(0) + except Exception: + # Fallback: use default size + imgWidth = 4 * inch + imgHeight = 3 * inch + imageStream.seek(0) + + # Create reportlab Image + reportlabImage = ReportLabImage(imageStream, width=imgWidth, height=imgHeight) + + elements = [reportlabImage] + + # Add caption if available + if caption: + captionStyle = self._createNormalStyle(styles) + captionStyle.fontSize = 10 + captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666")) + elements.append(Paragraph(f"{caption}", captionStyle)) + elif alt_text and alt_text != "Image": + # Use alt text as caption if no caption provided + captionStyle = self._createNormalStyle(styles) + captionStyle.fontSize = 10 + captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666")) + elements.append(Paragraph(f"Figure: {alt_text}", captionStyle)) + + return elements + + except Exception as imgError: + self.logger.warning(f"Error embedding image in PDF: {str(imgError)}") + # Fallback to placeholder + return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))] except Exception as e: self.logger.warning(f"Error rendering image: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index ac04ea90..f7b65eb1 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -66,6 +66,9 @@ class RendererPptx(BaseRenderer): # Debug: Show first 200 chars of content logger.info(f"JSON content preview: {str(extractedContent)[:200]}...") + # Store prs reference for image methods + self._currentPresentation = prs + for i, slide_data in enumerate(slidesData): logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars") # Debug: Show slide content preview @@ -75,6 +78,9 @@ class RendererPptx(BaseRenderer): else: logger.warning(f" ⚠️ Slide {i+1} has NO content!") + # Check if slide has images + hasImages = slide_data.get("images") and len(slide_data.get("images", [])) > 0 + # Create slide with appropriate layout based on content slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles) slide_layout = prs.slide_layouts[slideLayoutIndex] @@ -92,67 +98,71 @@ class RendererPptx(BaseRenderer): title_color = self._get_safe_color(title_style.get("color", (31, 78, 121))) title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color) - # Set content with AI-generated styling - content_shape = slide.placeholders[1] - content_text = slide_data.get("content", "") + # Handle images first (if present) + if hasImages: + self._addImagesToSlide(slide, slide_data.get("images", []), styles) - # Format content text with AI styles - text_frame = content_shape.text_frame - text_frame.clear() - - # Split content into paragraphs - paragraphs = content_text.split('\n\n') - - for i, paragraph in enumerate(paragraphs): - if paragraph.strip(): - if i == 0: - p = text_frame.paragraphs[0] - else: - p = text_frame.add_paragraph() - - p.text = paragraph.strip() - - # Apply AI-generated styling based on content type - if paragraph.startswith('#'): - # Header - p.text = paragraph.lstrip('#').strip() - heading_style = styles.get("heading", {}) - p.font.size = Pt(heading_style.get("font_size", 32)) - p.font.bold = heading_style.get("bold", True) - heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*heading_color) - elif paragraph.startswith('##'): - # Subheader - p.text = paragraph.lstrip('#').strip() - subheading_style = styles.get("subheading", {}) - p.font.size = Pt(subheading_style.get("font_size", 24)) - p.font.bold = subheading_style.get("bold", True) - subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79))) - p.font.color.rgb = RGBColor(*subheading_color) - elif paragraph.startswith('*') and paragraph.endswith('*'): - # Bold text - p.text = paragraph.strip('*') - paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) - p.font.bold = True - paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*paragraph_color) - else: - # Regular text - paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) - p.font.bold = paragraph_style.get("bold", False) - paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47))) - p.font.color.rgb = RGBColor(*paragraph_color) - - # Apply alignment - align = paragraph_style.get("align", "left") - if align == "center": - p.alignment = PP_ALIGN.CENTER - elif align == "right": - p.alignment = PP_ALIGN.RIGHT - else: - p.alignment = PP_ALIGN.LEFT + # Set content with AI-generated styling (if not image-only slide) + if slide_content or not hasImages: + content_shape = slide.placeholders[1] + + # Format content text with AI styles + text_frame = content_shape.text_frame + text_frame.clear() + + # Split content into paragraphs + paragraphs = slide_content.split('\n\n') + + for paraIdx, paragraph in enumerate(paragraphs): + if paragraph.strip(): + if paraIdx == 0: + p = text_frame.paragraphs[0] + else: + p = text_frame.add_paragraph() + + p.text = paragraph.strip() + + # Apply AI-generated styling based on content type + if paragraph.startswith('#'): + # Header + p.text = paragraph.lstrip('#').strip() + heading_style = styles.get("heading", {}) + p.font.size = Pt(heading_style.get("font_size", 32)) + p.font.bold = heading_style.get("bold", True) + heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47))) + p.font.color.rgb = RGBColor(*heading_color) + elif paragraph.startswith('##'): + # Subheader + p.text = paragraph.lstrip('#').strip() + subheading_style = styles.get("subheading", {}) + p.font.size = Pt(subheading_style.get("font_size", 24)) + p.font.bold = subheading_style.get("bold", True) + subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79))) + p.font.color.rgb = RGBColor(*subheading_color) + elif paragraph.startswith('*') and paragraph.endswith('*'): + # Bold text + p.text = paragraph.strip('*') + paragraph_style = styles.get("paragraph", {}) + p.font.size = Pt(paragraph_style.get("font_size", 18)) + p.font.bold = True + paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47))) + p.font.color.rgb = RGBColor(*paragraph_color) + else: + # Regular text + paragraph_style = styles.get("paragraph", {}) + p.font.size = Pt(paragraph_style.get("font_size", 18)) + p.font.bold = paragraph_style.get("bold", False) + paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47))) + p.font.color.rgb = RGBColor(*paragraph_color) + + # Apply alignment + align = paragraph_style.get("align", "left") + if align == "center": + p.alignment = PP_ALIGN.CENTER + elif align == "right": + p.alignment = PP_ALIGN.RIGHT + else: + p.alignment = PP_ALIGN.LEFT # If no slides were created, create a default slide if not slidesData: @@ -568,15 +578,16 @@ JSON ONLY. NO OTHER TEXT.""" slides = [] try: - # Validate JSON structure - if not isinstance(json_content, dict): - raise ValueError("JSON content must be a dictionary") + # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) + if not self._validateJsonStructure(json_content): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in json_content: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(json_content) + metadata = self._extractMetadata(json_content) # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + document_title = metadata.get("title", title) # Create title slide slides.append({ @@ -585,7 +596,6 @@ JSON ONLY. NO OTHER TEXT.""" }) # Process sections into slides based on content and user intent - sections = json_content.get("sections", []) slides.extend(self._createSlidesFromSections(sections, styles)) # If no content slides were created, create a default content slide @@ -624,6 +634,24 @@ JSON ONLY. NO OTHER TEXT.""" content_type = section.get("content_type", "paragraph") elements = section.get("elements", []) + # Handle image sections specially + if content_type == "image": + # Extract image data + images = [] + for element in elements: + if element.get("base64Data"): + images.append({ + "base64Data": element.get("base64Data"), + "altText": element.get("altText", "Image"), + "caption": element.get("caption") + }) + + return { + "title": section_title or element.get("altText", "Image"), + "content": "", # No text content for image slides + "images": images + } + # Build slide content based on section type content_parts = [] @@ -645,7 +673,8 @@ JSON ONLY. NO OTHER TEXT.""" return { "title": section_title, - "content": slide_content + "content": slide_content, + "images": [] # No images for non-image sections } except Exception as e: @@ -835,7 +864,8 @@ JSON ONLY. NO OTHER TEXT.""" if current_slide_content: slides.append({ "title": current_slide_title, - "content": "\n\n".join(current_slide_content) + "content": "\n\n".join(current_slide_content), + "images": [] }) current_slide_content = [] @@ -844,6 +874,31 @@ JSON ONLY. NO OTHER TEXT.""" if isinstance(element, dict) and "text" in element: current_slide_title = element.get("text", "Untitled Section") break + elif section_type == "image": + # Create separate slide for image + if current_slide_content: + slides.append({ + "title": current_slide_title, + "content": "\n\n".join(current_slide_content), + "images": [] + }) + current_slide_content = [] + + # Extract image data + imageData = [] + for element in elements: + if element.get("base64Data"): + imageData.append({ + "base64Data": element.get("base64Data"), + "altText": element.get("altText", "Image"), + "caption": element.get("caption") + }) + + slides.append({ + "title": section.get("title") or (imageData[0].get("altText", "Image") if imageData else "Image"), + "content": "", + "images": imageData + }) else: # Add content to current slide formatted_content = self._formatSectionContent(section) @@ -854,7 +909,8 @@ JSON ONLY. NO OTHER TEXT.""" if current_slide_content: slides.append({ "title": current_slide_title, - "content": "\n\n".join(current_slide_content) + "content": "\n\n".join(current_slide_content), + "images": [] }) return slides @@ -869,6 +925,10 @@ JSON ONLY. NO OTHER TEXT.""" content_type = section.get("content_type", "paragraph") elements = section.get("elements", []) + # Image sections return empty content (handled separately) + if content_type == "image": + return "" + # Process each element in the section content_parts = [] for element in elements: @@ -891,6 +951,110 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error formatting section content: {str(e)}") return "" + def _addImagesToSlide(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any]) -> None: + """Add images to a PowerPoint slide.""" + try: + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + import base64 + import io + + if not images: + return + + # Get slide dimensions from presentation + if hasattr(self, '_currentPresentation'): + prs = self._currentPresentation + else: + prs = slide.presentation + slideWidth = prs.slide_width + slideHeight = prs.slide_height + titleHeight = Inches(1.5) # Approximate title height + + # Available area for images + availableWidth = slideWidth - Inches(1) # Margins + availableHeight = slideHeight - titleHeight - Inches(1) # Title + margins + + # Position images + if len(images) == 1: + # Single image: center it + img = images[0] + base64Data = img.get("base64Data") + if base64Data: + imageBytes = base64.b64decode(base64Data) + imageStream = io.BytesIO(imageBytes) + + # Get image dimensions + try: + from PIL import Image as PILImage + pilImage = PILImage.open(imageStream) + imgWidth, imgHeight = pilImage.size + + # Scale to fit available space (max 80% of slide) + maxWidth = availableWidth * 0.8 + maxHeight = availableHeight * 0.8 + + scale = min(maxWidth / imgWidth, maxHeight / imgHeight, 1.0) + finalWidth = imgWidth * scale + finalHeight = imgHeight * scale + + # Center image + left = (slideWidth - finalWidth) / 2 + top = titleHeight + (availableHeight - finalHeight) / 2 + + imageStream.seek(0) + except Exception: + # Fallback: use default size + finalWidth = Inches(6) + finalHeight = Inches(4.5) + left = (slideWidth - finalWidth) / 2 + top = titleHeight + Inches(1) + imageStream.seek(0) + + # Add image to slide + slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) + + # Add caption if available + caption = img.get("caption") or img.get("altText") + if caption and caption != "Image": + # Add text box below image + captionTop = top + finalHeight + Inches(0.2) + captionBox = slide.shapes.add_textbox( + Inches(1), + captionTop, + slideWidth - Inches(2), + Inches(0.5) + ) + captionFrame = captionBox.text_frame + captionFrame.text = caption + captionFrame.paragraphs[0].font.size = Pt(12) + captionFrame.paragraphs[0].font.italic = True + captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER + else: + # Multiple images: arrange in grid + cols = 2 if len(images) <= 4 else 3 + rows = (len(images) + cols - 1) // cols + + imgWidth = (availableWidth - Inches(0.5) * (cols - 1)) / cols + imgHeight = (availableHeight - Inches(0.5) * (rows - 1)) / rows + + for idx, img in enumerate(images): + base64Data = img.get("base64Data") + if base64Data: + row = idx // cols + col = idx % cols + + imageBytes = base64.b64decode(base64Data) + imageStream = io.BytesIO(imageBytes) + + left = Inches(0.5) + col * (imgWidth + Inches(0.5)) + top = titleHeight + Inches(0.5) + row * (imgHeight + Inches(0.5)) + + slide.shapes.add_picture(imageStream, left, top, width=imgWidth, height=imgHeight) + + except Exception as e: + logger.warning(f"Error adding images to slide: {str(e)}") + def _formatTimestamp(self) -> str: """Format current timestamp for presentation generation.""" from datetime import datetime, UTC diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py index ceb1c638..56d4af61 100644 --- a/modules/services/serviceGeneration/renderers/rendererText.py +++ b/modules/services/serviceGeneration/renderers/rendererText.py @@ -64,14 +64,15 @@ class RendererText(BaseRenderer): """Generate text content from structured JSON document.""" try: # Validate JSON structure - if not isinstance(jsonContent, dict): - raise ValueError("JSON content must be a dictionary") + if not self._validateJsonStructure(jsonContent): + raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}") - if "sections" not in jsonContent: - raise ValueError("JSON content must contain 'sections' field") + # Extract sections and metadata from standardized schema + sections = self._extractSections(jsonContent) + metadata = self._extractMetadata(jsonContent) # Use title from JSON metadata if available, otherwise use provided title - documentTitle = jsonContent.get("metadata", {}).get("title", title) + documentTitle = metadata.get("title", title) # Build text content textParts = [] @@ -82,7 +83,6 @@ class RendererText(BaseRenderer): textParts.append("") # Process each section - sections = jsonContent.get("sections", []) for section in sections: sectionText = self._renderJsonSection(section) if sectionText: diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index fadecd88..2ebe11c2 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -451,7 +451,7 @@ class RendererXlsx(BaseRenderer): def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]: """Generate sheet names based on actual content structure.""" - sections = jsonContent.get("sections", []) + sections = self._extractSections(jsonContent) # If no sections, create a single sheet if not sections: @@ -496,7 +496,7 @@ class RendererXlsx(BaseRenderer): if not sheetNames: return - sections = jsonContent.get("sections", []) + sections = self._extractSections(jsonContent) tableSections = [s for s in sections if s.get("content_type") == "table"] if len(tableSections) > 1: @@ -607,7 +607,7 @@ class RendererXlsx(BaseRenderer): row += 1 # Content overview - sections = jsonContent.get("sections", []) + sections = self._extractSections(jsonContent) sheet[f'A{row + 1}'] = "Content Overview:" sheet[f'A{row + 1}'].font = Font(bold=True) @@ -640,7 +640,7 @@ class RendererXlsx(BaseRenderer): def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]): """Populate additional sheets based on content types.""" try: - sections = jsonContent.get("sections", []) + sections = self._extractSections(jsonContent) for sheetName in sheetNames: if sheetName not in sheets: @@ -692,12 +692,14 @@ class RendererXlsx(BaseRenderer): for element in elements: if section_type == "table": startRow = self._addTableToExcel(sheet, element, styles, startRow) - elif section_type == "list": + elif section_type == "bullet_list" or section_type == "list": startRow = self._addListToExcel(sheet, element, styles, startRow) elif section_type == "paragraph": startRow = self._addParagraphToExcel(sheet, element, styles, startRow) elif section_type == "heading": startRow = self._addHeadingToExcel(sheet, element, styles, startRow) + elif section_type == "image": + startRow = self._addImageToExcel(sheet, element, styles, startRow) else: startRow = self._addParagraphToExcel(sheet, element, styles, startRow) @@ -807,6 +809,75 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not add heading to Excel: {str(e)}") return startRow + 1 + + def _addImageToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: + """Add an image element to Excel sheet using openpyxl.""" + try: + base64Data = element.get("base64Data", "") + altText = element.get("altText", "Image") + caption = element.get("caption", "") + + if not base64Data: + # No image data - add placeholder text + sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]") + return startRow + 1 + + try: + from openpyxl.drawing.image import Image as OpenpyxlImage + import base64 + import io + + # Decode base64 image data + imageBytes = base64.b64decode(base64Data) + imageStream = io.BytesIO(imageBytes) + + # Create openpyxl Image + img = OpenpyxlImage(imageStream) + + # Set image size (max width 6 inches, maintain aspect ratio) + maxWidth = 400 # pixels (approximately 6 inches at 72 DPI) + if img.width > maxWidth: + scale = maxWidth / img.width + img.width = maxWidth + img.height = int(img.height * scale) + + # Anchor image to cell (A column, current row) + img.anchor = f'A{startRow}' + + # Add image to sheet + sheet.add_image(img) + + # Calculate height needed for image (approximate) + # Excel row height is in points (1/72 inch), image height is in pixels + # Assuming 72 DPI: pixels = points + imageHeightPoints = img.height / 1.33 # Approximate conversion + sheet.row_dimensions[startRow].height = max(15, imageHeightPoints) # Min 15 points + + # Add caption below image if available + if caption: + startRow += 1 + sheet.cell(row=startRow, column=1, value=caption) + sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10) + sheet.cell(row=startRow, column=1).alignment = Alignment(horizontal="left") + elif altText and altText != "Image": + startRow += 1 + sheet.cell(row=startRow, column=1, value=f"Figure: {altText}") + sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10) + + return startRow + 1 + + except ImportError: + self.logger.warning("openpyxl.drawing.image not available, using placeholder") + sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]") + return startRow + 1 + except Exception as imgError: + self.logger.warning(f"Error embedding image in Excel: {str(imgError)}") + sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]") + return startRow + 1 + + except Exception as e: + self.logger.warning(f"Could not add image to Excel: {str(e)}") + return startRow + 1 def _formatTimestamp(self) -> str: """Format current timestamp for document generation.""" diff --git a/modules/services/serviceGeneration/subContentGenerator.py b/modules/services/serviceGeneration/subContentGenerator.py new file mode 100644 index 00000000..1b1f64a9 --- /dev/null +++ b/modules/services/serviceGeneration/subContentGenerator.py @@ -0,0 +1,840 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Content Generator for hierarchical document generation. +Generates content for each section in the document structure. +""" + +import logging +import asyncio +from typing import Dict, Any, Optional, List, Callable +from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator + +logger = logging.getLogger(__name__) + + +class ContentGenerator: + """Generates content for document sections""" + + def __init__(self, services: Any): + self.services = services + self.integrator = ContentIntegrator(services) + + async def generateContent( + self, + structure: Dict[str, Any], + cachedContent: Optional[Dict[str, Any]] = None, + userPrompt: str = "", + progressCallback: Optional[Callable] = None, + parallelGeneration: bool = True, + batchSize: int = 10 + ) -> Dict[str, Any]: + """ + Generate content for all sections in structure. + + Args: + structure: Document structure from Phase 1 + cachedContent: Extracted content cache + userPrompt: Original user prompt + progressCallback: Function to call for progress updates + parallelGeneration: Enable parallel section generation + batchSize: Number of sections to process in parallel + + Returns: + Complete document structure with populated elements + """ + try: + documents = structure.get("documents", []) + + if not documents: + logger.warning("No documents found in structure") + return structure + + allGeneratedSections = [] + totalSectionsAcrossDocs = 0 + + # Count total sections for progress tracking + for doc in documents: + totalSectionsAcrossDocs += len(doc.get("sections", [])) + + if progressCallback: + progressCallback(0, totalSectionsAcrossDocs, "Starting content generation...") + + currentSectionIndex = 0 + + for docIdx, doc in enumerate(documents): + sections = doc.get("sections", []) + totalSections = len(sections) + + if totalSections == 0: + continue + + # Determine if parallel generation is beneficial + # Use sequential if only 1 section or if sections depend on each other + useParallel = parallelGeneration and totalSections > 1 + + # Count images - if many images, parallel is still beneficial but slower + imageCount = sum(1 for s in sections if s.get("content_type") == "image") + + if progressCallback and docIdx > 0: + progressCallback( + currentSectionIndex, + totalSectionsAcrossDocs, + f"Processing document {docIdx + 1}/{len(documents)}..." + ) + + if useParallel: + # Generate in batches for parallel processing + generatedSections = await self._generateSectionsParallel( + sections=sections, + cachedContent=cachedContent, + userPrompt=userPrompt, + documentMetadata=structure.get("metadata", {}), + progressCallback=lambda idx, total, msg: progressCallback( + currentSectionIndex + idx, + totalSectionsAcrossDocs, + msg + ) if progressCallback else None, + batchSize=batchSize + ) + else: + # Generate sequentially (better for context-dependent sections) + generatedSections = await self._generateSectionsSequential( + sections=sections, + cachedContent=cachedContent, + userPrompt=userPrompt, + documentMetadata=structure.get("metadata", {}), + progressCallback=lambda idx, total, msg: progressCallback( + currentSectionIndex + idx, + totalSectionsAcrossDocs, + msg + ) if progressCallback else None + ) + + allGeneratedSections.extend(generatedSections) + currentSectionIndex += totalSections + + if progressCallback: + progressCallback( + totalSectionsAcrossDocs, + totalSectionsAcrossDocs, + "Content generation complete" + ) + + # Integrate generated content into structure + completeStructure = self.integrator.integrateContent( + structure=structure, + generatedSections=allGeneratedSections + ) + + return completeStructure + + except Exception as e: + logger.error(f"Error generating content: {str(e)}") + raise + + async def _generateSectionsSequential( + self, + sections: List[Dict[str, Any]], + cachedContent: Optional[Dict[str, Any]], + userPrompt: str, + documentMetadata: Dict[str, Any], + progressCallback: Optional[Callable] = None + ) -> List[Dict[str, Any]]: + """ + Generate sections sequentially with enhanced progress tracking. + Uses previous sections for context continuity. + """ + generatedSections = [] + previousSections = [] + totalSections = len(sections) + + for idx, section in enumerate(sections): + try: + contentType = section.get("content_type", "content") + sectionId = section.get("id", f"section_{idx}") + + # Enhanced progress message + if contentType == "image": + message = f"Generating image: {section.get('generation_hint', 'Image')[:50]}..." + elif contentType == "heading": + message = f"Generating heading..." + elif contentType == "paragraph": + message = f"Generating paragraph..." + else: + message = f"Generating {contentType}..." + + if progressCallback: + progressCallback( + idx + 1, + totalSections, + message + ) + + context = { + "userPrompt": userPrompt, + "cachedContent": cachedContent, + "previousSections": previousSections.copy(), + "targetSection": section, + "documentMetadata": documentMetadata, + "operationId": None + } + + generated = await self._generateSectionContent(section, context) + generatedSections.append(generated) + previousSections.append(generated) + + # Log success + if contentType == "image": + logger.info(f"Successfully generated image for section {sectionId}") + elif not generated.get("error"): + logger.debug(f"Successfully generated {contentType} for section {sectionId}") + + except Exception as e: + logger.error(f"Error generating section {section.get('id')}: {str(e)}") + errorSection = self.integrator.createErrorSection(section, str(e)) + generatedSections.append(errorSection) + previousSections.append(errorSection) + + return generatedSections + + async def _generateSectionsParallel( + self, + sections: List[Dict[str, Any]], + cachedContent: Optional[Dict[str, Any]], + userPrompt: str, + documentMetadata: Dict[str, Any], + progressCallback: Optional[Callable] = None, + batchSize: int = 10 + ) -> List[Dict[str, Any]]: + """ + Generate sections in parallel batches with enhanced progress tracking. + + Args: + sections: List of sections to generate + cachedContent: Extracted content cache + userPrompt: Original user prompt + documentMetadata: Document metadata + progressCallback: Progress callback function + batchSize: Number of sections to process in parallel per batch + + Returns: + List of generated sections + """ + generatedSections = [] + totalSections = len(sections) + + if totalSections == 0: + return [] + + # Adjust batch size based on section types (images take longer) + imageCount = sum(1 for s in sections if s.get("content_type") == "image") + if imageCount > 0: + # Reduce batch size if many images (images are slower) + adjustedBatchSize = min(batchSize, max(3, batchSize - imageCount // 2)) + else: + adjustedBatchSize = batchSize + + # Process in batches + totalBatches = (totalSections + adjustedBatchSize - 1) // adjustedBatchSize + accumulatedPreviousSections = [] # Track sections from previous batches + + for batchNum, batchStart in enumerate(range(0, totalSections, adjustedBatchSize)): + batch = sections[batchStart:batchStart + adjustedBatchSize] + batchEnd = min(batchStart + adjustedBatchSize, totalSections) + + if progressCallback: + progressCallback( + batchStart, + totalSections, + f"Processing batch {batchNum + 1}/{totalBatches} ({len(batch)} sections)..." + ) + + async def generateWithProgress(section: Dict[str, Any], globalIndex: int, localIndex: int, batchPreviousSections: List[Dict[str, Any]]): + try: + contentType = section.get("content_type", "content") + sectionId = section.get("id", f"section_{globalIndex}") + + # Enhanced progress message based on content type + if contentType == "image": + message = f"Generating image: {section.get('generation_hint', 'Image')[:50]}..." + elif contentType == "heading": + message = f"Generating heading..." + elif contentType == "paragraph": + message = f"Generating paragraph..." + else: + message = f"Generating {contentType}..." + + if progressCallback: + progressCallback( + globalIndex + 1, + totalSections, + message + ) + + context = { + "userPrompt": userPrompt, + "cachedContent": cachedContent, + "previousSections": batchPreviousSections.copy(), # Include sections from previous batches + "targetSection": section, + "documentMetadata": documentMetadata, + "operationId": None # Can be set if needed for nested progress + } + + result = await self._generateSectionContent(section, context) + + # Log success + if contentType == "image": + logger.info(f"Successfully generated image for section {sectionId}") + elif not result.get("error"): + logger.debug(f"Successfully generated {contentType} for section {sectionId}") + + return result + + except Exception as e: + logger.error(f"Error generating section {section.get('id')}: {str(e)}") + return self.integrator.createErrorSection(section, str(e)) + + # Generate batch in parallel + # Pass accumulated previous sections to each task in this batch + batchTasks = [ + generateWithProgress(section, batchStart + idx, idx, accumulatedPreviousSections) + for idx, section in enumerate(batch) + ] + + batchResults = await asyncio.gather( + *batchTasks, + return_exceptions=True + ) + + # Handle exceptions and collect results + for idx, result in enumerate(batchResults): + if isinstance(result, Exception): + logger.error(f"Error in parallel generation batch {batchNum + 1}: {str(result)}") + errorSection = self.integrator.createErrorSection(batch[idx], str(result)) + generatedSections.append(errorSection) + accumulatedPreviousSections.append(errorSection) # Add to accumulated for next batch + else: + generatedSections.append(result) + accumulatedPreviousSections.append(result) # Add to accumulated for next batch + + # Update progress after batch completion + if progressCallback: + progressCallback( + batchEnd, + totalSections, + f"Completed batch {batchNum + 1}/{totalBatches}" + ) + + return generatedSections + + async def _generateSectionContent( + self, + section: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Generate content for a single section. + + Args: + section: Section to generate content for + context: Generation context + + Returns: + Section with populated elements array + """ + try: + contentType = section.get("content_type", "") + complexity = section.get("complexity", "simple") + + if contentType == "image": + return await self._generateImageSection(section, context) + elif complexity == "complex": + return await self._generateComplexTextSection(section, context) + else: + return await self._generateSimpleSection(section, context) + + except Exception as e: + logger.error(f"Error generating section {section.get('id')}: {str(e)}") + return self.integrator.createErrorSection(section, str(e)) + + async def _generateSimpleSection( + self, + section: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """Generate content for simple section (heading, paragraph)""" + try: + contentType = section.get("content_type", "") + generationHint = section.get("generation_hint", "") + + # Create section-specific prompt + sectionPrompt = self._createSectionPrompt(section, context) + + # Debug: Log section generation prompt + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + sectionId = section.get('id', 'unknown') + contentType = section.get('content_type', 'unknown') + try: + self.services.utils.writeDebugFile( + sectionPrompt, + f"document_generation_section_{sectionId}_{contentType}_prompt" + ) + except Exception as e: + logger.debug(f"Could not write debug file for section prompt: {e}") + + # Call AI to generate content + from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum + + options = AiCallOptions( + operationType=OperationTypeEnum.DATA_GENERATE, + resultFormat="json" + ) + + aiResponse = await self.services.ai.callAiContent( + prompt=sectionPrompt, + options=options, + outputFormat="json" + ) + + # Debug: Log section generation response (always log, even if empty) + sectionId = section.get('id', 'unknown') + contentType = section.get('content_type', 'unknown') + + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + responseContent = '' + if aiResponse: + if hasattr(aiResponse, 'content') and aiResponse.content: + responseContent = aiResponse.content + elif hasattr(aiResponse, 'documents') and aiResponse.documents: + responseContent = f"[Response has {len(aiResponse.documents)} documents]" + else: + responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]" + else: + responseContent = '[No response object]' + + self.services.utils.writeDebugFile( + responseContent, + f"document_generation_section_{sectionId}_{contentType}_response" + ) + logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)") + except Exception as e: + logger.warning(f"Could not write debug file for section response: {e}") + import traceback + logger.debug(traceback.format_exc()) + + if not aiResponse or not aiResponse.content: + logger.error(f"AI section generation returned empty response for section {sectionId}") + logger.error(f"Response object: {aiResponse}, has content: {hasattr(aiResponse, 'content') if aiResponse else False}") + raise ValueError("AI section generation returned empty response") + + # Extract JSON elements + rawContent = aiResponse.content if aiResponse and aiResponse.content else "" + if not rawContent or not rawContent.strip(): + logger.error(f"AI section generation returned empty response for section {sectionId}") + logger.error(f"Response object: {aiResponse}, content length: {len(rawContent) if rawContent else 0}") + raise ValueError("AI section generation returned empty response") + + extractedJson = self.services.utils.jsonExtractString(rawContent) + if not extractedJson or not extractedJson.strip(): + logger.error(f"No JSON found in AI response for section {sectionId}") + logger.error(f"Raw response (first 1000 chars): {rawContent[:1000]}") + logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500] if extractedJson else 'None'}") + raise ValueError("No JSON found in AI section response") + + import json + try: + elementsData = json.loads(extractedJson) + logger.debug(f"Parsed JSON for section {section.get('id')}: type={type(elementsData)}, keys={list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}") + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON from AI response for section {section.get('id')}") + logger.error(f"JSON decode error: {str(e)}") + logger.error(f"Extracted JSON length: {len(extractedJson)} chars") + logger.error(f"Extracted JSON (first 1000 chars): {extractedJson[:1000]}") + if len(extractedJson) > 1000: + logger.error(f"Extracted JSON (last 500 chars): {extractedJson[-500:]}") + logger.error(f"Raw AI response length: {len(rawContent)} chars") + logger.error(f"Raw AI response (first 1000 chars): {rawContent[:1000] if rawContent else 'None'}") + + # Try to recover from truncated JSON if it looks like it was cut off + if "Expecting" in str(e) and ("delimiter" in str(e) or "value" in str(e)): + # Check if JSON starts correctly but is truncated + if extractedJson.strip().startswith('{"elements"'): + logger.warning(f"JSON appears truncated, attempting recovery...") + # Use closeJsonStructures which handles unterminated strings properly + try: + from modules.shared.jsonUtils import closeJsonStructures + recoveredJson = closeJsonStructures(extractedJson) + + logger.info(f"Attempting to parse recovered JSON (closed structures)") + logger.debug(f"Recovered JSON length: {len(recoveredJson)} chars (original: {len(extractedJson)} chars)") + + elementsData = json.loads(recoveredJson) + logger.info(f"Successfully recovered JSON for section {section.get('id')}") + except (json.JSONDecodeError, ValueError) as recoveryError: + logger.error(f"JSON recovery failed: {str(recoveryError)}") + logger.error(f"Recovered JSON (first 500 chars): {recoveredJson[:500] if 'recoveredJson' in locals() else 'N/A'}") + # Check if raw response might be truncated + if len(rawContent) <= len(extractedJson) + 100: # Raw content is similar length to extracted + logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)") + logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits") + raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}") + else: + raise ValueError(f"Invalid JSON in AI response: {str(e)}") + else: + raise ValueError(f"Invalid JSON in AI response: {str(e)}") + + # Extract elements array - handle various response formats + elements = None + + if isinstance(elementsData, dict): + # Try to find elements in various possible locations + if "elements" in elementsData: + elements = elementsData["elements"] + elif "content" in elementsData and isinstance(elementsData["content"], list): + # Some models return {"content": [...]} + elements = elementsData["content"] + elif "data" in elementsData and isinstance(elementsData["data"], list): + # Some models return {"data": [...]} + elements = elementsData["data"] + elif len(elementsData) == 1: + # Single key dict - might be the elements directly + firstValue = list(elementsData.values())[0] + if isinstance(firstValue, list): + elements = firstValue + else: + # Try to convert entire dict to a single element + logger.warning(f"AI returned dict without 'elements' key, attempting to convert: {list(elementsData.keys())}") + # For heading/paragraph, create element from dict + if contentType == "heading": + text = elementsData.get("text") or elementsData.get("heading") or str(elementsData) + level = elementsData.get("level", 1) + elements = [{"level": level, "text": text}] + elif contentType == "paragraph": + text = elementsData.get("text") or elementsData.get("content") or str(elementsData) + elements = [{"text": text}] + else: + # Try to create element from dict structure + elements = [elementsData] + elif isinstance(elementsData, list): + elements = elementsData + else: + # Primitive value - wrap it + logger.warning(f"AI returned primitive value, wrapping: {type(elementsData)}") + if contentType == "heading": + elements = [{"level": 1, "text": str(elementsData)}] + elif contentType == "paragraph": + elements = [{"text": str(elementsData)}] + else: + elements = [{"text": str(elementsData)}] + + if elements is None: + logger.error(f"Could not extract elements from AI response. Response structure: {type(elementsData)}, keys: {list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}") + logger.error(f"Full response (first 500 chars): {str(extractedJson)[:500]}") + raise ValueError(f"Invalid elements format in AI response. Expected dict with 'elements' key or list, got: {type(elementsData)}") + + # Validate elements is a list + if not isinstance(elements, list): + logger.warning(f"Elements is not a list, converting: {type(elements)}") + elements = [elements] + + # Update section with elements + section["elements"] = elements + return section + + except Exception as e: + logger.error(f"Error generating simple section: {str(e)}") + raise + + async def _generateImageSection( + self, + section: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """Generate image for image section or include existing image""" + try: + # Check if this is an existing image to include + imageSource = section.get("image_source", "generate") + + if imageSource == "existing": + # Include existing image from cachedContent + imageRefId = section.get("image_reference_id") + if not imageRefId: + raise ValueError(f"Image section {section.get('id')} has image_source='existing' but no image_reference_id") + + cachedContent = context.get("cachedContent", {}) + imageDocuments = cachedContent.get("imageDocuments", []) + + # Find the image document + imageDoc = next((img for img in imageDocuments if img.get("id") == imageRefId), None) + if not imageDoc: + raise ValueError(f"Image document {imageRefId} not found in cachedContent.imageDocuments") + + # Create image element from existing image + altText = imageDoc.get("altText", section.get("generation_hint", "Image")) + mimeType = imageDoc.get("mimeType", "image/png") + + section["elements"] = [{ + "base64Data": imageDoc.get("base64Data"), + "altText": altText, + "mimeType": mimeType, + "caption": section.get("metadata", {}).get("caption") + }] + + logger.info(f"Successfully included existing image {imageRefId} for section {section.get('id')}") + return section + + # Generate new image (existing logic) + imagePrompt = section.get("image_prompt") + if not imagePrompt: + # Try to create from generation_hint + generationHint = section.get("generation_hint", "") + if generationHint: + imagePrompt = f"Create a professional illustration: {generationHint}" + else: + raise ValueError(f"Image section {section.get('id')} missing image_prompt and generation_hint") + + # Call AI service for image generation + from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptImage + import json + + # Create image generation prompt + promptModel = AiCallPromptImage( + prompt=imagePrompt, + size="1024x1024", + quality="standard", + style="vivid" + ) + promptJson = promptModel.model_dump_json(exclude_none=True, indent=2) + + options = AiCallOptions( + operationType=OperationTypeEnum.IMAGE_GENERATE, + resultFormat="base64" + ) + + # Log image generation start + logger.info(f"Starting image generation for section {section.get('id')}: {imagePrompt[:100]}...") + + # Call AI for image generation + aiResponse = await self.services.ai.callAiContent( + prompt=promptJson, + options=options, + outputFormat="base64" + ) + + # Extract base64 image data + base64Data = None + + if aiResponse and aiResponse.documents and len(aiResponse.documents) > 0: + imageDoc = aiResponse.documents[0] + base64Data = imageDoc.documentData + logger.debug(f"Image data extracted from documents: {len(base64Data) if base64Data else 0} chars") + + # Fallback: check content field (might be base64 string) + if not base64Data and aiResponse and aiResponse.content: + base64Data = aiResponse.content + logger.debug(f"Image data extracted from content: {len(base64Data) if base64Data else 0} chars") + + if not base64Data: + raise ValueError("Image generation returned no data") + + # Validate base64 data + try: + import base64 + base64.b64decode(base64Data[:100], validate=True) # Validate first 100 chars + except Exception as e: + logger.warning(f"Image data may not be valid base64: {str(e)}") + # Continue anyway - renderer will handle it + + # Create image element + altText = section.get("generation_hint", "Image") + if not altText or altText == "Image": + # Use image_prompt as alt text if generation_hint is generic + altText = section.get("image_prompt", "Image")[:100] # Limit length + + caption = section.get("metadata", {}).get("caption") + + section["elements"] = [{ + "url": f"data:image/png;base64,{base64Data}", + "base64Data": base64Data, + "altText": altText, + "caption": caption + }] + + logger.info(f"Successfully generated image for section {section.get('id')}") + return section + + except Exception as e: + logger.error(f"Error generating image section: {str(e)}") + raise + + async def _generateComplexTextSection( + self, + section: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """Generate content for complex text section (long chapter)""" + # For now, use same approach as simple section + # Can be enhanced later with chunking for very long content + return await self._generateSimpleSection(section, context) + + def _createSectionPrompt( + self, + section: Dict[str, Any], + context: Dict[str, Any] + ) -> str: + """Create sub-prompt for section content generation""" + contentType = section.get("content_type", "") + generationHint = section.get("generation_hint", "") + userPrompt = context.get("userPrompt", "") + cachedContent = context.get("cachedContent") + previousSections = context.get("previousSections", []) + documentMetadata = context.get("documentMetadata", {}) + + # Get user language + userLanguage = self._getUserLanguage() + + # Format cached content + cachedContentText = "" + if cachedContent and cachedContent.get("extractedContent"): + cachedContentText = self._formatCachedContent(cachedContent) + + # Format previous sections for context + previousSectionsText = "" + if previousSections: + formattedSections = [] + for s in previousSections[-10:]: # Last 10 sections for context (increased from 5) + prevContentType = s.get('content_type', 'unknown') # Use different variable name to avoid shadowing + order = s.get('order', 0) + hint = s.get('generation_hint', '') + elements = s.get('elements', []) + + # Extract actual content from elements + contentPreview = "" + if elements: + if prevContentType == "heading": + # Extract heading text + for elem in elements: + if isinstance(elem, dict) and "text" in elem: + contentPreview = f": \"{elem['text']}\"" + break + elif prevContentType == "paragraph": + # Extract paragraph text (first 100 chars) + for elem in elements: + if isinstance(elem, dict) and "text" in elem: + text = elem['text'] + contentPreview = f": \"{text[:100]}{'...' if len(text) > 100 else ''}\"" + break + elif prevContentType == "bullet_list": + # Extract bullet items + for elem in elements: + if isinstance(elem, dict) and "items" in elem: + items = elem['items'] + if items: + contentPreview = f": {items[:3]}{'...' if len(items) > 3 else ''}" + break + + formattedSections.append( + f"- Section {order} ({prevContentType}){contentPreview}" + ) + previousSectionsText = "\n".join(formattedSections) + + prompt = f"""{'='*80} +SECTION TO GENERATE: +{'='*80} +Type: {contentType} +Hint: {generationHint} +{'='*80} + +CONTEXT: +- User Request: {userPrompt} +- Previous Sections: {len(previousSections)} sections already generated +- Document Title: {documentMetadata.get('title', 'Unknown')} + +{'='*80} +PREVIOUS SECTIONS (for continuity): +{'='*80} +{previousSectionsText if previousSectionsText else "This is the first section."} +{'='*80} + +{'='*80} +EXTRACTED CONTENT (if available): +{'='*80} +{cachedContentText if cachedContentText else "None"} +{'='*80} + +TASK: Generate content for this section ONLY. + +INSTRUCTIONS: +1. Generate content appropriate for section type: {contentType} +2. Use the generation hint: {generationHint} +3. Consider previous sections for continuity +4. Use extracted content if relevant +5. All content must be in the language '{userLanguage}' + +6. CRITICAL: Return ONLY a JSON object with an "elements" array. DO NOT return a full document structure. + +REQUIRED FORMAT - Return ONLY this structure: + +For heading: +{{"elements": [{{"level": 1, "text": "Heading Text"}}]}} + +For paragraph: +{{"elements": [{{"text": "Paragraph text content"}}]}} + +For table: +{{"elements": [{{"headers": ["Col1", "Col2"], "rows": [["Row1", "Row2"]]}}]}} + +For bullet_list: +{{"elements": [{{"items": ["Item 1", "Item 2"]}}]}} + +For code_block: +{{"elements": [{{"code": "code content here", "language": "python"}}]}} + +CRITICAL RULES: +- Return ONLY {{"elements": [...]}} - nothing else +- DO NOT include "metadata", "documents", "sections", or any other fields +- DO NOT return a full document structure +- DO NOT add explanatory text before or after the JSON +- The response must start with {{"elements": and end with }} +- This is a SINGLE SECTION, not a full document +""" + return prompt + + def _formatCachedContent(self, cachedContent: Dict[str, Any]) -> str: + """Format cached content for prompt inclusion""" + try: + extractedContent = cachedContent.get("extractedContent", []) + if not extractedContent: + return "No content extracted." + + formattedParts = [] + for extracted in extractedContent: + if hasattr(extracted, 'parts'): + for part in extracted.parts: + if hasattr(part, 'content'): + formattedParts.append(part.content) + elif isinstance(extracted, dict): + formattedParts.append(str(extracted)) + else: + formattedParts.append(str(extracted)) + + return "\n\n".join(formattedParts) if formattedParts else "No content extracted." + + except Exception as e: + logger.warning(f"Error formatting cached content: {str(e)}") + return "Error formatting cached content." + + def _getUserLanguage(self) -> str: + """Get user language for document generation""" + try: + if self.services: + if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage: + return self.services.currentUserLanguage + elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'): + return self.services.user.language + except Exception: + pass + return 'en' # Default fallback + diff --git a/modules/services/serviceGeneration/subContentIntegrator.py b/modules/services/serviceGeneration/subContentIntegrator.py new file mode 100644 index 00000000..7bee437e --- /dev/null +++ b/modules/services/serviceGeneration/subContentIntegrator.py @@ -0,0 +1,167 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Content Integrator for hierarchical document generation. +Merges generated content into document structure and validates completeness. +""" + +import logging +from typing import Dict, Any, List, Tuple + +logger = logging.getLogger(__name__) + + +class ContentIntegrator: + """Integrates generated content into document structure""" + + def __init__(self, services: Any = None): + self.services = services + + def integrateContent( + self, + structure: Dict[str, Any], + generatedSections: List[Dict[str, Any]] + ) -> Dict[str, Any]: + """ + Merge generated sections into document structure. + + Args: + structure: Original document structure + generatedSections: List of sections with populated elements + + Returns: + Complete document structure ready for rendering + """ + try: + # Create mapping of section IDs to generated sections + sectionMap = {section.get("id"): section for section in generatedSections} + + # Process each document + for doc in structure.get("documents", []): + sections = doc.get("sections", []) + + for idx, section in enumerate(sections): + sectionId = section.get("id") + + # Find corresponding generated section + if sectionId in sectionMap: + generatedSection = sectionMap[sectionId] + + # Merge elements into structure section + if "elements" in generatedSection: + section["elements"] = generatedSection["elements"] + + # Preserve error information if present + if generatedSection.get("error"): + section["error"] = True + section["errorMessage"] = generatedSection.get("errorMessage") + section["originalContentType"] = generatedSection.get("originalContentType") + else: + # Section not generated - create error section + logger.warning(f"Section {sectionId} not found in generated sections") + section = self.createErrorSection( + section, + f"Section {sectionId} was not generated" + ) + sections[idx] = section + + # Debug: Write final merged structure to debug file + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + import json + structureJson = json.dumps(structure, indent=2, ensure_ascii=False) + self.services.utils.writeDebugFile( + structureJson, + "document_generation_final_merged_json" + ) + logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)") + except Exception as e: + logger.debug(f"Could not write debug file for final merged JSON: {e}") + + return structure + + except Exception as e: + logger.error(f"Error integrating content: {str(e)}") + raise + + def validateCompleteness( + self, + document: Dict[str, Any] + ) -> Tuple[bool, List[str]]: + """ + Validate that all sections have content. + + Args: + document: Document structure to validate + + Returns: + (is_complete, list_of_missing_sections) + """ + missingSections = [] + + try: + for doc in document.get("documents", []): + sections = doc.get("sections", []) + + for section in sections: + sectionId = section.get("id", "unknown") + elements = section.get("elements", []) + + # Check if section has content + if not elements or len(elements) == 0: + # Skip error sections (they have error text) + if not section.get("error"): + missingSections.append(sectionId) + else: + # Validate elements have actual content + hasContent = False + for element in elements: + # Check different content types + if element.get("text") or element.get("base64Data") or \ + element.get("headers") or element.get("items") or \ + element.get("code"): + hasContent = True + break + + if not hasContent and not section.get("error"): + missingSections.append(sectionId) + + return len(missingSections) == 0, missingSections + + except Exception as e: + logger.error(f"Error validating completeness: {str(e)}") + return False, [f"Validation error: {str(e)}"] + + def createErrorSection( + self, + originalSection: Dict[str, Any], + errorMessage: str + ) -> Dict[str, Any]: + """ + Create error placeholder section. + + Args: + originalSection: Original section that failed + errorMessage: Error message to display + + Returns: + Error section with placeholder content + """ + contentType = originalSection.get("content_type", "content") + sectionId = originalSection.get("id", "unknown") + + return { + "id": sectionId, + "content_type": "paragraph", # Change to paragraph for error display + "elements": [{ + "text": f"[ERROR: Failed to generate {contentType} for section '{sectionId}'. Error: {errorMessage}]" + }], + "order": originalSection.get("order", 0), + "error": True, + "errorMessage": errorMessage, + "originalContentType": contentType, + "title": originalSection.get("title"), + "generation_hint": originalSection.get("generation_hint"), + "complexity": originalSection.get("complexity") + } + diff --git a/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py b/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py new file mode 100644 index 00000000..d6620d3d --- /dev/null +++ b/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py @@ -0,0 +1,316 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Document Purpose Analyzer for hierarchical document generation. +Uses AI to analyze user prompt and determine purpose for each document. +""" + +import logging +import json +from typing import Dict, Any, List, Optional +from modules.datamodels.datamodelChat import ChatDocument +from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum + +logger = logging.getLogger(__name__) + + +class DocumentPurposeAnalyzer: + """Analyzes user prompt and documents to determine document purposes""" + + def __init__(self, services: Any): + self.services = services + + async def analyzeDocumentPurposes( + self, + userPrompt: str, + chatDocuments: List[ChatDocument], + actionContext: str = "generateDocument" + ) -> Dict[str, Any]: + """ + Use AI to analyze user prompt and determine purpose for each document. + + Args: + userPrompt: User's original prompt + chatDocuments: List of ChatDocument objects to analyze + actionContext: Action name (e.g., "generateDocument", "extractData") + + Returns: + { + "document_purposes": [ + { + "document_id": "...", + "purpose": "extract_text_content" | "include_image" | ..., + "reasoning": "...", + "extractionPrompt": "..." (if purpose requires extraction), + "processingNotes": "..." + } + ], + "overall_intent": "..." + } + """ + try: + if not chatDocuments: + return { + "document_purposes": [], + "overall_intent": "No documents provided" + } + + # Create document metadata list for AI analysis + documentMetadata = [] + for doc in chatDocuments: + docInfo = { + "document_id": doc.id, + "fileName": doc.fileName, + "mimeType": doc.mimeType, + "fileSize": doc.fileSize + } + documentMetadata.append(docInfo) + + # Create analysis prompt + analysisPrompt = self._createAnalysisPrompt( + userPrompt=userPrompt, + actionContext=actionContext, + documentMetadata=documentMetadata + ) + + # Debug: Log purpose analysis prompt + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + self.services.utils.writeDebugFile( + analysisPrompt, + "document_purpose_analysis_prompt" + ) + except Exception as e: + logger.debug(f"Could not write debug file for purpose analysis prompt: {e}") + + # Call AI for analysis + options = AiCallOptions( + operationType=OperationTypeEnum.DATA_GENERATE, + resultFormat="json" + ) + + aiResponse = await self.services.ai.callAiContent( + prompt=analysisPrompt, + options=options, + outputFormat="json" + ) + + # Debug: Log purpose analysis response + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + responseContent = aiResponse.content if aiResponse and aiResponse.content else '' + responseMetadata = { + "status": aiResponse.status if aiResponse else "N/A", + "error": aiResponse.error if aiResponse else "N/A", + "documents_count": len(aiResponse.documents) if aiResponse and aiResponse.documents else 0 + } + self.services.utils.writeDebugFile( + f"Response Content:\n{responseContent}\n\nResponse Metadata:\n{json.dumps(responseMetadata, indent=2)}", + "document_purpose_analysis_response" + ) + except Exception as e: + logger.debug(f"Could not write debug file for purpose analysis response: {e}") + + if not aiResponse or not aiResponse.content: + logger.warning("AI purpose analysis returned empty response, using defaults") + return self._createDefaultPurposes(chatDocuments, actionContext) + + # Extract and parse JSON + extractedJson = self.services.utils.jsonExtractString(aiResponse.content) + if not extractedJson: + logger.warning("No JSON found in purpose analysis response, using defaults") + return self._createDefaultPurposes(chatDocuments, actionContext) + + try: + analysisResult = json.loads(extractedJson) + + # Validate structure + if "document_purposes" not in analysisResult: + logger.warning("Invalid analysis result structure, using defaults") + return self._createDefaultPurposes(chatDocuments, actionContext) + + # Ensure all documents have purposes + analyzedIds = {dp.get("document_id") for dp in analysisResult.get("document_purposes", [])} + for doc in chatDocuments: + if doc.id not in analyzedIds: + logger.warning(f"Document {doc.id} not in analysis result, adding default purpose") + defaultPurpose = self._determineDefaultPurpose(doc, actionContext) + analysisResult["document_purposes"].append({ + "document_id": doc.id, + "purpose": defaultPurpose, + "reasoning": f"Default purpose based on document type and action context", + "extractionPrompt": None, + "processingNotes": None + }) + + return analysisResult + + except json.JSONDecodeError as e: + logger.error(f"Failed to parse purpose analysis JSON: {str(e)}") + logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500]}") + return self._createDefaultPurposes(chatDocuments, actionContext) + + except Exception as e: + logger.error(f"Error analyzing document purposes: {str(e)}") + return self._createDefaultPurposes(chatDocuments, actionContext) + + def _createAnalysisPrompt( + self, + userPrompt: str, + actionContext: str, + documentMetadata: List[Dict[str, Any]] + ) -> str: + """Create AI prompt for document purpose analysis""" + + # Format document list + docListText = "" + for i, docInfo in enumerate(documentMetadata, 1): + docListText += f"\n{i}. Document ID: {docInfo['document_id']}\n" + docListText += f" File Name: {docInfo['fileName']}\n" + docListText += f" MIME Type: {docInfo['mimeType']}\n" + docListText += f" File Size: {docInfo['fileSize']} bytes\n" + + # Get user language + userLanguage = self._getUserLanguage() + + prompt = f"""{'='*80} +DOCUMENT PURPOSE ANALYSIS +{'='*80} + +USER PROMPT: +{userPrompt} + +ACTION CONTEXT: {actionContext} + +DOCUMENTS PROVIDED: +{docListText} +{'='*80} + +TASK: For each document, determine its purpose based on: +1. User prompt intent (what the user wants to do) +2. Action context (what action is being performed) +3. Document type (mimeType - is it text, image, etc.) +4. Document metadata (fileName, size) + +AVAILABLE PURPOSES: +- "extract_text_content": Extract text content for use in document generation +- "include_image": Include the image directly in the generated document (for images) +- "analyze_image_vision": Analyze image with vision AI to extract text/information (for images with text/charts) +- "use_as_template": Use document structure/layout as template for generation +- "use_as_reference": Use as background context/reference without detailed extraction +- "extract_data": Extract structured data (key-value pairs, entities, fields) +- "attach": Document is an attachment - don't process, just attach to output +- "convert_format": Convert document format (for convert actions) +- "translate": Translate document content (for translate actions) +- "summarize": Create summary of document (for summarize actions) +- "compare": Compare documents (for comparison actions) +- "merge": Merge documents (for merge actions) +- "extract_tables_charts": Extract tables and charts specifically +- "use_for_styling": Use document for styling/formatting reference only +- "extract_metadata": Extract only document metadata + +CRITICAL RULES: +1. For images (mimeType starts with "image/"): + - If user wants to "include" or "show" images → "include_image" + - If user wants to "analyze", "read text", or "extract text" from images → "analyze_image_vision" + - Default for images in generateDocument → "include_image" + +2. For text documents in generateDocument: + - If user mentions "template" or "structure" → "use_as_template" + - If user mentions "reference" or "context" → "use_as_reference" + - Default → "extract_text_content" + +3. Consider action context: + - generateDocument: Usually "extract_text_content" or "include_image" + - extractData: Usually "extract_data" + - translateDocument: Usually "translate" + - summarizeDocument: Usually "summarize" + +4. Return ONLY valid JSON following this structure: +{{ + "document_purposes": [ + {{ + "document_id": "document_id_here", + "purpose": "extract_text_content", + "reasoning": "Brief explanation in language '{userLanguage}'", + "extractionPrompt": "Specific extraction prompt if purpose requires extraction, otherwise null", + "processingNotes": "Any special processing requirements or null" + }} + ], + "overall_intent": "Summary of how documents should be used together in language '{userLanguage}'" +}} + +5. All content must be in the language '{userLanguage}' +6. Return ONLY the JSON structure. No explanations before or after. + +Return ONLY the JSON structure. +""" + return prompt + + def _createDefaultPurposes( + self, + chatDocuments: List[ChatDocument], + actionContext: str + ) -> Dict[str, Any]: + """Create default purposes when AI analysis fails""" + purposes = [] + + for doc in chatDocuments: + purpose = self._determineDefaultPurpose(doc, actionContext) + purposes.append({ + "document_id": doc.id, + "purpose": purpose, + "reasoning": f"Default purpose based on document type ({doc.mimeType}) and action context ({actionContext})", + "extractionPrompt": None, + "processingNotes": None + }) + + return { + "document_purposes": purposes, + "overall_intent": f"Default processing for {len(chatDocuments)} document(s) in {actionContext} action" + } + + def _determineDefaultPurpose( + self, + doc: ChatDocument, + actionContext: str + ) -> str: + """Determine default purpose based on document type and action context""" + mimeType = doc.mimeType or "" + + # Image documents + if mimeType.startswith("image/"): + if actionContext == "generateDocument": + return "include_image" + elif actionContext in ["extractData", "process"]: + return "analyze_image_vision" + else: + return "include_image" # Default for images + + # Action-specific defaults + if actionContext == "extractData": + return "extract_data" + elif actionContext == "translateDocument": + return "translate" + elif actionContext == "summarizeDocument": + return "summarize" + elif actionContext == "convertDocument" or actionContext == "convert": + return "convert_format" + elif actionContext == "generateDocument": + return "extract_text_content" + else: + # Default for other actions + return "extract_text_content" + + def _getUserLanguage(self) -> str: + """Get user language for document generation""" + try: + if self.services: + if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage: + return self.services.currentUserLanguage + elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'): + return self.services.user.language + except Exception: + pass + return 'en' # Default fallback + diff --git a/modules/services/serviceGeneration/subStructureGenerator.py b/modules/services/serviceGeneration/subStructureGenerator.py new file mode 100644 index 00000000..d2ef1aeb --- /dev/null +++ b/modules/services/serviceGeneration/subStructureGenerator.py @@ -0,0 +1,488 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Structure Generator for hierarchical document generation. +Generates document skeleton with section placeholders. +""" + +import logging +import json +from typing import Dict, Any, Optional, List +from modules.datamodels.datamodelJson import jsonTemplateDocument + +logger = logging.getLogger(__name__) + + +class StructureGenerator: + """Generates document structure with section placeholders""" + + def __init__(self, services: Any): + self.services = services + + async def generateStructure( + self, + userPrompt: str, + documentList: Optional[Any] = None, + cachedContent: Optional[Dict[str, Any]] = None, + maxSectionLength: int = 500, + existingImages: Optional[List[Dict[str, Any]]] = None + ) -> Dict[str, Any]: + """ + Generate document structure with sections. + + Args: + userPrompt: User's original prompt + documentList: Optional document references + cachedContent: Optional extracted content cache + maxSectionLength: Maximum words for simple sections + existingImages: Optional list of existing images to include + + Returns: + Document structure with empty elements arrays + """ + try: + # Create structure generation prompt + structurePrompt = self._createStructurePrompt( + userPrompt=userPrompt, + cachedContent=cachedContent, + maxSectionLength=maxSectionLength, + existingImages=existingImages or [] + ) + + # Debug: Log structure generation prompt + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + self.services.utils.writeDebugFile( + structurePrompt, + "document_generation_structure_prompt" + ) + except Exception as e: + logger.debug(f"Could not write debug file for structure prompt: {e}") + + # Call AI to generate structure + from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum + + options = AiCallOptions( + operationType=OperationTypeEnum.DATA_GENERATE, + resultFormat="json" + ) + + aiResponse = await self.services.ai.callAiContent( + prompt=structurePrompt, + options=options, + outputFormat="json" + ) + + # Debug: Log structure generation response + if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): + try: + self.services.utils.writeDebugFile( + aiResponse.content if aiResponse and aiResponse.content else '', + "document_generation_structure_response" + ) + except Exception as e: + logger.debug(f"Could not write debug file for structure response: {e}") + + if not aiResponse or not aiResponse.content: + raise ValueError("AI structure generation returned empty response") + + # Extract and parse JSON + extractedJson = self.services.utils.jsonExtractString(aiResponse.content) + if not extractedJson: + raise ValueError("No JSON found in AI structure response") + + structure = json.loads(extractedJson) + + # Validate and enhance structure + structure = self._validateAndEnhanceStructure(structure, maxSectionLength) + + return structure + + except Exception as e: + logger.error(f"Error generating structure: {str(e)}") + raise + + def _createStructurePrompt( + self, + userPrompt: str, + cachedContent: Optional[Dict[str, Any]] = None, + maxSectionLength: int = 500, + existingImages: Optional[List[Dict[str, Any]]] = None + ) -> str: + """ + Create prompt for structure generation. + """ + # Get user language + userLanguage = self._getUserLanguage() + + # Format cached content if available + cachedContentText = "" + if cachedContent and cachedContent.get("extractedContent"): + cachedContentText = self._formatCachedContent(cachedContent) + + # Use provided existingImages or extract from cachedContent + if existingImages is None: + existingImages = [] + if cachedContent and cachedContent.get("imageDocuments"): + existingImages = cachedContent.get("imageDocuments", []) + + # Create structure template + structureTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", "Document Title") + + prompt = f"""{'='*80} +USER REQUEST: +{'='*80} +{userPrompt} +{'='*80} + +TASK: Generate a document STRUCTURE (skeleton) with sections. +Do NOT generate actual content yet - only the structure. + +{'='*80} +EXTRACTED CONTENT (if available): +{'='*80} +{cachedContentText if cachedContentText else "No source documents provided."} +{'='*80} + +INSTRUCTIONS: +1. Analyze the user request and extracted content +2. Create a document structure with CONTENT sections only +3. For each section, specify: + - id: Unique identifier (e.g., "section_title_1", "section_image_1") + - content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" + - complexity: "simple" (can generate directly) or "complex" (needs sub-prompt) + - generation_hint: Brief description of what content should be generated + - image_prompt: (only for image sections) Detailed prompt for image generation + - order: Section order number (starting from 1) + - elements: [] (empty array - will be populated later) + +4. Identify image sections: + - If user requests illustrations/images, create image sections + - If existing images are provided in documentList (check EXISTING IMAGES section below), create image sections that reference them + - Add image_prompt field with detailed description for image generation (only for new images) + - Set complexity to "complex" + - For existing images: Set image_source to "existing" and image_reference_id to the image document ID + - Example for new image: {{"id": "section_image_1", "content_type": "image", "complexity": "complex", "generation_hint": "Illustration for chapter 1", "image_prompt": "A detailed description for image generation", "order": 2, "elements": []}} + - Example for existing image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Include provided image", "image_source": "existing", "image_reference_id": "doc_id_here", "order": 2, "elements": []}} + +{'='*80} +EXISTING IMAGES (to include in document): +{'='*80} +{self._formatExistingImages(existingImages) if existingImages else "No existing images provided."} +{'='*80} + +6. Identify complex text sections: + - Long chapters (>{maxSectionLength} words expected) should be marked as "complex" + - Short paragraphs/headings should be "simple" + +7. Return ONLY valid JSON following this structure: +{structureTemplate} + +5. CRITICAL RULES: + - Return ONLY valid JSON (no comments, no trailing commas, double quotes only) + - Follow the exact JSON schema structure provided + - IMPORTANT: All sections MUST have empty elements arrays: "elements": [] (the template shows examples with content, but you must use empty arrays) + - ALL sections MUST include "generation_hint" field with a brief description of what content should be generated + - ALL sections MUST include "complexity" field: "simple" for short content, "complex" for long chapters/images + - Image sections MUST include "image_prompt" field with detailed description for image generation + - Order numbers MUST start from 1 (not 0) + - All content must be in the language '{userLanguage}' + - Do NOT generate actual content - only structure (skeleton) + - Use only supported content_type values: "heading", "paragraph", "image", "table", "bullet_list", "code_block" + +Return ONLY the JSON structure. No explanations. +""" + return prompt + + def _validateAndEnhanceStructure( + self, + structure: Dict[str, Any], + maxSectionLength: int + ) -> Dict[str, Any]: + """ + Validate structure and enhance with complexity identification. + """ + try: + # Ensure structure has required fields + if "documents" not in structure: + if "sections" in structure: + # Convert single-document format to multi-document format + structure = { + "metadata": structure.get("metadata", {}), + "documents": [{ + "id": "doc_1", + "title": structure.get("metadata", {}).get("title", "Document"), + "filename": "document.json", + "sections": structure.get("sections", []) + }] + } + else: + raise ValueError("Structure missing 'documents' or 'sections' field") + + # Process each document + for doc in structure.get("documents", []): + sections = doc.get("sections", []) + + # Process and validate sections according to standardized schema + for idx, section in enumerate(sections): + # Ensure required fields + if "id" not in section: + section["id"] = f"section_{idx + 1}" + + sectionId = section.get("id", "") + section["order"] = idx + 1 + + if "elements" not in section: + section["elements"] = [] + + # Identify complexity if not set + if "complexity" not in section: + section["complexity"] = self._identifySectionComplexity( + section, + maxSectionLength + ) + + # Ensure generation_hint exists (required for content generation) + if "generation_hint" not in section or not section.get("generation_hint"): + # Create meaningful generation hint from section id or content type + contentType = section.get("content_type", "") + + # Extract meaningful hint from section ID + meaningfulHint = self._extractMeaningfulHint(sectionId, contentType, section.get("elements", [])) + section["generation_hint"] = meaningfulHint + + # Ensure image sections have proper configuration + if section.get("content_type") == "image": + imageSource = section.get("image_source", "generate") + + if imageSource == "existing": + # Existing image - ensure image_reference_id is set + if "image_reference_id" not in section: + logger.warning(f"Image section {sectionId} has image_source='existing' but no image_reference_id") + # Existing images are simple (no generation needed) + section["complexity"] = "simple" + else: + # New image generation - ensure image_prompt + if "image_prompt" not in section or not section.get("image_prompt"): + # Try to extract from generation_hint + generationHint = section.get("generation_hint", "") + if generationHint: + # Enhance generation_hint to be a proper image prompt + section["image_prompt"] = self._enhanceImagePrompt(generationHint) + else: + # Create default based on document context + docTitle = doc.get("title", "Document") + section["image_prompt"] = f"Generate an illustration for: {docTitle}" + + # Ensure complexity is set to complex for new image generation + section["complexity"] = "complex" + + return structure + + except Exception as e: + logger.error(f"Error validating structure: {str(e)}") + raise + + def _identifySectionComplexity( + self, + section: Dict[str, Any], + maxSectionLength: int + ) -> str: + """ + Identify if section is simple or complex. + + Rules: + - Images: always complex + - Long chapters (>maxSectionLength words): complex + - Others: simple + """ + contentType = section.get("content_type", "") + + # Images are always complex + if contentType == "image": + return "complex" + + # Check generation_hint for length indicators + generationHint = section.get("generation_hint", "").lower() + + # Keywords indicating long content + longContentKeywords = [ + "chapter", "long", "detailed", "comprehensive", + "extensive", "full", "complete story" + ] + + if any(keyword in generationHint for keyword in longContentKeywords): + return "complex" + + # Default to simple + return "simple" + + def _extractMeaningfulHint( + self, + sectionId: str, + contentType: str, + elements: List[Any] + ) -> str: + """ + Extract meaningful generation hint from section ID, content type, or elements. + + Args: + sectionId: Section identifier (e.g., "section_heading_current_state") + contentType: Content type (e.g., "heading", "paragraph") + elements: Existing elements if any + + Returns: + Meaningful generation hint string + """ + sectionIdLower = sectionId.lower() + + # Try to extract text from existing elements first (most accurate) + if elements and isinstance(elements, list) and len(elements) > 0: + firstElement = elements[0] + if isinstance(firstElement, dict): + if "text" in firstElement and firstElement["text"]: + if contentType == "heading": + return firstElement["text"] + elif contentType == "paragraph": + return f"Content paragraph: {firstElement['text'][:50]}..." + + # Extract meaningful text from section ID + # Remove common prefixes: "section_", "section_heading_", "section_paragraph_", etc. + meaningfulPart = sectionId + for prefix in ["section_heading_", "section_paragraph_", "section_bullet_list_", + "section_code_block_", "section_image_", "section_"]: + if meaningfulPart.lower().startswith(prefix): + meaningfulPart = meaningfulPart[len(prefix):] + break + + # Convert snake_case to Title Case + # e.g., "current_state" -> "Current State" + words = meaningfulPart.replace("_", " ").split() + titleCase = " ".join(word.capitalize() for word in words if word) + + # Handle special cases + if "introduction" in sectionIdLower or "intro" in sectionIdLower: + return "Introduction paragraph" + elif "conclusion" in sectionIdLower: + return "Conclusion paragraph" + elif "footer" in sectionIdLower or "copyright" in sectionIdLower: + return "Footer content" + elif "title" in sectionIdLower and "main" in sectionIdLower: + # Main title - try to get from document title or use generic + return "Main document title" + + # Create hint based on content type and extracted text + if contentType == "heading": + if titleCase: + return titleCase + else: + return "Section heading" + elif contentType == "paragraph": + if titleCase: + return f"Content paragraph about {titleCase.lower()}" + else: + return f"Content paragraph" + elif contentType == "bullet_list": + if titleCase: + return f"Bullet list: {titleCase.lower()}" + else: + return "Bullet list items" + elif contentType == "code_block": + return "Code content" + else: + if titleCase: + return f"Content for {titleCase.lower()}" + else: + return f"Content for {contentType} section" + + def _extractImagePrompts( + self, + structure: Dict[str, Any] + ) -> Dict[str, str]: + """ + Extract image generation prompts from structure. + Maps section_id -> image_prompt + """ + imagePrompts = {} + + for doc in structure.get("documents", []): + for section in doc.get("sections", []): + if section.get("content_type") == "image": + sectionId = section.get("id") + imagePrompt = section.get("image_prompt") + if sectionId and imagePrompt: + imagePrompts[sectionId] = imagePrompt + + return imagePrompts + + def _formatCachedContent( + self, + cachedContent: Dict[str, Any] + ) -> str: + """ + Format cached content for prompt inclusion. + """ + try: + extractedContent = cachedContent.get("extractedContent", []) + if not extractedContent: + return "No content extracted." + + # Format ContentPart objects + formattedParts = [] + for extracted in extractedContent: + if hasattr(extracted, 'parts'): + for part in extracted.parts: + if hasattr(part, 'content'): + formattedParts.append(part.content) + elif isinstance(extracted, dict): + formattedParts.append(str(extracted)) + else: + formattedParts.append(str(extracted)) + + return "\n\n".join(formattedParts) if formattedParts else "No content extracted." + + except Exception as e: + logger.warning(f"Error formatting cached content: {str(e)}") + return "Error formatting cached content." + + def _enhanceImagePrompt(self, generationHint: str) -> str: + """ + Enhance generation hint to be a proper image generation prompt. + Adds visual details and style guidance if missing. + """ + # If hint already contains visual details, use as-is + visualKeywords = ["illustration", "image", "picture", "visual", "depict", "show", "drawing"] + if any(keyword.lower() in generationHint.lower() for keyword in visualKeywords): + return generationHint + + # Enhance with visual description + enhanced = f"Create a professional illustration: {generationHint}" + return enhanced + + def _formatExistingImages(self, imageDocuments: List[Dict[str, Any]]) -> str: + """Format existing images list for prompt inclusion""" + if not imageDocuments: + return "No existing images provided." + + formatted = [] + for i, imgDoc in enumerate(imageDocuments, 1): + formatted.append(f"{i}. Image ID: {imgDoc.get('id')}") + formatted.append(f" File Name: {imgDoc.get('fileName', 'Unknown')}") + formatted.append(f" MIME Type: {imgDoc.get('mimeType', 'Unknown')}") + formatted.append(f" Alt Text: {imgDoc.get('altText', 'Image')}") + formatted.append("") + + return "\n".join(formatted) + + def _getUserLanguage(self) -> str: + """Get user language for document generation""" + try: + if self.services: + if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage: + return self.services.currentUserLanguage + elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'): + return self.services.user.language + except Exception: + pass + return 'en' # Default fallback + diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py index df48b141..907e84a6 100644 --- a/modules/shared/jsonUtils.py +++ b/modules/shared/jsonUtils.py @@ -199,8 +199,7 @@ def closeJsonStructures(text: str) -> str: # Handle unterminated strings: find the last unclosed string # Look for patterns like: "value" or "value\n (unterminated) - # Simple heuristic: if we end with an unterminated string (odd number of quotes at end) - # Try to close it by finding the last opening quote and closing it + # Check if we're in the middle of a string value when text ends if result.strip(): # Count quotes - if odd number, we have an unterminated string quoteCount = result.count('"') @@ -219,6 +218,32 @@ def closeJsonStructures(text: str) -> str: # Find where the string should end (before next comma, bracket, or brace) # For now, just close it at the end result += '"' + else: + # Even number of quotes, but might still be in middle of string if cut off + # Check if text ends with a colon followed by a quote (start of string value) + # or ends with text that looks like it's inside a string (no closing quote after last quote) + import re + # Pattern: ends with "text" where text doesn't end with quote + # Look for pattern like: "text": "incomplete + if re.search(r':\s*"[^"]*$', result): + # We're in the middle of a string value, close it + result += '"' + # Also check if we end with text after a quote (like "key": "value but cut off) + elif re.search(r'"\s*:\s*"[^"]*[^",}\]]$', result): + # Check if last quote is followed by non-quote, non-structural chars + lastQuotePos = result.rfind('"') + if lastQuotePos >= 0: + afterQuote = result[lastQuotePos + 1:] + # If after quote we have text but no closing quote, comma, or brace, we're in a string + if afterQuote and not re.match(r'^\s*[,}\]\]]', afterQuote): + # Check if it's escaped + escapeCount = 0 + i = lastQuotePos - 1 + while i >= 0 and result[i] == '\\': + escapeCount += 1 + i -= 1 + if escapeCount % 2 == 0: + result += '"' # Count open/close brackets and braces openBraces = result.count('{') diff --git a/modules/workflows/methods/methodAi/actions/convert.py b/modules/workflows/methods/methodAi/actions/convert.py index 1c34fa9b..788fadea 100644 --- a/modules/workflows/methods/methodAi/actions/convert.py +++ b/modules/workflows/methods/methodAi/actions/convert.py @@ -98,7 +98,7 @@ async def convert(self, parameters: Dict[str, Any]) -> ActionResult: renderOptions["columnsPerRow"] = parameters.get("columnsPerRow") renderOptions["includeHeader"] = parameters.get("includeHeader", True) - rendered_content, mime_type = await generationService.renderReport( + rendered_content, mime_type, _images = await generationService.renderReport( jsonData, normalizedOutputFormat, title, None, None ) diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py index 5badc321..5b5db12f 100644 --- a/modules/workflows/methods/methodAi/actions/generateDocument.py +++ b/modules/workflows/methods/methodAi/actions/generateDocument.py @@ -3,13 +3,18 @@ """ Generate Document action for AI operations. -Generates documents from scratch or based on templates/inputs. +Generates documents from scratch or based on templates/inputs using hierarchical approach. """ import logging -from typing import Dict, Any +import time +from typing import Dict, Any, Optional from modules.workflows.methods.methodBase import action -from modules.datamodels.datamodelChat import ActionResult +from modules.datamodels.datamodelChat import ActionResult, ActionDocument +from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy +from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator +from modules.services.serviceGeneration.subContentGenerator import ContentGenerator +from modules.services.serviceGeneration.subDocumentPurposeAnalyzer import DocumentPurposeAnalyzer logger = logging.getLogger(__name__) @@ -17,15 +22,18 @@ logger = logging.getLogger(__name__) async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: """ GENERAL: - - Purpose: Generate documents from scratch or based on templates/inputs. + - Purpose: Generate documents from scratch or based on templates/inputs using hierarchical approach. - Input requirements: prompt or description (required); optional documentList (for templates/references). - - Output format: Document in specified format (default: docx). + - Output format: Document in specified format. Any format supported by dynamically registered renderers is acceptable (default: txt). Parameters: - prompt (str, required): Description of the document to generate. - documentList (list, optional): Template documents or reference documents to use as a guide. - documentType (str, optional): Type of document - letter, memo, proposal, contract, etc. - - resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx. + - resultType (str, optional): Output format. Any format supported by dynamically registered renderers is acceptable (formats are discovered automatically from renderer registry). Common formats: txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg. Default: txt. + - maxSectionLength (int, optional): Maximum words for simple sections. Default: 500. + - parallelGeneration (bool, optional): Enable parallel section generation. Default: True. + - progressLogging (bool, optional): Send ChatLog progress updates. Default: True. """ prompt = parameters.get("prompt") if not prompt: @@ -33,21 +41,361 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: documentList = parameters.get("documentList", []) documentType = parameters.get("documentType") - resultType = parameters.get("resultType", "docx") + resultType = parameters.get("resultType", "txt") - aiPrompt = f"Generate a document based on the following requirements: {prompt}" - if documentType: - aiPrompt += f" Document type: {documentType}." - if documentList: - aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style." - aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization." + # Auto-detect format from prompt if not explicitly provided + if resultType == "txt" and prompt: + promptLower = prompt.lower() + if "html" in promptLower or "html5" in promptLower: + resultType = "html" + logger.info(f"Auto-detected HTML format from prompt") + elif "pdf" in promptLower: + resultType = "pdf" + logger.info(f"Auto-detected PDF format from prompt") + elif "markdown" in promptLower or " md " in promptLower or promptLower.endswith(" md"): + resultType = "md" + logger.info(f"Auto-detected Markdown format from prompt") + elif ("text" in promptLower or "txt" in promptLower) and "html" not in promptLower: + resultType = "txt" + logger.info(f"Auto-detected Text format from prompt") - processParams = { - "aiPrompt": aiPrompt, - "resultType": resultType - } - if documentList: - processParams["documentList"] = documentList + maxSectionLength = parameters.get("maxSectionLength", 500) + parallelGeneration = parameters.get("parallelGeneration", True) + progressLogging = parameters.get("progressLogging", True) - return await self.process(processParams) + # Create operation ID for progress tracking + workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" + operationId = f"doc_gen_{workflowId}_{int(time.time())}" + parentOperationId = parameters.get('parentOperationId') + + try: + # Phase 1: Structure Generation + if progressLogging: + self.services.chat.progressLogStart( + operationId, + "Document", + "Structure Generation", + "Generating document structure...", + parentOperationId=parentOperationId + ) + + structureGenerator = StructureGenerator(self.services) + + # Analyze document purposes and process documents accordingly + cachedContent = None + imageDocuments = [] + documentPurposes = {} + + if documentList: + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.1, "Analyzing document purposes...") + + # Convert documentList to DocumentReferenceList + from modules.datamodels.datamodelDocref import DocumentReferenceList + + if isinstance(documentList, DocumentReferenceList): + docRefList = documentList + elif isinstance(documentList, str): + docRefList = DocumentReferenceList.from_string_list([documentList]) + elif isinstance(documentList, list): + docRefList = DocumentReferenceList.from_string_list(documentList) + else: + docRefList = DocumentReferenceList(references=[]) + + # Get ChatDocuments + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) + if chatDocuments: + logger.info(f"Analyzing purposes for {len(chatDocuments)} documents") + + # Analyze document purposes using AI + purposeAnalyzer = DocumentPurposeAnalyzer(self.services) + purposeAnalysis = await purposeAnalyzer.analyzeDocumentPurposes( + userPrompt=prompt, + chatDocuments=chatDocuments, + actionContext="generateDocument" + ) + + documentPurposes = {dp["document_id"]: dp for dp in purposeAnalysis.get("document_purposes", [])} + logger.info(f"Purpose analysis complete: {purposeAnalysis.get('overall_intent', 'N/A')}") + + # Separate documents by purpose + textDocs = [] + imageDocsToInclude = [] + imageDocsToAnalyze = [] + + for doc in chatDocuments: + docPurpose = documentPurposes.get(doc.id, {}) + purpose = docPurpose.get("purpose", "extract_text_content") + + if purpose == "include_image": + imageDocsToInclude.append(doc) + elif purpose == "analyze_image_vision": + imageDocsToAnalyze.append(doc) + elif purpose in ["extract_text_content", "use_as_template", "use_as_reference", "extract_data"]: + textDocs.append(doc) + # Skip "attach" purpose - don't process + + # Process text documents (extract content) + extractedResults = [] + if textDocs: + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.15, f"Extracting content from {len(textDocs)} text document(s)...") + + # Prepare extraction options with purpose-specific prompts + extractionOptionsList = [] + for doc in textDocs: + docPurpose = documentPurposes.get(doc.id, {}) + extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all content from the document" + + extractionOptions = ExtractionOptions( + prompt=extractionPrompt, + mergeStrategy=MergeStrategy( + mergeType="concatenate", + groupBy="typeGroup", + orderBy="id" + ), + processDocumentsIndividually=True + ) + extractionOptionsList.append((doc, extractionOptions)) + + # Extract content from text documents + for doc, extractionOptions in extractionOptionsList: + try: + docResults = self.services.extraction.extractContent( + [doc], + extractionOptions, + parentOperationId=operationId + ) + extractedResults.extend(docResults) + except Exception as e: + logger.error(f"Error extracting content from {doc.fileName}: {str(e)}") + + logger.info(f"Extracted content from {len(extractedResults)} text document(s)") + + # Process images to analyze (vision call) + if imageDocsToAnalyze: + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.2, f"Analyzing {len(imageDocsToAnalyze)} image(s) with vision AI...") + + # Extract content from images using vision analysis + for doc in imageDocsToAnalyze: + try: + docPurpose = documentPurposes.get(doc.id, {}) + extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all text and information from this image" + + extractionOptions = ExtractionOptions( + prompt=extractionPrompt, + mergeStrategy=MergeStrategy( + mergeType="concatenate", + groupBy="typeGroup", + orderBy="id" + ), + processDocumentsIndividually=True + ) + + docResults = self.services.extraction.extractContent( + [doc], + extractionOptions, + parentOperationId=operationId + ) + extractedResults.extend(docResults) + except Exception as e: + logger.error(f"Error analyzing image {doc.fileName}: {str(e)}") + + logger.info(f"Analyzed {len(imageDocsToAnalyze)} image(s) with vision AI") + + # Process images to include (store image data) + if imageDocsToInclude: + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.25, f"Preparing {len(imageDocsToInclude)} image(s) for inclusion...") + + # Get image data for inclusion + from modules.interfaces.interfaceDbComponentObjects import getInterface + dbInterface = getInterface() + + for doc in imageDocsToInclude: + try: + # Get image bytes + imageBytes = dbInterface.getFileData(doc.fileId) + if imageBytes: + # Encode to base64 + import base64 + base64Data = base64.b64encode(imageBytes).decode('utf-8') + + # Create image document entry + imageDoc = { + "id": doc.id, + "fileName": doc.fileName, + "mimeType": doc.mimeType, + "base64Data": base64Data, + "altText": doc.fileName or "Image", + "fileSize": doc.fileSize + } + imageDocuments.append(imageDoc) + logger.debug(f"Prepared image {doc.fileName} for inclusion ({len(base64Data)} chars base64)") + else: + logger.warning(f"Could not retrieve image data for {doc.fileName}") + except Exception as e: + logger.error(f"Error preparing image {doc.fileName} for inclusion: {str(e)}") + + logger.info(f"Prepared {len(imageDocuments)} image(s) for inclusion") + + # Build cachedContent with all information + cachedContent = { + "extractedContent": extractedResults, + "imageDocuments": imageDocuments, + "documentPurposes": documentPurposes, + "extractionTimestamp": time.time(), + "sourceDocuments": [doc.id for doc in chatDocuments] + } + + logger.info(f"Document processing complete: {len(extractedResults)} extracted, {len(imageDocuments)} images to include") + + # Generate structure + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.2, "Generating document structure...") + + structure = await structureGenerator.generateStructure( + userPrompt=prompt, + documentList=documentList if documentList else None, + cachedContent=cachedContent, + maxSectionLength=maxSectionLength, + existingImages=imageDocuments # Pass existing images for structure generation + ) + + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated") + + # Phase 2: Content Generation + if progressLogging: + self.services.chat.progressLogUpdate( + operationId, + 0.34, + "Starting content generation..." + ) + + contentGenerator = ContentGenerator(self.services) + + # Create enhanced progress callback + def progressCallback(sectionIndex: int, totalSections: int, message: str): + if progressLogging: + # Calculate progress: 34% to 90% for content generation phase + if totalSections > 0: + progress = 0.34 + (0.56 * (sectionIndex / totalSections)) + else: + progress = 0.34 + + # Format message + if sectionIndex > 0 and totalSections > 0: + progressMessage = f"Section {sectionIndex}/{totalSections}: {message}" + else: + progressMessage = message + + self.services.chat.progressLogUpdate( + operationId, + progress, + progressMessage + ) + + completeStructure = await contentGenerator.generateContent( + structure=structure, + cachedContent=cachedContent, + userPrompt=prompt, + progressCallback=progressCallback, + parallelGeneration=parallelGeneration + ) + + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated") + + # Phase 3: Integration & Rendering + if progressLogging: + self.services.chat.progressLogUpdate( + operationId, + 0.91, + "Rendering final document..." + ) + + # Use existing renderReport method + title = structure.get("metadata", {}).get("title", "Generated Document") + if documentType: + title = f"{title} ({documentType})" + + renderedContent, mimeType, images = await self.services.generation.renderReport( + extractedContent=completeStructure, + outputFormat=resultType, + title=title, + userPrompt=prompt, + aiService=self.services.ai + ) + + # Build list of documents to return + documents = [ + ActionDocument( + documentName=f"document.{resultType}", + documentData=renderedContent, + mimeType=mimeType + ) + ] + + # Add images as separate documents + if images: + logger.info(f"Processing {len(images)} image(s) from renderer") + import base64 + for idx, imageData in enumerate(images): + try: + base64Data = imageData.get("base64Data", "") + altText = imageData.get("altText", f"image_{idx + 1}") + caption = imageData.get("caption", "") + sectionId = imageData.get("sectionId", f"section_{idx + 1}") + + if base64Data: + # Decode base64 to bytes + imageBytes = base64.b64decode(base64Data) + + # Determine filename and mime type + filename = imageData.get("filename", f"image_{idx + 1}.png") + if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')): + filename = f"image_{idx + 1}.png" + + # Determine mime type from filename + if filename.lower().endswith('.png'): + imageMimeType = "image/png" + elif filename.lower().endswith(('.jpg', '.jpeg')): + imageMimeType = "image/jpeg" + elif filename.lower().endswith('.gif'): + imageMimeType = "image/gif" + elif filename.lower().endswith('.webp'): + imageMimeType = "image/webp" + else: + imageMimeType = "image/png" # Default + + # Add image document + documents.append(ActionDocument( + documentName=filename, + documentData=imageBytes, + mimeType=imageMimeType + )) + logger.info(f"Added image document: {filename} (section: {sectionId}, {len(imageBytes)} bytes, alt: {altText})") + else: + logger.warning(f"Image {idx + 1} (section: {sectionId}) has no base64Data, skipping") + except Exception as e: + logger.error(f"Error adding image document {idx + 1}: {str(e)}", exc_info=True) + continue + else: + logger.debug("No images returned from renderer") + + # Note: Document creation is handled by the workflow system + # We just return the rendered content and images in ActionResult + + if progressLogging: + self.services.chat.progressLogFinish(operationId, True) + + return ActionResult.isSuccess(documents=documents) + + except Exception as e: + logger.error(f"Error in hierarchical document generation: {str(e)}") + if progressLogging: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=str(e)) diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py index 101c8586..7595c2eb 100644 --- a/modules/workflows/methods/methodAi/methodAi.py +++ b/modules/workflows/methods/methodAi/methodAi.py @@ -353,11 +353,10 @@ class MethodAi(MethodBase): "resultType": WorkflowActionParameter( name="resultType", type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["docx", "pdf", "txt", "md"], + frontendType=FrontendType.TEXT, required=False, - default="docx", - description="Output format" + default="txt", + description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Any format supported by renderers is acceptable. Default: txt" ) }, execute=generateDocument.__get__(self, self.__class__) diff --git a/modules/workflows/methods/methodBase.py b/modules/workflows/methods/methodBase.py index 72f35c19..a20f5ec1 100644 --- a/modules/workflows/methods/methodBase.py +++ b/modules/workflows/methods/methodBase.py @@ -98,9 +98,13 @@ class MethodBase: self.logger.error(f"Method {self.name} has no _actions dictionary defined. Actions will not be available.") return result + totalActions = len(self._actions) + deniedActions = [] + for actionName, actionDef in self._actions.items(): # RBAC-Check: Prüfe ob Action für aktuellen User verfügbar ist if not self._checkActionPermission(actionDef.actionId): + deniedActions.append(f"{actionName} ({actionDef.actionId})") continue # Skip if user doesn't have permission # Konvertiere WorkflowActionDefinition zu System-Format @@ -110,6 +114,11 @@ class MethodBase: 'method': self._createActionWrapper(actionDef) } + if deniedActions: + self.logger.warning(f"Method {self.name}: {len(deniedActions)}/{totalActions} actions denied by RBAC: {deniedActions[:5]}{'...' if len(deniedActions) > 5 else ''}") + if not result and totalActions > 0: + self.logger.error(f"Method {self.name}: ALL {totalActions} actions denied by RBAC! This will result in empty action list.") + return result def _checkActionPermission(self, actionId: str) -> bool: @@ -120,22 +129,36 @@ class MethodBase: REQUIREMENT: RBAC-Service muss verfügbar sein. """ if not hasattr(self.services, 'rbac') or not self.services.rbac: - self.logger.error(f"RBAC service not available. Action {actionId} will be denied.") + self.logger.error(f"RBAC service not available (services.rbac is None). Action {actionId} will be denied.") return False - currentUser = self.services.chat.getCurrentUser() + # Get current user from services.user (not from chat service) + currentUser = getattr(self.services, 'user', None) if not currentUser: - self.logger.warning(f"No current user found. Action {actionId} will be denied.") + self.logger.warning(f"No current user found (services.user is None). Action {actionId} will be denied.") return False # RBAC-Check: RESOURCE context, item = actionId - permissions = self.services.rbac.getUserPermissions( - user=currentUser, - context=AccessRuleContext.RESOURCE, - item=actionId - ) - - return permissions.view + try: + permissions = self.services.rbac.getUserPermissions( + user=currentUser, + context=AccessRuleContext.RESOURCE, + item=actionId + ) + hasPermission = permissions.view + if not hasPermission: + # Log detailed RBAC denial info + userRoles = getattr(currentUser, 'roleLabels', []) or [] + self.logger.warning( + f"RBAC denied action {actionId} for user {currentUser.id}. " + f"User roles: {userRoles}, " + f"Permissions: view={permissions.view}, edit={permissions.edit}, delete={permissions.delete}. " + f"No matching RBAC rule found for context=RESOURCE, item={actionId}" + ) + return hasPermission + except Exception as e: + self.logger.error(f"RBAC check failed for action {actionId}: {str(e)}. Action will be denied.") + return False def _convertParametersToSystemFormat(self, parameters: Dict[str, WorkflowActionParameter]) -> Dict[str, Dict[str, Any]]: """Convert WorkflowActionParameter dict to system format for API/UI consumption""" diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py index 119a4692..b1de9f98 100644 --- a/modules/workflows/processing/adaptive/contentValidator.py +++ b/modules/workflows/processing/adaptive/contentValidator.py @@ -37,52 +37,6 @@ class ContentValidator: """ return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory) - def _analyzeDocuments(self, documents: List[Any]) -> List[Dict[str, Any]]: - """Generic document analysis - create simple summaries with metadata.""" - summaries = [] - for doc in documents: - try: - data = getattr(doc, 'documentData', None) - name = getattr(doc, 'documentName', 'Unknown') - mimeType = getattr(doc, 'mimeType', 'unknown') - formatExt = self._detectFormat(doc) - sizeInfo = self._calculateSize(doc) - - # Simple preview: if it's dict/list, dump JSON; otherwise use string - preview = None - if data is not None: - if isinstance(data, (dict, list)): - preview = json.dumps(data, indent=2, ensure_ascii=False) - # Truncate if too large - if len(preview) > MAX_CONTENT_SIZE_FOR_FULL_PREVIEW: - preview = preview[:PREVIEW_SAMPLE_SIZE] + f"\n\n[Truncated - {self._formatBytes(sizeInfo['bytes'])} total]" - else: - text = str(data) - if len(text) > MAX_CONTENT_SIZE_FOR_FULL_PREVIEW: - preview = text[:PREVIEW_SAMPLE_SIZE] + f"\n\n[Truncated - {self._formatBytes(sizeInfo['bytes'])} total]" - else: - preview = text - - summary = { - "name": name, - "mimeType": mimeType, - "format": formatExt, - "size": sizeInfo["readable"], - "preview": preview - } - summaries.append(summary) - except Exception as e: - logger.warning(f"Error analyzing document {getattr(doc, 'documentName', 'Unknown')}: {str(e)}") - summaries.append({ - "name": getattr(doc, 'documentName', 'Unknown'), - "mimeType": getattr(doc, 'mimeType', 'unknown'), - "format": "unknown", - "size": "0 B", - "preview": None, - "error": str(e) - }) - return summaries - def _summarizeJsonStructure(self, jsonData: Any) -> Dict[str, Any]: """Summarize JSON document structure for validation - extracts main objects, statistics, captions, and IDs.""" try: @@ -120,9 +74,11 @@ class ContentValidator: "order": section.get("order") } + # Get elements for processing + elements = section.get("elements", []) + # For tables: extract caption and statistics if section.get("content_type") == "table": - elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: tableElement = elements[0] sectionSummary["caption"] = tableElement.get("caption") @@ -134,7 +90,6 @@ class ContentValidator: # For lists: extract item count elif section.get("content_type") == "list": - elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: listElement = elements[0] items = listElement.get("items", []) @@ -142,7 +97,6 @@ class ContentValidator: # For paragraphs/headings: extract text preview elif section.get("content_type") in ["paragraph", "heading"]: - elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: textElement = elements[0] text = textElement.get("text", "") @@ -174,8 +128,10 @@ class ContentValidator: "order": section.get("order") } + # Get elements for processing + elements = section.get("elements", []) + if section.get("content_type") == "table": - elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: tableElement = elements[0] sectionSummary["caption"] = tableElement.get("caption") @@ -475,6 +431,12 @@ VALIDATION RULES: 5. Format understanding: Different formats can represent equivalent data structures. Focus on content, not format name. 6. Multi-step workflow awareness: If ACTION HISTORY is present, consider the workflow as a whole. Document metadata (e.g., extraction_method) describes how data was EXTRACTED in the last step, not necessarily how it was OBTAINED in the workflow. 7. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help. +8. CRITICAL - Data vs Data Description: When criteria require specific data types (e.g., images, tables, charts, files), distinguish between: + - ACTUAL DATA: The actual data itself (binary data, structured data, embedded content) + - DATA DESCRIPTIONS: Text fields that describe or specify what data should be created (e.g., "image_description", "table_description", "chart_specification") - these are TEXT METADATA, NOT the actual data + - If only descriptions/specifications exist but no actual data, the criterion is NOT met. Descriptions are instructions for creating data, not the data itself. + - Check content types in sections/elements: if content_type matches the required data type (e.g., "image" for images, "table" for tables), actual data exists. If only text fields describing the data exist, the data is missing. + - Check document statistics: if counts for the required data type are 0, the data is missing even if descriptions exist. VALIDATION STEPS: - Check ACTION HISTORY first (if present) for PROCESS-ORIENTED criteria (e.g., "search performed", "sources used", "verification done") diff --git a/modules/workflows/processing/core/actionExecutor.py b/modules/workflows/processing/core/actionExecutor.py index 71b5572f..0e4d6ee4 100644 --- a/modules/workflows/processing/core/actionExecutor.py +++ b/modules/workflows/processing/core/actionExecutor.py @@ -84,43 +84,85 @@ class ActionExecutor: enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats logger.info(f"Expected formats: {action.expectedDocumentFormats}") - # Get current task execution operationId to pass as parent to action methods - # This MUST be the "Service Workflow Execution" operation ID (taskExec_*) - parentOperationId = None + # Get current task execution operationId (taskExec_*) - this is the parent of the action + taskOperationId = None try: progressLogger = self.services.chat.createProgressLogger() activeOperations = progressLogger.getActiveOperations() - logger.debug(f"Looking for parent operation ID. Active operations: {list(activeOperations.keys())}") + logger.debug(f"Looking for task operation ID. Active operations: {list(activeOperations.keys())}") # Look for task execution operation (starts with "taskExec_") - # This is the "Service Workflow Execution" level that should be parent of ALL actions + # This is the Task level that should be parent of this action for opId in activeOperations.keys(): if opId.startswith("taskExec_"): - parentOperationId = opId - logger.info(f"Found parent operation ID: {parentOperationId} for action {action.execMethod}.{action.execAction}") + taskOperationId = opId + logger.info(f"Found task operation ID: {taskOperationId} for action {action.execMethod}.{action.execAction}") break - if not parentOperationId: - logger.warning(f"No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}") + if not taskOperationId: + logger.error(f"CRITICAL: No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}. Action logs will appear at root level!") except Exception as e: - logger.error(f"Error getting parent operation ID: {str(e)}") + logger.error(f"Error getting task operation ID: {str(e)}") - # Add parentOperationId to parameters so action methods can use it - # This is critical for UI dashboard hierarchical display - if parentOperationId: - enhancedParameters['parentOperationId'] = parentOperationId - logger.info(f"Passing parentOperationId '{parentOperationId}' to action {action.execMethod}.{action.execAction}") + # Create action operationId entry - Action is child of Task + import time + actionOperationId = f"action_{action.execMethod}_{action.execAction}_{workflow.id}_{taskNum}_{actionNum}_{int(time.time())}" + + try: + # Start action progress tracking - Action is child of Task + # CRITICAL: If taskOperationId is None, the action will appear at root level + self.services.chat.progressLogStart( + actionOperationId, + action.execMethod.capitalize(), + action.execAction, + f"Task {taskNum} Action {actionNum}", + parentOperationId=taskOperationId # Will be None if taskExec_ not found + ) + except Exception as e: + logger.error(f"Error starting action progress log: {str(e)}") + + # Add action operationId to parameters so action methods can use it for their steps + # Action steps should be children of the action, not the task + # CRITICAL: This must always be set, even if taskOperationId is None + enhancedParameters['parentOperationId'] = actionOperationId + if taskOperationId: + logger.info(f"Created action operationId '{actionOperationId}' (parent: {taskOperationId}) for action {action.execMethod}.{action.execAction}") else: - logger.warning(f"WARNING: No parentOperationId found for action {action.execMethod}.{action.execAction}. Action logs will appear at root level!") + logger.warning(f"Created action operationId '{actionOperationId}' WITHOUT parent (taskExec_ not found) for action {action.execMethod}.{action.execAction}. Action will appear at root level!") # Check workflow status before executing the action checkWorkflowStopped(self.services) - result = await self.executeAction( - methodName=action.execMethod, - actionName=action.execAction, - parameters=enhancedParameters - ) + # Execute action and track success for progress log + result = None + actionSuccess = False + try: + result = await self.executeAction( + methodName=action.execMethod, + actionName=action.execAction, + parameters=enhancedParameters + ) + actionSuccess = result.success if result else False + except Exception as e: + logger.error(f"Error executing action: {str(e)}") + actionSuccess = False + finally: + # Finish action progress tracking + try: + self.services.chat.progressLogFinish(actionOperationId, actionSuccess) + except Exception as e: + logger.error(f"Error finishing action progress log: {str(e)}") + + # If action execution failed, return error result + if result is None: + action.setError("Action execution failed") + return ActionResult( + success=False, + documents=[], + resultLabel=action.execResultLabel, + error="Action execution failed" + ) + resultLabel = action.execResultLabel # Trace action result with full document metadata diff --git a/modules/workflows/processing/modes/modeDynamic.py b/modules/workflows/processing/modes/modeDynamic.py index c4654460..50889b22 100644 --- a/modules/workflows/processing/modes/modeDynamic.py +++ b/modules/workflows/processing/modes/modeDynamic.py @@ -565,10 +565,9 @@ class DynamicMode(BaseMode): methodInstance = _methods[methodName]['instance'] if actionName in methodInstance.actions: action_info = methodInstance.actions[actionName] - docstring = action_info.get('description', '') - # Extract parameter names from docstring to check if documentList exists - paramDescriptions, _ = methodInstance._extractParameterDetails(docstring) - if 'documentList' in paramDescriptions: + # Use structured WorkflowActionParameter objects from new system + parameters_def = action_info.get('parameters', {}) + if 'documentList' in parameters_def: # Convert DocumentReferenceList to string list for database serialization # Action methods will convert it back to DocumentReferenceList when needed parameters['documentList'] = docList.to_string_list() @@ -596,10 +595,9 @@ class DynamicMode(BaseMode): methodInstance = _methods[methodName]['instance'] if actionName in methodInstance.actions: action_info = methodInstance.actions[actionName] - docstring = action_info.get('description', '') - # Extract parameter names from docstring to check if connectionReference exists - paramDescriptions, _ = methodInstance._extractParameterDetails(docstring) - if 'connectionReference' in paramDescriptions: + # Use structured WorkflowActionParameter objects from new system + parameters_def = action_info.get('parameters', {}) + if 'connectionReference' in parameters_def: parameters['connectionReference'] = connectionRef logger.info(f"Added connectionReference to parameters: {connectionRef}") except Exception as e: diff --git a/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md b/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md new file mode 100644 index 00000000..39c649ce --- /dev/null +++ b/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md @@ -0,0 +1,354 @@ +# Architecture & Implementation Analysis +## Deep Review of Hierarchical Document Generation + +**Date**: 2025-12-22 +**Status**: Critical Issues Found + +--- + +## Executive Summary + +The hierarchical document generation system is **partially implemented** but has **critical architectural mismatches** and **implementation gaps** that prevent it from working correctly. While core components exist, several fundamental issues need to be addressed. + +--- + +## ✅ What's Correctly Implemented + +### Phase 1: Core Infrastructure ✅ +- ✅ `StructureGenerator` class exists with `generateStructure()` method +- ✅ `ContentGenerator` class exists with `generateContent()` method +- ✅ `ContentIntegrator` class exists with `integrateContent()` method +- ✅ `generateDocument` action uses hierarchical approach +- ✅ Basic progress logging implemented +- ✅ Error handling with `createErrorSection()` implemented + +### Phase 2: Image Generation ✅ +- ✅ `_generateImageSection()` method implemented +- ✅ Image prompt extraction from structure +- ✅ Base64 image data storage +- ✅ Error handling for image failures + +### Phase 3: Parallel Processing ✅ +- ✅ `_generateSectionsParallel()` method implemented +- ✅ `_generateSectionsSequential()` method implemented +- ✅ Batch processing for large documents +- ✅ Progress callback system +- ✅ Exception handling in parallel execution + +--- + +## ❌ Critical Issues Found + +### Issue 1: Previous Sections Context Not Working in Parallel Mode ⚠️ **PARTIALLY FIXED** + +**Problem**: +- In parallel mode, sections within the same batch cannot see each other (correct) +- BUT: Sections in later batches should see sections from earlier batches +- **Current Status**: Code was fixed to accumulate previous sections, but needs verification + +**Location**: `subContentGenerator.py` lines 240-319 + +**Fix Applied**: +- Added `accumulatedPreviousSections` to track sections across batches +- Pass accumulated sections to each batch +- **VERIFICATION NEEDED**: Test that prompts actually show previous sections + +**Risk**: Medium - May cause continuity issues in generated content + +--- + +### Issue 2: Variable Shadowing Bug ✅ **FIXED** + +**Problem**: +- `contentType` variable was shadowed in loop, causing wrong section type in prompts + +**Location**: `subContentGenerator.py` line 676 + +**Fix Applied**: +- Renamed loop variable to `prevContentType` + +**Status**: ✅ Fixed + +--- + +### Issue 3: Missing `generation_hint` in Structure Response ✅ **FIXED** + +**Problem**: +- Structure generator creates generic hints like "Section heading" instead of meaningful hints +- AI generates same content for all headings because hints are identical + +**Location**: `subStructureGenerator.py` lines 242-269 + +**Fix Applied**: +- Added `_extractMeaningfulHint()` method to extract meaningful hints from section IDs +- Example: `section_heading_current_state` → "Current State" + +**Status**: ✅ Fixed + +--- + +### Issue 4: JSON Template Architecture Mismatch ✅ **FIXED** + +**Problem**: +- `jsonTemplateDocument` showed filled `elements` arrays, but structure generation requires empty arrays +- Template missing `generation_hint` and `complexity` fields +- Template showed `order: 0` but should start from 1 + +**Location**: `datamodelJson.py` + +**Fix Applied**: +- Updated template to show empty `elements: []` +- Added `generation_hint` to all sections +- Added `complexity` to all sections +- Changed `order` to start from 1 +- Added `title` to metadata + +**Status**: ✅ Fixed + +--- + +### Issue 5: Structure Prompt Instructions Mismatch ✅ **FIXED** + +**Problem**: +- Prompt said "All sections must have empty elements arrays" but template showed filled arrays +- Prompt didn't explicitly require `generation_hint` and `complexity` fields + +**Location**: `subStructureGenerator.py` lines 181-190 + +**Fix Applied**: +- Enhanced prompt to explicitly require `generation_hint` and `complexity` +- Clarified that template examples show structure, but elements must be empty + +**Status**: ✅ Fixed + +--- + +## ⚠️ Remaining Issues & Gaps + +### Issue 6: Missing Validation Before Content Generation ⚠️ **NOT IMPLEMENTED** + +**Problem**: +- No validation that structure has required fields before content generation +- No check that all sections have `generation_hint` before generating content + +**Expected** (from Phase 6): +```python +# Validate structure before content generation +if not validateStructure(structure): + raise ValueError("Invalid structure") +``` + +**Current**: Validation happens in `_validateAndEnhanceStructure()` but only adds missing fields, doesn't validate + +**Impact**: Low - Enhancement adds missing fields, but explicit validation would be better + +**Recommendation**: Add explicit validation method + +--- + +### Issue 7: Previous Sections Formatting Missing Content ⚠️ **PARTIALLY IMPLEMENTED** + +**Problem**: +- Previous sections formatting extracts content from `elements`, but if sections don't have elements yet (in parallel mode), it shows nothing +- Should show `generation_hint` as fallback when elements not available + +**Location**: `subContentGenerator.py` lines 671-709 + +**Current Behavior**: +- Shows content preview if elements exist +- Shows nothing if elements don't exist + +**Expected Behavior**: +- Show content preview if elements exist +- Show `generation_hint` as fallback if elements don't exist + +**Impact**: Medium - Reduces context quality in parallel generation + +**Recommendation**: Add fallback to show `generation_hint` when elements not available + +--- + +### Issue 8: Debug File Shows Raw Response, Not Validated Structure ⚠️ **NOT FIXED** + +**Problem**: +- Debug file writes `aiResponse.content` (raw AI response) before validation +- Can't verify if `generation_hint` was added by validation + +**Location**: `subStructureGenerator.py` lines 77-84 + +**Impact**: Low - Makes debugging harder but doesn't affect functionality + +**Recommendation**: Write validated structure to separate debug file + +--- + +### Issue 9: Missing Unit Tests ⚠️ **NOT IMPLEMENTED** + +**Problem**: +- No unit tests for any components (Phase 7 requirement) +- No tests for structure generation +- No tests for content generation +- No tests for integration + +**Impact**: High - No way to verify correctness or catch regressions + +**Recommendation**: Add comprehensive unit tests + +--- + +### Issue 10: Missing Integration Tests ⚠️ **NOT IMPLEMENTED** + +**Problem**: +- No end-to-end tests +- No tests with images +- No tests with long documents +- No error scenario tests + +**Impact**: High - No verification of complete flow + +**Recommendation**: Add integration tests + +--- + +### Issue 11: Content Caching Not Optimized ⚠️ **PARTIALLY IMPLEMENTED** + +**Problem**: +- Content is extracted and cached, but: + - No cache validation (check if documents changed) + - No cache reuse verification + - Content is passed to prompts but may not be formatted efficiently + +**Expected** (from Phase 5): +- Cache validation +- Efficient formatting +- Performance testing + +**Current**: Basic caching exists but not optimized + +**Impact**: Medium - Works but could be more efficient + +**Recommendation**: Add cache validation and optimization + +--- + +### Issue 12: Renderer Updates Not Verified ⚠️ **UNKNOWN** + +**Problem**: +- Implementation plan requires renderer updates for images +- HTML renderer should create separate image files +- PDF/XLSX/PPTX renderers should embed images +- **Status unknown** - need to verify renderers handle images correctly + +**Impact**: High - Images may not render correctly + +**Recommendation**: Verify all renderers handle images correctly + +--- + +## 📋 Architecture Compliance Check + +### Data Structure Compliance ✅ + +| Field | Required | Implemented | Status | +|-------|----------|-------------|--------| +| `metadata.title` | Yes | ✅ | ✅ | +| `metadata.split_strategy` | Yes | ✅ | ✅ | +| `sections[].id` | Yes | ✅ | ✅ | +| `sections[].content_type` | Yes | ✅ | ✅ | +| `sections[].complexity` | Yes | ✅ | ✅ | +| `sections[].generation_hint` | Yes | ✅ | ✅ | +| `sections[].order` | Yes | ✅ | ✅ | +| `sections[].elements` | Yes | ✅ | ✅ | +| `sections[].image_prompt` | Image only | ✅ | ✅ | + +### Component Method Compliance ✅ + +| Component | Method | Required | Implemented | Status | +|-----------|--------|----------|-------------|--------| +| StructureGenerator | `generateStructure()` | Yes | ✅ | ✅ | +| StructureGenerator | `_createStructurePrompt()` | Yes | ✅ | ✅ | +| StructureGenerator | `_identifySectionComplexity()` | Yes | ✅ | ✅ | +| StructureGenerator | `_extractImagePrompts()` | Yes | ✅ | ✅ | +| StructureGenerator | `_validateAndEnhanceStructure()` | Yes | ✅ | ✅ | +| StructureGenerator | `_extractMeaningfulHint()` | Yes | ✅ | ✅ | +| ContentGenerator | `generateContent()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateSectionContent()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateSimpleSection()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateComplexTextSection()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateImageSection()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateSectionsParallel()` | Yes | ✅ | ✅ | +| ContentGenerator | `_generateSectionsSequential()` | Yes | ✅ | ✅ | +| ContentGenerator | `_createSectionPrompt()` | Yes | ✅ | ✅ | +| ContentIntegrator | `integrateContent()` | Yes | ✅ | ✅ | +| ContentIntegrator | `validateCompleteness()` | Yes | ✅ | ✅ | +| ContentIntegrator | `createErrorSection()` | Yes | ✅ | ✅ | + +--- + +## 🎯 Priority Fixes Needed + +### Critical (Must Fix) +1. ✅ **Issue 2**: Variable shadowing bug - **FIXED** +2. ✅ **Issue 3**: Missing generation_hint - **FIXED** +3. ✅ **Issue 4**: JSON template mismatch - **FIXED** +4. ✅ **Issue 5**: Prompt instructions mismatch - **FIXED** +5. ⚠️ **Issue 1**: Previous sections context - **NEEDS VERIFICATION** + +### High Priority (Should Fix) +6. ⚠️ **Issue 12**: Renderer image handling - **NEEDS VERIFICATION** +7. ⚠️ **Issue 9**: Missing unit tests - **NOT IMPLEMENTED** +8. ⚠️ **Issue 10**: Missing integration tests - **NOT IMPLEMENTED** + +### Medium Priority (Nice to Have) +9. ⚠️ **Issue 7**: Previous sections formatting fallback - **PARTIALLY IMPLEMENTED** +10. ⚠️ **Issue 11**: Content caching optimization - **PARTIALLY IMPLEMENTED** +11. ⚠️ **Issue 6**: Structure validation - **NOT IMPLEMENTED** +12. ⚠️ **Issue 8**: Debug file improvements - **NOT IMPLEMENTED** + +--- + +## ✅ Summary + +### What Works +- Core infrastructure is implemented +- Image generation is integrated +- Parallel processing is implemented +- Error handling is in place +- Progress logging works + +### What's Fixed (This Session) +- Variable shadowing bug +- Missing generation_hint extraction +- JSON template architecture mismatch +- Prompt instructions clarity +- Previous sections tracking (needs verification) + +### What Needs Work +- Unit and integration tests +- Renderer verification +- Previous sections formatting fallback +- Cache optimization +- Structure validation + +### Overall Status +**Architecture**: ✅ **85% Compliant** +**Implementation**: ✅ **80% Complete** +**Testing**: ❌ **0% Complete** +**Production Ready**: ⚠️ **Not Yet** (needs testing and verification) + +--- + +## Next Steps + +1. **Verify Issue 1 Fix**: Test that previous sections are correctly tracked in parallel mode +2. **Verify Issue 12**: Test that all renderers handle images correctly +3. **Add Unit Tests**: Start with critical components (StructureGenerator, ContentGenerator) +4. **Add Integration Tests**: Test end-to-end flow with various scenarios +5. **Improve Previous Sections Formatting**: Add fallback to show generation_hint when elements not available +6. **Add Structure Validation**: Explicit validation before content generation +7. **Optimize Content Caching**: Add cache validation and efficient formatting + +--- + +**Analysis Complete**: 2025-12-22 + diff --git a/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md new file mode 100644 index 00000000..d0a59e80 --- /dev/null +++ b/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md @@ -0,0 +1,459 @@ +# Concept: Hierarchical Document Generation with Image Integration + +## Executive Summary + +This concept proposes a **three-phase hierarchical approach** to document generation that enables proper image integration and handles complex documents efficiently. + +**Key Decisions**: +- ✅ **Performance**: Parallel processing with ChatLog progress messages +- ✅ **Error Handling**: Skip failed sections, show error messages +- ✅ **Image Storage**: Store as base64 in JSON (renderers need direct access) +- ✅ **Backward Compatibility**: Not needed - implement as new default + +**Renderer Status**: +- ✅ **Ready**: Text, Markdown, DOCX renderers +- ⚠️ **Needs Update**: HTML (create separate image files), PDF (embed images) +- ⚠️ **Needs Implementation**: XLSX, PPTX (add image support) + +## Problem Statement + +Currently, the document generation system has the following limitations: + +1. **No Image Integration**: Images are generated separately but cannot be embedded into document structures +2. **Single-Pass Generation**: Documents are generated in one AI call, making it difficult to handle complex sections (long text, images, chapters) +3. **Repeated Extraction**: Content extraction may happen multiple times unnecessarily +4. **No Structured Approach**: No mechanism to first define document structure, then populate sections + +## Current Architecture Analysis + +### Current Flow: +``` +User Request → ai.generateDocument → ai.process → AI JSON Generation → Renderer → Final Document +``` + +### Issues: +- AI generates complete JSON structure in one pass +- Images are generated separately via `ai.generate` action +- No mechanism to integrate generated images into document structure +- JSON schema supports `image` content_type, but AI rarely generates it +- Content extraction happens per action, not cached/reused + +### Current Image Handling: +- Images can be rendered IF they exist in JSON structure (`content_type: "image"`) +- Image data expected as `base64Data` in elements +- Renderers support image rendering (Docx, PDF, HTML, etc.) +- But images are never generated WITHIN document generation + +## Proposed Solution: Hierarchical Document Generation + +### Core Concept + +**Three-Phase Approach:** +1. **Structure Generation Phase**: Generate document skeleton with section placeholders +2. **Content Generation Phase**: Generate content for each section (text or image) via sub-prompts +3. **Integration Phase**: Merge all generated content into final document structure + +### Architecture Overview + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Phase 1: Structure Generation │ +│ - Generate document skeleton │ +│ - Identify sections (text, image, complex) │ +│ - Create section placeholders with metadata │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Phase 2: Content Generation (Tree-like) │ +│ │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Section 1: Heading (simple) │ │ +│ │ → Generate directly │ │ +│ └──────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Section 2: Paragraph (simple) │ │ +│ │ → Generate directly │ │ +│ └──────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Section 3: Image (complex) │ │ +│ │ → Sub-prompt: Generate image │ │ +│ │ → Store image data │ │ +│ │ → Create image section with base64Data │ │ +│ └──────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Section 4: Long Chapter (complex) │ │ +│ │ → Sub-prompt: Generate chapter content │ │ +│ │ → Split into subsections if needed │ │ +│ └──────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Phase 3: Integration │ +│ - Merge all generated content │ +│ - Replace placeholders with actual data │ +│ - Validate structure completeness │ +│ - Render to final format │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Detailed Design + +### Phase 1: Structure Generation + +**Purpose**: Create document skeleton with section metadata + +**Process**: +1. AI generates document structure with sections +2. Each section includes: + - `id`: Unique identifier + - `content_type`: Type (heading, paragraph, image, table, etc.) + - `complexity`: "simple" or "complex" + - `generation_hint`: Instructions for content generation + - `order`: Section order + - `elements`: Empty or placeholder + +**Example Structure**: +```json +{ + "metadata": { + "title": "Children's Bedtime Story", + "split_strategy": "single_document" + }, + "documents": [{ + "id": "doc_1", + "sections": [ + { + "id": "section_title", + "content_type": "heading", + "complexity": "simple", + "generation_hint": "Story title", + "order": 1, + "elements": [] + }, + { + "id": "section_intro", + "content_type": "paragraph", + "complexity": "simple", + "generation_hint": "Introduction paragraph", + "order": 2, + "elements": [] + }, + { + "id": "section_image_1", + "content_type": "image", + "complexity": "complex", + "generation_hint": "Illustration: Rabbit meeting owl in moonlit forest", + "image_prompt": "A small brown rabbit sitting in a peaceful forest clearing under moonlight with stars, meeting a wise owl perched on a branch", + "order": 3, + "elements": [] + }, + { + "id": "section_chapter_1", + "content_type": "paragraph", + "complexity": "complex", + "generation_hint": "First chapter: Rabbit's adventure begins", + "order": 4, + "elements": [] + } + ] + }] +} +``` + +### Phase 2: Content Generation + +**Purpose**: Generate actual content for each section + +**Process**: +1. Iterate through sections in order +2. For each section: + - **Simple sections** (heading, short paragraph): + - Generate content directly via AI + - Populate `elements` array + - **Complex sections** (image, long chapter): + - Create sub-prompt based on `generation_hint` and `image_prompt` + - Generate content via specialized action: + - Images: `ai.generate` with image generation + - Long text: `ai.process` with focused prompt + - Store generated content + - Populate `elements` array + +**Content Caching**: +- Extract content from source documents ONCE at the start +- Cache extracted content for reuse across all sections +- Pass cached content to sub-prompts to avoid re-extraction + +**Image Generation**: +- For `content_type: "image"` sections: + - Use `image_prompt` from structure + - Call `ai.generate` action with image generation + - Receive base64 image data + - Create image element: + ```json + { + "url": "data:image/png;base64,", + "base64Data": "", + "altText": "", + "caption": "" + } + ``` + +### Phase 3: Integration + +**Purpose**: Merge all content into final document structure + +**Process**: +1. Validate all sections have content +2. Merge generated content into structure +3. Replace placeholders with actual data +4. Finalize JSON structure +5. Render to target format (docx, pdf, html, etc.) + +## Implementation Strategy + +### New Components Needed + +1. **Structure Generator** (`structureGenerator.py`) + - Generates document skeleton + - Identifies section complexity + - Creates generation hints + +2. **Content Generator** (`contentGenerator.py`) + - Generates content for each section + - Handles simple vs complex sections + - Manages sub-prompts and image generation + - Caches extracted content + +3. **Content Integrator** (`contentIntegrator.py`) + - Merges generated content + - Validates completeness + - Finalizes document structure + +### Modified Components + +1. **`generateDocument` action** + - Implement hierarchical generation as default + - Orchestrate three phases + - Add progress logging for each phase + +2. **`process` action** + - Support content caching (extract once, reuse) + - Support sub-prompt generation for sections + +3. **Prompt Builder** (`subPromptBuilderGeneration.py`) + - Add structure generation prompt + - Add section-specific content prompts + - Add image generation prompt templates + +4. **Renderers** (Update required): + - **HTML Renderer**: Create separate image files and link them + - **PDF Renderer**: Embed images using reportlab + - **XLSX Renderer**: Add image embedding support + - **PPTX Renderer**: Add image embedding support + +### New Action Parameters + +**For `generateDocument`**: +- `enableImageIntegration`: boolean (default: true) +- `maxSectionLength`: int (threshold for "complex" sections, default: 500 words) +- `parallelGeneration`: boolean (default: true) - enable parallel section generation +- `progressLogging`: boolean (default: true) - send ChatLog progress updates + +**For sub-prompts**: +- `sectionContext`: Previous sections for context +- `cachedContent`: Extracted content cache (to avoid re-extraction) +- `targetSection`: Section metadata +- `previousSections`: Array of already-generated sections for continuity + +## Benefits + +1. **Image Integration**: Images can be generated and embedded into documents +2. **Structured Approach**: Clear separation of structure and content +3. **Efficiency**: Content extracted once, reused across sections +4. **Scalability**: Can handle very long documents by splitting into sections +5. **Quality**: Better control over complex sections (images, long chapters) +6. **Flexibility**: Can generate different content types per section + +## Migration Strategy + +**Note**: No backwards compatibility needed - can implement directly as new default. + +1. **Phase 1**: Implement hierarchical generation as new default +2. **Phase 2**: Update renderers (HTML, PDF, XLSX, PPTX) for image support +3. **Phase 3**: Testing and refinement +4. **Phase 4**: Remove old single-pass mode (or keep as internal fallback only) + +## Example Workflow + +**User Request**: "Create a children's bedtime story with 5 illustrations" + +**Phase 1 Output**: +```json +{ + "metadata": {"title": "Flöckchen's Adventure"}, + "documents": [{ + "sections": [ + {"id": "title", "content_type": "heading", "complexity": "simple", ...}, + {"id": "intro", "content_type": "paragraph", "complexity": "simple", ...}, + {"id": "img1", "content_type": "image", "complexity": "complex", + "image_prompt": "Rabbit meeting owl", ...}, + {"id": "chapter1", "content_type": "paragraph", "complexity": "complex", ...}, + {"id": "img2", "content_type": "image", "complexity": "complex", ...}, + ... + ] + }] +} +``` + +**Phase 2 Process**: +- Generate title → populate elements +- Generate intro → populate elements +- Generate image 1 → call `ai.generate`, store base64 → populate elements +- Generate chapter 1 → sub-prompt → populate elements +- Generate image 2 → call `ai.generate`, store base64 → populate elements +- ... + +**Phase 3 Output**: Complete document with all sections populated, ready for rendering + +## Renderer Readiness Assessment + +### Current Renderer Status for Image Handling: + +1. **Text Renderer** (`rendererText.py`): ✅ **READY** + - Skips images, shows placeholder: `[Image: altText]` + - No changes needed + +2. **Markdown Renderer** (`rendererMarkdown.py`): ✅ **READY** + - Shows placeholder with truncated base64: `![altText](data:image/png;base64,...)` + - No changes needed (markdown limitation) + +3. **HTML Renderer** (`rendererHtml.py`): ⚠️ **NEEDS UPDATE** + - Currently: Embeds base64 directly in `` tag as data URI + - **Required Change**: Create separate image files and link to them + - Implementation: Generate image files (e.g., `image_1.png`, `image_2.png`) alongside HTML + - Update `` tags to use relative paths: `...` + - Return multiple files: HTML file + image files + +4. **PDF Renderer** (`rendererPdf.py`): ⚠️ **NEEDS UPDATE** + - Currently: Shows placeholder `[Image: altText]` + - **Required Change**: Embed images directly in PDF using reportlab + - Implementation: Use `reportlab.platypus.Image()` with base64 decoded bytes + +5. **DOCX Renderer** (`rendererDocx.py`): ✅ **READY** + - Embeds images directly using `doc.add_picture()` + - Adds captions below images + - No changes needed + +6. **XLSX Renderer** (`rendererXlsx.py`): ⚠️ **NEEDS IMPLEMENTATION** + - Currently: No image handling found + - **Required Change**: Add image support using openpyxl + - Implementation: Use `openpyxl.drawing.image.Image()` to embed images in cells + - Store images in worksheet cells or as floating images + +7. **PPTX Renderer** (`rendererPptx.py`): ⚠️ **NEEDS IMPLEMENTATION** + - Currently: No image handling found + - **Required Change**: Add image support using python-pptx + - Implementation: Use `slide.shapes.add_picture()` to add images to slides + +### Renderer Update Requirements: + +**Priority 1 (Critical for HTML output)**: +- HTML Renderer: Create separate image files and link them + +**Priority 2 (Important for document formats)**: +- PDF Renderer: Embed images using reportlab +- XLSX Renderer: Add image embedding support +- PPTX Renderer: Add image embedding support + +## Answers to Open Questions + +### 1. Performance: How to handle very large documents (100+ sections)? + +**Answer**: Use parallel processing where possible, with progress ChatLog messages. + +**Implementation Strategy**: +- **Parallel Section Generation**: Generate independent sections in parallel using asyncio +- **Batch Processing**: Process sections in batches (e.g., 10 sections at a time) +- **Progress Tracking**: Send ChatLog progress updates: + - "Generating structure..." (Phase 1) + - "Generating content for section X/Y..." (Phase 2) + - "Generating image for section X..." (Phase 2 - images) + - "Merging content..." (Phase 3) + - "Rendering final document..." (Phase 3) +- **Streaming**: For very large documents, consider streaming partial results + +**Example Progress Messages**: +``` +Phase 1: Structure Generation (0% → 33%) +Phase 2: Content Generation (33% → 90%) + - Section 1/10: Heading (34%) + - Section 2/10: Paragraph (40%) + - Section 3/10: Image generation (50%) + - Section 4/10: Chapter (60%) + ... +Phase 3: Integration & Rendering (90% → 100%) +``` + +### 2. Error Handling: What if one section fails? + +**Answer**: Skip failed sections, keep section title and type, show error message in the section. + +**Implementation Strategy**: +- **Graceful Degradation**: Continue processing remaining sections +- **Error Section**: Create error placeholder section: + ```json + { + "id": "section_failed_3", + "content_type": "paragraph", + "elements": [{ + "text": "[ERROR: Failed to generate content for this section. Error: ]" + }], + "order": 3, + "error": true, + "errorMessage": "" + } + ``` +- **Logging**: Log errors for debugging but don't fail entire document +- **User Notification**: Include error count in final progress message + +### 3. Image Storage: Where to store generated images? + +**Answer**: Store images in JSON as base64, as renderers need them afterwards. + +**Implementation Strategy**: +- **In-Memory Storage**: Keep base64 strings in JSON structure during generation +- **JSON Structure**: Store in section elements: + ```json + { + "url": "data:image/png;base64,", + "base64Data": "", + "altText": "Image description", + "caption": "Optional caption" + } + ``` +- **Memory Management**: For very large images, consider compression or chunking +- **Renderer Access**: All renderers can access `base64Data` directly from JSON +- **HTML Special Case**: HTML renderer will extract base64, decode, and save as separate files during rendering + +### 4. Backward Compatibility: How to ensure existing workflows still work? + +**Answer**: No backwards compatibility needed. + +**Implementation Strategy**: +- **New Default**: Hierarchical generation becomes the default mode +- **Clean Migration**: All document generation uses hierarchical approach +- **No Fallback**: Remove single-pass mode (or keep as internal fallback only) +- **Breaking Change**: Acceptable since this is a new feature/enhancement + +## Next Steps + +1. **Review and Approval**: Get feedback on concept +2. **Detailed Design**: Design API and data structures +3. **Prototype**: Implement Phase 1 (structure generation) +4. **Testing**: Test with real use cases +5. **Full Implementation**: Implement all phases +6. **Migration**: Migrate existing workflows + diff --git a/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md new file mode 100644 index 00000000..55a0c35c --- /dev/null +++ b/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md @@ -0,0 +1,1067 @@ +# Detailed Design: Hierarchical Document Generation with Image Integration + +## Table of Contents + +1. [Architecture Overview](#architecture-overview) +2. [Data Structures](#data-structures) +3. [Component Design](#component-design) +4. [API Design](#api-design) +5. [Image Handling](#image-handling) +6. [Progress Logging](#progress-logging) +7. [Error Handling](#error-handling) +8. [Performance Considerations](#performance-considerations) + +## Architecture Overview + +### System Flow + +``` +┌─────────────────────────────────────────────────────────────┐ +│ User Request: generateDocument │ +│ Parameters: prompt, documentList, resultType, etc. │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Phase 1: Structure Generation │ +│ - Extract content from documentList (if provided) │ +│ - Cache extracted content │ +│ - Generate document skeleton with sections │ +│ - Identify section complexity │ +│ - Create generation hints │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Phase 2: Content Generation (Parallel) │ +│ │ +│ Simple Sections (heading, short paragraph): │ +│ ┌────────────────────────────────────────┐ │ +│ │ Generate content directly via AI │ │ +│ │ Populate elements array │ │ +│ └────────────────────────────────────────┘ │ +│ │ +│ Complex Sections (image, long chapter): │ +│ ┌────────────────────────────────────────┐ │ +│ │ Create sub-prompt │ │ +│ │ Generate content (text or image) │ │ +│ │ Store in elements array │ │ +│ └────────────────────────────────────────┘ │ +│ │ +│ Progress Updates: │ +│ - "Generating section X/Y..." │ +│ - "Generating image for section X..." │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Phase 3: Integration & Rendering │ +│ - Validate all sections have content │ +│ - Merge generated content into structure │ +│ - Replace placeholders with actual data │ +│ - Render to target format (docx, pdf, html, etc.) │ +└─────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Final Document(s) │ +│ - Single document (docx, pdf, html, etc.) │ +│ - Or multiple files (html + image files) │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Data Structures + +### Document Structure (Phase 1 Output) + +```python +{ + "metadata": { + "title": str, + "split_strategy": str, # "single_document" | "multi_document" + "source_documents": List[str], + "extraction_method": str + }, + "documents": [ + { + "id": str, + "title": str, + "filename": str, + "sections": [ + { + "id": str, + "content_type": str, # "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" + "complexity": str, # "simple" | "complex" + "generation_hint": str, + "image_prompt": Optional[str], # Only for image sections + "order": int, + "elements": [], # Empty initially, populated in Phase 2 + "metadata": Optional[Dict[str, Any]] + } + ] + } + ] +} +``` + +### Section Content (Phase 2 Output) + +**Simple Section (heading)**: +```python +{ + "id": "section_title", + "content_type": "heading", + "elements": [ + { + "level": int, + "text": str + } + ], + "order": 1 +} +``` + +**Simple Section (paragraph)**: +```python +{ + "id": "section_intro", + "content_type": "paragraph", + "elements": [ + { + "text": str + } + ], + "order": 2 +} +``` + +**Complex Section (image)**: +```python +{ + "id": "section_image_1", + "content_type": "image", + "elements": [ + { + "url": "data:image/png;base64,", + "base64Data": str, # Full base64 encoded image + "altText": str, + "caption": Optional[str] + } + ], + "order": 3 +} +``` + +**Error Section**: +```python +{ + "id": "section_failed_4", + "content_type": "paragraph", + "elements": [ + { + "text": f"[ERROR: Failed to generate content for this section. Error: {error_message}]" + } + ], + "order": 4, + "error": True, + "errorMessage": str, + "originalContentType": str # Original content_type that failed +} +``` + +### Content Cache + +```python +{ + "extractedContent": List[ContentPart], # From extraction service + "extractionTimestamp": float, + "sourceDocuments": List[str] # Document IDs +} +``` + +### Generation Context + +```python +{ + "userPrompt": str, + "cachedContent": ContentCache, + "previousSections": List[Dict[str, Any]], # Already generated sections + "targetSection": Dict[str, Any], # Section to generate + "documentMetadata": Dict[str, Any] +} +``` + +## Component Design + +### 1. StructureGenerator + +**Purpose**: Generate document skeleton with section placeholders + +**Location**: `poweron/gateway/modules/services/serviceGeneration/subStructureGenerator.py` + +**Methods**: +```python +class StructureGenerator: + async def generateStructure( + self, + userPrompt: str, + documentList: Optional[DocumentReferenceList], + cachedContent: Optional[ContentCache], + services: Any + ) -> Dict[str, Any]: + """ + Generate document structure with sections. + + Returns: + Document structure with empty elements arrays + """ + + def _createStructurePrompt( + self, + userPrompt: str, + cachedContent: Optional[ContentCache], + services: Any + ) -> str: + """ + Create prompt for structure generation. + """ + + def _identifySectionComplexity( + self, + section: Dict[str, Any], + userPrompt: str + ) -> str: + """ + Identify if section is simple or complex. + + Rules: + - Images: always complex + - Long chapters (>maxSectionLength words): complex + - Others: simple + """ + + def _extractImagePrompts( + self, + structure: Dict[str, Any], + userPrompt: str + ) -> Dict[str, str]: + """ + Extract image generation prompts from structure and user prompt. + Maps section_id -> image_prompt + """ +``` + +### 2. ContentGenerator + +**Purpose**: Generate content for each section + +**Location**: `poweron/gateway/modules/services/serviceGeneration/subContentGenerator.py` + +**Methods**: +```python +class ContentGenerator: + async def generateContent( + self, + structure: Dict[str, Any], + cachedContent: Optional[ContentCache], + userPrompt: str, + services: Any, + progressCallback: Optional[Callable] = None + ) -> Dict[str, Any]: + """ + Generate content for all sections in structure. + + Args: + structure: Document structure from Phase 1 + cachedContent: Extracted content cache + userPrompt: Original user prompt + services: Services instance + progressCallback: Function to call for progress updates + + Returns: + Complete document structure with populated elements + """ + + async def _generateSectionContent( + self, + section: Dict[str, Any], + context: GenerationContext, + services: Any + ) -> Dict[str, Any]: + """ + Generate content for a single section. + + Returns: + Section with populated elements array + """ + + async def _generateSimpleSection( + self, + section: Dict[str, Any], + context: GenerationContext, + services: Any + ) -> Dict[str, Any]: + """ + Generate content for simple section (heading, paragraph). + """ + + async def _generateImageSection( + self, + section: Dict[str, Any], + context: GenerationContext, + services: Any + ) -> Dict[str, Any]: + """ + Generate image for image section. + Calls ai.generate action with image generation. + """ + + async def _generateComplexTextSection( + self, + section: Dict[str, Any], + context: GenerationContext, + services: Any + ) -> Dict[str, Any]: + """ + Generate content for complex text section (long chapter). + Uses focused sub-prompt. + """ + + async def _generateSectionsParallel( + self, + sections: List[Dict[str, Any]], + context: GenerationContext, + services: Any, + progressCallback: Optional[Callable] = None + ) -> List[Dict[str, Any]]: + """ + Generate content for multiple sections in parallel. + Uses asyncio.gather for parallel execution. + """ + + def _createSectionPrompt( + self, + section: Dict[str, Any], + context: GenerationContext + ) -> str: + """ + Create sub-prompt for section content generation. + """ +``` + +### 3. ContentIntegrator + +**Purpose**: Merge generated content and render final document + +**Location**: `poweron/gateway/modules/services/serviceGeneration/subContentIntegrator.py` + +**Methods**: +```python +class ContentIntegrator: + def integrateContent( + self, + structure: Dict[str, Any], + generatedSections: List[Dict[str, Any]] + ) -> Dict[str, Any]: + """ + Merge generated sections into document structure. + + Returns: + Complete document structure ready for rendering + """ + + def validateCompleteness( + self, + document: Dict[str, Any] + ) -> Tuple[bool, List[str]]: + """ + Validate that all sections have content. + + Returns: + (is_complete, list_of_missing_sections) + """ + + def createErrorSection( + self, + originalSection: Dict[str, Any], + errorMessage: str + ) -> Dict[str, Any]: + """ + Create error placeholder section. + """ +``` + +### 4. Modified generateDocument Action + +**Location**: `poweron/gateway/modules/workflows/methods/methodAi/actions/generateDocument.py` + +**Changes**: +```python +@action +async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Generate documents using hierarchical approach. + """ + # Extract parameters + prompt = parameters.get("prompt") + documentList = parameters.get("documentList", []) + resultType = parameters.get("resultType", "docx") + maxSectionLength = parameters.get("maxSectionLength", 500) + parallelGeneration = parameters.get("parallelGeneration", True) + progressLogging = parameters.get("progressLogging", True) + + # Create operation ID for progress tracking + operationId = f"doc_gen_{self.services.workflow.id}_{int(time.time())}" + parentOperationId = parameters.get('parentOperationId') + + try: + # Phase 1: Structure Generation + if progressLogging: + self.services.chat.progressLogStart( + operationId, + "Document", + "Structure Generation", + "Generating document structure...", + parentOperationId=parentOperationId + ) + + structureGenerator = StructureGenerator(self.services) + + # Extract and cache content if documentList provided + cachedContent = None + if documentList: + # Extract content once + chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) + if chatDocuments: + extractionOptions = ExtractionOptions( + prompt="Extract all content from documents", + mergeStrategy=MergeStrategy(mergeType="concatenate") + ) + extractedResults = self.services.extraction.extractContent( + chatDocuments, + extractionOptions + ) + cachedContent = { + "extractedContent": extractedResults, + "extractionTimestamp": time.time(), + "sourceDocuments": [doc.id for doc in chatDocuments] + } + + # Generate structure + structure = await structureGenerator.generateStructure( + userPrompt=prompt, + documentList=documentList, + cachedContent=cachedContent, + services=self.services + ) + + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated") + + # Phase 2: Content Generation + if progressLogging: + self.services.chat.progressLogUpdate( + operationId, + 0.34, + "Starting content generation..." + ) + + contentGenerator = ContentGenerator(self.services) + + def progressCallback(sectionIndex: int, totalSections: int, message: str): + if progressLogging: + progress = 0.34 + (0.56 * (sectionIndex / totalSections)) + self.services.chat.progressLogUpdate( + operationId, + progress, + f"Section {sectionIndex}/{totalSections}: {message}" + ) + + completeStructure = await contentGenerator.generateContent( + structure=structure, + cachedContent=cachedContent, + userPrompt=prompt, + services=self.services, + progressCallback=progressCallback + ) + + if progressLogging: + self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated") + + # Phase 3: Integration & Rendering + if progressLogging: + self.services.chat.progressLogUpdate( + operationId, + 0.91, + "Rendering final document..." + ) + + # Use existing renderReport method + title = structure.get("metadata", {}).get("title", "Generated Document") + renderedContent, mimeType = await self.services.generation.renderReport( + extractedContent=completeStructure, + outputFormat=resultType, + title=title, + userPrompt=prompt, + aiService=self.services.ai + ) + + # Create document + document = self.services.generation._createDocument( + fileName=f"document.{resultType}", + mimeType=mimeType, + content=renderedContent, + base64encoded=(mimeType not in ["text/plain", "text/html", "text/markdown"]), + messageId=None + ) + + if progressLogging: + self.services.chat.progressLogFinish(operationId, True) + + return ActionResult.isSuccess( + documents=[ActionDocument( + documentName=f"document.{resultType}", + documentData=renderedContent, + mimeType=mimeType + )] + ) + + except Exception as e: + logger.error(f"Error in hierarchical document generation: {str(e)}") + if progressLogging: + self.services.chat.progressLogFinish(operationId, False) + return ActionResult.isFailure(error=str(e)) +``` + +## API Design + +### Structure Generation Prompt + +```python +def _createStructurePrompt( + userPrompt: str, + cachedContent: Optional[ContentCache], + services: Any +) -> str: + """ + Create prompt for structure generation. + """ + prompt = f""" +{'='*80} +USER REQUEST: +{'='*80} +{userPrompt} +{'='*80} + +TASK: Generate a document STRUCTURE (skeleton) with sections. +Do NOT generate actual content yet - only the structure. + +{'='*80} +EXTRACTED CONTENT (if available): +{'='*80} +{_formatCachedContent(cachedContent) if cachedContent else "No source documents provided."} +{'='*80} + +INSTRUCTIONS: +1. Analyze the user request and extracted content +2. Create a document structure with sections +3. For each section, specify: + - id: Unique identifier + - content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" + - complexity: "simple" (can generate directly) or "complex" (needs sub-prompt) + - generation_hint: Brief description of what content should be generated + - image_prompt: (only for image sections) Detailed prompt for image generation + - order: Section order number + - elements: [] (empty array - will be populated later) + +4. Identify image sections: + - If user requests illustrations/images, create image sections + - Add image_prompt field with detailed description + - Set complexity to "complex" + +5. Identify complex text sections: + - Long chapters (>500 words expected) should be marked as "complex" + - Short paragraphs/headings should be "simple" + +6. Return ONLY valid JSON following this structure: +{{ + "metadata": {{ + "title": "Document Title", + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "ai_generation" + }}, + "documents": [ + {{ + "id": "doc_1", + "title": "Document Title", + "filename": "document.json", + "sections": [ + {{ + "id": "section_1", + "content_type": "heading", + "complexity": "simple", + "generation_hint": "Main title", + "order": 1, + "elements": [] + }}, + {{ + "id": "section_2", + "content_type": "image", + "complexity": "complex", + "generation_hint": "Illustration for chapter 1", + "image_prompt": "Detailed description for image generation", + "order": 2, + "elements": [] + }} + ] + }} + ] +}} + +Return ONLY the JSON structure. No explanations. +""" + return prompt +``` + +### Section Content Generation Prompt + +```python +def _createSectionPrompt( + section: Dict[str, Any], + context: GenerationContext +) -> str: + """ + Create sub-prompt for section content generation. + """ + sectionType = section.get("content_type") + generationHint = section.get("generation_hint", "") + + prompt = f""" +{'='*80} +SECTION TO GENERATE: +{'='*80} +Type: {sectionType} +Hint: {generationHint} +{'='*80} + +CONTEXT: +- User Request: {context.userPrompt} +- Previous Sections: {len(context.previousSections)} sections already generated +- Document Title: {context.documentMetadata.get('title', 'Unknown')} + +{'='*80} +EXTRACTED CONTENT (if available): +{'='*80} +{_formatCachedContent(context.cachedContent) if context.cachedContent else "None"} +{'='*80} + +TASK: Generate content for this section ONLY. + +INSTRUCTIONS: +1. Generate content appropriate for section type: {sectionType} +2. Use the generation hint: {generationHint} +3. Consider previous sections for continuity +4. Use extracted content if relevant + +5. Return ONLY the elements array for this section: + +For heading: +{{ + "elements": [ + {{"level": 1, "text": "Heading Text"}} + ] +}} + +For paragraph: +{{ + "elements": [ + {{"text": "Paragraph text content"}} + ] +}} + +For image: +{{ + "elements": [ + {{ + "url": "data:image/png;base64,", + "base64Data": "", + "altText": "Image description", + "caption": "Optional caption" + }} + ] +}} + +Return ONLY the elements array as JSON. No other text. +""" + return prompt +``` + +## Image Handling + +### Image Generation Flow + +```python +async def _generateImageSection( + section: Dict[str, Any], + context: GenerationContext, + services: Any +) -> Dict[str, Any]: + """ + Generate image for image section. + """ + imagePrompt = section.get("image_prompt") + if not imagePrompt: + raise ValueError(f"Image section {section.get('id')} missing image_prompt") + + # Call ai.generate action with image generation + from modules.workflows.methods.methodAi.actions.generate import generate + + generateParams = { + "prompt": imagePrompt, + "resultType": "png", + "parentOperationId": context.operationId + } + + result = await generate(self=services.ai, parameters=generateParams) + + if not result.success or not result.documents: + raise ValueError(f"Image generation failed: {result.error}") + + # Extract base64 image data + imageDoc = result.documents[0] + base64Data = imageDoc.documentData + + # Create image element + section["elements"] = [{ + "url": f"data:image/png;base64,{base64Data}", + "base64Data": base64Data, + "altText": section.get("generation_hint", "Image"), + "caption": section.get("metadata", {}).get("caption") + }] + + return section +``` + +### HTML Renderer Image Handling + +**Location**: `poweron/gateway/modules/services/serviceGeneration/renderers/rendererHtml.py` + +**Changes**: +```python +async def render( + self, + extractedContent: Dict[str, Any], + title: str, + userPrompt: str = None, + aiService=None +) -> Tuple[str, str]: + """ + Render HTML with separate image files. + + Returns: + (html_content, mime_type) + """ + # Generate HTML + htmlContent = await self._generateHtmlFromJson(...) + + # Extract images and create separate files + images = self._extractImages(extractedContent) + + if images: + # Create image files + imageFiles = [] + for idx, imageData in enumerate(images): + base64Data = imageData.get("base64Data") + if base64Data: + # Decode base64 + imageBytes = base64.b64decode(base64Data) + + # Create filename + filename = f"image_{idx + 1}.png" + + # Update HTML to use relative path + htmlContent = htmlContent.replace( + f'data:image/png;base64,{base64Data}', + filename + ) + + imageFiles.append({ + "filename": filename, + "content": imageBytes, + "mimeType": "image/png" + }) + + # Return HTML + image files info + # Note: This requires modification to return multiple files + # For now, embed base64 (will be updated in implementation) + return htmlContent, "text/html" + + return htmlContent, "text/html" + +def _extractImages(self, jsonContent: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Extract all images from JSON structure. + """ + images = [] + + documents = jsonContent.get("documents", []) + if not documents: + sections = jsonContent.get("sections", []) + documents = [{"sections": sections}] + + for doc in documents: + sections = doc.get("sections", []) + for section in sections: + if section.get("content_type") == "image": + elements = section.get("elements", []) + for element in elements: + if element.get("base64Data"): + images.append(element) + + return images +``` + +## Progress Logging + +### Progress Stages + +```python +PROGRESS_STAGES = { + "structure_generation": { + "start": 0.0, + "end": 0.33, + "messages": [ + "Extracting content from documents...", + "Generating document structure...", + "Structure generated" + ] + }, + "content_generation": { + "start": 0.34, + "end": 0.90, + "messages": [ + "Starting content generation...", + "Generating section {current}/{total}...", + "Generating image for section {section_id}...", + "Content generated" + ] + }, + "integration_rendering": { + "start": 0.91, + "end": 1.0, + "messages": [ + "Rendering final document...", + "Document complete" + ] + } +} +``` + +### Progress Callback Implementation + +```python +def createProgressCallback( + operationId: str, + totalSections: int, + services: Any +) -> Callable: + """ + Create progress callback function. + """ + def progressCallback( + sectionIndex: int, + totalSections: int, + message: str + ): + # Calculate progress + baseProgress = 0.34 # Start of content generation phase + phaseProgress = 0.56 # Length of content generation phase + sectionProgress = (sectionIndex / totalSections) * phaseProgress + currentProgress = baseProgress + sectionProgress + + # Update progress log + services.chat.progressLogUpdate( + operationId, + currentProgress, + f"Section {sectionIndex}/{totalSections}: {message}" + ) + + return progressCallback +``` + +## Error Handling + +### Error Section Creation + +```python +def createErrorSection( + originalSection: Dict[str, Any], + errorMessage: str +) -> Dict[str, Any]: + """ + Create error placeholder section. + """ + return { + "id": originalSection.get("id", "unknown"), + "content_type": "paragraph", # Change to paragraph for error display + "elements": [{ + "text": f"[ERROR: Failed to generate {originalSection.get('content_type', 'content')} for section '{originalSection.get('id', 'unknown')}'. Error: {errorMessage}]" + }], + "order": originalSection.get("order", 0), + "error": True, + "errorMessage": errorMessage, + "originalContentType": originalSection.get("content_type") + } +``` + +### Error Handling in Content Generation + +```python +async def _generateSectionContent( + self, + section: Dict[str, Any], + context: GenerationContext, + services: Any +) -> Dict[str, Any]: + """ + Generate content for a single section with error handling. + """ + try: + complexity = section.get("complexity", "simple") + contentType = section.get("content_type") + + if contentType == "image": + return await self._generateImageSection(section, context, services) + elif complexity == "complex": + return await self._generateComplexTextSection(section, context, services) + else: + return await self._generateSimpleSection(section, context, services) + + except Exception as e: + logger.error(f"Error generating section {section.get('id')}: {str(e)}") + return createErrorSection(section, str(e)) +``` + +## Performance Considerations + +### Parallel Generation + +```python +async def _generateSectionsParallel( + self, + sections: List[Dict[str, Any]], + context: GenerationContext, + services: Any, + progressCallback: Optional[Callable] = None +) -> List[Dict[str, Any]]: + """ + Generate content for multiple sections in parallel. + """ + async def generateWithProgress(section: Dict[str, Any], index: int): + if progressCallback: + progressCallback(index + 1, len(sections), f"Generating {section.get('content_type')}...") + + return await self._generateSectionContent(section, context, services) + + # Generate all sections in parallel + results = await asyncio.gather( + *[generateWithProgress(section, idx) for idx, section in enumerate(sections)], + return_exceptions=True + ) + + # Handle exceptions + generatedSections = [] + for idx, result in enumerate(results): + if isinstance(result, Exception): + logger.error(f"Error generating section {idx}: {str(result)}") + generatedSections.append( + createErrorSection(sections[idx], str(result)) + ) + else: + generatedSections.append(result) + + return generatedSections +``` + +### Batch Processing for Large Documents + +```python +async def generateContent( + self, + structure: Dict[str, Any], + cachedContent: Optional[ContentCache], + userPrompt: str, + services: Any, + progressCallback: Optional[Callable] = None, + batchSize: int = 10 +) -> Dict[str, Any]: + """ + Generate content with batching for large documents. + """ + documents = structure.get("documents", []) + + for doc in documents: + sections = doc.get("sections", []) + + # Process in batches + for batchStart in range(0, len(sections), batchSize): + batch = sections[batchStart:batchStart + batchSize] + + # Generate batch in parallel + generatedBatch = await self._generateSectionsParallel( + batch, + context, + services, + progressCallback + ) + + # Update sections + for idx, generated in enumerate(generatedBatch): + sections[batchStart + idx] = generated + + return structure +``` + +## Testing Strategy + +### Unit Tests + +1. **StructureGenerator Tests**: + - Test structure generation with/without source documents + - Test complexity identification + - Test image prompt extraction + +2. **ContentGenerator Tests**: + - Test simple section generation + - Test image section generation + - Test complex text section generation + - Test parallel generation + - Test error handling + +3. **ContentIntegrator Tests**: + - Test content merging + - Test validation + - Test error section creation + +### Integration Tests + +1. **End-to-End Tests**: + - Test complete document generation flow + - Test with images + - Test with long documents + - Test error scenarios + +2. **Renderer Tests**: + - Test HTML renderer with separate image files + - Test PDF renderer with embedded images + - Test XLSX/PPTX renderers with images + +### Performance Tests + +1. **Large Document Tests**: + - Test with 100+ sections + - Test parallel generation performance + - Test memory usage + +2. **Image Generation Tests**: + - Test multiple images + - Test large images + - Test image generation failures + diff --git a/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md new file mode 100644 index 00000000..4476c2b9 --- /dev/null +++ b/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md @@ -0,0 +1,398 @@ +# Implementation Plan: Hierarchical Document Generation + +## Overview + +This document outlines the step-by-step implementation plan for the hierarchical document generation system with image integration. + +## Implementation Phases + +### Phase 1: Core Infrastructure (Week 1) + +**Goal**: Set up core components and data structures + +#### Tasks: + +1. **Create StructureGenerator Component** + - [ ] Create `subStructureGenerator.py` + - [ ] Implement `generateStructure()` method + - [ ] Implement `_createStructurePrompt()` method + - [ ] Implement `_identifySectionComplexity()` method + - [ ] Implement `_extractImagePrompts()` method + - [ ] Add unit tests + +2. **Create ContentGenerator Component** + - [ ] Create `subContentGenerator.py` + - [ ] Implement `generateContent()` method + - [ ] Implement `_generateSectionContent()` method + - [ ] Implement `_generateSimpleSection()` method + - [ ] Implement `_generateComplexTextSection()` method + - [ ] Implement `_createSectionPrompt()` method + - [ ] Add unit tests + +3. **Create ContentIntegrator Component** + - [ ] Create `subContentIntegrator.py` + - [ ] Implement `integrateContent()` method + - [ ] Implement `validateCompleteness()` method + - [ ] Implement `createErrorSection()` method + - [ ] Add unit tests + +4. **Update generateDocument Action** + - [ ] Modify `generateDocument.py` to use hierarchical approach + - [ ] Add Phase 1: Structure generation + - [ ] Add Phase 2: Content generation (sequential first) + - [ ] Add Phase 3: Integration & rendering + - [ ] Add basic progress logging + - [ ] Add error handling + +**Deliverables**: +- Core components created +- Basic hierarchical generation working (sequential) +- Unit tests passing + +**Estimated Time**: 3-4 days + +--- + +### Phase 2: Image Generation Integration (Week 1-2) + +**Goal**: Integrate image generation into content generation + +#### Tasks: + +1. **Implement Image Section Generation** + - [ ] Add `_generateImageSection()` method to ContentGenerator + - [ ] Integrate with `ai.generate` action + - [ ] Handle base64 image data storage + - [ ] Add image prompt extraction from structure + - [ ] Add error handling for image generation failures + +2. **Update Structure Generation Prompt** + - [ ] Add image section detection in structure prompt + - [ ] Add image_prompt field extraction + - [ ] Test with user prompts requesting images + +3. **Test Image Integration** + - [ ] Test image generation in document structure + - [ ] Test multiple images in one document + - [ ] Test image generation failures + +**Deliverables**: +- Image generation integrated +- Images stored as base64 in JSON +- Error handling for image failures + +**Estimated Time**: 2-3 days + +--- + +### Phase 3: Parallel Processing & Progress Logging (Week 2) + +**Goal**: Implement parallel section generation and detailed progress logging + +#### Tasks: + +1. **Implement Parallel Generation** + - [ ] Add `_generateSectionsParallel()` method + - [ ] Use `asyncio.gather()` for parallel execution + - [ ] Add batch processing for large documents + - [ ] Handle exceptions in parallel execution + - [ ] Test parallel vs sequential performance + +2. **Enhance Progress Logging** + - [ ] Create progress callback system + - [ ] Add detailed progress messages: + - Structure generation progress + - Section-by-section progress + - Image generation progress + - Rendering progress + - [ ] Calculate accurate progress percentages + - [ ] Test progress updates + +3. **Update generateDocument Action** + - [ ] Integrate parallel generation + - [ ] Add progress callback to content generation + - [ ] Update progress logging throughout phases + +**Deliverables**: +- Parallel section generation working +- Detailed progress logging +- Performance improvements + +**Estimated Time**: 2-3 days + +--- + +### Phase 4: Renderer Updates (Week 2-3) + +**Goal**: Update renderers to properly handle images + +#### Tasks: + +1. **Update HTML Renderer** + - [ ] Modify `rendererHtml.py` + - [ ] Add `_extractImages()` method + - [ ] Implement separate image file creation + - [ ] Update HTML to use relative image paths + - [ ] Handle multiple image files + - [ ] Test HTML + image files output + +2. **Update PDF Renderer** + - [ ] Modify `rendererPdf.py` + - [ ] Update `_renderJsonImage()` to embed images + - [ ] Use `reportlab.platypus.Image()` with base64 + - [ ] Handle image sizing and positioning + - [ ] Test PDF with embedded images + +3. **Update XLSX Renderer** + - [ ] Modify `rendererXlsx.py` + - [ ] Add `_renderJsonImage()` method + - [ ] Use `openpyxl.drawing.image.Image()` to embed images + - [ ] Handle image placement in cells + - [ ] Test XLSX with images + +4. **Update PPTX Renderer** + - [ ] Modify `rendererPptx.py` + - [ ] Add `_renderJsonImage()` method + - [ ] Use `slide.shapes.add_picture()` to add images + - [ ] Handle image sizing on slides + - [ ] Test PPTX with images + +**Deliverables**: +- All renderers support images +- HTML creates separate image files +- PDF/XLSX/PPTX embed images directly + +**Estimated Time**: 4-5 days + +--- + +### Phase 5: Content Caching & Optimization (Week 3) + +**Goal**: Implement content caching to avoid re-extraction + +#### Tasks: + +1. **Implement Content Cache** + - [ ] Create ContentCache data structure + - [ ] Extract content once at start of generation + - [ ] Pass cached content to all sub-prompts + - [ ] Add cache validation (check if documents changed) + - [ ] Test cache reuse + +2. **Optimize Prompt Building** + - [ ] Update structure prompt to use cached content + - [ ] Update section prompts to use cached content + - [ ] Format cached content efficiently + - [ ] Test prompt sizes + +3. **Performance Testing** + - [ ] Test with large documents + - [ ] Test with multiple source documents + - [ ] Measure performance improvements + - [ ] Optimize bottlenecks + +**Deliverables**: +- Content caching implemented +- No redundant content extraction +- Performance optimized + +**Estimated Time**: 2-3 days + +--- + +### Phase 6: Error Handling & Edge Cases (Week 3-4) + +**Goal**: Robust error handling and edge case coverage + +#### Tasks: + +1. **Enhance Error Handling** + - [ ] Improve error section creation + - [ ] Add error recovery strategies + - [ ] Handle partial failures gracefully + - [ ] Add error logging and reporting + +2. **Handle Edge Cases** + - [ ] Empty document list + - [ ] No sections generated + - [ ] All sections fail + - [ ] Very large images + - [ ] Very long documents (100+ sections) + - [ ] Missing image prompts + - [ ] Invalid section types + +3. **Add Validation** + - [ ] Validate structure before content generation + - [ ] Validate content before integration + - [ ] Validate final document before rendering + - [ ] Add comprehensive error messages + +**Deliverables**: +- Robust error handling +- Edge cases covered +- Clear error messages + +**Estimated Time**: 2-3 days + +--- + +### Phase 7: Testing & Refinement (Week 4) + +**Goal**: Comprehensive testing and refinement + +#### Tasks: + +1. **Unit Testing** + - [ ] Complete unit tests for all components + - [ ] Test all methods + - [ ] Test error scenarios + - [ ] Achieve >80% code coverage + +2. **Integration Testing** + - [ ] Test end-to-end document generation + - [ ] Test with various document types + - [ ] Test with images + - [ ] Test with long documents + - [ ] Test error scenarios + +3. **Performance Testing** + - [ ] Test with 10, 50, 100+ sections + - [ ] Measure generation time + - [ ] Measure memory usage + - [ ] Compare parallel vs sequential + - [ ] Optimize if needed + +4. **User Acceptance Testing** + - [ ] Test with real user scenarios + - [ ] Test bedtime story with images (original use case) + - [ ] Test business documents + - [ ] Test technical documents + - [ ] Gather feedback + +5. **Documentation** + - [ ] Update API documentation + - [ ] Add code comments + - [ ] Update user guides + - [ ] Create examples + +**Deliverables**: +- Comprehensive test suite +- Performance benchmarks +- Documentation complete +- Ready for production + +**Estimated Time**: 3-4 days + +--- + +## Dependencies + +### External Dependencies +- `asyncio` - For parallel processing +- `base64` - For image encoding/decoding +- `reportlab` - For PDF image embedding +- `openpyxl` - For XLSX image embedding +- `python-pptx` - For PPTX image embedding + +### Internal Dependencies +- `serviceGeneration` - Main generation service +- `serviceAi` - AI service for generation +- `serviceExtraction` - Content extraction service +- `methodAi.actions.generate` - Image generation action +- `methodAi.actions.process` - Text generation action + +## Risk Mitigation + +### Risks and Mitigation Strategies + +1. **Risk**: Image generation failures break entire document + - **Mitigation**: Error handling creates error sections, continues processing + +2. **Risk**: Parallel generation causes memory issues + - **Mitigation**: Batch processing, limit concurrent operations + +3. **Risk**: Large base64 images cause JSON size issues + - **Mitigation**: Consider compression or chunking for very large images + +4. **Risk**: HTML renderer needs to return multiple files + - **Mitigation**: Modify return type or create file bundle system + +5. **Risk**: Performance not meeting expectations + - **Mitigation**: Profile and optimize bottlenecks, consider caching + +## Success Criteria + +### Functional Requirements +- ✅ Documents can be generated with embedded images +- ✅ HTML renderer creates separate image files +- ✅ PDF/XLSX/PPTX renderers embed images +- ✅ Progress logging shows detailed progress +- ✅ Error handling prevents complete failures +- ✅ Content extraction happens only once + +### Performance Requirements +- ✅ Parallel generation improves performance by 2x+ for multi-section documents +- ✅ Progress updates appear within 1 second of action +- ✅ Documents with 50+ sections complete in <5 minutes + +### Quality Requirements +- ✅ >80% code coverage +- ✅ All edge cases handled +- ✅ Clear error messages +- ✅ Comprehensive documentation + +## Rollout Plan + +### Step 1: Internal Testing (Week 4) +- Deploy to development environment +- Internal team testing +- Fix critical issues + +### Step 2: Beta Testing (Week 5) +- Deploy to staging environment +- Select beta users +- Gather feedback +- Fix issues + +### Step 3: Production Deployment (Week 6) +- Deploy to production +- Monitor performance +- Monitor errors +- Gather user feedback + +### Step 4: Optimization (Ongoing) +- Monitor usage patterns +- Optimize based on real-world usage +- Add enhancements based on feedback + +## Timeline Summary + +| Phase | Duration | Start | End | +|-------|----------|-------|-----| +| Phase 1: Core Infrastructure | 3-4 days | Day 1 | Day 4 | +| Phase 2: Image Integration | 2-3 days | Day 4 | Day 7 | +| Phase 3: Parallel Processing | 2-3 days | Day 7 | Day 10 | +| Phase 4: Renderer Updates | 4-5 days | Day 10 | Day 15 | +| Phase 5: Content Caching | 2-3 days | Day 15 | Day 18 | +| Phase 6: Error Handling | 2-3 days | Day 18 | Day 21 | +| Phase 7: Testing & Refinement | 3-4 days | Day 21 | Day 25 | + +**Total Estimated Time**: 4-5 weeks + +## Next Steps + +1. **Review and Approve Plan** + - Review implementation plan + - Approve timeline + - Assign resources + +2. **Set Up Development Environment** + - Create feature branch + - Set up test infrastructure + - Prepare development tools + +3. **Begin Phase 1** + - Start with StructureGenerator + - Set up project structure + - Begin implementation + diff --git a/modules/workflows/processing/shared/RENDERING_ISSUE_ANALYSIS.md b/modules/workflows/processing/shared/RENDERING_ISSUE_ANALYSIS.md new file mode 100644 index 00000000..ee790a69 --- /dev/null +++ b/modules/workflows/processing/shared/RENDERING_ISSUE_ANALYSIS.md @@ -0,0 +1,238 @@ +# Rendering Issue Analysis +## Why HTML Documents Are Being Rendered as Text + +**Date**: 2025-12-22 +**Issue**: Documents requested as HTML are being output as text/plain + +--- + +## Root Cause Analysis + +### Issue 1: `resultType` Not Extracted from Task Objective ❌ **CRITICAL** + +**Problem**: +- Task objective clearly states: "Generate a complete, well-structured **HTML document**" +- Validation shows: `EXPECTED FORMATS: ['html']` +- But action was called with: `ai.generateDocument {}` (empty parameters) +- So `resultType` defaults to `"docx"` instead of `"html"` + +**Location**: +- `generateDocument.py` line 44: `resultType = parameters.get("resultType", "docx")` +- No parameter extraction from task objective/prompt + +**Impact**: **CRITICAL** - Wrong format is used even though task clearly requests HTML + +**Fix Needed**: +- Extract `resultType` from task objective/prompt before calling action +- Or enhance `generateDocument` to detect format from prompt if not provided + +--- + +### Issue 2: HTML Not in Action Definition Options ❌ **CRITICAL** + +**Problem**: +- Action definition in `methodAi.py` line 357 only lists: `["docx", "pdf", "txt", "md"]` +- `"html"` is **NOT** in the allowed options +- But docstring says HTML is supported: `"resultType (str, optional): Output format (docx, pdf, txt, md, html, etc.)"` + +**Location**: +- `methodAi.py` line 357: `frontendOptions=["docx", "pdf", "txt", "md"]` + +**Impact**: **CRITICAL** - Even if HTML is requested, it might be rejected or not recognized + +**Fix Needed**: +- Add `"html"` to `frontendOptions` list + +--- + +### Issue 3: Renderer Fallback to Text ❌ **CRITICAL** + +**Problem**: +- When `resultType="docx"` is used (default) +- If docx renderer fails or is not found +- System falls back to text renderer (line 403-404 of `mainServiceGeneration.py`) +- This explains why output is `text/plain` instead of HTML + +**Location**: +- `mainServiceGeneration.py` lines 393-409: `_getFormatRenderer()` method +- Line 403: `logger.warning(f"No renderer found for format {output_format}, falling back to text")` + +**Impact**: **CRITICAL** - Wrong format is rendered + +**Fix Needed**: +- Fix docx renderer if it's failing +- Or better: Extract correct format from prompt + +--- + +### Issue 4: Missing Parameter Extraction ❌ **HIGH PRIORITY** + +**Problem**: +- Task objective contains format information ("HTML document") +- But no parameter extraction step extracts `resultType` from prompt +- Action is called with empty parameters `{}` + +**Location**: +- Workflow execution - parameter extraction phase +- Should extract `resultType: "html"` from task objective + +**Impact**: **HIGH** - System can't infer format from user intent + +**Fix Needed**: +- Add parameter extraction that detects format from prompt +- Or enhance `generateDocument` to auto-detect format from prompt + +--- + +## Flow Analysis + +### Expected Flow: +``` +1. Task Objective: "Generate HTML document..." +2. Parameter Extraction: Extract resultType="html" from objective +3. Action Call: ai.generateDocument({resultType: "html", prompt: "..."}) +4. Content Generation: Generate sections with content +5. Integration: Merge sections into complete structure +6. Rendering: Call renderReport(outputFormat="html") +7. HTML Renderer: Render to HTML +8. Output: document.html (text/html) +``` + +### Actual Flow (Broken): +``` +1. Task Objective: "Generate HTML document..." +2. Parameter Extraction: ❌ MISSING - no extraction +3. Action Call: ai.generateDocument({}) ❌ Empty parameters +4. Content Generation: ✅ Generate sections with content +5. Integration: ✅ Merge sections into complete structure +6. Rendering: Call renderReport(outputFormat="docx") ❌ Wrong format +7. Docx Renderer: ❌ Fails or not found +8. Fallback: Text renderer ❌ Wrong renderer +9. Output: document.text (text/plain) ❌ Wrong format +``` + +--- + +## Fixes Required + +### Fix 1: Add HTML to Action Definition Options ✅ **EASY** + +**File**: `gateway/modules/workflows/methods/methodAi/methodAi.py` +**Line**: 357 + +**Change**: +```python +frontendOptions=["docx", "pdf", "txt", "md", "html"], # Added "html" +``` + +--- + +### Fix 2: Extract resultType from Prompt ✅ **MEDIUM** + +**Option A**: Enhance `generateDocument` to detect format from prompt + +**File**: `gateway/modules/workflows/methods/methodAi/actions/generateDocument.py` +**After line 44**: + +```python +resultType = parameters.get("resultType", "docx") + +# Auto-detect format from prompt if not provided +if resultType == "docx" and prompt: + promptLower = prompt.lower() + if "html" in promptLower or "html5" in promptLower: + resultType = "html" + elif "pdf" in promptLower: + resultType = "pdf" + elif "markdown" in promptLower or "md" in promptLower: + resultType = "md" + elif "text" in promptLower or "txt" in promptLower: + resultType = "txt" +``` + +**Option B**: Extract in parameter planning phase (better, but requires workflow changes) + +--- + +### Fix 3: Improve Renderer Error Handling ✅ **MEDIUM** + +**File**: `gateway/modules/services/serviceGeneration/mainServiceGeneration.py` +**Lines**: 393-409 + +**Enhance**: Better error messages and logging when renderer not found + +```python +def _getFormatRenderer(self, output_format: str): + """Get the appropriate renderer for the specified format using auto-discovery.""" + try: + from .renderers.registry import getRenderer + renderer = getRenderer(output_format, services=self.services) + + if renderer: + return renderer + + # Log available formats for debugging + from .renderers.registry import getSupportedFormats + availableFormats = getSupportedFormats() + logger.error( + f"No renderer found for format '{output_format}'. " + f"Available formats: {availableFormats}" + ) + + # Fallback to text renderer if no specific renderer found + logger.warning(f"Falling back to text renderer for format {output_format}") + fallbackRenderer = getRenderer('text', services=self.services) + if fallbackRenderer: + return fallbackRenderer + + logger.error("Even text renderer fallback failed") + return None + + except Exception as e: + logger.error(f"Error getting renderer for {output_format}: {str(e)}") + return None +``` + +--- + +## Verification Steps + +After fixes: + +1. **Test HTML Generation**: + - Task: "Generate HTML document about AI" + - Expected: `resultType="html"` extracted or detected + - Expected: HTML renderer used + - Expected: Output is `document.html` with `text/html` MIME type + +2. **Test Format Detection**: + - Task: "Generate PDF report" + - Expected: `resultType="pdf"` detected + - Expected: PDF renderer used + +3. **Test Explicit Parameter**: + - Action: `ai.generateDocument({resultType: "html", prompt: "..."})` + - Expected: HTML renderer used (no fallback) + +--- + +## Summary + +**Root Causes**: +1. ❌ `resultType` not extracted from task objective +2. ❌ HTML not in action definition options +3. ❌ Renderer fallback to text when docx fails +4. ❌ No format auto-detection from prompt + +**Priority**: **CRITICAL** - System cannot produce HTML documents as requested + +**Estimated Fix Time**: +- Fix 1: 5 minutes +- Fix 2: 30 minutes +- Fix 3: 15 minutes +- **Total**: ~1 hour + +--- + +**Analysis Complete**: 2025-12-22 + diff --git a/modules/workflows/processing/shared/methodDiscovery.py b/modules/workflows/processing/shared/methodDiscovery.py index 30708010..e3bfa769 100644 --- a/modules/workflows/processing/shared/methodDiscovery.py +++ b/modules/workflows/processing/shared/methodDiscovery.py @@ -68,7 +68,7 @@ def discoverMethods(serviceCenter): # Method not discovered yet - create new instance methodInstance = item(serviceCenter) - # Use the actions property from MethodBase which handles @action decorator + # Use the actions property from MethodBase which handles WorkflowActionDefinition actions = methodInstance.actions # Create method info @@ -131,7 +131,7 @@ def getMethodsList(serviceCenter): return "\n\n".join(methodsList) def getActionParameterList(methodName: str, actionName: str, methods: Dict[str, Any]) -> str: - """Get action parameter list from method docstring for AI parameter generation (list only).""" + """Get action parameter list from WorkflowActionParameter structure for AI parameter generation (list only).""" try: if not methods or methodName not in methods: return "" @@ -141,17 +141,21 @@ def getActionParameterList(methodName: str, actionName: str, methods: Dict[str, return "" action_info = methodInstance.actions[actionName] - # Extract parameter descriptions from docstring - docstring = action_info.get('description', '') - paramDescriptions, paramTypes = methodInstance._extractParameterDetails(docstring) + # Use structured WorkflowActionParameter objects from new system + parameters = action_info.get('parameters', {}) param_list = [] - for paramName, paramDesc in paramDescriptions.items(): - paramType = paramTypes.get(paramName, 'Any') + for paramName, paramInfo in parameters.items(): + paramType = paramInfo.get('type', 'Any') + paramDesc = paramInfo.get('description', '') + paramRequired = paramInfo.get('required', False) + + # Format: paramName (type, required/optional): description + reqText = "required" if paramRequired else "optional" if paramDesc: - param_list.append(f"- {paramName} ({paramType}): {paramDesc}") + param_list.append(f"- {paramName} ({paramType}, {reqText}): {paramDesc}") else: - param_list.append(f"- {paramName} ({paramType})") + param_list.append(f"- {paramName} ({paramType}, {reqText})") # Return list only, without leading headings or trailing text return "\n".join(param_list) diff --git a/modules/workflows/processing/shared/placeholderFactory.py b/modules/workflows/processing/shared/placeholderFactory.py index 99d1523f..797352ab 100644 --- a/modules/workflows/processing/shared/placeholderFactory.py +++ b/modules/workflows/processing/shared/placeholderFactory.py @@ -88,10 +88,23 @@ def extractAvailableMethods(service: Any) -> str: # Create a flat JSON format with compound action names for better AI parsing available_actions_json = {} + processed_methods = set() # Track processed methods to avoid duplicates + for methodName, methodInfo in methods.items(): + # Skip short name aliases - only process full class names (MethodXxx) + # Short names are stored as aliases but we want to avoid processing them twice + if not methodName.startswith('Method'): + continue + # Convert MethodAi -> ai, MethodDocument -> document, etc. shortName = methodName.replace('Method', '').lower() + # Skip if we've already processed this method (via its short name alias) + if shortName in processed_methods: + continue + + processed_methods.add(shortName) + for actionName, actionInfo in methodInfo['actions'].items(): # Create compound action name: method.action compoundActionName = f"{shortName}.{actionName}" diff --git a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py index 0ad5ecfe..31878033 100644 --- a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py +++ b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py @@ -343,6 +343,12 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to - Check "structureComparison.gap" to see what's missing. If quantitative gaps are available, use them. - Next action should ONLY generate the MISSING part, NOT repeat what's already delivered +CRITICAL - Missing Data Generation Strategy: +- When gap analysis shows missing data (found count = 0 but required count > 0): + * Generate the missing data FIRST as separate outputs before attempting integration + * Do NOT try to generate AND integrate missing data in one step - data must exist before integration + * Only AFTER missing data exists can you integrate it with existing data in a subsequent action + === OUTPUT FORMAT === Return ONLY JSON (no markdown, no explanations). The decision MUST: - Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...) diff --git a/modules/workflows/processing/workflowProcessor.py b/modules/workflows/processing/workflowProcessor.py index d97541e5..9c9d6c84 100644 --- a/modules/workflows/processing/workflowProcessor.py +++ b/modules/workflows/processing/workflowProcessor.py @@ -28,6 +28,7 @@ class WorkflowProcessor: self.services = services self.mode = self._createMode(services.workflow.workflowMode) self.workflow = services.workflow + self.workflowExecOperationId = None # Will be set by workflowManager for task hierarchy def _createMode(self, workflowMode: WorkflowModeEnum) -> BaseMode: """Create the appropriate mode implementation based on workflow mode""" @@ -111,16 +112,20 @@ class WorkflowProcessor: # Init progress logger operationId = f"taskExec_{workflow.id}_{taskIndex}_{int(time.time())}" + # Get parent operationId (Service Workflow Execution) if available + parentOperationId = getattr(self, 'workflowExecOperationId', None) + try: # Check workflow status before executing task checkWorkflowStopped(self.services) - # Start progress tracking + # Start progress tracking - Task is child of Service Workflow Execution self.services.chat.progressLogStart( operationId, "Workflow Execution", "Task Execution", - f"Task {taskIndex}" + f"Task {taskIndex}", + parentOperationId=parentOperationId ) logger.info(f"=== STARTING TASK EXECUTION ===") diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py index 987f46bf..a5971904 100644 --- a/modules/workflows/workflowManager.py +++ b/modules/workflows/workflowManager.py @@ -566,72 +566,89 @@ class WorkflowManager: allTaskResults: List = [] previousResults: List[str] = [] - for idx, taskStep in enumerate(taskPlan.tasks): - currentTaskIndex = idx + 1 - logger.info(f"Task {currentTaskIndex}/{totalTasks}: {taskStep.objective}") + # Create "Service Workflow Execution" root entry - parent of all tasks + workflowExecOperationId = f"workflowExec_{workflow.id}" + self.services.chat.progressLogStart( + workflowExecOperationId, + "Service", + "Workflow Execution", + f"Executing {totalTasks} task(s)" + ) + + # Store workflow execution operationId in workflowProcessor for task hierarchy + handling.workflowExecOperationId = workflowExecOperationId + + try: + for idx, taskStep in enumerate(taskPlan.tasks): + currentTaskIndex = idx + 1 + logger.info(f"Task {currentTaskIndex}/{totalTasks}: {taskStep.objective}") - # Update workflow state before executing task (fixes "Task 0" issue) - handling.updateWorkflowBeforeExecutingTask(currentTaskIndex) + # Update workflow state before executing task (fixes "Task 0" issue) + handling.updateWorkflowBeforeExecutingTask(currentTaskIndex) - # Build TaskContext (mode-specific behavior is inside WorkflowProcessor) - taskContext = TaskContext( - taskStep=taskStep, - workflow=workflow, - workflowId=workflow.id, - availableDocuments=None, - availableConnections=None, - previousResults=previousResults, - previousHandover=None, - improvements=[], - retryCount=0, - previousActionResults=[], - previousReviewResult=None, - isRegeneration=False, - failurePatterns=[], - failedActions=[], - successfulActions=[], - criteriaProgress={ - 'met_criteria': set(), - 'unmet_criteria': set(), - 'attempt_history': [] - } - ) - - taskResult = await handling.executeTask(taskStep, workflow, taskContext) - - # Persist task result for cross-task/round document references - # Convert ChatTaskResult to WorkflowTaskResult for persistence - from modules.datamodels.datamodelWorkflow import TaskResult as WorkflowTaskResult - from modules.datamodels.datamodelChat import ActionResult - - # Get final ActionResult from task execution (last action result) - finalActionResult = None - if hasattr(taskResult, 'actionResult'): - finalActionResult = taskResult.actionResult - elif taskContext.previousActionResults and len(taskContext.previousActionResults) > 0: - # Use last action result from context - finalActionResult = taskContext.previousActionResults[-1] - - # Create WorkflowTaskResult for persistence - if finalActionResult: - workflowTaskResult = WorkflowTaskResult( - taskId=taskStep.id, - actionResult=finalActionResult + # Build TaskContext (mode-specific behavior is inside WorkflowProcessor) + taskContext = TaskContext( + taskStep=taskStep, + workflow=workflow, + workflowId=workflow.id, + availableDocuments=None, + availableConnections=None, + previousResults=previousResults, + previousHandover=None, + improvements=[], + retryCount=0, + previousActionResults=[], + previousReviewResult=None, + isRegeneration=False, + failurePatterns=[], + failedActions=[], + successfulActions=[], + criteriaProgress={ + 'met_criteria': set(), + 'unmet_criteria': set(), + 'attempt_history': [] + } ) - # Persist task result (creates ChatMessage + ChatDocuments) - await handling.persistTaskResult(workflowTaskResult, workflow, taskContext) - - handoverData = await handling.prepareTaskHandover(taskStep, [], taskResult, workflow) - allTaskResults.append({ - 'taskStep': taskStep, - 'taskResult': taskResult, - 'handoverData': handoverData - }) - if taskResult.success and taskResult.feedback: - previousResults.append(taskResult.feedback) + + taskResult = await handling.executeTask(taskStep, workflow, taskContext) - # Mark workflow as completed; error/stop cases update status elsewhere - workflow.status = "completed" + # Persist task result for cross-task/round document references + # Convert ChatTaskResult to WorkflowTaskResult for persistence + from modules.datamodels.datamodelWorkflow import TaskResult as WorkflowTaskResult + from modules.datamodels.datamodelChat import ActionResult + + # Get final ActionResult from task execution (last action result) + finalActionResult = None + if hasattr(taskResult, 'actionResult'): + finalActionResult = taskResult.actionResult + elif taskContext.previousActionResults and len(taskContext.previousActionResults) > 0: + # Use last action result from context + finalActionResult = taskContext.previousActionResults[-1] + + # Create WorkflowTaskResult for persistence + if finalActionResult: + workflowTaskResult = WorkflowTaskResult( + taskId=taskStep.id, + actionResult=finalActionResult + ) + # Persist task result (creates ChatMessage + ChatDocuments) + await handling.persistTaskResult(workflowTaskResult, workflow, taskContext) + + handoverData = await handling.prepareTaskHandover(taskStep, [], taskResult, workflow) + allTaskResults.append({ + 'taskStep': taskStep, + 'taskResult': taskResult, + 'handoverData': handoverData + }) + if taskResult.success and taskResult.feedback: + previousResults.append(taskResult.feedback) + + # Mark workflow as completed; error/stop cases update status elsewhere + workflow.status = "completed" + finally: + # Finish "Service Workflow Execution" entry + self.services.chat.progressLogFinish(workflowExecOperationId, True) + return None async def _processWorkflowResults(self) -> None: diff --git a/requirements.txt b/requirements.txt index 5191019b..6377611d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -71,6 +71,9 @@ google-cloud-texttospeech==2.16.3 ## MSFT Integration msal==1.24.1 +## Azure Integration +azure-communication-email>=1.0.0 # Azure Communication Services Email + ## Testing Dependencies pytest>=8.0.0 pytest-asyncio>=0.21.0 diff --git a/tests/functional/test09_document_generation_formats.py b/tests/functional/test09_document_generation_formats.py new file mode 100644 index 00000000..0834f440 --- /dev/null +++ b/tests/functional/test09_document_generation_formats.py @@ -0,0 +1,410 @@ +#!/usr/bin/env python3 +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Document Generation Formats Test - Tests document generation in all supported formats +Tests HTML, PDF, DOCX, XLSX, and PPTX generation with images and various content types. +""" + +import asyncio +import json +import sys +import os +import time +import base64 +from typing import Dict, Any, List, Optional + +# Add the gateway to path (go up 2 levels from tests/functional/) +_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _gateway_path not in sys.path: + sys.path.insert(0, _gateway_path) + +# Import the service initialization +from modules.services import getInterface as getServices +from modules.datamodels.datamodelChat import UserInputRequest, WorkflowModeEnum +from modules.datamodels.datamodelUam import User +from modules.features.workflow import chatStart +import modules.interfaces.interfaceDbChatObjects as interfaceDbChatObjects + + +class DocumentGenerationFormatsTester: + def __init__(self): + # Use root user for testing (has full access to everything) + from modules.interfaces.interfaceDbAppObjects import getRootInterface + rootInterface = getRootInterface() + self.testUser = rootInterface.currentUser + + # Initialize services using the existing system + self.services = getServices(self.testUser, None) # Test user, no workflow + self.workflow = None + self.testResults = {} + self.generatedDocuments = {} + + async def initialize(self): + """Initialize the test environment.""" + # Enable debug file logging for tests + from modules.shared.configuration import APP_CONFIG + APP_CONFIG.set("APP_DEBUG_CHAT_WORKFLOW_ENABLED", True) + + # Set logging level to INFO to see workflow progress + import logging + logging.getLogger().setLevel(logging.INFO) + + print(f"Initialized test with user: {self.testUser.id}") + print(f"Mandate ID: {self.testUser.mandateId}") + print(f"Debug logging enabled: {APP_CONFIG.get('APP_DEBUG_CHAT_WORKFLOW_ENABLED', False)}") + + def createTestPrompt(self, format: str) -> str: + """Create a test prompt for document generation in the specified format.""" + prompts = { + "html": "Create a professional HTML document about 'The Future of Artificial Intelligence' with: 1) A main title, 2) An introduction paragraph, 3) Three key sections with headings, 4) A bullet list of benefits, 5) An image showing AI technology (generate it), 6) A conclusion paragraph. Format as HTML.", + "pdf": "Create a professional PDF report about 'Climate Change Impact Analysis' with: 1) A title page, 2) An executive summary, 3) Three main sections with data tables, 4) Charts/graphs described, 5) An image showing environmental impact (generate it), 6) Conclusions and recommendations. Format as PDF.", + "docx": "Create a comprehensive Word document about 'Project Management Best Practices' with: 1) A cover page with title, 2) Table of contents, 3) Five chapters with headings and paragraphs, 4) A table comparing methodologies, 5) An image illustrating project workflow (generate it), 6) Appendices. Format as DOCX.", + "xlsx": "Create an Excel workbook about 'Sales Performance Analysis' with: 1) A summary sheet with key metrics, 2) A detailed data sheet with sales data in a table format (columns: Month, Product, Sales, Units, Revenue), 3) A chart sheet with visualizations described, 4) An analysis sheet with calculations. Format as XLSX.", + "pptx": "Create a PowerPoint presentation about 'Digital Transformation Strategy' with: 1) A title slide, 2) An agenda slide, 3) Five content slides with bullet points, 4) A slide with an image showing transformation roadmap (generate it), 5) A conclusion slide. Format as PPTX." + } + return prompts.get(format.lower(), prompts["docx"]) + + async def generateDocumentInFormat(self, format: str) -> Dict[str, Any]: + """Generate a document in the specified format using workflow.""" + print("\n" + "="*80) + print(f"GENERATING DOCUMENT IN {format.upper()} FORMAT") + print("="*80) + + prompt = self.createTestPrompt(format) + print(f"Prompt: {prompt[:200]}...") + + # Create user input request + userInput = UserInputRequest( + prompt=prompt, + userLanguage="en" + ) + + # Start workflow + print(f"\nStarting workflow for {format.upper()} generation...") + workflow = await chatStart( + currentUser=self.testUser, + userInput=userInput, + workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC, + workflowId=None + ) + + if not workflow: + return { + "success": False, + "error": "Failed to start workflow" + } + + self.workflow = workflow + print(f"Workflow started: {workflow.id}") + + # Wait for workflow completion + print(f"Waiting for workflow completion...") + completed = await self.waitForWorkflowCompletion(timeout=300) # 5 minute timeout + + if not completed: + return { + "success": False, + "error": "Workflow did not complete within timeout", + "workflowId": workflow.id, + "status": workflow.status if workflow else "unknown" + } + + # Analyze results + results = self.analyzeWorkflowResults() + + # Extract documents for this format + documents = results.get("documents", []) + formatDocuments = [d for d in documents if d.get("fileName", "").endswith(f".{format.lower()}")] + + return { + "success": True, + "format": format, + "workflowId": workflow.id, + "status": results.get("status"), + "documentCount": len(formatDocuments), + "documents": formatDocuments, + "results": results + } + + async def waitForWorkflowCompletion(self, timeout: int = 300, checkInterval: int = 2) -> bool: + """Wait for workflow to complete.""" + if not self.workflow: + return False + + startTime = time.time() + lastStatus = None + + interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser) + + while True: + # Check timeout + if time.time() - startTime > timeout: + print(f"\n⏱️ Timeout after {timeout} seconds") + return False + + # Get current workflow status + try: + currentWorkflow = interfaceDbChat.getWorkflow(self.workflow.id) + if not currentWorkflow: + print("\n❌ Workflow not found") + return False + + currentStatus = currentWorkflow.status + elapsed = int(time.time() - startTime) + + # Print status if it changed + if currentStatus != lastStatus: + print(f"Workflow status: {currentStatus} (elapsed: {elapsed}s)") + lastStatus = currentStatus + + # Check if workflow is complete + if currentStatus in ["completed", "stopped", "failed"]: + self.workflow = currentWorkflow + statusIcon = "✅" if currentStatus == "completed" else "❌" + print(f"\n{statusIcon} Workflow finished with status: {currentStatus} (elapsed: {elapsed}s)") + return currentStatus == "completed" + + # Wait before next check + await asyncio.sleep(checkInterval) + + except Exception as e: + print(f"\n⚠️ Error checking workflow status: {str(e)}") + await asyncio.sleep(checkInterval) + + def analyzeWorkflowResults(self) -> Dict[str, Any]: + """Analyze workflow results and extract information.""" + if not self.workflow: + return {"error": "No workflow to analyze"} + + interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser) + workflow = interfaceDbChat.getWorkflow(self.workflow.id) + + if not workflow: + return {"error": "Workflow not found"} + + # Get unified chat data + chatData = interfaceDbChat.getUnifiedChatData(workflow.id, None) + + # Count messages + messages = chatData.get("messages", []) + userMessages = [m for m in messages if m.get("role") == "user"] + assistantMessages = [m for m in messages if m.get("role") == "assistant"] + + # Count documents + documents = chatData.get("documents", []) + + # Get logs + logs = chatData.get("logs", []) + + results = { + "workflowId": workflow.id, + "status": workflow.status, + "workflowMode": str(workflow.workflowMode) if hasattr(workflow, 'workflowMode') else None, + "currentRound": workflow.currentRound, + "totalTasks": workflow.totalTasks, + "totalActions": workflow.totalActions, + "messageCount": len(messages), + "userMessageCount": len(userMessages), + "assistantMessageCount": len(assistantMessages), + "documentCount": len(documents), + "logCount": len(logs), + "documents": documents, + "logs": logs + } + + print(f"\nWorkflow Results:") + print(f" Status: {results['status']}") + print(f" Tasks: {results['totalTasks']}") + print(f" Actions: {results['totalActions']}") + print(f" Messages: {results['messageCount']}") + print(f" Documents: {results['documentCount']}") + + # Print document details + if documents: + print(f"\nGenerated Documents:") + for doc in documents: + fileName = doc.get("fileName", "unknown") + fileSize = doc.get("fileSize", 0) + mimeType = doc.get("mimeType", "unknown") + print(f" - {fileName} ({fileSize} bytes, {mimeType})") + + return results + + def verifyDocumentFormat(self, document: Dict[str, Any], expectedFormat: str) -> Dict[str, Any]: + """Verify that a document matches the expected format.""" + fileName = document.get("fileName", "") + mimeType = document.get("mimeType", "") + fileSize = document.get("fileSize", 0) + + # Expected MIME types + expectedMimeTypes = { + "html": ["text/html", "application/xhtml+xml"], + "pdf": ["application/pdf"], + "docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"], + "xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"], + "pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] + } + + # Expected file extensions + expectedExtensions = { + "html": [".html", ".htm"], + "pdf": [".pdf"], + "docx": [".docx"], + "xlsx": [".xlsx"], + "pptx": [".pptx"] + } + + formatLower = expectedFormat.lower() + expectedMimes = expectedMimeTypes.get(formatLower, []) + expectedExts = expectedExtensions.get(formatLower, []) + + # Check file extension + hasCorrectExtension = any(fileName.lower().endswith(ext) for ext in expectedExts) + + # Check MIME type + hasCorrectMimeType = any(mimeType.lower() == mime.lower() for mime in expectedMimes) + + # Check file size (should be > 0) + hasValidSize = fileSize > 0 + + verification = { + "format": expectedFormat, + "fileName": fileName, + "mimeType": mimeType, + "fileSize": fileSize, + "hasCorrectExtension": hasCorrectExtension, + "hasCorrectMimeType": hasCorrectMimeType, + "hasValidSize": hasValidSize, + "isValid": hasCorrectExtension and hasValidSize + } + + return verification + + async def testAllFormats(self) -> Dict[str, Any]: + """Test document generation in all formats.""" + print("\n" + "="*80) + print("TESTING DOCUMENT GENERATION IN ALL FORMATS") + print("="*80) + + formats = ["html", "pdf", "docx", "xlsx", "pptx"] + results = {} + + for format in formats: + try: + print(f"\n{'='*80}") + print(f"Testing {format.upper()} format...") + print(f"{'='*80}") + + result = await self.generateDocumentInFormat(format) + results[format] = result + + if result.get("success"): + documents = result.get("documents", []) + if documents: + # Verify first document + verification = self.verifyDocumentFormat(documents[0], format) + result["verification"] = verification + + print(f"\n✅ {format.upper()} generation successful!") + print(f" Documents: {len(documents)}") + print(f" Verification: {'✅ PASS' if verification['isValid'] else '❌ FAIL'}") + if verification.get("fileName"): + print(f" File: {verification['fileName']}") + print(f" Size: {verification['fileSize']} bytes") + print(f" MIME: {verification['mimeType']}") + else: + print(f"\n⚠️ {format.upper()} generation completed but no documents found") + else: + error = result.get("error", "Unknown error") + print(f"\n❌ {format.upper()} generation failed: {error}") + + # Small delay between tests + await asyncio.sleep(2) + + except Exception as e: + import traceback + print(f"\n❌ Error testing {format.upper()}: {str(e)}") + print(traceback.format_exc()) + results[format] = { + "success": False, + "error": str(e), + "traceback": traceback.format_exc() + } + + return results + + async def runTest(self): + """Run the complete test.""" + print("\n" + "="*80) + print("DOCUMENT GENERATION FORMATS TEST") + print("="*80) + + try: + # Initialize + await self.initialize() + + # Test all formats + results = await self.testAllFormats() + + # Summary + print("\n" + "="*80) + print("TEST SUMMARY") + print("="*80) + + successCount = 0 + failCount = 0 + + for format, result in results.items(): + if result.get("success"): + successCount += 1 + status = "✅ PASS" + docCount = result.get("documentCount", 0) + verification = result.get("verification", {}) + isValid = verification.get("isValid", False) + statusIcon = "✅" if isValid else "⚠️" + print(f"{statusIcon} {format.upper():6s}: {status} - {docCount} document(s)") + else: + failCount += 1 + error = result.get("error", "Unknown error") + print(f"❌ {format.upper():6s}: FAIL - {error}") + + print(f"\nTotal: {successCount} passed, {failCount} failed out of {len(results)} formats") + + self.testResults = { + "success": failCount == 0, + "successCount": successCount, + "failCount": failCount, + "totalFormats": len(results), + "results": results + } + + return self.testResults + + except Exception as e: + import traceback + print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}") + print(f"Traceback:\n{traceback.format_exc()}") + self.testResults = { + "success": False, + "error": str(e), + "traceback": traceback.format_exc() + } + return self.testResults + + +async def main(): + """Run document generation formats test.""" + tester = DocumentGenerationFormatsTester() + results = await tester.runTest() + + # Print final results as JSON for easy parsing + print("\n" + "="*80) + print("FINAL RESULTS (JSON)") + print("="*80) + print(json.dumps(results, indent=2, default=str)) + + +if __name__ == "__main__": + asyncio.run(main()) +