# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Generate Document action for AI operations. Generates documents from scratch or based on templates/inputs using hierarchical approach. """ import logging import time from typing import Dict, Any, Optional from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator from modules.services.serviceGeneration.subContentGenerator import ContentGenerator from modules.services.serviceGeneration.subDocumentPurposeAnalyzer import DocumentPurposeAnalyzer logger = logging.getLogger(__name__) @action async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: """ GENERAL: - Purpose: Generate documents from scratch or based on templates/inputs using hierarchical approach. - Input requirements: prompt or description (required); optional documentList (for templates/references). - Output format: Document in specified format. Any format supported by dynamically registered renderers is acceptable (default: txt). Parameters: - prompt (str, required): Description of the document to generate. - documentList (list, optional): Template documents or reference documents to use as a guide. - documentType (str, optional): Type of document - letter, memo, proposal, contract, etc. - resultType (str, optional): Output format. Any format supported by dynamically registered renderers is acceptable (formats are discovered automatically from renderer registry). Common formats: txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg. Default: txt. - maxSectionLength (int, optional): Maximum words for simple sections. Default: 500. - parallelGeneration (bool, optional): Enable parallel section generation. Default: True. - progressLogging (bool, optional): Send ChatLog progress updates. Default: True. """ prompt = parameters.get("prompt") if not prompt: return ActionResult.isFailure(error="prompt is required") documentList = parameters.get("documentList", []) documentType = parameters.get("documentType") resultType = parameters.get("resultType", "txt") # Auto-detect format from prompt if not explicitly provided if resultType == "txt" and prompt: promptLower = prompt.lower() if "html" in promptLower or "html5" in promptLower: resultType = "html" logger.info(f"Auto-detected HTML format from prompt") elif "pdf" in promptLower: resultType = "pdf" logger.info(f"Auto-detected PDF format from prompt") elif "markdown" in promptLower or " md " in promptLower or promptLower.endswith(" md"): resultType = "md" logger.info(f"Auto-detected Markdown format from prompt") elif ("text" in promptLower or "txt" in promptLower) and "html" not in promptLower: resultType = "txt" logger.info(f"Auto-detected Text format from prompt") maxSectionLength = parameters.get("maxSectionLength", 500) parallelGeneration = parameters.get("parallelGeneration", True) progressLogging = parameters.get("progressLogging", True) # Create operation ID for progress tracking workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" operationId = f"doc_gen_{workflowId}_{int(time.time())}" parentOperationId = parameters.get('parentOperationId') try: # Phase 1: Structure Generation if progressLogging: self.services.chat.progressLogStart( operationId, "Document", "Structure Generation", "Generating document structure...", parentOperationId=parentOperationId ) structureGenerator = StructureGenerator(self.services) # Analyze document purposes and process documents accordingly cachedContent = None imageDocuments = [] documentPurposes = {} if documentList: if progressLogging: self.services.chat.progressLogUpdate(operationId, 0.1, "Analyzing document purposes...") # Convert documentList to DocumentReferenceList from modules.datamodels.datamodelDocref import DocumentReferenceList if isinstance(documentList, DocumentReferenceList): docRefList = documentList elif isinstance(documentList, str): docRefList = DocumentReferenceList.from_string_list([documentList]) elif isinstance(documentList, list): docRefList = DocumentReferenceList.from_string_list(documentList) else: docRefList = DocumentReferenceList(references=[]) # Get ChatDocuments chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) if chatDocuments: logger.info(f"Analyzing purposes for {len(chatDocuments)} documents") # Analyze document purposes using AI purposeAnalyzer = DocumentPurposeAnalyzer(self.services) purposeAnalysis = await purposeAnalyzer.analyzeDocumentPurposes( userPrompt=prompt, chatDocuments=chatDocuments, actionContext="generateDocument" ) documentPurposes = {dp["document_id"]: dp for dp in purposeAnalysis.get("document_purposes", [])} logger.info(f"Purpose analysis complete: {purposeAnalysis.get('overall_intent', 'N/A')}") # Separate documents by purpose textDocs = [] imageDocsToInclude = [] imageDocsToAnalyze = [] for doc in chatDocuments: docPurpose = documentPurposes.get(doc.id, {}) purpose = docPurpose.get("purpose", "extract_text_content") if purpose == "include_image": imageDocsToInclude.append(doc) elif purpose == "analyze_image_vision": imageDocsToAnalyze.append(doc) elif purpose in ["extract_text_content", "use_as_template", "use_as_reference", "extract_data"]: textDocs.append(doc) # Skip "attach" purpose - don't process # Process text documents (extract content) extractedResults = [] if textDocs: if progressLogging: self.services.chat.progressLogUpdate(operationId, 0.15, f"Extracting content from {len(textDocs)} text document(s)...") # Prepare extraction options with purpose-specific prompts extractionOptionsList = [] for doc in textDocs: docPurpose = documentPurposes.get(doc.id, {}) extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all content from the document" extractionOptions = ExtractionOptions( prompt=extractionPrompt, mergeStrategy=MergeStrategy( mergeType="concatenate", groupBy="typeGroup", orderBy="id" ), processDocumentsIndividually=True ) extractionOptionsList.append((doc, extractionOptions)) # Extract content from text documents for doc, extractionOptions in extractionOptionsList: try: docResults = self.services.extraction.extractContent( [doc], extractionOptions, parentOperationId=operationId ) extractedResults.extend(docResults) except Exception as e: logger.error(f"Error extracting content from {doc.fileName}: {str(e)}") logger.info(f"Extracted content from {len(extractedResults)} text document(s)") # Process images to analyze (vision call) if imageDocsToAnalyze: if progressLogging: self.services.chat.progressLogUpdate(operationId, 0.2, f"Analyzing {len(imageDocsToAnalyze)} image(s) with vision AI...") # Extract content from images using vision analysis for doc in imageDocsToAnalyze: try: docPurpose = documentPurposes.get(doc.id, {}) extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all text and information from this image" extractionOptions = ExtractionOptions( prompt=extractionPrompt, mergeStrategy=MergeStrategy( mergeType="concatenate", groupBy="typeGroup", orderBy="id" ), processDocumentsIndividually=True ) docResults = self.services.extraction.extractContent( [doc], extractionOptions, parentOperationId=operationId ) extractedResults.extend(docResults) except Exception as e: logger.error(f"Error analyzing image {doc.fileName}: {str(e)}") logger.info(f"Analyzed {len(imageDocsToAnalyze)} image(s) with vision AI") # Process images to include (store image data) if imageDocsToInclude: if progressLogging: self.services.chat.progressLogUpdate(operationId, 0.25, f"Preparing {len(imageDocsToInclude)} image(s) for inclusion...") # Get image data for inclusion from modules.interfaces.interfaceDbComponentObjects import getInterface dbInterface = getInterface() for doc in imageDocsToInclude: try: # Get image bytes imageBytes = dbInterface.getFileData(doc.fileId) if imageBytes: # Encode to base64 import base64 base64Data = base64.b64encode(imageBytes).decode('utf-8') # Create image document entry imageDoc = { "id": doc.id, "fileName": doc.fileName, "mimeType": doc.mimeType, "base64Data": base64Data, "altText": doc.fileName or "Image", "fileSize": doc.fileSize } imageDocuments.append(imageDoc) logger.debug(f"Prepared image {doc.fileName} for inclusion ({len(base64Data)} chars base64)") else: logger.warning(f"Could not retrieve image data for {doc.fileName}") except Exception as e: logger.error(f"Error preparing image {doc.fileName} for inclusion: {str(e)}") logger.info(f"Prepared {len(imageDocuments)} image(s) for inclusion") # Build cachedContent with all information cachedContent = { "extractedContent": extractedResults, "imageDocuments": imageDocuments, "documentPurposes": documentPurposes, "extractionTimestamp": time.time(), "sourceDocuments": [doc.id for doc in chatDocuments] } logger.info(f"Document processing complete: {len(extractedResults)} extracted, {len(imageDocuments)} images to include") # Generate structure if progressLogging: self.services.chat.progressLogUpdate(operationId, 0.2, "Generating document structure...") structure = await structureGenerator.generateStructure( userPrompt=prompt, documentList=documentList if documentList else None, cachedContent=cachedContent, maxSectionLength=maxSectionLength, existingImages=imageDocuments # Pass existing images for structure generation ) if progressLogging: self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated") # Phase 2: Content Generation if progressLogging: self.services.chat.progressLogUpdate( operationId, 0.34, "Starting content generation..." ) contentGenerator = ContentGenerator(self.services) # Create enhanced progress callback def progressCallback(sectionIndex: int, totalSections: int, message: str): if progressLogging: # Calculate progress: 34% to 90% for content generation phase if totalSections > 0: progress = 0.34 + (0.56 * (sectionIndex / totalSections)) else: progress = 0.34 # Format message if sectionIndex > 0 and totalSections > 0: progressMessage = f"Section {sectionIndex}/{totalSections}: {message}" else: progressMessage = message self.services.chat.progressLogUpdate( operationId, progress, progressMessage ) completeStructure = await contentGenerator.generateContent( structure=structure, cachedContent=cachedContent, userPrompt=prompt, progressCallback=progressCallback, parallelGeneration=parallelGeneration ) if progressLogging: self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated") # Phase 3: Integration & Rendering if progressLogging: self.services.chat.progressLogUpdate( operationId, 0.91, "Rendering final document..." ) # Use existing renderReport method title = structure.get("metadata", {}).get("title", "Generated Document") if documentType: title = f"{title} ({documentType})" renderedContent, mimeType, images = await self.services.generation.renderReport( extractedContent=completeStructure, outputFormat=resultType, title=title, userPrompt=prompt, aiService=self.services.ai ) # Build list of documents to return documents = [ ActionDocument( documentName=f"document.{resultType}", documentData=renderedContent, mimeType=mimeType ) ] # Add images as separate documents if images: logger.info(f"Processing {len(images)} image(s) from renderer") import base64 for idx, imageData in enumerate(images): try: base64Data = imageData.get("base64Data", "") altText = imageData.get("altText", f"image_{idx + 1}") caption = imageData.get("caption", "") sectionId = imageData.get("sectionId", f"section_{idx + 1}") if base64Data: # Decode base64 to bytes imageBytes = base64.b64decode(base64Data) # Determine filename and mime type filename = imageData.get("filename", f"image_{idx + 1}.png") if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')): filename = f"image_{idx + 1}.png" # Determine mime type from filename if filename.lower().endswith('.png'): imageMimeType = "image/png" elif filename.lower().endswith(('.jpg', '.jpeg')): imageMimeType = "image/jpeg" elif filename.lower().endswith('.gif'): imageMimeType = "image/gif" elif filename.lower().endswith('.webp'): imageMimeType = "image/webp" else: imageMimeType = "image/png" # Default # Add image document documents.append(ActionDocument( documentName=filename, documentData=imageBytes, mimeType=imageMimeType )) logger.info(f"Added image document: {filename} (section: {sectionId}, {len(imageBytes)} bytes, alt: {altText})") else: logger.warning(f"Image {idx + 1} (section: {sectionId}) has no base64Data, skipping") except Exception as e: logger.error(f"Error adding image document {idx + 1}: {str(e)}", exc_info=True) continue else: logger.debug("No images returned from renderer") # Note: Document creation is handled by the workflow system # We just return the rendered content and images in ActionResult if progressLogging: self.services.chat.progressLogFinish(operationId, True) return ActionResult.isSuccess(documents=documents) except Exception as e: logger.error(f"Error in hierarchical document generation: {str(e)}") if progressLogging: self.services.chat.progressLogFinish(operationId, False) return ActionResult.isFailure(error=str(e))