401 lines
19 KiB
Python
401 lines
19 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
"""
|
|
Generate Document action for AI operations.
|
|
Generates documents from scratch or based on templates/inputs using hierarchical approach.
|
|
"""
|
|
|
|
import logging
|
|
import time
|
|
from typing import Dict, Any, Optional
|
|
from modules.workflows.methods.methodBase import action
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
|
from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator
|
|
from modules.services.serviceGeneration.subContentGenerator import ContentGenerator
|
|
from modules.services.serviceGeneration.subDocumentPurposeAnalyzer import DocumentPurposeAnalyzer
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@action
|
|
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Generate documents from scratch or based on templates/inputs using hierarchical approach.
|
|
- Input requirements: prompt or description (required); optional documentList (for templates/references).
|
|
- Output format: Document in specified format. Any format supported by dynamically registered renderers is acceptable (default: txt).
|
|
|
|
Parameters:
|
|
- prompt (str, required): Description of the document to generate.
|
|
- documentList (list, optional): Template documents or reference documents to use as a guide.
|
|
- documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
|
|
- resultType (str, optional): Output format. Any format supported by dynamically registered renderers is acceptable (formats are discovered automatically from renderer registry). Common formats: txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg. Default: txt.
|
|
- maxSectionLength (int, optional): Maximum words for simple sections. Default: 500.
|
|
- parallelGeneration (bool, optional): Enable parallel section generation. Default: True.
|
|
- progressLogging (bool, optional): Send ChatLog progress updates. Default: True.
|
|
"""
|
|
prompt = parameters.get("prompt")
|
|
if not prompt:
|
|
return ActionResult.isFailure(error="prompt is required")
|
|
|
|
documentList = parameters.get("documentList", [])
|
|
documentType = parameters.get("documentType")
|
|
resultType = parameters.get("resultType", "txt")
|
|
|
|
# Auto-detect format from prompt if not explicitly provided
|
|
if resultType == "txt" and prompt:
|
|
promptLower = prompt.lower()
|
|
if "html" in promptLower or "html5" in promptLower:
|
|
resultType = "html"
|
|
logger.info(f"Auto-detected HTML format from prompt")
|
|
elif "pdf" in promptLower:
|
|
resultType = "pdf"
|
|
logger.info(f"Auto-detected PDF format from prompt")
|
|
elif "markdown" in promptLower or " md " in promptLower or promptLower.endswith(" md"):
|
|
resultType = "md"
|
|
logger.info(f"Auto-detected Markdown format from prompt")
|
|
elif ("text" in promptLower or "txt" in promptLower) and "html" not in promptLower:
|
|
resultType = "txt"
|
|
logger.info(f"Auto-detected Text format from prompt")
|
|
|
|
maxSectionLength = parameters.get("maxSectionLength", 500)
|
|
parallelGeneration = parameters.get("parallelGeneration", True)
|
|
progressLogging = parameters.get("progressLogging", True)
|
|
|
|
# Create operation ID for progress tracking
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"doc_gen_{workflowId}_{int(time.time())}"
|
|
parentOperationId = parameters.get('parentOperationId')
|
|
|
|
try:
|
|
# Phase 1: Structure Generation
|
|
if progressLogging:
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Document",
|
|
"Structure Generation",
|
|
"Generating document structure...",
|
|
parentOperationId=parentOperationId
|
|
)
|
|
|
|
structureGenerator = StructureGenerator(self.services)
|
|
|
|
# Analyze document purposes and process documents accordingly
|
|
cachedContent = None
|
|
imageDocuments = []
|
|
documentPurposes = {}
|
|
|
|
if documentList:
|
|
if progressLogging:
|
|
self.services.chat.progressLogUpdate(operationId, 0.1, "Analyzing document purposes...")
|
|
|
|
# Convert documentList to DocumentReferenceList
|
|
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
|
|
|
if isinstance(documentList, DocumentReferenceList):
|
|
docRefList = documentList
|
|
elif isinstance(documentList, str):
|
|
docRefList = DocumentReferenceList.from_string_list([documentList])
|
|
elif isinstance(documentList, list):
|
|
docRefList = DocumentReferenceList.from_string_list(documentList)
|
|
else:
|
|
docRefList = DocumentReferenceList(references=[])
|
|
|
|
# Get ChatDocuments
|
|
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
|
|
if chatDocuments:
|
|
logger.info(f"Analyzing purposes for {len(chatDocuments)} documents")
|
|
|
|
# Analyze document purposes using AI
|
|
purposeAnalyzer = DocumentPurposeAnalyzer(self.services)
|
|
purposeAnalysis = await purposeAnalyzer.analyzeDocumentPurposes(
|
|
userPrompt=prompt,
|
|
chatDocuments=chatDocuments,
|
|
actionContext="generateDocument"
|
|
)
|
|
|
|
documentPurposes = {dp["document_id"]: dp for dp in purposeAnalysis.get("document_purposes", [])}
|
|
logger.info(f"Purpose analysis complete: {purposeAnalysis.get('overall_intent', 'N/A')}")
|
|
|
|
# Separate documents by purpose
|
|
textDocs = []
|
|
imageDocsToInclude = []
|
|
imageDocsToAnalyze = []
|
|
|
|
for doc in chatDocuments:
|
|
docPurpose = documentPurposes.get(doc.id, {})
|
|
purpose = docPurpose.get("purpose", "extract_text_content")
|
|
|
|
if purpose == "include_image":
|
|
imageDocsToInclude.append(doc)
|
|
elif purpose == "analyze_image_vision":
|
|
imageDocsToAnalyze.append(doc)
|
|
elif purpose in ["extract_text_content", "use_as_template", "use_as_reference", "extract_data"]:
|
|
textDocs.append(doc)
|
|
# Skip "attach" purpose - don't process
|
|
|
|
# Process text documents (extract content)
|
|
extractedResults = []
|
|
if textDocs:
|
|
if progressLogging:
|
|
self.services.chat.progressLogUpdate(operationId, 0.15, f"Extracting content from {len(textDocs)} text document(s)...")
|
|
|
|
# Prepare extraction options with purpose-specific prompts
|
|
extractionOptionsList = []
|
|
for doc in textDocs:
|
|
docPurpose = documentPurposes.get(doc.id, {})
|
|
extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all content from the document"
|
|
|
|
extractionOptions = ExtractionOptions(
|
|
prompt=extractionPrompt,
|
|
mergeStrategy=MergeStrategy(
|
|
mergeType="concatenate",
|
|
groupBy="typeGroup",
|
|
orderBy="id"
|
|
),
|
|
processDocumentsIndividually=True
|
|
)
|
|
extractionOptionsList.append((doc, extractionOptions))
|
|
|
|
# Extract content from text documents
|
|
for doc, extractionOptions in extractionOptionsList:
|
|
try:
|
|
docResults = self.services.extraction.extractContent(
|
|
[doc],
|
|
extractionOptions,
|
|
parentOperationId=operationId
|
|
)
|
|
extractedResults.extend(docResults)
|
|
except Exception as e:
|
|
logger.error(f"Error extracting content from {doc.fileName}: {str(e)}")
|
|
|
|
logger.info(f"Extracted content from {len(extractedResults)} text document(s)")
|
|
|
|
# Process images to analyze (vision call)
|
|
if imageDocsToAnalyze:
|
|
if progressLogging:
|
|
self.services.chat.progressLogUpdate(operationId, 0.2, f"Analyzing {len(imageDocsToAnalyze)} image(s) with vision AI...")
|
|
|
|
# Extract content from images using vision analysis
|
|
for doc in imageDocsToAnalyze:
|
|
try:
|
|
docPurpose = documentPurposes.get(doc.id, {})
|
|
extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all text and information from this image"
|
|
|
|
extractionOptions = ExtractionOptions(
|
|
prompt=extractionPrompt,
|
|
mergeStrategy=MergeStrategy(
|
|
mergeType="concatenate",
|
|
groupBy="typeGroup",
|
|
orderBy="id"
|
|
),
|
|
processDocumentsIndividually=True
|
|
)
|
|
|
|
docResults = self.services.extraction.extractContent(
|
|
[doc],
|
|
extractionOptions,
|
|
parentOperationId=operationId
|
|
)
|
|
extractedResults.extend(docResults)
|
|
except Exception as e:
|
|
logger.error(f"Error analyzing image {doc.fileName}: {str(e)}")
|
|
|
|
logger.info(f"Analyzed {len(imageDocsToAnalyze)} image(s) with vision AI")
|
|
|
|
# Process images to include (store image data)
|
|
if imageDocsToInclude:
|
|
if progressLogging:
|
|
self.services.chat.progressLogUpdate(operationId, 0.25, f"Preparing {len(imageDocsToInclude)} image(s) for inclusion...")
|
|
|
|
# Get image data for inclusion
|
|
from modules.interfaces.interfaceDbComponentObjects import getInterface
|
|
dbInterface = getInterface()
|
|
|
|
for doc in imageDocsToInclude:
|
|
try:
|
|
# Get image bytes
|
|
imageBytes = dbInterface.getFileData(doc.fileId)
|
|
if imageBytes:
|
|
# Encode to base64
|
|
import base64
|
|
base64Data = base64.b64encode(imageBytes).decode('utf-8')
|
|
|
|
# Create image document entry
|
|
imageDoc = {
|
|
"id": doc.id,
|
|
"fileName": doc.fileName,
|
|
"mimeType": doc.mimeType,
|
|
"base64Data": base64Data,
|
|
"altText": doc.fileName or "Image",
|
|
"fileSize": doc.fileSize
|
|
}
|
|
imageDocuments.append(imageDoc)
|
|
logger.debug(f"Prepared image {doc.fileName} for inclusion ({len(base64Data)} chars base64)")
|
|
else:
|
|
logger.warning(f"Could not retrieve image data for {doc.fileName}")
|
|
except Exception as e:
|
|
logger.error(f"Error preparing image {doc.fileName} for inclusion: {str(e)}")
|
|
|
|
logger.info(f"Prepared {len(imageDocuments)} image(s) for inclusion")
|
|
|
|
# Build cachedContent with all information
|
|
cachedContent = {
|
|
"extractedContent": extractedResults,
|
|
"imageDocuments": imageDocuments,
|
|
"documentPurposes": documentPurposes,
|
|
"extractionTimestamp": time.time(),
|
|
"sourceDocuments": [doc.id for doc in chatDocuments]
|
|
}
|
|
|
|
logger.info(f"Document processing complete: {len(extractedResults)} extracted, {len(imageDocuments)} images to include")
|
|
|
|
# Generate structure
|
|
if progressLogging:
|
|
self.services.chat.progressLogUpdate(operationId, 0.2, "Generating document structure...")
|
|
|
|
structure = await structureGenerator.generateStructure(
|
|
userPrompt=prompt,
|
|
documentList=documentList if documentList else None,
|
|
cachedContent=cachedContent,
|
|
maxSectionLength=maxSectionLength,
|
|
existingImages=imageDocuments # Pass existing images for structure generation
|
|
)
|
|
|
|
if progressLogging:
|
|
self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated")
|
|
|
|
# Phase 2: Content Generation
|
|
if progressLogging:
|
|
self.services.chat.progressLogUpdate(
|
|
operationId,
|
|
0.34,
|
|
"Starting content generation..."
|
|
)
|
|
|
|
contentGenerator = ContentGenerator(self.services)
|
|
|
|
# Create enhanced progress callback
|
|
def progressCallback(sectionIndex: int, totalSections: int, message: str):
|
|
if progressLogging:
|
|
# Calculate progress: 34% to 90% for content generation phase
|
|
if totalSections > 0:
|
|
progress = 0.34 + (0.56 * (sectionIndex / totalSections))
|
|
else:
|
|
progress = 0.34
|
|
|
|
# Format message
|
|
if sectionIndex > 0 and totalSections > 0:
|
|
progressMessage = f"Section {sectionIndex}/{totalSections}: {message}"
|
|
else:
|
|
progressMessage = message
|
|
|
|
self.services.chat.progressLogUpdate(
|
|
operationId,
|
|
progress,
|
|
progressMessage
|
|
)
|
|
|
|
completeStructure = await contentGenerator.generateContent(
|
|
structure=structure,
|
|
cachedContent=cachedContent,
|
|
userPrompt=prompt,
|
|
progressCallback=progressCallback,
|
|
parallelGeneration=parallelGeneration
|
|
)
|
|
|
|
if progressLogging:
|
|
self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated")
|
|
|
|
# Phase 3: Integration & Rendering
|
|
if progressLogging:
|
|
self.services.chat.progressLogUpdate(
|
|
operationId,
|
|
0.91,
|
|
"Rendering final document..."
|
|
)
|
|
|
|
# Use existing renderReport method
|
|
title = structure.get("metadata", {}).get("title", "Generated Document")
|
|
if documentType:
|
|
title = f"{title} ({documentType})"
|
|
|
|
renderedContent, mimeType, images = await self.services.generation.renderReport(
|
|
extractedContent=completeStructure,
|
|
outputFormat=resultType,
|
|
title=title,
|
|
userPrompt=prompt,
|
|
aiService=self.services.ai
|
|
)
|
|
|
|
# Build list of documents to return
|
|
documents = [
|
|
ActionDocument(
|
|
documentName=f"document.{resultType}",
|
|
documentData=renderedContent,
|
|
mimeType=mimeType
|
|
)
|
|
]
|
|
|
|
# Add images as separate documents
|
|
if images:
|
|
logger.info(f"Processing {len(images)} image(s) from renderer")
|
|
import base64
|
|
for idx, imageData in enumerate(images):
|
|
try:
|
|
base64Data = imageData.get("base64Data", "")
|
|
altText = imageData.get("altText", f"image_{idx + 1}")
|
|
caption = imageData.get("caption", "")
|
|
sectionId = imageData.get("sectionId", f"section_{idx + 1}")
|
|
|
|
if base64Data:
|
|
# Decode base64 to bytes
|
|
imageBytes = base64.b64decode(base64Data)
|
|
|
|
# Determine filename and mime type
|
|
filename = imageData.get("filename", f"image_{idx + 1}.png")
|
|
if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')):
|
|
filename = f"image_{idx + 1}.png"
|
|
|
|
# Determine mime type from filename
|
|
if filename.lower().endswith('.png'):
|
|
imageMimeType = "image/png"
|
|
elif filename.lower().endswith(('.jpg', '.jpeg')):
|
|
imageMimeType = "image/jpeg"
|
|
elif filename.lower().endswith('.gif'):
|
|
imageMimeType = "image/gif"
|
|
elif filename.lower().endswith('.webp'):
|
|
imageMimeType = "image/webp"
|
|
else:
|
|
imageMimeType = "image/png" # Default
|
|
|
|
# Add image document
|
|
documents.append(ActionDocument(
|
|
documentName=filename,
|
|
documentData=imageBytes,
|
|
mimeType=imageMimeType
|
|
))
|
|
logger.info(f"Added image document: {filename} (section: {sectionId}, {len(imageBytes)} bytes, alt: {altText})")
|
|
else:
|
|
logger.warning(f"Image {idx + 1} (section: {sectionId}) has no base64Data, skipping")
|
|
except Exception as e:
|
|
logger.error(f"Error adding image document {idx + 1}: {str(e)}", exc_info=True)
|
|
continue
|
|
else:
|
|
logger.debug("No images returned from renderer")
|
|
|
|
# Note: Document creation is handled by the workflow system
|
|
# We just return the rendered content and images in ActionResult
|
|
|
|
if progressLogging:
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
|
|
return ActionResult.isSuccess(documents=documents)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in hierarchical document generation: {str(e)}")
|
|
if progressLogging:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error=str(e))
|
|
|