enhanced document generation with images

This commit is contained in:
ValueOn AG 2025-12-23 00:34:15 +01:00
parent b2196bc6a3
commit 982932d2f5
39 changed files with 6236 additions and 442 deletions

View file

@ -19,12 +19,14 @@ supportedSectionTypes: List[str] = [
]
# Canonical JSON template used for AI generation (documents array + sections)
# Rendering pipelines can select the first document and read its sections.
# This template is used for STRUCTURE generation - sections have empty elements arrays.
# For content generation, elements arrays will be populated later.
jsonTemplateDocument: str = """{
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
"extraction_method": "ai_generation",
"title": "{{DOCUMENT_TITLE}}"
},
"documents": [
{
@ -33,56 +35,77 @@ jsonTemplateDocument: str = """{
"filename": "document.json",
"sections": [
{
"id": "section_heading_example",
"id": "section_heading_main_title",
"content_type": "heading",
"elements": [
{"level": 1, "text": "Heading Text"}
],
"order": 0
"complexity": "simple",
"generation_hint": "Main document title heading",
"order": 1,
"elements": []
},
{
"id": "section_paragraph_example",
"id": "section_paragraph_introduction",
"content_type": "paragraph",
"elements": [
{"text": "Paragraph text content"}
],
"order": 0
"complexity": "simple",
"generation_hint": "Introduction paragraph",
"order": 2,
"elements": []
},
{
"id": "section_heading_section_1",
"content_type": "heading",
"complexity": "simple",
"generation_hint": "Section heading for topic 1",
"order": 3,
"elements": []
},
{
"id": "section_paragraph_section_1",
"content_type": "paragraph",
"complexity": "simple",
"generation_hint": "Content paragraph for section 1",
"order": 4,
"elements": []
},
{
"id": "section_bullet_list_example",
"content_type": "bullet_list",
"elements": [
{
"items": ["Item 1", "Item 2"]
}
],
"order": 0
"complexity": "simple",
"generation_hint": "Bullet list items",
"order": 5,
"elements": []
},
{
"id": "section_image_example",
"content_type": "image",
"complexity": "complex",
"generation_hint": "Illustration for document",
"image_prompt": "A detailed description for image generation",
"order": 6,
"elements": []
},
{
"id": "section_table_example",
"content_type": "table",
"elements": [
{
"headers": ["Column 1", "Column 2"],
"rows": [
["Row 1 Col 1", "Row 1 Col 2"],
["Row 2 Col 1", "Row 2 Col 2"]
],
"caption": "Table caption"
}
],
"order": 0
"complexity": "simple",
"generation_hint": "Data table with relevant information",
"order": 7,
"elements": []
},
{
"id": "section_code_example",
"content_type": "code_block",
"elements": [
{
"code": "function example() { return true; }",
"language": "javascript"
}
],
"order": 0
"complexity": "simple",
"generation_hint": "Code example or snippet",
"order": 8,
"elements": []
},
{
"id": "section_paragraph_conclusion",
"content_type": "paragraph",
"complexity": "simple",
"generation_hint": "Conclusion paragraph",
"order": 9,
"elements": []
}
]
}

View file

@ -19,9 +19,9 @@ class WorkflowActionParameter(BaseModel):
name: str = Field(description="Parameter name")
type: str = Field(description="Python type as string: 'str', 'int', 'bool', 'List[str]', etc.")
frontendType: FrontendType = Field(description="UI rendering type (from global FrontendType enum)")
frontendOptions: Optional[Union[str, List[Dict[str, Any]]]] = Field(
frontendOptions: Optional[Union[str, List[str]]] = Field(
None,
description="Options for select/multiselect/custom types. String reference (e.g., 'user.connection') or static list. For custom types, this is automatically set to the API endpoint."
description="Options for select/multiselect/custom types. String reference (e.g., 'user.connection') or list of strings (e.g., ['txt', 'json']). For custom types, this is automatically set to the API endpoint."
)
required: bool = Field(False, description="Whether parameter is required")
default: Optional[Any] = Field(None, description="Default value")

View file

@ -57,6 +57,9 @@ class Services:
from modules.interfaces.interfaceDbComponentObjects import getInterface as getComponentInterface
self.interfaceDbComponent = getComponentInterface(user)
# Expose RBAC directly on services for convenience
self.rbac = self.interfaceDbApp.rbac if self.interfaceDbApp else None
# Initialize service packages
from .serviceExtraction.mainServiceExtraction import ExtractionService

View file

@ -1206,37 +1206,74 @@ If no trackable items can be identified, return: {{"kpis": []}}
else:
content_for_generation = None
self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
# Detect if this is a section generation prompt (not full document generation)
# Section prompts contain "SECTION TO GENERATE" marker
isSectionGeneration = "SECTION TO GENERATE" in prompt or "CRITICAL: Return ONLY a JSON object with an \"elements\" array" in prompt
generation_prompt = await buildGenerationPrompt(
outputFormat, prompt, title, content_for_generation, None, self.services
)
if isSectionGeneration:
# For section generation, use the prompt directly without wrapping
# Section prompts are already complete and should not be wrapped in document generation template
logger.debug("Detected section generation prompt - skipping document generation wrapper")
generation_prompt = prompt
promptArgs = {
"outputFormat": outputFormat,
"userPrompt": prompt,
"title": title,
"extracted_content": content_for_generation,
"services": self.services
}
# Call AI directly without looping (sections are simple, single-call)
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for section generation")
request = AiCallRequest(
prompt=generation_prompt,
context="",
options=options
)
response = await self.callAi(request)
generated_json = response.content if response and response.content else ""
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
# Extract user prompt from promptArgs for task completion analysis
userPrompt = None
if promptArgs:
userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
# For section generation, return the raw JSON content directly
# No rendering needed - sections are just JSON elements
self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Section content generated")
self.services.chat.progressLogFinish(aiOperationId, True)
# Track generation progress - the looping function will update with byte progress
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
"document_generation",
buildGenerationPrompt,
promptArgs,
aiOperationId,
userPrompt=userPrompt
)
metadata = AiResponseMetadata(
title=title or "Section Content",
operationType=opType.value if opType else None
)
return AiResponse(
content=generated_json,
metadata=metadata,
documents=[]
)
else:
# Full document generation - use the wrapper
self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
generation_prompt = await buildGenerationPrompt(
outputFormat, prompt, title, content_for_generation, None, self.services
)
promptArgs = {
"outputFormat": outputFormat,
"userPrompt": prompt,
"title": title,
"extracted_content": content_for_generation,
"services": self.services
}
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
# Extract user prompt from promptArgs for task completion analysis
userPrompt = None
if promptArgs:
userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
# Track generation progress - the looping function will update with byte progress
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
"document_generation",
buildGenerationPrompt,
promptArgs,
aiOperationId,
userPrompt=userPrompt
)
# Calculate final size for completion message
finalSize = len(generated_json.encode('utf-8')) if generated_json else 0
@ -1291,7 +1328,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
rendered_content, mime_type = await generationService.renderReport(
rendered_content, mime_type, _images = await generationService.renderReport(
generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
)
self.services.chat.progressLogFinish(renderOperationId, True)

View file

@ -82,14 +82,62 @@ class GenerationService:
documentData = doc_data['content']
mimeType = doc_data['mimeType']
# Convert document data to string content
content = convertDocumentDataToString(documentData, getFileExtension(documentName))
# Handle binary data (images, PDFs, Office docs) differently from text
# Check if this is a binary MIME type
binaryMimeTypes = {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/pdf",
"image/png", "image/jpeg", "image/jpg", "image/gif", "image/webp", "image/bmp", "image/svg+xml",
}
# Skip empty or minimal content
minimalContentPatterns = ['{}', '[]', 'null', '""', "''"]
if not content or content.strip() == "" or content.strip() in minimalContentPatterns:
logger.warning(f"Empty or minimal content for document {documentName}, skipping")
continue
isBinaryMimeType = mimeType in binaryMimeTypes
base64encoded = False
content = None
if isBinaryMimeType:
# For binary data, handle bytes vs base64 string vs regular string
if isinstance(documentData, bytes):
# Already bytes - encode to base64 string for storage
import base64
content = base64.b64encode(documentData).decode('utf-8')
base64encoded = True
elif isinstance(documentData, str):
# Check if it's already valid base64
import base64
try:
# Try to decode to verify it's base64
base64.b64decode(documentData, validate=True)
# Valid base64 - use as is
content = documentData
base64encoded = True
except Exception:
# Not valid base64 - might be raw string, try encoding
try:
content = base64.b64encode(documentData.encode('utf-8')).decode('utf-8')
base64encoded = True
except Exception:
logger.warning(f"Could not process binary data for {documentName}, skipping")
continue
else:
# Other types - convert to string then base64
import base64
try:
content = base64.b64encode(str(documentData).encode('utf-8')).decode('utf-8')
base64encoded = True
except Exception:
logger.warning(f"Could not encode binary data for {documentName}, skipping")
continue
else:
# Text data - convert to string
content = convertDocumentDataToString(documentData, getFileExtension(documentName))
# Skip empty or minimal content
minimalContentPatterns = ['{}', '[]', 'null', '""', "''"]
if not content or content.strip() == "" or content.strip() in minimalContentPatterns:
logger.warning(f"Empty or minimal content for document {documentName}, skipping")
continue
# Normalize file extension based on mime type if missing or incorrect
try:
@ -102,6 +150,13 @@ class GenerationService:
"text/markdown": ".md",
"text/plain": ".txt",
"application/json": ".json",
"image/png": ".png",
"image/jpeg": ".jpg",
"image/jpg": ".jpg",
"image/gif": ".gif",
"image/webp": ".webp",
"image/bmp": ".bmp",
"image/svg+xml": ".svg",
}
expectedExt = mime_to_ext.get(mimeType)
if expectedExt:
@ -114,20 +169,6 @@ class GenerationService:
except Exception:
pass
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
base64encoded = False
try:
binaryMimeTypes = {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/pdf",
}
if isinstance(documentData, str) and mimeType in binaryMimeTypes:
base64encoded = True
except Exception:
base64encoded = False
# Create document with file in one step using interfaces directly
document = self._createDocument(
fileName=documentName,
@ -278,7 +319,7 @@ class GenerationService:
'workflowId': 'unknown'
}
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]:
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str, List[Dict[str, Any]]]:
"""
Render extracted JSON content to the specified output format.
Always uses unified "documents" array format.
@ -291,7 +332,8 @@ class GenerationService:
aiService: AI service instance for generation prompt creation
Returns:
tuple: (rendered_content, mime_type)
tuple: (rendered_content, mime_type, images_list)
images_list: List of image dicts with base64Data, altText, caption, etc.
"""
try:
# Validate JSON input
@ -311,12 +353,10 @@ class GenerationService:
if "sections" not in single_doc:
raise ValueError("Document must contain 'sections' field")
# Create content for single document renderer
contentToRender = {
"sections": single_doc["sections"],
"metadata": extractedContent.get("metadata", {}),
"continuation": extractedContent.get("continuation", None)
}
# Pass standardized schema to renderer (maintains architecture)
# Renderer should extract sections from documents array according to standardized schema
# Standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
contentToRender = extractedContent # Pass full standardized schema
# Get the appropriate renderer for the format
renderer = self._getFormatRenderer(outputFormat)
@ -324,9 +364,15 @@ class GenerationService:
raise ValueError(f"Unsupported output format: {outputFormat}")
# Render the JSON content directly (AI generation handled by main service)
# Renderer receives standardized schema and extracts what it needs
renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService)
return renderedContent, mimeType
# Get images from renderer if available
images = []
if hasattr(renderer, 'getRenderedImages'):
images = renderer.getRenderedImages()
return renderedContent, mimeType, images
except Exception as e:
logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
@ -353,14 +399,21 @@ class GenerationService:
def _getFormatRenderer(self, output_format: str):
"""Get the appropriate renderer for the specified format using auto-discovery."""
try:
from .renderers.registry import getRenderer
from .renderers.registry import getRenderer, getSupportedFormats
renderer = getRenderer(output_format, services=self.services)
if renderer:
return renderer
# Log available formats for debugging
availableFormats = getSupportedFormats()
logger.error(
f"No renderer found for format '{output_format}'. "
f"Available formats: {availableFormats}"
)
# Fallback to text renderer if no specific renderer found
logger.warning(f"No renderer found for format {output_format}, falling back to text")
logger.warning(f"Falling back to text renderer for format {output_format}")
fallbackRenderer = getRenderer('text', services=self.services)
if fallbackRenderer:
return fallbackRenderer
@ -370,4 +423,6 @@ class GenerationService:
except Exception as e:
logger.error(f"Error getting renderer for {output_format}: {str(e)}")
import traceback
logger.debug(traceback.format_exc())
return None

View file

@ -66,12 +66,34 @@ class BaseRenderer(ABC):
pass
def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract sections from report data."""
return reportData.get('sections', [])
"""
Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
"""
if "documents" not in reportData:
raise ValueError("Report data must follow standardized schema with 'documents' array")
documents = reportData.get("documents", [])
if not isinstance(documents, list) or len(documents) == 0:
raise ValueError("Standardized schema must contain at least one document in 'documents' array")
firstDoc = documents[0]
if not isinstance(firstDoc, dict) or "sections" not in firstDoc:
raise ValueError("Document in standardized schema must contain 'sections' field")
return firstDoc.get("sections", [])
def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]:
"""Extract metadata from report data."""
return reportData.get('metadata', {})
"""
Extract metadata from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
"""
if "metadata" not in reportData:
raise ValueError("Report data must follow standardized schema with 'metadata' field")
metadata = reportData.get("metadata", {})
if not isinstance(metadata, dict):
raise ValueError("Metadata in standardized schema must be a dictionary")
return metadata
def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str:
"""Get title from report data or use fallback."""
@ -79,14 +101,33 @@ class BaseRenderer(ABC):
return metadata.get('title', fallbackTitle)
def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool:
"""Validate that JSON content has the expected structure."""
"""
Validate that JSON content follows standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
"""
if not isinstance(jsonContent, dict):
return False
if "sections" not in jsonContent:
# Validate metadata field exists
if "metadata" not in jsonContent:
return False
sections = jsonContent.get("sections", [])
if not isinstance(jsonContent.get("metadata"), dict):
return False
# Validate documents array exists and is not empty
if "documents" not in jsonContent:
return False
documents = jsonContent.get("documents", [])
if not isinstance(documents, list) or len(documents) == 0:
return False
# Validate first document has sections
firstDoc = documents[0]
if not isinstance(firstDoc, dict) or "sections" not in firstDoc:
return False
sections = firstDoc.get("sections", [])
if not isinstance(sections, list):
return False

View file

@ -41,15 +41,16 @@ class RendererCsv(BaseRenderer):
async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate CSV content from structured JSON document."""
try:
# Validate JSON structure
if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
if not self._validateJsonStructure(jsonContent):
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Extract sections and metadata from standardized schema
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = jsonContent.get("metadata", {}).get("title", title)
documentTitle = metadata.get("title", title)
# Generate CSV content
csvRows = []
@ -60,7 +61,6 @@ class RendererCsv(BaseRenderer):
csvRows.append([]) # Empty row
# Process each section in order
sections = jsonContent.get("sections", [])
for section in sections:
sectionCsv = self._renderJsonSectionToCsv(section)
if sectionCsv:

View file

@ -71,22 +71,22 @@ class RendererDocx(BaseRenderer):
self._setupBasicDocumentStyles(doc)
self._setupDocumentStyles(doc, styleSet)
# Validate JSON structure
if not isinstance(json_content, dict):
raise ValueError("JSON content must be a dictionary")
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
if not self._validateJsonStructure(json_content):
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
if "sections" not in json_content:
raise ValueError("JSON content must contain 'sections' field")
# Extract sections and metadata from standardized schema
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
document_title = metadata.get("title", title)
# Add document title using Title style
if document_title:
doc.add_paragraph(document_title, style='Title')
# Process each section in order
sections = json_content.get("sections", [])
for section in sections:
self._renderJsonSection(doc, section, styleSet)

View file

@ -28,14 +28,25 @@ class RendererHtml(BaseRenderer):
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to HTML format using AI-analyzed styling."""
try:
# Extract images first
images = self._extractImages(extractedContent)
# Store images in instance for later retrieval
self._renderedImages = images
# Generate HTML using AI-analyzed styling
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
# Replace base64 data URIs with relative file paths if images exist
if images:
htmlContent = self._replaceImageDataUris(htmlContent, images)
return htmlContent, "text/html"
except Exception as e:
self.logger.error(f"Error rendering HTML: {str(e)}")
# Return minimal HTML fallback
self._renderedImages = [] # Initialize empty list on error
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
@ -45,14 +56,15 @@ class RendererHtml(BaseRenderer):
styles = await self._getStyleSet(userPrompt, aiService)
# Validate JSON structure
if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
if not self._validateJsonStructure(jsonContent):
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Extract sections and metadata from standardized schema
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = jsonContent.get("metadata", {}).get("title", title)
documentTitle = metadata.get("title", title)
# Build HTML document
htmlParts = []
@ -77,7 +89,6 @@ class RendererHtml(BaseRenderer):
htmlParts.append('<main>')
# Process each section
sections = jsonContent.get("sections", [])
for section in sections:
sectionHtml = self._renderJsonSection(section, styles)
if sectionHtml:
@ -377,12 +388,15 @@ class RendererHtml(BaseRenderer):
def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON heading to HTML using AI-generated styles."""
try:
# Normalize non-dict inputs
if isinstance(headingData, str):
# Normalize inputs - headingData is typically a list of elements from _getSectionData
if isinstance(headingData, list):
# Extract first element from elements array
if headingData and len(headingData) > 0:
headingData = headingData[0] if isinstance(headingData[0], dict) else {}
else:
return ""
elif isinstance(headingData, str):
headingData = {"text": headingData, "level": 2}
elif isinstance(headingData, list):
# Render a list as bullet list under a default heading label
return self._renderJsonBulletList({"items": headingData}, styles)
elif not isinstance(headingData, dict):
return ""
@ -402,21 +416,28 @@ class RendererHtml(BaseRenderer):
def _renderJsonParagraph(self, paragraphData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON paragraph to HTML using AI-generated styles."""
try:
# Normalize non-dict inputs
if isinstance(paragraphData, str):
paragraphData = {"text": paragraphData}
elif isinstance(paragraphData, list):
# Treat list as bullet list paragraph
return self._renderJsonBulletList({"items": paragraphData}, styles)
elif not isinstance(paragraphData, dict):
# Normalize inputs - paragraphData is typically a list of elements from _getSectionData
if isinstance(paragraphData, list):
# Extract text from all paragraph elements
texts = []
for el in paragraphData:
if isinstance(el, dict) and "text" in el:
texts.append(el["text"])
elif isinstance(el, str):
texts.append(el)
if texts:
# Join multiple paragraphs with <p> tags
return '\n'.join(f'<p>{text}</p>' for text in texts)
return ""
elif isinstance(paragraphData, str):
return f'<p>{paragraphData}</p>'
elif isinstance(paragraphData, dict):
text = paragraphData.get("text", "")
if text:
return f'<p>{text}</p>'
return ""
else:
return ""
text = paragraphData.get("text", "")
if text:
return f'<p>{text}</p>'
return ""
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
@ -441,16 +462,145 @@ class RendererHtml(BaseRenderer):
return ""
def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON image to HTML."""
"""Render a JSON image to HTML with placeholder for later replacement."""
try:
base64Data = imageData.get("base64Data", "")
altText = imageData.get("altText", "Image")
caption = imageData.get("caption", "")
if base64Data:
return f'<img src="data:image/png;base64,{base64Data}" alt="{altText}">'
# Use data URI as placeholder - will be replaced with file path in _replaceImageDataUris
# Include a marker so we can find and replace it
imageMarker = f"<!--IMAGE_MARKER:{len(base64Data)}:{altText[:50]}-->"
imgTag = f'<img src="data:image/png;base64,{base64Data}" alt="{altText}">'
if caption:
return f'{imageMarker}<figure>{imgTag}<figcaption>{caption}</figcaption></figure>'
else:
return f'{imageMarker}{imgTag}'
return ""
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
return f'<div class="error">[Image: {imageData.get("altText", "Image")}]</div>'
def _extractImages(self, jsonContent: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Extract all images from JSON structure.
Returns:
List of image data dictionaries with base64Data, altText, caption, sectionId
"""
images = []
try:
# Extract from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
documents = jsonContent.get("documents", [])
if not documents or not isinstance(documents, list):
return images
for doc in documents:
if not isinstance(doc, dict):
continue
sections = doc.get("sections", [])
for section in sections:
if section.get("content_type") == "image":
elements = section.get("elements", [])
for element in elements:
base64Data = element.get("base64Data", "")
# If base64Data not found, try extracting from url data URI
if not base64Data:
url = element.get("url", "")
if url.startswith("data:image/"):
# Extract base64 from data URI: data:image/png;base64,<base64>
import re
match = re.match(r'data:image/[^;]+;base64,(.+)', url)
if match:
base64Data = match.group(1)
if base64Data:
sectionId = section.get("id", "unknown")
# Generate filename from section ID
filename = f"{sectionId}.png"
# Clean filename (remove invalid characters)
filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename)
images.append({
"base64Data": base64Data,
"altText": element.get("altText", "Image"),
"caption": element.get("caption"),
"sectionId": sectionId,
"filename": filename
})
self.logger.debug(f"Extracted image from section {sectionId}: {filename}")
self.logger.info(f"Extracted {len(images)} image(s) from JSON structure")
return images
except Exception as e:
self.logger.warning(f"Error extracting images: {str(e)}")
return []
def _replaceImageDataUris(self, htmlContent: str, images: List[Dict[str, Any]]) -> str:
"""
Replace base64 data URIs in HTML with relative file paths.
Args:
htmlContent: HTML content with data URIs
images: List of image data dictionaries
Returns:
HTML content with relative file paths
"""
try:
import base64
import re
# Find all image data URIs in HTML
dataUriPattern = r'data:image/png;base64,([A-Za-z0-9+/=]+)'
def replaceDataUri(match):
base64Data = match.group(1)
# Find matching image in images list
matchingImage = None
for img in images:
if img["base64Data"] == base64Data or img["base64Data"].startswith(base64Data[:100]):
matchingImage = img
break
if matchingImage:
# Use filename from image data (generated from section ID)
filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png")
# Replace with relative path
altText = matchingImage.get("altText", "Image")
caption = matchingImage.get("caption", "")
if caption:
return f'<figure><img src="{filename}" alt="{altText}"><figcaption>{caption}</figcaption></figure>'
else:
return f'<img src="{filename}" alt="{altText}">'
else:
# Keep original if no match found
return match.group(0)
# Replace all data URIs
updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent)
return updatedHtml
except Exception as e:
self.logger.warning(f"Error replacing image data URIs: {str(e)}")
return htmlContent # Return original if replacement fails
def getRenderedImages(self) -> List[Dict[str, Any]]:
"""
Get images that were extracted during rendering.
Returns list of image dicts with base64Data, altText, caption, and filename.
"""
if not hasattr(self, '_renderedImages'):
return []
return self._renderedImages

View file

@ -123,7 +123,7 @@ class RendererImage(BaseRenderer):
promptParts.append(f"Document Title: {title}")
# Analyze content and create visual description
sections = extractedContent.get("sections", [])
sections = self._extractSections(extractedContent)
contentDescription = self._analyzeContentForVisualDescription(sections)
if contentDescription:
@ -286,7 +286,7 @@ Return only the compressed prompt, no explanations.
styleElements.append("corporate, professional design")
# Analyze content type for additional style hints
sections = extractedContent.get("sections", [])
sections = self._extractSections(extractedContent)
hasTables = any(self._getSectionType(s) == "table" for s in sections)
hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections)
hasCode = any(self._getSectionType(s) == "code_block" for s in sections)

View file

@ -41,15 +41,16 @@ class RendererMarkdown(BaseRenderer):
def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate markdown content from structured JSON document."""
try:
# Validate JSON structure
if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
if not self._validateJsonStructure(jsonContent):
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Extract sections and metadata from standardized schema
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = jsonContent.get("metadata", {}).get("title", title)
documentTitle = metadata.get("title", title)
# Build markdown content
markdownParts = []
@ -59,7 +60,6 @@ class RendererMarkdown(BaseRenderer):
markdownParts.append("")
# Process each section
sections = jsonContent.get("sections", [])
for section in sections:
sectionMarkdown = self._renderJsonSection(section)
if sectionMarkdown:

View file

@ -65,14 +65,15 @@ class RendererPdf(BaseRenderer):
styles = await self._getStyleSet(userPrompt, aiService)
# Validate JSON structure
if not isinstance(json_content, dict):
raise ValueError("JSON content must be a dictionary")
if not self._validateJsonStructure(json_content):
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
if "sections" not in json_content:
raise ValueError("JSON content must contain 'sections' field")
# Extract sections and metadata from standardized schema
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
document_title = metadata.get("title", title)
# Make title shorter to prevent wrapping/overlapping
if len(document_title) > 40:
@ -102,8 +103,7 @@ class RendererPdf(BaseRenderer):
story.append(Spacer(1, 30)) # Add spacing before page break
story.append(PageBreak())
# Process each section
sections = json_content.get("sections", [])
# Process each section (sections already extracted above)
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
for i, section in enumerate(sections):
self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
@ -505,7 +505,7 @@ class RendererPdf(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
return [Paragraph(f"[Error rendering section: {str(e)}]", self._createNormalStyle(styles))]
def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON table to PDF elements using AI-generated styles."""
@ -555,9 +555,9 @@ class RendererPdf(BaseRenderer):
elements = []
for item in items:
if isinstance(item, str):
elements.append(Paragraph(f"{item}", self._create_normal_style(styles)))
elements.append(Paragraph(f"{item}", self._createNormalStyle(styles)))
elif isinstance(item, dict) and "text" in item:
elements.append(Paragraph(f"{item['text']}", self._create_normal_style(styles)))
elements.append(Paragraph(f"{item['text']}", self._createNormalStyle(styles)))
if elements:
elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
@ -637,16 +637,84 @@ class RendererPdf(BaseRenderer):
return []
def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON image to PDF elements."""
"""Render a JSON image to PDF elements using reportlab."""
try:
base64_data = image_data.get("base64Data", "")
alt_text = image_data.get("altText", "Image")
caption = image_data.get("caption", "")
if base64_data:
# For now, just add a placeholder since reportlab image handling is complex
# If base64Data not found, try extracting from url data URI
if not base64_data:
url = image_data.get("url", "")
if url.startswith("data:image/"):
# Extract base64 from data URI: data:image/png;base64,<base64>
import re
match = re.match(r'data:image/[^;]+;base64,(.+)', url)
if match:
base64_data = match.group(1)
if not base64_data:
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
return []
try:
from reportlab.platypus import Image as ReportLabImage
from reportlab.lib.units import inch
import base64
import io
# Decode base64 image data
imageBytes = base64.b64decode(base64_data)
imageStream = io.BytesIO(imageBytes)
# Create reportlab Image element
# Try to get image dimensions from PIL
try:
from PIL import Image as PILImage
pilImage = PILImage.open(imageStream)
imgWidth, imgHeight = pilImage.size
# Scale to fit page (max width 6 inches, maintain aspect ratio)
maxWidth = 6 * inch
if imgWidth > maxWidth:
scale = maxWidth / imgWidth
imgWidth = maxWidth
imgHeight = imgHeight * scale
else:
imgWidth = imgWidth * (inch / 72) # Convert pixels to inches (assuming 72 DPI)
imgHeight = imgHeight * (inch / 72)
# Reset stream for reportlab
imageStream.seek(0)
except Exception:
# Fallback: use default size
imgWidth = 4 * inch
imgHeight = 3 * inch
imageStream.seek(0)
# Create reportlab Image
reportlabImage = ReportLabImage(imageStream, width=imgWidth, height=imgHeight)
elements = [reportlabImage]
# Add caption if available
if caption:
captionStyle = self._createNormalStyle(styles)
captionStyle.fontSize = 10
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
elements.append(Paragraph(f"<i>{caption}</i>", captionStyle))
elif alt_text and alt_text != "Image":
# Use alt text as caption if no caption provided
captionStyle = self._createNormalStyle(styles)
captionStyle.fontSize = 10
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
elements.append(Paragraph(f"<i>Figure: {alt_text}</i>", captionStyle))
return elements
except Exception as imgError:
self.logger.warning(f"Error embedding image in PDF: {str(imgError)}")
# Fallback to placeholder
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")

View file

@ -66,6 +66,9 @@ class RendererPptx(BaseRenderer):
# Debug: Show first 200 chars of content
logger.info(f"JSON content preview: {str(extractedContent)[:200]}...")
# Store prs reference for image methods
self._currentPresentation = prs
for i, slide_data in enumerate(slidesData):
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
# Debug: Show slide content preview
@ -75,6 +78,9 @@ class RendererPptx(BaseRenderer):
else:
logger.warning(f" ⚠️ Slide {i+1} has NO content!")
# Check if slide has images
hasImages = slide_data.get("images") and len(slide_data.get("images", [])) > 0
# Create slide with appropriate layout based on content
slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles)
slide_layout = prs.slide_layouts[slideLayoutIndex]
@ -92,67 +98,71 @@ class RendererPptx(BaseRenderer):
title_color = self._get_safe_color(title_style.get("color", (31, 78, 121)))
title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
# Set content with AI-generated styling
content_shape = slide.placeholders[1]
content_text = slide_data.get("content", "")
# Handle images first (if present)
if hasImages:
self._addImagesToSlide(slide, slide_data.get("images", []), styles)
# Format content text with AI styles
text_frame = content_shape.text_frame
text_frame.clear()
# Set content with AI-generated styling (if not image-only slide)
if slide_content or not hasImages:
content_shape = slide.placeholders[1]
# Split content into paragraphs
paragraphs = content_text.split('\n\n')
# Format content text with AI styles
text_frame = content_shape.text_frame
text_frame.clear()
for i, paragraph in enumerate(paragraphs):
if paragraph.strip():
if i == 0:
p = text_frame.paragraphs[0]
else:
p = text_frame.add_paragraph()
# Split content into paragraphs
paragraphs = slide_content.split('\n\n')
p.text = paragraph.strip()
for paraIdx, paragraph in enumerate(paragraphs):
if paragraph.strip():
if paraIdx == 0:
p = text_frame.paragraphs[0]
else:
p = text_frame.add_paragraph()
# Apply AI-generated styling based on content type
if paragraph.startswith('#'):
# Header
p.text = paragraph.lstrip('#').strip()
heading_style = styles.get("heading", {})
p.font.size = Pt(heading_style.get("font_size", 32))
p.font.bold = heading_style.get("bold", True)
heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47)))
p.font.color.rgb = RGBColor(*heading_color)
elif paragraph.startswith('##'):
# Subheader
p.text = paragraph.lstrip('#').strip()
subheading_style = styles.get("subheading", {})
p.font.size = Pt(subheading_style.get("font_size", 24))
p.font.bold = subheading_style.get("bold", True)
subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79)))
p.font.color.rgb = RGBColor(*subheading_color)
elif paragraph.startswith('*') and paragraph.endswith('*'):
# Bold text
p.text = paragraph.strip('*')
paragraph_style = styles.get("paragraph", {})
p.font.size = Pt(paragraph_style.get("font_size", 18))
p.font.bold = True
paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
p.font.color.rgb = RGBColor(*paragraph_color)
else:
# Regular text
paragraph_style = styles.get("paragraph", {})
p.font.size = Pt(paragraph_style.get("font_size", 18))
p.font.bold = paragraph_style.get("bold", False)
paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
p.font.color.rgb = RGBColor(*paragraph_color)
p.text = paragraph.strip()
# Apply alignment
align = paragraph_style.get("align", "left")
if align == "center":
p.alignment = PP_ALIGN.CENTER
elif align == "right":
p.alignment = PP_ALIGN.RIGHT
else:
p.alignment = PP_ALIGN.LEFT
# Apply AI-generated styling based on content type
if paragraph.startswith('#'):
# Header
p.text = paragraph.lstrip('#').strip()
heading_style = styles.get("heading", {})
p.font.size = Pt(heading_style.get("font_size", 32))
p.font.bold = heading_style.get("bold", True)
heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47)))
p.font.color.rgb = RGBColor(*heading_color)
elif paragraph.startswith('##'):
# Subheader
p.text = paragraph.lstrip('#').strip()
subheading_style = styles.get("subheading", {})
p.font.size = Pt(subheading_style.get("font_size", 24))
p.font.bold = subheading_style.get("bold", True)
subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79)))
p.font.color.rgb = RGBColor(*subheading_color)
elif paragraph.startswith('*') and paragraph.endswith('*'):
# Bold text
p.text = paragraph.strip('*')
paragraph_style = styles.get("paragraph", {})
p.font.size = Pt(paragraph_style.get("font_size", 18))
p.font.bold = True
paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
p.font.color.rgb = RGBColor(*paragraph_color)
else:
# Regular text
paragraph_style = styles.get("paragraph", {})
p.font.size = Pt(paragraph_style.get("font_size", 18))
p.font.bold = paragraph_style.get("bold", False)
paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
p.font.color.rgb = RGBColor(*paragraph_color)
# Apply alignment
align = paragraph_style.get("align", "left")
if align == "center":
p.alignment = PP_ALIGN.CENTER
elif align == "right":
p.alignment = PP_ALIGN.RIGHT
else:
p.alignment = PP_ALIGN.LEFT
# If no slides were created, create a default slide
if not slidesData:
@ -568,15 +578,16 @@ JSON ONLY. NO OTHER TEXT."""
slides = []
try:
# Validate JSON structure
if not isinstance(json_content, dict):
raise ValueError("JSON content must be a dictionary")
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
if not self._validateJsonStructure(json_content):
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
if "sections" not in json_content:
raise ValueError("JSON content must contain 'sections' field")
# Extract sections and metadata from standardized schema
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
document_title = metadata.get("title", title)
# Create title slide
slides.append({
@ -585,7 +596,6 @@ JSON ONLY. NO OTHER TEXT."""
})
# Process sections into slides based on content and user intent
sections = json_content.get("sections", [])
slides.extend(self._createSlidesFromSections(sections, styles))
# If no content slides were created, create a default content slide
@ -624,6 +634,24 @@ JSON ONLY. NO OTHER TEXT."""
content_type = section.get("content_type", "paragraph")
elements = section.get("elements", [])
# Handle image sections specially
if content_type == "image":
# Extract image data
images = []
for element in elements:
if element.get("base64Data"):
images.append({
"base64Data": element.get("base64Data"),
"altText": element.get("altText", "Image"),
"caption": element.get("caption")
})
return {
"title": section_title or element.get("altText", "Image"),
"content": "", # No text content for image slides
"images": images
}
# Build slide content based on section type
content_parts = []
@ -645,7 +673,8 @@ JSON ONLY. NO OTHER TEXT."""
return {
"title": section_title,
"content": slide_content
"content": slide_content,
"images": [] # No images for non-image sections
}
except Exception as e:
@ -835,7 +864,8 @@ JSON ONLY. NO OTHER TEXT."""
if current_slide_content:
slides.append({
"title": current_slide_title,
"content": "\n\n".join(current_slide_content)
"content": "\n\n".join(current_slide_content),
"images": []
})
current_slide_content = []
@ -844,6 +874,31 @@ JSON ONLY. NO OTHER TEXT."""
if isinstance(element, dict) and "text" in element:
current_slide_title = element.get("text", "Untitled Section")
break
elif section_type == "image":
# Create separate slide for image
if current_slide_content:
slides.append({
"title": current_slide_title,
"content": "\n\n".join(current_slide_content),
"images": []
})
current_slide_content = []
# Extract image data
imageData = []
for element in elements:
if element.get("base64Data"):
imageData.append({
"base64Data": element.get("base64Data"),
"altText": element.get("altText", "Image"),
"caption": element.get("caption")
})
slides.append({
"title": section.get("title") or (imageData[0].get("altText", "Image") if imageData else "Image"),
"content": "",
"images": imageData
})
else:
# Add content to current slide
formatted_content = self._formatSectionContent(section)
@ -854,7 +909,8 @@ JSON ONLY. NO OTHER TEXT."""
if current_slide_content:
slides.append({
"title": current_slide_title,
"content": "\n\n".join(current_slide_content)
"content": "\n\n".join(current_slide_content),
"images": []
})
return slides
@ -869,6 +925,10 @@ JSON ONLY. NO OTHER TEXT."""
content_type = section.get("content_type", "paragraph")
elements = section.get("elements", [])
# Image sections return empty content (handled separately)
if content_type == "image":
return ""
# Process each element in the section
content_parts = []
for element in elements:
@ -891,6 +951,110 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting section content: {str(e)}")
return ""
def _addImagesToSlide(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any]) -> None:
"""Add images to a PowerPoint slide."""
try:
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
import base64
import io
if not images:
return
# Get slide dimensions from presentation
if hasattr(self, '_currentPresentation'):
prs = self._currentPresentation
else:
prs = slide.presentation
slideWidth = prs.slide_width
slideHeight = prs.slide_height
titleHeight = Inches(1.5) # Approximate title height
# Available area for images
availableWidth = slideWidth - Inches(1) # Margins
availableHeight = slideHeight - titleHeight - Inches(1) # Title + margins
# Position images
if len(images) == 1:
# Single image: center it
img = images[0]
base64Data = img.get("base64Data")
if base64Data:
imageBytes = base64.b64decode(base64Data)
imageStream = io.BytesIO(imageBytes)
# Get image dimensions
try:
from PIL import Image as PILImage
pilImage = PILImage.open(imageStream)
imgWidth, imgHeight = pilImage.size
# Scale to fit available space (max 80% of slide)
maxWidth = availableWidth * 0.8
maxHeight = availableHeight * 0.8
scale = min(maxWidth / imgWidth, maxHeight / imgHeight, 1.0)
finalWidth = imgWidth * scale
finalHeight = imgHeight * scale
# Center image
left = (slideWidth - finalWidth) / 2
top = titleHeight + (availableHeight - finalHeight) / 2
imageStream.seek(0)
except Exception:
# Fallback: use default size
finalWidth = Inches(6)
finalHeight = Inches(4.5)
left = (slideWidth - finalWidth) / 2
top = titleHeight + Inches(1)
imageStream.seek(0)
# Add image to slide
slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight)
# Add caption if available
caption = img.get("caption") or img.get("altText")
if caption and caption != "Image":
# Add text box below image
captionTop = top + finalHeight + Inches(0.2)
captionBox = slide.shapes.add_textbox(
Inches(1),
captionTop,
slideWidth - Inches(2),
Inches(0.5)
)
captionFrame = captionBox.text_frame
captionFrame.text = caption
captionFrame.paragraphs[0].font.size = Pt(12)
captionFrame.paragraphs[0].font.italic = True
captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER
else:
# Multiple images: arrange in grid
cols = 2 if len(images) <= 4 else 3
rows = (len(images) + cols - 1) // cols
imgWidth = (availableWidth - Inches(0.5) * (cols - 1)) / cols
imgHeight = (availableHeight - Inches(0.5) * (rows - 1)) / rows
for idx, img in enumerate(images):
base64Data = img.get("base64Data")
if base64Data:
row = idx // cols
col = idx % cols
imageBytes = base64.b64decode(base64Data)
imageStream = io.BytesIO(imageBytes)
left = Inches(0.5) + col * (imgWidth + Inches(0.5))
top = titleHeight + Inches(0.5) + row * (imgHeight + Inches(0.5))
slide.shapes.add_picture(imageStream, left, top, width=imgWidth, height=imgHeight)
except Exception as e:
logger.warning(f"Error adding images to slide: {str(e)}")
def _formatTimestamp(self) -> str:
"""Format current timestamp for presentation generation."""
from datetime import datetime, UTC

View file

@ -64,14 +64,15 @@ class RendererText(BaseRenderer):
"""Generate text content from structured JSON document."""
try:
# Validate JSON structure
if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
if not self._validateJsonStructure(jsonContent):
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Extract sections and metadata from standardized schema
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = jsonContent.get("metadata", {}).get("title", title)
documentTitle = metadata.get("title", title)
# Build text content
textParts = []
@ -82,7 +83,6 @@ class RendererText(BaseRenderer):
textParts.append("")
# Process each section
sections = jsonContent.get("sections", [])
for section in sections:
sectionText = self._renderJsonSection(section)
if sectionText:

View file

@ -451,7 +451,7 @@ class RendererXlsx(BaseRenderer):
def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
"""Generate sheet names based on actual content structure."""
sections = jsonContent.get("sections", [])
sections = self._extractSections(jsonContent)
# If no sections, create a single sheet
if not sections:
@ -496,7 +496,7 @@ class RendererXlsx(BaseRenderer):
if not sheetNames:
return
sections = jsonContent.get("sections", [])
sections = self._extractSections(jsonContent)
tableSections = [s for s in sections if s.get("content_type") == "table"]
if len(tableSections) > 1:
@ -607,7 +607,7 @@ class RendererXlsx(BaseRenderer):
row += 1
# Content overview
sections = jsonContent.get("sections", [])
sections = self._extractSections(jsonContent)
sheet[f'A{row + 1}'] = "Content Overview:"
sheet[f'A{row + 1}'].font = Font(bold=True)
@ -640,7 +640,7 @@ class RendererXlsx(BaseRenderer):
def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]):
"""Populate additional sheets based on content types."""
try:
sections = jsonContent.get("sections", [])
sections = self._extractSections(jsonContent)
for sheetName in sheetNames:
if sheetName not in sheets:
@ -692,12 +692,14 @@ class RendererXlsx(BaseRenderer):
for element in elements:
if section_type == "table":
startRow = self._addTableToExcel(sheet, element, styles, startRow)
elif section_type == "list":
elif section_type == "bullet_list" or section_type == "list":
startRow = self._addListToExcel(sheet, element, styles, startRow)
elif section_type == "paragraph":
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
elif section_type == "heading":
startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
elif section_type == "image":
startRow = self._addImageToExcel(sheet, element, styles, startRow)
else:
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
@ -808,6 +810,75 @@ class RendererXlsx(BaseRenderer):
self.logger.warning(f"Could not add heading to Excel: {str(e)}")
return startRow + 1
def _addImageToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add an image element to Excel sheet using openpyxl."""
try:
base64Data = element.get("base64Data", "")
altText = element.get("altText", "Image")
caption = element.get("caption", "")
if not base64Data:
# No image data - add placeholder text
sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]")
return startRow + 1
try:
from openpyxl.drawing.image import Image as OpenpyxlImage
import base64
import io
# Decode base64 image data
imageBytes = base64.b64decode(base64Data)
imageStream = io.BytesIO(imageBytes)
# Create openpyxl Image
img = OpenpyxlImage(imageStream)
# Set image size (max width 6 inches, maintain aspect ratio)
maxWidth = 400 # pixels (approximately 6 inches at 72 DPI)
if img.width > maxWidth:
scale = maxWidth / img.width
img.width = maxWidth
img.height = int(img.height * scale)
# Anchor image to cell (A column, current row)
img.anchor = f'A{startRow}'
# Add image to sheet
sheet.add_image(img)
# Calculate height needed for image (approximate)
# Excel row height is in points (1/72 inch), image height is in pixels
# Assuming 72 DPI: pixels = points
imageHeightPoints = img.height / 1.33 # Approximate conversion
sheet.row_dimensions[startRow].height = max(15, imageHeightPoints) # Min 15 points
# Add caption below image if available
if caption:
startRow += 1
sheet.cell(row=startRow, column=1, value=caption)
sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10)
sheet.cell(row=startRow, column=1).alignment = Alignment(horizontal="left")
elif altText and altText != "Image":
startRow += 1
sheet.cell(row=startRow, column=1, value=f"Figure: {altText}")
sheet.cell(row=startRow, column=1).font = Font(italic=True, size=10)
return startRow + 1
except ImportError:
self.logger.warning("openpyxl.drawing.image not available, using placeholder")
sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]")
return startRow + 1
except Exception as imgError:
self.logger.warning(f"Error embedding image in Excel: {str(imgError)}")
sheet.cell(row=startRow, column=1, value=f"[Image: {altText}]")
return startRow + 1
except Exception as e:
self.logger.warning(f"Could not add image to Excel: {str(e)}")
return startRow + 1
def _formatTimestamp(self) -> str:
"""Format current timestamp for document generation."""
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")

View file

@ -0,0 +1,840 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Content Generator for hierarchical document generation.
Generates content for each section in the document structure.
"""
import logging
import asyncio
from typing import Dict, Any, Optional, List, Callable
from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator
logger = logging.getLogger(__name__)
class ContentGenerator:
"""Generates content for document sections"""
def __init__(self, services: Any):
self.services = services
self.integrator = ContentIntegrator(services)
async def generateContent(
self,
structure: Dict[str, Any],
cachedContent: Optional[Dict[str, Any]] = None,
userPrompt: str = "",
progressCallback: Optional[Callable] = None,
parallelGeneration: bool = True,
batchSize: int = 10
) -> Dict[str, Any]:
"""
Generate content for all sections in structure.
Args:
structure: Document structure from Phase 1
cachedContent: Extracted content cache
userPrompt: Original user prompt
progressCallback: Function to call for progress updates
parallelGeneration: Enable parallel section generation
batchSize: Number of sections to process in parallel
Returns:
Complete document structure with populated elements
"""
try:
documents = structure.get("documents", [])
if not documents:
logger.warning("No documents found in structure")
return structure
allGeneratedSections = []
totalSectionsAcrossDocs = 0
# Count total sections for progress tracking
for doc in documents:
totalSectionsAcrossDocs += len(doc.get("sections", []))
if progressCallback:
progressCallback(0, totalSectionsAcrossDocs, "Starting content generation...")
currentSectionIndex = 0
for docIdx, doc in enumerate(documents):
sections = doc.get("sections", [])
totalSections = len(sections)
if totalSections == 0:
continue
# Determine if parallel generation is beneficial
# Use sequential if only 1 section or if sections depend on each other
useParallel = parallelGeneration and totalSections > 1
# Count images - if many images, parallel is still beneficial but slower
imageCount = sum(1 for s in sections if s.get("content_type") == "image")
if progressCallback and docIdx > 0:
progressCallback(
currentSectionIndex,
totalSectionsAcrossDocs,
f"Processing document {docIdx + 1}/{len(documents)}..."
)
if useParallel:
# Generate in batches for parallel processing
generatedSections = await self._generateSectionsParallel(
sections=sections,
cachedContent=cachedContent,
userPrompt=userPrompt,
documentMetadata=structure.get("metadata", {}),
progressCallback=lambda idx, total, msg: progressCallback(
currentSectionIndex + idx,
totalSectionsAcrossDocs,
msg
) if progressCallback else None,
batchSize=batchSize
)
else:
# Generate sequentially (better for context-dependent sections)
generatedSections = await self._generateSectionsSequential(
sections=sections,
cachedContent=cachedContent,
userPrompt=userPrompt,
documentMetadata=structure.get("metadata", {}),
progressCallback=lambda idx, total, msg: progressCallback(
currentSectionIndex + idx,
totalSectionsAcrossDocs,
msg
) if progressCallback else None
)
allGeneratedSections.extend(generatedSections)
currentSectionIndex += totalSections
if progressCallback:
progressCallback(
totalSectionsAcrossDocs,
totalSectionsAcrossDocs,
"Content generation complete"
)
# Integrate generated content into structure
completeStructure = self.integrator.integrateContent(
structure=structure,
generatedSections=allGeneratedSections
)
return completeStructure
except Exception as e:
logger.error(f"Error generating content: {str(e)}")
raise
async def _generateSectionsSequential(
self,
sections: List[Dict[str, Any]],
cachedContent: Optional[Dict[str, Any]],
userPrompt: str,
documentMetadata: Dict[str, Any],
progressCallback: Optional[Callable] = None
) -> List[Dict[str, Any]]:
"""
Generate sections sequentially with enhanced progress tracking.
Uses previous sections for context continuity.
"""
generatedSections = []
previousSections = []
totalSections = len(sections)
for idx, section in enumerate(sections):
try:
contentType = section.get("content_type", "content")
sectionId = section.get("id", f"section_{idx}")
# Enhanced progress message
if contentType == "image":
message = f"Generating image: {section.get('generation_hint', 'Image')[:50]}..."
elif contentType == "heading":
message = f"Generating heading..."
elif contentType == "paragraph":
message = f"Generating paragraph..."
else:
message = f"Generating {contentType}..."
if progressCallback:
progressCallback(
idx + 1,
totalSections,
message
)
context = {
"userPrompt": userPrompt,
"cachedContent": cachedContent,
"previousSections": previousSections.copy(),
"targetSection": section,
"documentMetadata": documentMetadata,
"operationId": None
}
generated = await self._generateSectionContent(section, context)
generatedSections.append(generated)
previousSections.append(generated)
# Log success
if contentType == "image":
logger.info(f"Successfully generated image for section {sectionId}")
elif not generated.get("error"):
logger.debug(f"Successfully generated {contentType} for section {sectionId}")
except Exception as e:
logger.error(f"Error generating section {section.get('id')}: {str(e)}")
errorSection = self.integrator.createErrorSection(section, str(e))
generatedSections.append(errorSection)
previousSections.append(errorSection)
return generatedSections
async def _generateSectionsParallel(
self,
sections: List[Dict[str, Any]],
cachedContent: Optional[Dict[str, Any]],
userPrompt: str,
documentMetadata: Dict[str, Any],
progressCallback: Optional[Callable] = None,
batchSize: int = 10
) -> List[Dict[str, Any]]:
"""
Generate sections in parallel batches with enhanced progress tracking.
Args:
sections: List of sections to generate
cachedContent: Extracted content cache
userPrompt: Original user prompt
documentMetadata: Document metadata
progressCallback: Progress callback function
batchSize: Number of sections to process in parallel per batch
Returns:
List of generated sections
"""
generatedSections = []
totalSections = len(sections)
if totalSections == 0:
return []
# Adjust batch size based on section types (images take longer)
imageCount = sum(1 for s in sections if s.get("content_type") == "image")
if imageCount > 0:
# Reduce batch size if many images (images are slower)
adjustedBatchSize = min(batchSize, max(3, batchSize - imageCount // 2))
else:
adjustedBatchSize = batchSize
# Process in batches
totalBatches = (totalSections + adjustedBatchSize - 1) // adjustedBatchSize
accumulatedPreviousSections = [] # Track sections from previous batches
for batchNum, batchStart in enumerate(range(0, totalSections, adjustedBatchSize)):
batch = sections[batchStart:batchStart + adjustedBatchSize]
batchEnd = min(batchStart + adjustedBatchSize, totalSections)
if progressCallback:
progressCallback(
batchStart,
totalSections,
f"Processing batch {batchNum + 1}/{totalBatches} ({len(batch)} sections)..."
)
async def generateWithProgress(section: Dict[str, Any], globalIndex: int, localIndex: int, batchPreviousSections: List[Dict[str, Any]]):
try:
contentType = section.get("content_type", "content")
sectionId = section.get("id", f"section_{globalIndex}")
# Enhanced progress message based on content type
if contentType == "image":
message = f"Generating image: {section.get('generation_hint', 'Image')[:50]}..."
elif contentType == "heading":
message = f"Generating heading..."
elif contentType == "paragraph":
message = f"Generating paragraph..."
else:
message = f"Generating {contentType}..."
if progressCallback:
progressCallback(
globalIndex + 1,
totalSections,
message
)
context = {
"userPrompt": userPrompt,
"cachedContent": cachedContent,
"previousSections": batchPreviousSections.copy(), # Include sections from previous batches
"targetSection": section,
"documentMetadata": documentMetadata,
"operationId": None # Can be set if needed for nested progress
}
result = await self._generateSectionContent(section, context)
# Log success
if contentType == "image":
logger.info(f"Successfully generated image for section {sectionId}")
elif not result.get("error"):
logger.debug(f"Successfully generated {contentType} for section {sectionId}")
return result
except Exception as e:
logger.error(f"Error generating section {section.get('id')}: {str(e)}")
return self.integrator.createErrorSection(section, str(e))
# Generate batch in parallel
# Pass accumulated previous sections to each task in this batch
batchTasks = [
generateWithProgress(section, batchStart + idx, idx, accumulatedPreviousSections)
for idx, section in enumerate(batch)
]
batchResults = await asyncio.gather(
*batchTasks,
return_exceptions=True
)
# Handle exceptions and collect results
for idx, result in enumerate(batchResults):
if isinstance(result, Exception):
logger.error(f"Error in parallel generation batch {batchNum + 1}: {str(result)}")
errorSection = self.integrator.createErrorSection(batch[idx], str(result))
generatedSections.append(errorSection)
accumulatedPreviousSections.append(errorSection) # Add to accumulated for next batch
else:
generatedSections.append(result)
accumulatedPreviousSections.append(result) # Add to accumulated for next batch
# Update progress after batch completion
if progressCallback:
progressCallback(
batchEnd,
totalSections,
f"Completed batch {batchNum + 1}/{totalBatches}"
)
return generatedSections
async def _generateSectionContent(
self,
section: Dict[str, Any],
context: Dict[str, Any]
) -> Dict[str, Any]:
"""
Generate content for a single section.
Args:
section: Section to generate content for
context: Generation context
Returns:
Section with populated elements array
"""
try:
contentType = section.get("content_type", "")
complexity = section.get("complexity", "simple")
if contentType == "image":
return await self._generateImageSection(section, context)
elif complexity == "complex":
return await self._generateComplexTextSection(section, context)
else:
return await self._generateSimpleSection(section, context)
except Exception as e:
logger.error(f"Error generating section {section.get('id')}: {str(e)}")
return self.integrator.createErrorSection(section, str(e))
async def _generateSimpleSection(
self,
section: Dict[str, Any],
context: Dict[str, Any]
) -> Dict[str, Any]:
"""Generate content for simple section (heading, paragraph)"""
try:
contentType = section.get("content_type", "")
generationHint = section.get("generation_hint", "")
# Create section-specific prompt
sectionPrompt = self._createSectionPrompt(section, context)
# Debug: Log section generation prompt
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
sectionId = section.get('id', 'unknown')
contentType = section.get('content_type', 'unknown')
try:
self.services.utils.writeDebugFile(
sectionPrompt,
f"document_generation_section_{sectionId}_{contentType}_prompt"
)
except Exception as e:
logger.debug(f"Could not write debug file for section prompt: {e}")
# Call AI to generate content
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
resultFormat="json"
)
aiResponse = await self.services.ai.callAiContent(
prompt=sectionPrompt,
options=options,
outputFormat="json"
)
# Debug: Log section generation response (always log, even if empty)
sectionId = section.get('id', 'unknown')
contentType = section.get('content_type', 'unknown')
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
responseContent = ''
if aiResponse:
if hasattr(aiResponse, 'content') and aiResponse.content:
responseContent = aiResponse.content
elif hasattr(aiResponse, 'documents') and aiResponse.documents:
responseContent = f"[Response has {len(aiResponse.documents)} documents]"
else:
responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]"
else:
responseContent = '[No response object]'
self.services.utils.writeDebugFile(
responseContent,
f"document_generation_section_{sectionId}_{contentType}_response"
)
logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)")
except Exception as e:
logger.warning(f"Could not write debug file for section response: {e}")
import traceback
logger.debug(traceback.format_exc())
if not aiResponse or not aiResponse.content:
logger.error(f"AI section generation returned empty response for section {sectionId}")
logger.error(f"Response object: {aiResponse}, has content: {hasattr(aiResponse, 'content') if aiResponse else False}")
raise ValueError("AI section generation returned empty response")
# Extract JSON elements
rawContent = aiResponse.content if aiResponse and aiResponse.content else ""
if not rawContent or not rawContent.strip():
logger.error(f"AI section generation returned empty response for section {sectionId}")
logger.error(f"Response object: {aiResponse}, content length: {len(rawContent) if rawContent else 0}")
raise ValueError("AI section generation returned empty response")
extractedJson = self.services.utils.jsonExtractString(rawContent)
if not extractedJson or not extractedJson.strip():
logger.error(f"No JSON found in AI response for section {sectionId}")
logger.error(f"Raw response (first 1000 chars): {rawContent[:1000]}")
logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500] if extractedJson else 'None'}")
raise ValueError("No JSON found in AI section response")
import json
try:
elementsData = json.loads(extractedJson)
logger.debug(f"Parsed JSON for section {section.get('id')}: type={type(elementsData)}, keys={list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}")
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON from AI response for section {section.get('id')}")
logger.error(f"JSON decode error: {str(e)}")
logger.error(f"Extracted JSON length: {len(extractedJson)} chars")
logger.error(f"Extracted JSON (first 1000 chars): {extractedJson[:1000]}")
if len(extractedJson) > 1000:
logger.error(f"Extracted JSON (last 500 chars): {extractedJson[-500:]}")
logger.error(f"Raw AI response length: {len(rawContent)} chars")
logger.error(f"Raw AI response (first 1000 chars): {rawContent[:1000] if rawContent else 'None'}")
# Try to recover from truncated JSON if it looks like it was cut off
if "Expecting" in str(e) and ("delimiter" in str(e) or "value" in str(e)):
# Check if JSON starts correctly but is truncated
if extractedJson.strip().startswith('{"elements"'):
logger.warning(f"JSON appears truncated, attempting recovery...")
# Use closeJsonStructures which handles unterminated strings properly
try:
from modules.shared.jsonUtils import closeJsonStructures
recoveredJson = closeJsonStructures(extractedJson)
logger.info(f"Attempting to parse recovered JSON (closed structures)")
logger.debug(f"Recovered JSON length: {len(recoveredJson)} chars (original: {len(extractedJson)} chars)")
elementsData = json.loads(recoveredJson)
logger.info(f"Successfully recovered JSON for section {section.get('id')}")
except (json.JSONDecodeError, ValueError) as recoveryError:
logger.error(f"JSON recovery failed: {str(recoveryError)}")
logger.error(f"Recovered JSON (first 500 chars): {recoveredJson[:500] if 'recoveredJson' in locals() else 'N/A'}")
# Check if raw response might be truncated
if len(rawContent) <= len(extractedJson) + 100: # Raw content is similar length to extracted
logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)")
logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits")
raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}")
else:
raise ValueError(f"Invalid JSON in AI response: {str(e)}")
else:
raise ValueError(f"Invalid JSON in AI response: {str(e)}")
# Extract elements array - handle various response formats
elements = None
if isinstance(elementsData, dict):
# Try to find elements in various possible locations
if "elements" in elementsData:
elements = elementsData["elements"]
elif "content" in elementsData and isinstance(elementsData["content"], list):
# Some models return {"content": [...]}
elements = elementsData["content"]
elif "data" in elementsData and isinstance(elementsData["data"], list):
# Some models return {"data": [...]}
elements = elementsData["data"]
elif len(elementsData) == 1:
# Single key dict - might be the elements directly
firstValue = list(elementsData.values())[0]
if isinstance(firstValue, list):
elements = firstValue
else:
# Try to convert entire dict to a single element
logger.warning(f"AI returned dict without 'elements' key, attempting to convert: {list(elementsData.keys())}")
# For heading/paragraph, create element from dict
if contentType == "heading":
text = elementsData.get("text") or elementsData.get("heading") or str(elementsData)
level = elementsData.get("level", 1)
elements = [{"level": level, "text": text}]
elif contentType == "paragraph":
text = elementsData.get("text") or elementsData.get("content") or str(elementsData)
elements = [{"text": text}]
else:
# Try to create element from dict structure
elements = [elementsData]
elif isinstance(elementsData, list):
elements = elementsData
else:
# Primitive value - wrap it
logger.warning(f"AI returned primitive value, wrapping: {type(elementsData)}")
if contentType == "heading":
elements = [{"level": 1, "text": str(elementsData)}]
elif contentType == "paragraph":
elements = [{"text": str(elementsData)}]
else:
elements = [{"text": str(elementsData)}]
if elements is None:
logger.error(f"Could not extract elements from AI response. Response structure: {type(elementsData)}, keys: {list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}")
logger.error(f"Full response (first 500 chars): {str(extractedJson)[:500]}")
raise ValueError(f"Invalid elements format in AI response. Expected dict with 'elements' key or list, got: {type(elementsData)}")
# Validate elements is a list
if not isinstance(elements, list):
logger.warning(f"Elements is not a list, converting: {type(elements)}")
elements = [elements]
# Update section with elements
section["elements"] = elements
return section
except Exception as e:
logger.error(f"Error generating simple section: {str(e)}")
raise
async def _generateImageSection(
self,
section: Dict[str, Any],
context: Dict[str, Any]
) -> Dict[str, Any]:
"""Generate image for image section or include existing image"""
try:
# Check if this is an existing image to include
imageSource = section.get("image_source", "generate")
if imageSource == "existing":
# Include existing image from cachedContent
imageRefId = section.get("image_reference_id")
if not imageRefId:
raise ValueError(f"Image section {section.get('id')} has image_source='existing' but no image_reference_id")
cachedContent = context.get("cachedContent", {})
imageDocuments = cachedContent.get("imageDocuments", [])
# Find the image document
imageDoc = next((img for img in imageDocuments if img.get("id") == imageRefId), None)
if not imageDoc:
raise ValueError(f"Image document {imageRefId} not found in cachedContent.imageDocuments")
# Create image element from existing image
altText = imageDoc.get("altText", section.get("generation_hint", "Image"))
mimeType = imageDoc.get("mimeType", "image/png")
section["elements"] = [{
"base64Data": imageDoc.get("base64Data"),
"altText": altText,
"mimeType": mimeType,
"caption": section.get("metadata", {}).get("caption")
}]
logger.info(f"Successfully included existing image {imageRefId} for section {section.get('id')}")
return section
# Generate new image (existing logic)
imagePrompt = section.get("image_prompt")
if not imagePrompt:
# Try to create from generation_hint
generationHint = section.get("generation_hint", "")
if generationHint:
imagePrompt = f"Create a professional illustration: {generationHint}"
else:
raise ValueError(f"Image section {section.get('id')} missing image_prompt and generation_hint")
# Call AI service for image generation
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptImage
import json
# Create image generation prompt
promptModel = AiCallPromptImage(
prompt=imagePrompt,
size="1024x1024",
quality="standard",
style="vivid"
)
promptJson = promptModel.model_dump_json(exclude_none=True, indent=2)
options = AiCallOptions(
operationType=OperationTypeEnum.IMAGE_GENERATE,
resultFormat="base64"
)
# Log image generation start
logger.info(f"Starting image generation for section {section.get('id')}: {imagePrompt[:100]}...")
# Call AI for image generation
aiResponse = await self.services.ai.callAiContent(
prompt=promptJson,
options=options,
outputFormat="base64"
)
# Extract base64 image data
base64Data = None
if aiResponse and aiResponse.documents and len(aiResponse.documents) > 0:
imageDoc = aiResponse.documents[0]
base64Data = imageDoc.documentData
logger.debug(f"Image data extracted from documents: {len(base64Data) if base64Data else 0} chars")
# Fallback: check content field (might be base64 string)
if not base64Data and aiResponse and aiResponse.content:
base64Data = aiResponse.content
logger.debug(f"Image data extracted from content: {len(base64Data) if base64Data else 0} chars")
if not base64Data:
raise ValueError("Image generation returned no data")
# Validate base64 data
try:
import base64
base64.b64decode(base64Data[:100], validate=True) # Validate first 100 chars
except Exception as e:
logger.warning(f"Image data may not be valid base64: {str(e)}")
# Continue anyway - renderer will handle it
# Create image element
altText = section.get("generation_hint", "Image")
if not altText or altText == "Image":
# Use image_prompt as alt text if generation_hint is generic
altText = section.get("image_prompt", "Image")[:100] # Limit length
caption = section.get("metadata", {}).get("caption")
section["elements"] = [{
"url": f"data:image/png;base64,{base64Data}",
"base64Data": base64Data,
"altText": altText,
"caption": caption
}]
logger.info(f"Successfully generated image for section {section.get('id')}")
return section
except Exception as e:
logger.error(f"Error generating image section: {str(e)}")
raise
async def _generateComplexTextSection(
self,
section: Dict[str, Any],
context: Dict[str, Any]
) -> Dict[str, Any]:
"""Generate content for complex text section (long chapter)"""
# For now, use same approach as simple section
# Can be enhanced later with chunking for very long content
return await self._generateSimpleSection(section, context)
def _createSectionPrompt(
self,
section: Dict[str, Any],
context: Dict[str, Any]
) -> str:
"""Create sub-prompt for section content generation"""
contentType = section.get("content_type", "")
generationHint = section.get("generation_hint", "")
userPrompt = context.get("userPrompt", "")
cachedContent = context.get("cachedContent")
previousSections = context.get("previousSections", [])
documentMetadata = context.get("documentMetadata", {})
# Get user language
userLanguage = self._getUserLanguage()
# Format cached content
cachedContentText = ""
if cachedContent and cachedContent.get("extractedContent"):
cachedContentText = self._formatCachedContent(cachedContent)
# Format previous sections for context
previousSectionsText = ""
if previousSections:
formattedSections = []
for s in previousSections[-10:]: # Last 10 sections for context (increased from 5)
prevContentType = s.get('content_type', 'unknown') # Use different variable name to avoid shadowing
order = s.get('order', 0)
hint = s.get('generation_hint', '')
elements = s.get('elements', [])
# Extract actual content from elements
contentPreview = ""
if elements:
if prevContentType == "heading":
# Extract heading text
for elem in elements:
if isinstance(elem, dict) and "text" in elem:
contentPreview = f": \"{elem['text']}\""
break
elif prevContentType == "paragraph":
# Extract paragraph text (first 100 chars)
for elem in elements:
if isinstance(elem, dict) and "text" in elem:
text = elem['text']
contentPreview = f": \"{text[:100]}{'...' if len(text) > 100 else ''}\""
break
elif prevContentType == "bullet_list":
# Extract bullet items
for elem in elements:
if isinstance(elem, dict) and "items" in elem:
items = elem['items']
if items:
contentPreview = f": {items[:3]}{'...' if len(items) > 3 else ''}"
break
formattedSections.append(
f"- Section {order} ({prevContentType}){contentPreview}"
)
previousSectionsText = "\n".join(formattedSections)
prompt = f"""{'='*80}
SECTION TO GENERATE:
{'='*80}
Type: {contentType}
Hint: {generationHint}
{'='*80}
CONTEXT:
- User Request: {userPrompt}
- Previous Sections: {len(previousSections)} sections already generated
- Document Title: {documentMetadata.get('title', 'Unknown')}
{'='*80}
PREVIOUS SECTIONS (for continuity):
{'='*80}
{previousSectionsText if previousSectionsText else "This is the first section."}
{'='*80}
{'='*80}
EXTRACTED CONTENT (if available):
{'='*80}
{cachedContentText if cachedContentText else "None"}
{'='*80}
TASK: Generate content for this section ONLY.
INSTRUCTIONS:
1. Generate content appropriate for section type: {contentType}
2. Use the generation hint: {generationHint}
3. Consider previous sections for continuity
4. Use extracted content if relevant
5. All content must be in the language '{userLanguage}'
6. CRITICAL: Return ONLY a JSON object with an "elements" array. DO NOT return a full document structure.
REQUIRED FORMAT - Return ONLY this structure:
For heading:
{{"elements": [{{"level": 1, "text": "Heading Text"}}]}}
For paragraph:
{{"elements": [{{"text": "Paragraph text content"}}]}}
For table:
{{"elements": [{{"headers": ["Col1", "Col2"], "rows": [["Row1", "Row2"]]}}]}}
For bullet_list:
{{"elements": [{{"items": ["Item 1", "Item 2"]}}]}}
For code_block:
{{"elements": [{{"code": "code content here", "language": "python"}}]}}
CRITICAL RULES:
- Return ONLY {{"elements": [...]}} - nothing else
- DO NOT include "metadata", "documents", "sections", or any other fields
- DO NOT return a full document structure
- DO NOT add explanatory text before or after the JSON
- The response must start with {{"elements": and end with }}
- This is a SINGLE SECTION, not a full document
"""
return prompt
def _formatCachedContent(self, cachedContent: Dict[str, Any]) -> str:
"""Format cached content for prompt inclusion"""
try:
extractedContent = cachedContent.get("extractedContent", [])
if not extractedContent:
return "No content extracted."
formattedParts = []
for extracted in extractedContent:
if hasattr(extracted, 'parts'):
for part in extracted.parts:
if hasattr(part, 'content'):
formattedParts.append(part.content)
elif isinstance(extracted, dict):
formattedParts.append(str(extracted))
else:
formattedParts.append(str(extracted))
return "\n\n".join(formattedParts) if formattedParts else "No content extracted."
except Exception as e:
logger.warning(f"Error formatting cached content: {str(e)}")
return "Error formatting cached content."
def _getUserLanguage(self) -> str:
"""Get user language for document generation"""
try:
if self.services:
if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
return self.services.currentUserLanguage
elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
return self.services.user.language
except Exception:
pass
return 'en' # Default fallback

View file

@ -0,0 +1,167 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Content Integrator for hierarchical document generation.
Merges generated content into document structure and validates completeness.
"""
import logging
from typing import Dict, Any, List, Tuple
logger = logging.getLogger(__name__)
class ContentIntegrator:
"""Integrates generated content into document structure"""
def __init__(self, services: Any = None):
self.services = services
def integrateContent(
self,
structure: Dict[str, Any],
generatedSections: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""
Merge generated sections into document structure.
Args:
structure: Original document structure
generatedSections: List of sections with populated elements
Returns:
Complete document structure ready for rendering
"""
try:
# Create mapping of section IDs to generated sections
sectionMap = {section.get("id"): section for section in generatedSections}
# Process each document
for doc in structure.get("documents", []):
sections = doc.get("sections", [])
for idx, section in enumerate(sections):
sectionId = section.get("id")
# Find corresponding generated section
if sectionId in sectionMap:
generatedSection = sectionMap[sectionId]
# Merge elements into structure section
if "elements" in generatedSection:
section["elements"] = generatedSection["elements"]
# Preserve error information if present
if generatedSection.get("error"):
section["error"] = True
section["errorMessage"] = generatedSection.get("errorMessage")
section["originalContentType"] = generatedSection.get("originalContentType")
else:
# Section not generated - create error section
logger.warning(f"Section {sectionId} not found in generated sections")
section = self.createErrorSection(
section,
f"Section {sectionId} was not generated"
)
sections[idx] = section
# Debug: Write final merged structure to debug file
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
import json
structureJson = json.dumps(structure, indent=2, ensure_ascii=False)
self.services.utils.writeDebugFile(
structureJson,
"document_generation_final_merged_json"
)
logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)")
except Exception as e:
logger.debug(f"Could not write debug file for final merged JSON: {e}")
return structure
except Exception as e:
logger.error(f"Error integrating content: {str(e)}")
raise
def validateCompleteness(
self,
document: Dict[str, Any]
) -> Tuple[bool, List[str]]:
"""
Validate that all sections have content.
Args:
document: Document structure to validate
Returns:
(is_complete, list_of_missing_sections)
"""
missingSections = []
try:
for doc in document.get("documents", []):
sections = doc.get("sections", [])
for section in sections:
sectionId = section.get("id", "unknown")
elements = section.get("elements", [])
# Check if section has content
if not elements or len(elements) == 0:
# Skip error sections (they have error text)
if not section.get("error"):
missingSections.append(sectionId)
else:
# Validate elements have actual content
hasContent = False
for element in elements:
# Check different content types
if element.get("text") or element.get("base64Data") or \
element.get("headers") or element.get("items") or \
element.get("code"):
hasContent = True
break
if not hasContent and not section.get("error"):
missingSections.append(sectionId)
return len(missingSections) == 0, missingSections
except Exception as e:
logger.error(f"Error validating completeness: {str(e)}")
return False, [f"Validation error: {str(e)}"]
def createErrorSection(
self,
originalSection: Dict[str, Any],
errorMessage: str
) -> Dict[str, Any]:
"""
Create error placeholder section.
Args:
originalSection: Original section that failed
errorMessage: Error message to display
Returns:
Error section with placeholder content
"""
contentType = originalSection.get("content_type", "content")
sectionId = originalSection.get("id", "unknown")
return {
"id": sectionId,
"content_type": "paragraph", # Change to paragraph for error display
"elements": [{
"text": f"[ERROR: Failed to generate {contentType} for section '{sectionId}'. Error: {errorMessage}]"
}],
"order": originalSection.get("order", 0),
"error": True,
"errorMessage": errorMessage,
"originalContentType": contentType,
"title": originalSection.get("title"),
"generation_hint": originalSection.get("generation_hint"),
"complexity": originalSection.get("complexity")
}

View file

@ -0,0 +1,316 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Document Purpose Analyzer for hierarchical document generation.
Uses AI to analyze user prompt and determine purpose for each document.
"""
import logging
import json
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
logger = logging.getLogger(__name__)
class DocumentPurposeAnalyzer:
"""Analyzes user prompt and documents to determine document purposes"""
def __init__(self, services: Any):
self.services = services
async def analyzeDocumentPurposes(
self,
userPrompt: str,
chatDocuments: List[ChatDocument],
actionContext: str = "generateDocument"
) -> Dict[str, Any]:
"""
Use AI to analyze user prompt and determine purpose for each document.
Args:
userPrompt: User's original prompt
chatDocuments: List of ChatDocument objects to analyze
actionContext: Action name (e.g., "generateDocument", "extractData")
Returns:
{
"document_purposes": [
{
"document_id": "...",
"purpose": "extract_text_content" | "include_image" | ...,
"reasoning": "...",
"extractionPrompt": "..." (if purpose requires extraction),
"processingNotes": "..."
}
],
"overall_intent": "..."
}
"""
try:
if not chatDocuments:
return {
"document_purposes": [],
"overall_intent": "No documents provided"
}
# Create document metadata list for AI analysis
documentMetadata = []
for doc in chatDocuments:
docInfo = {
"document_id": doc.id,
"fileName": doc.fileName,
"mimeType": doc.mimeType,
"fileSize": doc.fileSize
}
documentMetadata.append(docInfo)
# Create analysis prompt
analysisPrompt = self._createAnalysisPrompt(
userPrompt=userPrompt,
actionContext=actionContext,
documentMetadata=documentMetadata
)
# Debug: Log purpose analysis prompt
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
self.services.utils.writeDebugFile(
analysisPrompt,
"document_purpose_analysis_prompt"
)
except Exception as e:
logger.debug(f"Could not write debug file for purpose analysis prompt: {e}")
# Call AI for analysis
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
resultFormat="json"
)
aiResponse = await self.services.ai.callAiContent(
prompt=analysisPrompt,
options=options,
outputFormat="json"
)
# Debug: Log purpose analysis response
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
responseContent = aiResponse.content if aiResponse and aiResponse.content else ''
responseMetadata = {
"status": aiResponse.status if aiResponse else "N/A",
"error": aiResponse.error if aiResponse else "N/A",
"documents_count": len(aiResponse.documents) if aiResponse and aiResponse.documents else 0
}
self.services.utils.writeDebugFile(
f"Response Content:\n{responseContent}\n\nResponse Metadata:\n{json.dumps(responseMetadata, indent=2)}",
"document_purpose_analysis_response"
)
except Exception as e:
logger.debug(f"Could not write debug file for purpose analysis response: {e}")
if not aiResponse or not aiResponse.content:
logger.warning("AI purpose analysis returned empty response, using defaults")
return self._createDefaultPurposes(chatDocuments, actionContext)
# Extract and parse JSON
extractedJson = self.services.utils.jsonExtractString(aiResponse.content)
if not extractedJson:
logger.warning("No JSON found in purpose analysis response, using defaults")
return self._createDefaultPurposes(chatDocuments, actionContext)
try:
analysisResult = json.loads(extractedJson)
# Validate structure
if "document_purposes" not in analysisResult:
logger.warning("Invalid analysis result structure, using defaults")
return self._createDefaultPurposes(chatDocuments, actionContext)
# Ensure all documents have purposes
analyzedIds = {dp.get("document_id") for dp in analysisResult.get("document_purposes", [])}
for doc in chatDocuments:
if doc.id not in analyzedIds:
logger.warning(f"Document {doc.id} not in analysis result, adding default purpose")
defaultPurpose = self._determineDefaultPurpose(doc, actionContext)
analysisResult["document_purposes"].append({
"document_id": doc.id,
"purpose": defaultPurpose,
"reasoning": f"Default purpose based on document type and action context",
"extractionPrompt": None,
"processingNotes": None
})
return analysisResult
except json.JSONDecodeError as e:
logger.error(f"Failed to parse purpose analysis JSON: {str(e)}")
logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500]}")
return self._createDefaultPurposes(chatDocuments, actionContext)
except Exception as e:
logger.error(f"Error analyzing document purposes: {str(e)}")
return self._createDefaultPurposes(chatDocuments, actionContext)
def _createAnalysisPrompt(
self,
userPrompt: str,
actionContext: str,
documentMetadata: List[Dict[str, Any]]
) -> str:
"""Create AI prompt for document purpose analysis"""
# Format document list
docListText = ""
for i, docInfo in enumerate(documentMetadata, 1):
docListText += f"\n{i}. Document ID: {docInfo['document_id']}\n"
docListText += f" File Name: {docInfo['fileName']}\n"
docListText += f" MIME Type: {docInfo['mimeType']}\n"
docListText += f" File Size: {docInfo['fileSize']} bytes\n"
# Get user language
userLanguage = self._getUserLanguage()
prompt = f"""{'='*80}
DOCUMENT PURPOSE ANALYSIS
{'='*80}
USER PROMPT:
{userPrompt}
ACTION CONTEXT: {actionContext}
DOCUMENTS PROVIDED:
{docListText}
{'='*80}
TASK: For each document, determine its purpose based on:
1. User prompt intent (what the user wants to do)
2. Action context (what action is being performed)
3. Document type (mimeType - is it text, image, etc.)
4. Document metadata (fileName, size)
AVAILABLE PURPOSES:
- "extract_text_content": Extract text content for use in document generation
- "include_image": Include the image directly in the generated document (for images)
- "analyze_image_vision": Analyze image with vision AI to extract text/information (for images with text/charts)
- "use_as_template": Use document structure/layout as template for generation
- "use_as_reference": Use as background context/reference without detailed extraction
- "extract_data": Extract structured data (key-value pairs, entities, fields)
- "attach": Document is an attachment - don't process, just attach to output
- "convert_format": Convert document format (for convert actions)
- "translate": Translate document content (for translate actions)
- "summarize": Create summary of document (for summarize actions)
- "compare": Compare documents (for comparison actions)
- "merge": Merge documents (for merge actions)
- "extract_tables_charts": Extract tables and charts specifically
- "use_for_styling": Use document for styling/formatting reference only
- "extract_metadata": Extract only document metadata
CRITICAL RULES:
1. For images (mimeType starts with "image/"):
- If user wants to "include" or "show" images "include_image"
- If user wants to "analyze", "read text", or "extract text" from images "analyze_image_vision"
- Default for images in generateDocument "include_image"
2. For text documents in generateDocument:
- If user mentions "template" or "structure" "use_as_template"
- If user mentions "reference" or "context" "use_as_reference"
- Default "extract_text_content"
3. Consider action context:
- generateDocument: Usually "extract_text_content" or "include_image"
- extractData: Usually "extract_data"
- translateDocument: Usually "translate"
- summarizeDocument: Usually "summarize"
4. Return ONLY valid JSON following this structure:
{{
"document_purposes": [
{{
"document_id": "document_id_here",
"purpose": "extract_text_content",
"reasoning": "Brief explanation in language '{userLanguage}'",
"extractionPrompt": "Specific extraction prompt if purpose requires extraction, otherwise null",
"processingNotes": "Any special processing requirements or null"
}}
],
"overall_intent": "Summary of how documents should be used together in language '{userLanguage}'"
}}
5. All content must be in the language '{userLanguage}'
6. Return ONLY the JSON structure. No explanations before or after.
Return ONLY the JSON structure.
"""
return prompt
def _createDefaultPurposes(
self,
chatDocuments: List[ChatDocument],
actionContext: str
) -> Dict[str, Any]:
"""Create default purposes when AI analysis fails"""
purposes = []
for doc in chatDocuments:
purpose = self._determineDefaultPurpose(doc, actionContext)
purposes.append({
"document_id": doc.id,
"purpose": purpose,
"reasoning": f"Default purpose based on document type ({doc.mimeType}) and action context ({actionContext})",
"extractionPrompt": None,
"processingNotes": None
})
return {
"document_purposes": purposes,
"overall_intent": f"Default processing for {len(chatDocuments)} document(s) in {actionContext} action"
}
def _determineDefaultPurpose(
self,
doc: ChatDocument,
actionContext: str
) -> str:
"""Determine default purpose based on document type and action context"""
mimeType = doc.mimeType or ""
# Image documents
if mimeType.startswith("image/"):
if actionContext == "generateDocument":
return "include_image"
elif actionContext in ["extractData", "process"]:
return "analyze_image_vision"
else:
return "include_image" # Default for images
# Action-specific defaults
if actionContext == "extractData":
return "extract_data"
elif actionContext == "translateDocument":
return "translate"
elif actionContext == "summarizeDocument":
return "summarize"
elif actionContext == "convertDocument" or actionContext == "convert":
return "convert_format"
elif actionContext == "generateDocument":
return "extract_text_content"
else:
# Default for other actions
return "extract_text_content"
def _getUserLanguage(self) -> str:
"""Get user language for document generation"""
try:
if self.services:
if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
return self.services.currentUserLanguage
elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
return self.services.user.language
except Exception:
pass
return 'en' # Default fallback

View file

@ -0,0 +1,488 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Structure Generator for hierarchical document generation.
Generates document skeleton with section placeholders.
"""
import logging
import json
from typing import Dict, Any, Optional, List
from modules.datamodels.datamodelJson import jsonTemplateDocument
logger = logging.getLogger(__name__)
class StructureGenerator:
"""Generates document structure with section placeholders"""
def __init__(self, services: Any):
self.services = services
async def generateStructure(
self,
userPrompt: str,
documentList: Optional[Any] = None,
cachedContent: Optional[Dict[str, Any]] = None,
maxSectionLength: int = 500,
existingImages: Optional[List[Dict[str, Any]]] = None
) -> Dict[str, Any]:
"""
Generate document structure with sections.
Args:
userPrompt: User's original prompt
documentList: Optional document references
cachedContent: Optional extracted content cache
maxSectionLength: Maximum words for simple sections
existingImages: Optional list of existing images to include
Returns:
Document structure with empty elements arrays
"""
try:
# Create structure generation prompt
structurePrompt = self._createStructurePrompt(
userPrompt=userPrompt,
cachedContent=cachedContent,
maxSectionLength=maxSectionLength,
existingImages=existingImages or []
)
# Debug: Log structure generation prompt
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
self.services.utils.writeDebugFile(
structurePrompt,
"document_generation_structure_prompt"
)
except Exception as e:
logger.debug(f"Could not write debug file for structure prompt: {e}")
# Call AI to generate structure
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
resultFormat="json"
)
aiResponse = await self.services.ai.callAiContent(
prompt=structurePrompt,
options=options,
outputFormat="json"
)
# Debug: Log structure generation response
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
self.services.utils.writeDebugFile(
aiResponse.content if aiResponse and aiResponse.content else '',
"document_generation_structure_response"
)
except Exception as e:
logger.debug(f"Could not write debug file for structure response: {e}")
if not aiResponse or not aiResponse.content:
raise ValueError("AI structure generation returned empty response")
# Extract and parse JSON
extractedJson = self.services.utils.jsonExtractString(aiResponse.content)
if not extractedJson:
raise ValueError("No JSON found in AI structure response")
structure = json.loads(extractedJson)
# Validate and enhance structure
structure = self._validateAndEnhanceStructure(structure, maxSectionLength)
return structure
except Exception as e:
logger.error(f"Error generating structure: {str(e)}")
raise
def _createStructurePrompt(
self,
userPrompt: str,
cachedContent: Optional[Dict[str, Any]] = None,
maxSectionLength: int = 500,
existingImages: Optional[List[Dict[str, Any]]] = None
) -> str:
"""
Create prompt for structure generation.
"""
# Get user language
userLanguage = self._getUserLanguage()
# Format cached content if available
cachedContentText = ""
if cachedContent and cachedContent.get("extractedContent"):
cachedContentText = self._formatCachedContent(cachedContent)
# Use provided existingImages or extract from cachedContent
if existingImages is None:
existingImages = []
if cachedContent and cachedContent.get("imageDocuments"):
existingImages = cachedContent.get("imageDocuments", [])
# Create structure template
structureTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", "Document Title")
prompt = f"""{'='*80}
USER REQUEST:
{'='*80}
{userPrompt}
{'='*80}
TASK: Generate a document STRUCTURE (skeleton) with sections.
Do NOT generate actual content yet - only the structure.
{'='*80}
EXTRACTED CONTENT (if available):
{'='*80}
{cachedContentText if cachedContentText else "No source documents provided."}
{'='*80}
INSTRUCTIONS:
1. Analyze the user request and extracted content
2. Create a document structure with CONTENT sections only
3. For each section, specify:
- id: Unique identifier (e.g., "section_title_1", "section_image_1")
- content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block"
- complexity: "simple" (can generate directly) or "complex" (needs sub-prompt)
- generation_hint: Brief description of what content should be generated
- image_prompt: (only for image sections) Detailed prompt for image generation
- order: Section order number (starting from 1)
- elements: [] (empty array - will be populated later)
4. Identify image sections:
- If user requests illustrations/images, create image sections
- If existing images are provided in documentList (check EXISTING IMAGES section below), create image sections that reference them
- Add image_prompt field with detailed description for image generation (only for new images)
- Set complexity to "complex"
- For existing images: Set image_source to "existing" and image_reference_id to the image document ID
- Example for new image: {{"id": "section_image_1", "content_type": "image", "complexity": "complex", "generation_hint": "Illustration for chapter 1", "image_prompt": "A detailed description for image generation", "order": 2, "elements": []}}
- Example for existing image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Include provided image", "image_source": "existing", "image_reference_id": "doc_id_here", "order": 2, "elements": []}}
{'='*80}
EXISTING IMAGES (to include in document):
{'='*80}
{self._formatExistingImages(existingImages) if existingImages else "No existing images provided."}
{'='*80}
6. Identify complex text sections:
- Long chapters (>{maxSectionLength} words expected) should be marked as "complex"
- Short paragraphs/headings should be "simple"
7. Return ONLY valid JSON following this structure:
{structureTemplate}
5. CRITICAL RULES:
- Return ONLY valid JSON (no comments, no trailing commas, double quotes only)
- Follow the exact JSON schema structure provided
- IMPORTANT: All sections MUST have empty elements arrays: "elements": [] (the template shows examples with content, but you must use empty arrays)
- ALL sections MUST include "generation_hint" field with a brief description of what content should be generated
- ALL sections MUST include "complexity" field: "simple" for short content, "complex" for long chapters/images
- Image sections MUST include "image_prompt" field with detailed description for image generation
- Order numbers MUST start from 1 (not 0)
- All content must be in the language '{userLanguage}'
- Do NOT generate actual content - only structure (skeleton)
- Use only supported content_type values: "heading", "paragraph", "image", "table", "bullet_list", "code_block"
Return ONLY the JSON structure. No explanations.
"""
return prompt
def _validateAndEnhanceStructure(
self,
structure: Dict[str, Any],
maxSectionLength: int
) -> Dict[str, Any]:
"""
Validate structure and enhance with complexity identification.
"""
try:
# Ensure structure has required fields
if "documents" not in structure:
if "sections" in structure:
# Convert single-document format to multi-document format
structure = {
"metadata": structure.get("metadata", {}),
"documents": [{
"id": "doc_1",
"title": structure.get("metadata", {}).get("title", "Document"),
"filename": "document.json",
"sections": structure.get("sections", [])
}]
}
else:
raise ValueError("Structure missing 'documents' or 'sections' field")
# Process each document
for doc in structure.get("documents", []):
sections = doc.get("sections", [])
# Process and validate sections according to standardized schema
for idx, section in enumerate(sections):
# Ensure required fields
if "id" not in section:
section["id"] = f"section_{idx + 1}"
sectionId = section.get("id", "")
section["order"] = idx + 1
if "elements" not in section:
section["elements"] = []
# Identify complexity if not set
if "complexity" not in section:
section["complexity"] = self._identifySectionComplexity(
section,
maxSectionLength
)
# Ensure generation_hint exists (required for content generation)
if "generation_hint" not in section or not section.get("generation_hint"):
# Create meaningful generation hint from section id or content type
contentType = section.get("content_type", "")
# Extract meaningful hint from section ID
meaningfulHint = self._extractMeaningfulHint(sectionId, contentType, section.get("elements", []))
section["generation_hint"] = meaningfulHint
# Ensure image sections have proper configuration
if section.get("content_type") == "image":
imageSource = section.get("image_source", "generate")
if imageSource == "existing":
# Existing image - ensure image_reference_id is set
if "image_reference_id" not in section:
logger.warning(f"Image section {sectionId} has image_source='existing' but no image_reference_id")
# Existing images are simple (no generation needed)
section["complexity"] = "simple"
else:
# New image generation - ensure image_prompt
if "image_prompt" not in section or not section.get("image_prompt"):
# Try to extract from generation_hint
generationHint = section.get("generation_hint", "")
if generationHint:
# Enhance generation_hint to be a proper image prompt
section["image_prompt"] = self._enhanceImagePrompt(generationHint)
else:
# Create default based on document context
docTitle = doc.get("title", "Document")
section["image_prompt"] = f"Generate an illustration for: {docTitle}"
# Ensure complexity is set to complex for new image generation
section["complexity"] = "complex"
return structure
except Exception as e:
logger.error(f"Error validating structure: {str(e)}")
raise
def _identifySectionComplexity(
self,
section: Dict[str, Any],
maxSectionLength: int
) -> str:
"""
Identify if section is simple or complex.
Rules:
- Images: always complex
- Long chapters (>maxSectionLength words): complex
- Others: simple
"""
contentType = section.get("content_type", "")
# Images are always complex
if contentType == "image":
return "complex"
# Check generation_hint for length indicators
generationHint = section.get("generation_hint", "").lower()
# Keywords indicating long content
longContentKeywords = [
"chapter", "long", "detailed", "comprehensive",
"extensive", "full", "complete story"
]
if any(keyword in generationHint for keyword in longContentKeywords):
return "complex"
# Default to simple
return "simple"
def _extractMeaningfulHint(
self,
sectionId: str,
contentType: str,
elements: List[Any]
) -> str:
"""
Extract meaningful generation hint from section ID, content type, or elements.
Args:
sectionId: Section identifier (e.g., "section_heading_current_state")
contentType: Content type (e.g., "heading", "paragraph")
elements: Existing elements if any
Returns:
Meaningful generation hint string
"""
sectionIdLower = sectionId.lower()
# Try to extract text from existing elements first (most accurate)
if elements and isinstance(elements, list) and len(elements) > 0:
firstElement = elements[0]
if isinstance(firstElement, dict):
if "text" in firstElement and firstElement["text"]:
if contentType == "heading":
return firstElement["text"]
elif contentType == "paragraph":
return f"Content paragraph: {firstElement['text'][:50]}..."
# Extract meaningful text from section ID
# Remove common prefixes: "section_", "section_heading_", "section_paragraph_", etc.
meaningfulPart = sectionId
for prefix in ["section_heading_", "section_paragraph_", "section_bullet_list_",
"section_code_block_", "section_image_", "section_"]:
if meaningfulPart.lower().startswith(prefix):
meaningfulPart = meaningfulPart[len(prefix):]
break
# Convert snake_case to Title Case
# e.g., "current_state" -> "Current State"
words = meaningfulPart.replace("_", " ").split()
titleCase = " ".join(word.capitalize() for word in words if word)
# Handle special cases
if "introduction" in sectionIdLower or "intro" in sectionIdLower:
return "Introduction paragraph"
elif "conclusion" in sectionIdLower:
return "Conclusion paragraph"
elif "footer" in sectionIdLower or "copyright" in sectionIdLower:
return "Footer content"
elif "title" in sectionIdLower and "main" in sectionIdLower:
# Main title - try to get from document title or use generic
return "Main document title"
# Create hint based on content type and extracted text
if contentType == "heading":
if titleCase:
return titleCase
else:
return "Section heading"
elif contentType == "paragraph":
if titleCase:
return f"Content paragraph about {titleCase.lower()}"
else:
return f"Content paragraph"
elif contentType == "bullet_list":
if titleCase:
return f"Bullet list: {titleCase.lower()}"
else:
return "Bullet list items"
elif contentType == "code_block":
return "Code content"
else:
if titleCase:
return f"Content for {titleCase.lower()}"
else:
return f"Content for {contentType} section"
def _extractImagePrompts(
self,
structure: Dict[str, Any]
) -> Dict[str, str]:
"""
Extract image generation prompts from structure.
Maps section_id -> image_prompt
"""
imagePrompts = {}
for doc in structure.get("documents", []):
for section in doc.get("sections", []):
if section.get("content_type") == "image":
sectionId = section.get("id")
imagePrompt = section.get("image_prompt")
if sectionId and imagePrompt:
imagePrompts[sectionId] = imagePrompt
return imagePrompts
def _formatCachedContent(
self,
cachedContent: Dict[str, Any]
) -> str:
"""
Format cached content for prompt inclusion.
"""
try:
extractedContent = cachedContent.get("extractedContent", [])
if not extractedContent:
return "No content extracted."
# Format ContentPart objects
formattedParts = []
for extracted in extractedContent:
if hasattr(extracted, 'parts'):
for part in extracted.parts:
if hasattr(part, 'content'):
formattedParts.append(part.content)
elif isinstance(extracted, dict):
formattedParts.append(str(extracted))
else:
formattedParts.append(str(extracted))
return "\n\n".join(formattedParts) if formattedParts else "No content extracted."
except Exception as e:
logger.warning(f"Error formatting cached content: {str(e)}")
return "Error formatting cached content."
def _enhanceImagePrompt(self, generationHint: str) -> str:
"""
Enhance generation hint to be a proper image generation prompt.
Adds visual details and style guidance if missing.
"""
# If hint already contains visual details, use as-is
visualKeywords = ["illustration", "image", "picture", "visual", "depict", "show", "drawing"]
if any(keyword.lower() in generationHint.lower() for keyword in visualKeywords):
return generationHint
# Enhance with visual description
enhanced = f"Create a professional illustration: {generationHint}"
return enhanced
def _formatExistingImages(self, imageDocuments: List[Dict[str, Any]]) -> str:
"""Format existing images list for prompt inclusion"""
if not imageDocuments:
return "No existing images provided."
formatted = []
for i, imgDoc in enumerate(imageDocuments, 1):
formatted.append(f"{i}. Image ID: {imgDoc.get('id')}")
formatted.append(f" File Name: {imgDoc.get('fileName', 'Unknown')}")
formatted.append(f" MIME Type: {imgDoc.get('mimeType', 'Unknown')}")
formatted.append(f" Alt Text: {imgDoc.get('altText', 'Image')}")
formatted.append("")
return "\n".join(formatted)
def _getUserLanguage(self) -> str:
"""Get user language for document generation"""
try:
if self.services:
if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
return self.services.currentUserLanguage
elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
return self.services.user.language
except Exception:
pass
return 'en' # Default fallback

View file

@ -199,8 +199,7 @@ def closeJsonStructures(text: str) -> str:
# Handle unterminated strings: find the last unclosed string
# Look for patterns like: "value" or "value\n (unterminated)
# Simple heuristic: if we end with an unterminated string (odd number of quotes at end)
# Try to close it by finding the last opening quote and closing it
# Check if we're in the middle of a string value when text ends
if result.strip():
# Count quotes - if odd number, we have an unterminated string
quoteCount = result.count('"')
@ -219,6 +218,32 @@ def closeJsonStructures(text: str) -> str:
# Find where the string should end (before next comma, bracket, or brace)
# For now, just close it at the end
result += '"'
else:
# Even number of quotes, but might still be in middle of string if cut off
# Check if text ends with a colon followed by a quote (start of string value)
# or ends with text that looks like it's inside a string (no closing quote after last quote)
import re
# Pattern: ends with "text" where text doesn't end with quote
# Look for pattern like: "text": "incomplete
if re.search(r':\s*"[^"]*$', result):
# We're in the middle of a string value, close it
result += '"'
# Also check if we end with text after a quote (like "key": "value but cut off)
elif re.search(r'"\s*:\s*"[^"]*[^",}\]]$', result):
# Check if last quote is followed by non-quote, non-structural chars
lastQuotePos = result.rfind('"')
if lastQuotePos >= 0:
afterQuote = result[lastQuotePos + 1:]
# If after quote we have text but no closing quote, comma, or brace, we're in a string
if afterQuote and not re.match(r'^\s*[,}\]\]]', afterQuote):
# Check if it's escaped
escapeCount = 0
i = lastQuotePos - 1
while i >= 0 and result[i] == '\\':
escapeCount += 1
i -= 1
if escapeCount % 2 == 0:
result += '"'
# Count open/close brackets and braces
openBraces = result.count('{')

View file

@ -98,7 +98,7 @@ async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
renderOptions["includeHeader"] = parameters.get("includeHeader", True)
rendered_content, mime_type = await generationService.renderReport(
rendered_content, mime_type, _images = await generationService.renderReport(
jsonData, normalizedOutputFormat, title, None, None
)

View file

@ -3,13 +3,18 @@
"""
Generate Document action for AI operations.
Generates documents from scratch or based on templates/inputs.
Generates documents from scratch or based on templates/inputs using hierarchical approach.
"""
import logging
from typing import Dict, Any
import time
from typing import Dict, Any, Optional
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator
from modules.services.serviceGeneration.subContentGenerator import ContentGenerator
from modules.services.serviceGeneration.subDocumentPurposeAnalyzer import DocumentPurposeAnalyzer
logger = logging.getLogger(__name__)
@ -17,15 +22,18 @@ logger = logging.getLogger(__name__)
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Generate documents from scratch or based on templates/inputs.
- Purpose: Generate documents from scratch or based on templates/inputs using hierarchical approach.
- Input requirements: prompt or description (required); optional documentList (for templates/references).
- Output format: Document in specified format (default: docx).
- Output format: Document in specified format. Any format supported by dynamically registered renderers is acceptable (default: txt).
Parameters:
- prompt (str, required): Description of the document to generate.
- documentList (list, optional): Template documents or reference documents to use as a guide.
- documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
- resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx.
- resultType (str, optional): Output format. Any format supported by dynamically registered renderers is acceptable (formats are discovered automatically from renderer registry). Common formats: txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg. Default: txt.
- maxSectionLength (int, optional): Maximum words for simple sections. Default: 500.
- parallelGeneration (bool, optional): Enable parallel section generation. Default: True.
- progressLogging (bool, optional): Send ChatLog progress updates. Default: True.
"""
prompt = parameters.get("prompt")
if not prompt:
@ -33,21 +41,361 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
documentList = parameters.get("documentList", [])
documentType = parameters.get("documentType")
resultType = parameters.get("resultType", "docx")
resultType = parameters.get("resultType", "txt")
aiPrompt = f"Generate a document based on the following requirements: {prompt}"
if documentType:
aiPrompt += f" Document type: {documentType}."
if documentList:
aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style."
aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization."
# Auto-detect format from prompt if not explicitly provided
if resultType == "txt" and prompt:
promptLower = prompt.lower()
if "html" in promptLower or "html5" in promptLower:
resultType = "html"
logger.info(f"Auto-detected HTML format from prompt")
elif "pdf" in promptLower:
resultType = "pdf"
logger.info(f"Auto-detected PDF format from prompt")
elif "markdown" in promptLower or " md " in promptLower or promptLower.endswith(" md"):
resultType = "md"
logger.info(f"Auto-detected Markdown format from prompt")
elif ("text" in promptLower or "txt" in promptLower) and "html" not in promptLower:
resultType = "txt"
logger.info(f"Auto-detected Text format from prompt")
processParams = {
"aiPrompt": aiPrompt,
"resultType": resultType
}
if documentList:
processParams["documentList"] = documentList
maxSectionLength = parameters.get("maxSectionLength", 500)
parallelGeneration = parameters.get("parallelGeneration", True)
progressLogging = parameters.get("progressLogging", True)
return await self.process(processParams)
# Create operation ID for progress tracking
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"doc_gen_{workflowId}_{int(time.time())}"
parentOperationId = parameters.get('parentOperationId')
try:
# Phase 1: Structure Generation
if progressLogging:
self.services.chat.progressLogStart(
operationId,
"Document",
"Structure Generation",
"Generating document structure...",
parentOperationId=parentOperationId
)
structureGenerator = StructureGenerator(self.services)
# Analyze document purposes and process documents accordingly
cachedContent = None
imageDocuments = []
documentPurposes = {}
if documentList:
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.1, "Analyzing document purposes...")
# Convert documentList to DocumentReferenceList
from modules.datamodels.datamodelDocref import DocumentReferenceList
if isinstance(documentList, DocumentReferenceList):
docRefList = documentList
elif isinstance(documentList, str):
docRefList = DocumentReferenceList.from_string_list([documentList])
elif isinstance(documentList, list):
docRefList = DocumentReferenceList.from_string_list(documentList)
else:
docRefList = DocumentReferenceList(references=[])
# Get ChatDocuments
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
if chatDocuments:
logger.info(f"Analyzing purposes for {len(chatDocuments)} documents")
# Analyze document purposes using AI
purposeAnalyzer = DocumentPurposeAnalyzer(self.services)
purposeAnalysis = await purposeAnalyzer.analyzeDocumentPurposes(
userPrompt=prompt,
chatDocuments=chatDocuments,
actionContext="generateDocument"
)
documentPurposes = {dp["document_id"]: dp for dp in purposeAnalysis.get("document_purposes", [])}
logger.info(f"Purpose analysis complete: {purposeAnalysis.get('overall_intent', 'N/A')}")
# Separate documents by purpose
textDocs = []
imageDocsToInclude = []
imageDocsToAnalyze = []
for doc in chatDocuments:
docPurpose = documentPurposes.get(doc.id, {})
purpose = docPurpose.get("purpose", "extract_text_content")
if purpose == "include_image":
imageDocsToInclude.append(doc)
elif purpose == "analyze_image_vision":
imageDocsToAnalyze.append(doc)
elif purpose in ["extract_text_content", "use_as_template", "use_as_reference", "extract_data"]:
textDocs.append(doc)
# Skip "attach" purpose - don't process
# Process text documents (extract content)
extractedResults = []
if textDocs:
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.15, f"Extracting content from {len(textDocs)} text document(s)...")
# Prepare extraction options with purpose-specific prompts
extractionOptionsList = []
for doc in textDocs:
docPurpose = documentPurposes.get(doc.id, {})
extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all content from the document"
extractionOptions = ExtractionOptions(
prompt=extractionPrompt,
mergeStrategy=MergeStrategy(
mergeType="concatenate",
groupBy="typeGroup",
orderBy="id"
),
processDocumentsIndividually=True
)
extractionOptionsList.append((doc, extractionOptions))
# Extract content from text documents
for doc, extractionOptions in extractionOptionsList:
try:
docResults = self.services.extraction.extractContent(
[doc],
extractionOptions,
parentOperationId=operationId
)
extractedResults.extend(docResults)
except Exception as e:
logger.error(f"Error extracting content from {doc.fileName}: {str(e)}")
logger.info(f"Extracted content from {len(extractedResults)} text document(s)")
# Process images to analyze (vision call)
if imageDocsToAnalyze:
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.2, f"Analyzing {len(imageDocsToAnalyze)} image(s) with vision AI...")
# Extract content from images using vision analysis
for doc in imageDocsToAnalyze:
try:
docPurpose = documentPurposes.get(doc.id, {})
extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all text and information from this image"
extractionOptions = ExtractionOptions(
prompt=extractionPrompt,
mergeStrategy=MergeStrategy(
mergeType="concatenate",
groupBy="typeGroup",
orderBy="id"
),
processDocumentsIndividually=True
)
docResults = self.services.extraction.extractContent(
[doc],
extractionOptions,
parentOperationId=operationId
)
extractedResults.extend(docResults)
except Exception as e:
logger.error(f"Error analyzing image {doc.fileName}: {str(e)}")
logger.info(f"Analyzed {len(imageDocsToAnalyze)} image(s) with vision AI")
# Process images to include (store image data)
if imageDocsToInclude:
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.25, f"Preparing {len(imageDocsToInclude)} image(s) for inclusion...")
# Get image data for inclusion
from modules.interfaces.interfaceDbComponentObjects import getInterface
dbInterface = getInterface()
for doc in imageDocsToInclude:
try:
# Get image bytes
imageBytes = dbInterface.getFileData(doc.fileId)
if imageBytes:
# Encode to base64
import base64
base64Data = base64.b64encode(imageBytes).decode('utf-8')
# Create image document entry
imageDoc = {
"id": doc.id,
"fileName": doc.fileName,
"mimeType": doc.mimeType,
"base64Data": base64Data,
"altText": doc.fileName or "Image",
"fileSize": doc.fileSize
}
imageDocuments.append(imageDoc)
logger.debug(f"Prepared image {doc.fileName} for inclusion ({len(base64Data)} chars base64)")
else:
logger.warning(f"Could not retrieve image data for {doc.fileName}")
except Exception as e:
logger.error(f"Error preparing image {doc.fileName} for inclusion: {str(e)}")
logger.info(f"Prepared {len(imageDocuments)} image(s) for inclusion")
# Build cachedContent with all information
cachedContent = {
"extractedContent": extractedResults,
"imageDocuments": imageDocuments,
"documentPurposes": documentPurposes,
"extractionTimestamp": time.time(),
"sourceDocuments": [doc.id for doc in chatDocuments]
}
logger.info(f"Document processing complete: {len(extractedResults)} extracted, {len(imageDocuments)} images to include")
# Generate structure
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.2, "Generating document structure...")
structure = await structureGenerator.generateStructure(
userPrompt=prompt,
documentList=documentList if documentList else None,
cachedContent=cachedContent,
maxSectionLength=maxSectionLength,
existingImages=imageDocuments # Pass existing images for structure generation
)
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated")
# Phase 2: Content Generation
if progressLogging:
self.services.chat.progressLogUpdate(
operationId,
0.34,
"Starting content generation..."
)
contentGenerator = ContentGenerator(self.services)
# Create enhanced progress callback
def progressCallback(sectionIndex: int, totalSections: int, message: str):
if progressLogging:
# Calculate progress: 34% to 90% for content generation phase
if totalSections > 0:
progress = 0.34 + (0.56 * (sectionIndex / totalSections))
else:
progress = 0.34
# Format message
if sectionIndex > 0 and totalSections > 0:
progressMessage = f"Section {sectionIndex}/{totalSections}: {message}"
else:
progressMessage = message
self.services.chat.progressLogUpdate(
operationId,
progress,
progressMessage
)
completeStructure = await contentGenerator.generateContent(
structure=structure,
cachedContent=cachedContent,
userPrompt=prompt,
progressCallback=progressCallback,
parallelGeneration=parallelGeneration
)
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated")
# Phase 3: Integration & Rendering
if progressLogging:
self.services.chat.progressLogUpdate(
operationId,
0.91,
"Rendering final document..."
)
# Use existing renderReport method
title = structure.get("metadata", {}).get("title", "Generated Document")
if documentType:
title = f"{title} ({documentType})"
renderedContent, mimeType, images = await self.services.generation.renderReport(
extractedContent=completeStructure,
outputFormat=resultType,
title=title,
userPrompt=prompt,
aiService=self.services.ai
)
# Build list of documents to return
documents = [
ActionDocument(
documentName=f"document.{resultType}",
documentData=renderedContent,
mimeType=mimeType
)
]
# Add images as separate documents
if images:
logger.info(f"Processing {len(images)} image(s) from renderer")
import base64
for idx, imageData in enumerate(images):
try:
base64Data = imageData.get("base64Data", "")
altText = imageData.get("altText", f"image_{idx + 1}")
caption = imageData.get("caption", "")
sectionId = imageData.get("sectionId", f"section_{idx + 1}")
if base64Data:
# Decode base64 to bytes
imageBytes = base64.b64decode(base64Data)
# Determine filename and mime type
filename = imageData.get("filename", f"image_{idx + 1}.png")
if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')):
filename = f"image_{idx + 1}.png"
# Determine mime type from filename
if filename.lower().endswith('.png'):
imageMimeType = "image/png"
elif filename.lower().endswith(('.jpg', '.jpeg')):
imageMimeType = "image/jpeg"
elif filename.lower().endswith('.gif'):
imageMimeType = "image/gif"
elif filename.lower().endswith('.webp'):
imageMimeType = "image/webp"
else:
imageMimeType = "image/png" # Default
# Add image document
documents.append(ActionDocument(
documentName=filename,
documentData=imageBytes,
mimeType=imageMimeType
))
logger.info(f"Added image document: {filename} (section: {sectionId}, {len(imageBytes)} bytes, alt: {altText})")
else:
logger.warning(f"Image {idx + 1} (section: {sectionId}) has no base64Data, skipping")
except Exception as e:
logger.error(f"Error adding image document {idx + 1}: {str(e)}", exc_info=True)
continue
else:
logger.debug("No images returned from renderer")
# Note: Document creation is handled by the workflow system
# We just return the rendered content and images in ActionResult
if progressLogging:
self.services.chat.progressLogFinish(operationId, True)
return ActionResult.isSuccess(documents=documents)
except Exception as e:
logger.error(f"Error in hierarchical document generation: {str(e)}")
if progressLogging:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error=str(e))

View file

@ -353,11 +353,10 @@ class MethodAi(MethodBase):
"resultType": WorkflowActionParameter(
name="resultType",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["docx", "pdf", "txt", "md"],
frontendType=FrontendType.TEXT,
required=False,
default="docx",
description="Output format"
default="txt",
description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Any format supported by renderers is acceptable. Default: txt"
)
},
execute=generateDocument.__get__(self, self.__class__)

View file

@ -98,9 +98,13 @@ class MethodBase:
self.logger.error(f"Method {self.name} has no _actions dictionary defined. Actions will not be available.")
return result
totalActions = len(self._actions)
deniedActions = []
for actionName, actionDef in self._actions.items():
# RBAC-Check: Prüfe ob Action für aktuellen User verfügbar ist
if not self._checkActionPermission(actionDef.actionId):
deniedActions.append(f"{actionName} ({actionDef.actionId})")
continue # Skip if user doesn't have permission
# Konvertiere WorkflowActionDefinition zu System-Format
@ -110,6 +114,11 @@ class MethodBase:
'method': self._createActionWrapper(actionDef)
}
if deniedActions:
self.logger.warning(f"Method {self.name}: {len(deniedActions)}/{totalActions} actions denied by RBAC: {deniedActions[:5]}{'...' if len(deniedActions) > 5 else ''}")
if not result and totalActions > 0:
self.logger.error(f"Method {self.name}: ALL {totalActions} actions denied by RBAC! This will result in empty action list.")
return result
def _checkActionPermission(self, actionId: str) -> bool:
@ -120,22 +129,36 @@ class MethodBase:
REQUIREMENT: RBAC-Service muss verfügbar sein.
"""
if not hasattr(self.services, 'rbac') or not self.services.rbac:
self.logger.error(f"RBAC service not available. Action {actionId} will be denied.")
self.logger.error(f"RBAC service not available (services.rbac is None). Action {actionId} will be denied.")
return False
currentUser = self.services.chat.getCurrentUser()
# Get current user from services.user (not from chat service)
currentUser = getattr(self.services, 'user', None)
if not currentUser:
self.logger.warning(f"No current user found. Action {actionId} will be denied.")
self.logger.warning(f"No current user found (services.user is None). Action {actionId} will be denied.")
return False
# RBAC-Check: RESOURCE context, item = actionId
permissions = self.services.rbac.getUserPermissions(
user=currentUser,
context=AccessRuleContext.RESOURCE,
item=actionId
)
return permissions.view
try:
permissions = self.services.rbac.getUserPermissions(
user=currentUser,
context=AccessRuleContext.RESOURCE,
item=actionId
)
hasPermission = permissions.view
if not hasPermission:
# Log detailed RBAC denial info
userRoles = getattr(currentUser, 'roleLabels', []) or []
self.logger.warning(
f"RBAC denied action {actionId} for user {currentUser.id}. "
f"User roles: {userRoles}, "
f"Permissions: view={permissions.view}, edit={permissions.edit}, delete={permissions.delete}. "
f"No matching RBAC rule found for context=RESOURCE, item={actionId}"
)
return hasPermission
except Exception as e:
self.logger.error(f"RBAC check failed for action {actionId}: {str(e)}. Action will be denied.")
return False
def _convertParametersToSystemFormat(self, parameters: Dict[str, WorkflowActionParameter]) -> Dict[str, Dict[str, Any]]:
"""Convert WorkflowActionParameter dict to system format for API/UI consumption"""

View file

@ -37,52 +37,6 @@ class ContentValidator:
"""
return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory)
def _analyzeDocuments(self, documents: List[Any]) -> List[Dict[str, Any]]:
"""Generic document analysis - create simple summaries with metadata."""
summaries = []
for doc in documents:
try:
data = getattr(doc, 'documentData', None)
name = getattr(doc, 'documentName', 'Unknown')
mimeType = getattr(doc, 'mimeType', 'unknown')
formatExt = self._detectFormat(doc)
sizeInfo = self._calculateSize(doc)
# Simple preview: if it's dict/list, dump JSON; otherwise use string
preview = None
if data is not None:
if isinstance(data, (dict, list)):
preview = json.dumps(data, indent=2, ensure_ascii=False)
# Truncate if too large
if len(preview) > MAX_CONTENT_SIZE_FOR_FULL_PREVIEW:
preview = preview[:PREVIEW_SAMPLE_SIZE] + f"\n\n[Truncated - {self._formatBytes(sizeInfo['bytes'])} total]"
else:
text = str(data)
if len(text) > MAX_CONTENT_SIZE_FOR_FULL_PREVIEW:
preview = text[:PREVIEW_SAMPLE_SIZE] + f"\n\n[Truncated - {self._formatBytes(sizeInfo['bytes'])} total]"
else:
preview = text
summary = {
"name": name,
"mimeType": mimeType,
"format": formatExt,
"size": sizeInfo["readable"],
"preview": preview
}
summaries.append(summary)
except Exception as e:
logger.warning(f"Error analyzing document {getattr(doc, 'documentName', 'Unknown')}: {str(e)}")
summaries.append({
"name": getattr(doc, 'documentName', 'Unknown'),
"mimeType": getattr(doc, 'mimeType', 'unknown'),
"format": "unknown",
"size": "0 B",
"preview": None,
"error": str(e)
})
return summaries
def _summarizeJsonStructure(self, jsonData: Any) -> Dict[str, Any]:
"""Summarize JSON document structure for validation - extracts main objects, statistics, captions, and IDs."""
try:
@ -120,9 +74,11 @@ class ContentValidator:
"order": section.get("order")
}
# Get elements for processing
elements = section.get("elements", [])
# For tables: extract caption and statistics
if section.get("content_type") == "table":
elements = section.get("elements", [])
if elements and isinstance(elements, list) and len(elements) > 0:
tableElement = elements[0]
sectionSummary["caption"] = tableElement.get("caption")
@ -134,7 +90,6 @@ class ContentValidator:
# For lists: extract item count
elif section.get("content_type") == "list":
elements = section.get("elements", [])
if elements and isinstance(elements, list) and len(elements) > 0:
listElement = elements[0]
items = listElement.get("items", [])
@ -142,7 +97,6 @@ class ContentValidator:
# For paragraphs/headings: extract text preview
elif section.get("content_type") in ["paragraph", "heading"]:
elements = section.get("elements", [])
if elements and isinstance(elements, list) and len(elements) > 0:
textElement = elements[0]
text = textElement.get("text", "")
@ -174,8 +128,10 @@ class ContentValidator:
"order": section.get("order")
}
# Get elements for processing
elements = section.get("elements", [])
if section.get("content_type") == "table":
elements = section.get("elements", [])
if elements and isinstance(elements, list) and len(elements) > 0:
tableElement = elements[0]
sectionSummary["caption"] = tableElement.get("caption")
@ -475,6 +431,12 @@ VALIDATION RULES:
5. Format understanding: Different formats can represent equivalent data structures. Focus on content, not format name.
6. Multi-step workflow awareness: If ACTION HISTORY is present, consider the workflow as a whole. Document metadata (e.g., extraction_method) describes how data was EXTRACTED in the last step, not necessarily how it was OBTAINED in the workflow.
7. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help.
8. CRITICAL - Data vs Data Description: When criteria require specific data types (e.g., images, tables, charts, files), distinguish between:
- ACTUAL DATA: The actual data itself (binary data, structured data, embedded content)
- DATA DESCRIPTIONS: Text fields that describe or specify what data should be created (e.g., "image_description", "table_description", "chart_specification") - these are TEXT METADATA, NOT the actual data
- If only descriptions/specifications exist but no actual data, the criterion is NOT met. Descriptions are instructions for creating data, not the data itself.
- Check content types in sections/elements: if content_type matches the required data type (e.g., "image" for images, "table" for tables), actual data exists. If only text fields describing the data exist, the data is missing.
- Check document statistics: if counts for the required data type are 0, the data is missing even if descriptions exist.
VALIDATION STEPS:
- Check ACTION HISTORY first (if present) for PROCESS-ORIENTED criteria (e.g., "search performed", "sources used", "verification done")

View file

@ -84,43 +84,85 @@ class ActionExecutor:
enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats
logger.info(f"Expected formats: {action.expectedDocumentFormats}")
# Get current task execution operationId to pass as parent to action methods
# This MUST be the "Service Workflow Execution" operation ID (taskExec_*)
parentOperationId = None
# Get current task execution operationId (taskExec_*) - this is the parent of the action
taskOperationId = None
try:
progressLogger = self.services.chat.createProgressLogger()
activeOperations = progressLogger.getActiveOperations()
logger.debug(f"Looking for parent operation ID. Active operations: {list(activeOperations.keys())}")
logger.debug(f"Looking for task operation ID. Active operations: {list(activeOperations.keys())}")
# Look for task execution operation (starts with "taskExec_")
# This is the "Service Workflow Execution" level that should be parent of ALL actions
# This is the Task level that should be parent of this action
for opId in activeOperations.keys():
if opId.startswith("taskExec_"):
parentOperationId = opId
logger.info(f"Found parent operation ID: {parentOperationId} for action {action.execMethod}.{action.execAction}")
taskOperationId = opId
logger.info(f"Found task operation ID: {taskOperationId} for action {action.execMethod}.{action.execAction}")
break
if not parentOperationId:
logger.warning(f"No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}")
if not taskOperationId:
logger.error(f"CRITICAL: No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}. Action logs will appear at root level!")
except Exception as e:
logger.error(f"Error getting parent operation ID: {str(e)}")
logger.error(f"Error getting task operation ID: {str(e)}")
# Add parentOperationId to parameters so action methods can use it
# This is critical for UI dashboard hierarchical display
if parentOperationId:
enhancedParameters['parentOperationId'] = parentOperationId
logger.info(f"Passing parentOperationId '{parentOperationId}' to action {action.execMethod}.{action.execAction}")
# Create action operationId entry - Action is child of Task
import time
actionOperationId = f"action_{action.execMethod}_{action.execAction}_{workflow.id}_{taskNum}_{actionNum}_{int(time.time())}"
try:
# Start action progress tracking - Action is child of Task
# CRITICAL: If taskOperationId is None, the action will appear at root level
self.services.chat.progressLogStart(
actionOperationId,
action.execMethod.capitalize(),
action.execAction,
f"Task {taskNum} Action {actionNum}",
parentOperationId=taskOperationId # Will be None if taskExec_ not found
)
except Exception as e:
logger.error(f"Error starting action progress log: {str(e)}")
# Add action operationId to parameters so action methods can use it for their steps
# Action steps should be children of the action, not the task
# CRITICAL: This must always be set, even if taskOperationId is None
enhancedParameters['parentOperationId'] = actionOperationId
if taskOperationId:
logger.info(f"Created action operationId '{actionOperationId}' (parent: {taskOperationId}) for action {action.execMethod}.{action.execAction}")
else:
logger.warning(f"WARNING: No parentOperationId found for action {action.execMethod}.{action.execAction}. Action logs will appear at root level!")
logger.warning(f"Created action operationId '{actionOperationId}' WITHOUT parent (taskExec_ not found) for action {action.execMethod}.{action.execAction}. Action will appear at root level!")
# Check workflow status before executing the action
checkWorkflowStopped(self.services)
result = await self.executeAction(
methodName=action.execMethod,
actionName=action.execAction,
parameters=enhancedParameters
)
# Execute action and track success for progress log
result = None
actionSuccess = False
try:
result = await self.executeAction(
methodName=action.execMethod,
actionName=action.execAction,
parameters=enhancedParameters
)
actionSuccess = result.success if result else False
except Exception as e:
logger.error(f"Error executing action: {str(e)}")
actionSuccess = False
finally:
# Finish action progress tracking
try:
self.services.chat.progressLogFinish(actionOperationId, actionSuccess)
except Exception as e:
logger.error(f"Error finishing action progress log: {str(e)}")
# If action execution failed, return error result
if result is None:
action.setError("Action execution failed")
return ActionResult(
success=False,
documents=[],
resultLabel=action.execResultLabel,
error="Action execution failed"
)
resultLabel = action.execResultLabel
# Trace action result with full document metadata

View file

@ -565,10 +565,9 @@ class DynamicMode(BaseMode):
methodInstance = _methods[methodName]['instance']
if actionName in methodInstance.actions:
action_info = methodInstance.actions[actionName]
docstring = action_info.get('description', '')
# Extract parameter names from docstring to check if documentList exists
paramDescriptions, _ = methodInstance._extractParameterDetails(docstring)
if 'documentList' in paramDescriptions:
# Use structured WorkflowActionParameter objects from new system
parameters_def = action_info.get('parameters', {})
if 'documentList' in parameters_def:
# Convert DocumentReferenceList to string list for database serialization
# Action methods will convert it back to DocumentReferenceList when needed
parameters['documentList'] = docList.to_string_list()
@ -596,10 +595,9 @@ class DynamicMode(BaseMode):
methodInstance = _methods[methodName]['instance']
if actionName in methodInstance.actions:
action_info = methodInstance.actions[actionName]
docstring = action_info.get('description', '')
# Extract parameter names from docstring to check if connectionReference exists
paramDescriptions, _ = methodInstance._extractParameterDetails(docstring)
if 'connectionReference' in paramDescriptions:
# Use structured WorkflowActionParameter objects from new system
parameters_def = action_info.get('parameters', {})
if 'connectionReference' in parameters_def:
parameters['connectionReference'] = connectionRef
logger.info(f"Added connectionReference to parameters: {connectionRef}")
except Exception as e:

View file

@ -0,0 +1,354 @@
# Architecture & Implementation Analysis
## Deep Review of Hierarchical Document Generation
**Date**: 2025-12-22
**Status**: Critical Issues Found
---
## Executive Summary
The hierarchical document generation system is **partially implemented** but has **critical architectural mismatches** and **implementation gaps** that prevent it from working correctly. While core components exist, several fundamental issues need to be addressed.
---
## ✅ What's Correctly Implemented
### Phase 1: Core Infrastructure ✅
- ✅ `StructureGenerator` class exists with `generateStructure()` method
- ✅ `ContentGenerator` class exists with `generateContent()` method
- ✅ `ContentIntegrator` class exists with `integrateContent()` method
- ✅ `generateDocument` action uses hierarchical approach
- ✅ Basic progress logging implemented
- ✅ Error handling with `createErrorSection()` implemented
### Phase 2: Image Generation ✅
- ✅ `_generateImageSection()` method implemented
- ✅ Image prompt extraction from structure
- ✅ Base64 image data storage
- ✅ Error handling for image failures
### Phase 3: Parallel Processing ✅
- ✅ `_generateSectionsParallel()` method implemented
- ✅ `_generateSectionsSequential()` method implemented
- ✅ Batch processing for large documents
- ✅ Progress callback system
- ✅ Exception handling in parallel execution
---
## ❌ Critical Issues Found
### Issue 1: Previous Sections Context Not Working in Parallel Mode ⚠️ **PARTIALLY FIXED**
**Problem**:
- In parallel mode, sections within the same batch cannot see each other (correct)
- BUT: Sections in later batches should see sections from earlier batches
- **Current Status**: Code was fixed to accumulate previous sections, but needs verification
**Location**: `subContentGenerator.py` lines 240-319
**Fix Applied**:
- Added `accumulatedPreviousSections` to track sections across batches
- Pass accumulated sections to each batch
- **VERIFICATION NEEDED**: Test that prompts actually show previous sections
**Risk**: Medium - May cause continuity issues in generated content
---
### Issue 2: Variable Shadowing Bug ✅ **FIXED**
**Problem**:
- `contentType` variable was shadowed in loop, causing wrong section type in prompts
**Location**: `subContentGenerator.py` line 676
**Fix Applied**:
- Renamed loop variable to `prevContentType`
**Status**: ✅ Fixed
---
### Issue 3: Missing `generation_hint` in Structure Response ✅ **FIXED**
**Problem**:
- Structure generator creates generic hints like "Section heading" instead of meaningful hints
- AI generates same content for all headings because hints are identical
**Location**: `subStructureGenerator.py` lines 242-269
**Fix Applied**:
- Added `_extractMeaningfulHint()` method to extract meaningful hints from section IDs
- Example: `section_heading_current_state` → "Current State"
**Status**: ✅ Fixed
---
### Issue 4: JSON Template Architecture Mismatch ✅ **FIXED**
**Problem**:
- `jsonTemplateDocument` showed filled `elements` arrays, but structure generation requires empty arrays
- Template missing `generation_hint` and `complexity` fields
- Template showed `order: 0` but should start from 1
**Location**: `datamodelJson.py`
**Fix Applied**:
- Updated template to show empty `elements: []`
- Added `generation_hint` to all sections
- Added `complexity` to all sections
- Changed `order` to start from 1
- Added `title` to metadata
**Status**: ✅ Fixed
---
### Issue 5: Structure Prompt Instructions Mismatch ✅ **FIXED**
**Problem**:
- Prompt said "All sections must have empty elements arrays" but template showed filled arrays
- Prompt didn't explicitly require `generation_hint` and `complexity` fields
**Location**: `subStructureGenerator.py` lines 181-190
**Fix Applied**:
- Enhanced prompt to explicitly require `generation_hint` and `complexity`
- Clarified that template examples show structure, but elements must be empty
**Status**: ✅ Fixed
---
## ⚠️ Remaining Issues & Gaps
### Issue 6: Missing Validation Before Content Generation ⚠️ **NOT IMPLEMENTED**
**Problem**:
- No validation that structure has required fields before content generation
- No check that all sections have `generation_hint` before generating content
**Expected** (from Phase 6):
```python
# Validate structure before content generation
if not validateStructure(structure):
raise ValueError("Invalid structure")
```
**Current**: Validation happens in `_validateAndEnhanceStructure()` but only adds missing fields, doesn't validate
**Impact**: Low - Enhancement adds missing fields, but explicit validation would be better
**Recommendation**: Add explicit validation method
---
### Issue 7: Previous Sections Formatting Missing Content ⚠️ **PARTIALLY IMPLEMENTED**
**Problem**:
- Previous sections formatting extracts content from `elements`, but if sections don't have elements yet (in parallel mode), it shows nothing
- Should show `generation_hint` as fallback when elements not available
**Location**: `subContentGenerator.py` lines 671-709
**Current Behavior**:
- Shows content preview if elements exist
- Shows nothing if elements don't exist
**Expected Behavior**:
- Show content preview if elements exist
- Show `generation_hint` as fallback if elements don't exist
**Impact**: Medium - Reduces context quality in parallel generation
**Recommendation**: Add fallback to show `generation_hint` when elements not available
---
### Issue 8: Debug File Shows Raw Response, Not Validated Structure ⚠️ **NOT FIXED**
**Problem**:
- Debug file writes `aiResponse.content` (raw AI response) before validation
- Can't verify if `generation_hint` was added by validation
**Location**: `subStructureGenerator.py` lines 77-84
**Impact**: Low - Makes debugging harder but doesn't affect functionality
**Recommendation**: Write validated structure to separate debug file
---
### Issue 9: Missing Unit Tests ⚠️ **NOT IMPLEMENTED**
**Problem**:
- No unit tests for any components (Phase 7 requirement)
- No tests for structure generation
- No tests for content generation
- No tests for integration
**Impact**: High - No way to verify correctness or catch regressions
**Recommendation**: Add comprehensive unit tests
---
### Issue 10: Missing Integration Tests ⚠️ **NOT IMPLEMENTED**
**Problem**:
- No end-to-end tests
- No tests with images
- No tests with long documents
- No error scenario tests
**Impact**: High - No verification of complete flow
**Recommendation**: Add integration tests
---
### Issue 11: Content Caching Not Optimized ⚠️ **PARTIALLY IMPLEMENTED**
**Problem**:
- Content is extracted and cached, but:
- No cache validation (check if documents changed)
- No cache reuse verification
- Content is passed to prompts but may not be formatted efficiently
**Expected** (from Phase 5):
- Cache validation
- Efficient formatting
- Performance testing
**Current**: Basic caching exists but not optimized
**Impact**: Medium - Works but could be more efficient
**Recommendation**: Add cache validation and optimization
---
### Issue 12: Renderer Updates Not Verified ⚠️ **UNKNOWN**
**Problem**:
- Implementation plan requires renderer updates for images
- HTML renderer should create separate image files
- PDF/XLSX/PPTX renderers should embed images
- **Status unknown** - need to verify renderers handle images correctly
**Impact**: High - Images may not render correctly
**Recommendation**: Verify all renderers handle images correctly
---
## 📋 Architecture Compliance Check
### Data Structure Compliance ✅
| Field | Required | Implemented | Status |
|-------|----------|-------------|--------|
| `metadata.title` | Yes | ✅ | ✅ |
| `metadata.split_strategy` | Yes | ✅ | ✅ |
| `sections[].id` | Yes | ✅ | ✅ |
| `sections[].content_type` | Yes | ✅ | ✅ |
| `sections[].complexity` | Yes | ✅ | ✅ |
| `sections[].generation_hint` | Yes | ✅ | ✅ |
| `sections[].order` | Yes | ✅ | ✅ |
| `sections[].elements` | Yes | ✅ | ✅ |
| `sections[].image_prompt` | Image only | ✅ | ✅ |
### Component Method Compliance ✅
| Component | Method | Required | Implemented | Status |
|-----------|--------|----------|-------------|--------|
| StructureGenerator | `generateStructure()` | Yes | ✅ | ✅ |
| StructureGenerator | `_createStructurePrompt()` | Yes | ✅ | ✅ |
| StructureGenerator | `_identifySectionComplexity()` | Yes | ✅ | ✅ |
| StructureGenerator | `_extractImagePrompts()` | Yes | ✅ | ✅ |
| StructureGenerator | `_validateAndEnhanceStructure()` | Yes | ✅ | ✅ |
| StructureGenerator | `_extractMeaningfulHint()` | Yes | ✅ | ✅ |
| ContentGenerator | `generateContent()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateSectionContent()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateSimpleSection()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateComplexTextSection()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateImageSection()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateSectionsParallel()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateSectionsSequential()` | Yes | ✅ | ✅ |
| ContentGenerator | `_createSectionPrompt()` | Yes | ✅ | ✅ |
| ContentIntegrator | `integrateContent()` | Yes | ✅ | ✅ |
| ContentIntegrator | `validateCompleteness()` | Yes | ✅ | ✅ |
| ContentIntegrator | `createErrorSection()` | Yes | ✅ | ✅ |
---
## 🎯 Priority Fixes Needed
### Critical (Must Fix)
1. ✅ **Issue 2**: Variable shadowing bug - **FIXED**
2. ✅ **Issue 3**: Missing generation_hint - **FIXED**
3. ✅ **Issue 4**: JSON template mismatch - **FIXED**
4. ✅ **Issue 5**: Prompt instructions mismatch - **FIXED**
5. ⚠️ **Issue 1**: Previous sections context - **NEEDS VERIFICATION**
### High Priority (Should Fix)
6. ⚠️ **Issue 12**: Renderer image handling - **NEEDS VERIFICATION**
7. ⚠️ **Issue 9**: Missing unit tests - **NOT IMPLEMENTED**
8. ⚠️ **Issue 10**: Missing integration tests - **NOT IMPLEMENTED**
### Medium Priority (Nice to Have)
9. ⚠️ **Issue 7**: Previous sections formatting fallback - **PARTIALLY IMPLEMENTED**
10. ⚠️ **Issue 11**: Content caching optimization - **PARTIALLY IMPLEMENTED**
11. ⚠️ **Issue 6**: Structure validation - **NOT IMPLEMENTED**
12. ⚠️ **Issue 8**: Debug file improvements - **NOT IMPLEMENTED**
---
## ✅ Summary
### What Works
- Core infrastructure is implemented
- Image generation is integrated
- Parallel processing is implemented
- Error handling is in place
- Progress logging works
### What's Fixed (This Session)
- Variable shadowing bug
- Missing generation_hint extraction
- JSON template architecture mismatch
- Prompt instructions clarity
- Previous sections tracking (needs verification)
### What Needs Work
- Unit and integration tests
- Renderer verification
- Previous sections formatting fallback
- Cache optimization
- Structure validation
### Overall Status
**Architecture**: ✅ **85% Compliant**
**Implementation**: ✅ **80% Complete**
**Testing**: ❌ **0% Complete**
**Production Ready**: ⚠️ **Not Yet** (needs testing and verification)
---
## Next Steps
1. **Verify Issue 1 Fix**: Test that previous sections are correctly tracked in parallel mode
2. **Verify Issue 12**: Test that all renderers handle images correctly
3. **Add Unit Tests**: Start with critical components (StructureGenerator, ContentGenerator)
4. **Add Integration Tests**: Test end-to-end flow with various scenarios
5. **Improve Previous Sections Formatting**: Add fallback to show generation_hint when elements not available
6. **Add Structure Validation**: Explicit validation before content generation
7. **Optimize Content Caching**: Add cache validation and efficient formatting
---
**Analysis Complete**: 2025-12-22

View file

@ -0,0 +1,459 @@
# Concept: Hierarchical Document Generation with Image Integration
## Executive Summary
This concept proposes a **three-phase hierarchical approach** to document generation that enables proper image integration and handles complex documents efficiently.
**Key Decisions**:
- ✅ **Performance**: Parallel processing with ChatLog progress messages
- ✅ **Error Handling**: Skip failed sections, show error messages
- ✅ **Image Storage**: Store as base64 in JSON (renderers need direct access)
- ✅ **Backward Compatibility**: Not needed - implement as new default
**Renderer Status**:
- ✅ **Ready**: Text, Markdown, DOCX renderers
- ⚠️ **Needs Update**: HTML (create separate image files), PDF (embed images)
- ⚠️ **Needs Implementation**: XLSX, PPTX (add image support)
## Problem Statement
Currently, the document generation system has the following limitations:
1. **No Image Integration**: Images are generated separately but cannot be embedded into document structures
2. **Single-Pass Generation**: Documents are generated in one AI call, making it difficult to handle complex sections (long text, images, chapters)
3. **Repeated Extraction**: Content extraction may happen multiple times unnecessarily
4. **No Structured Approach**: No mechanism to first define document structure, then populate sections
## Current Architecture Analysis
### Current Flow:
```
User Request → ai.generateDocument → ai.process → AI JSON Generation → Renderer → Final Document
```
### Issues:
- AI generates complete JSON structure in one pass
- Images are generated separately via `ai.generate` action
- No mechanism to integrate generated images into document structure
- JSON schema supports `image` content_type, but AI rarely generates it
- Content extraction happens per action, not cached/reused
### Current Image Handling:
- Images can be rendered IF they exist in JSON structure (`content_type: "image"`)
- Image data expected as `base64Data` in elements
- Renderers support image rendering (Docx, PDF, HTML, etc.)
- But images are never generated WITHIN document generation
## Proposed Solution: Hierarchical Document Generation
### Core Concept
**Three-Phase Approach:**
1. **Structure Generation Phase**: Generate document skeleton with section placeholders
2. **Content Generation Phase**: Generate content for each section (text or image) via sub-prompts
3. **Integration Phase**: Merge all generated content into final document structure
### Architecture Overview
```
┌─────────────────────────────────────────────────────────────┐
│ Phase 1: Structure Generation │
│ - Generate document skeleton │
│ - Identify sections (text, image, complex) │
│ - Create section placeholders with metadata │
└─────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────┐
│ Phase 2: Content Generation (Tree-like) │
│ │
│ ┌──────────────────────────────────────────────┐ │
│ │ Section 1: Heading (simple) │ │
│ │ → Generate directly │ │
│ └──────────────────────────────────────────────┘ │
│ │
│ ┌──────────────────────────────────────────────┐ │
│ │ Section 2: Paragraph (simple) │ │
│ │ → Generate directly │ │
│ └──────────────────────────────────────────────┘ │
│ │
│ ┌──────────────────────────────────────────────┐ │
│ │ Section 3: Image (complex) │ │
│ │ → Sub-prompt: Generate image │ │
│ │ → Store image data │ │
│ │ → Create image section with base64Data │ │
│ └──────────────────────────────────────────────┘ │
│ │
│ ┌──────────────────────────────────────────────┐ │
│ │ Section 4: Long Chapter (complex) │ │
│ │ → Sub-prompt: Generate chapter content │ │
│ │ → Split into subsections if needed │ │
│ └──────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────┐
│ Phase 3: Integration │
│ - Merge all generated content │
│ - Replace placeholders with actual data │
│ - Validate structure completeness │
│ - Render to final format │
└─────────────────────────────────────────────────────────────┘
```
## Detailed Design
### Phase 1: Structure Generation
**Purpose**: Create document skeleton with section metadata
**Process**:
1. AI generates document structure with sections
2. Each section includes:
- `id`: Unique identifier
- `content_type`: Type (heading, paragraph, image, table, etc.)
- `complexity`: "simple" or "complex"
- `generation_hint`: Instructions for content generation
- `order`: Section order
- `elements`: Empty or placeholder
**Example Structure**:
```json
{
"metadata": {
"title": "Children's Bedtime Story",
"split_strategy": "single_document"
},
"documents": [{
"id": "doc_1",
"sections": [
{
"id": "section_title",
"content_type": "heading",
"complexity": "simple",
"generation_hint": "Story title",
"order": 1,
"elements": []
},
{
"id": "section_intro",
"content_type": "paragraph",
"complexity": "simple",
"generation_hint": "Introduction paragraph",
"order": 2,
"elements": []
},
{
"id": "section_image_1",
"content_type": "image",
"complexity": "complex",
"generation_hint": "Illustration: Rabbit meeting owl in moonlit forest",
"image_prompt": "A small brown rabbit sitting in a peaceful forest clearing under moonlight with stars, meeting a wise owl perched on a branch",
"order": 3,
"elements": []
},
{
"id": "section_chapter_1",
"content_type": "paragraph",
"complexity": "complex",
"generation_hint": "First chapter: Rabbit's adventure begins",
"order": 4,
"elements": []
}
]
}]
}
```
### Phase 2: Content Generation
**Purpose**: Generate actual content for each section
**Process**:
1. Iterate through sections in order
2. For each section:
- **Simple sections** (heading, short paragraph):
- Generate content directly via AI
- Populate `elements` array
- **Complex sections** (image, long chapter):
- Create sub-prompt based on `generation_hint` and `image_prompt`
- Generate content via specialized action:
- Images: `ai.generate` with image generation
- Long text: `ai.process` with focused prompt
- Store generated content
- Populate `elements` array
**Content Caching**:
- Extract content from source documents ONCE at the start
- Cache extracted content for reuse across all sections
- Pass cached content to sub-prompts to avoid re-extraction
**Image Generation**:
- For `content_type: "image"` sections:
- Use `image_prompt` from structure
- Call `ai.generate` action with image generation
- Receive base64 image data
- Create image element:
```json
{
"url": "data:image/png;base64,<base64_data>",
"base64Data": "<base64_data>",
"altText": "<alt_text>",
"caption": "<caption>"
}
```
### Phase 3: Integration
**Purpose**: Merge all content into final document structure
**Process**:
1. Validate all sections have content
2. Merge generated content into structure
3. Replace placeholders with actual data
4. Finalize JSON structure
5. Render to target format (docx, pdf, html, etc.)
## Implementation Strategy
### New Components Needed
1. **Structure Generator** (`structureGenerator.py`)
- Generates document skeleton
- Identifies section complexity
- Creates generation hints
2. **Content Generator** (`contentGenerator.py`)
- Generates content for each section
- Handles simple vs complex sections
- Manages sub-prompts and image generation
- Caches extracted content
3. **Content Integrator** (`contentIntegrator.py`)
- Merges generated content
- Validates completeness
- Finalizes document structure
### Modified Components
1. **`generateDocument` action**
- Implement hierarchical generation as default
- Orchestrate three phases
- Add progress logging for each phase
2. **`process` action**
- Support content caching (extract once, reuse)
- Support sub-prompt generation for sections
3. **Prompt Builder** (`subPromptBuilderGeneration.py`)
- Add structure generation prompt
- Add section-specific content prompts
- Add image generation prompt templates
4. **Renderers** (Update required):
- **HTML Renderer**: Create separate image files and link them
- **PDF Renderer**: Embed images using reportlab
- **XLSX Renderer**: Add image embedding support
- **PPTX Renderer**: Add image embedding support
### New Action Parameters
**For `generateDocument`**:
- `enableImageIntegration`: boolean (default: true)
- `maxSectionLength`: int (threshold for "complex" sections, default: 500 words)
- `parallelGeneration`: boolean (default: true) - enable parallel section generation
- `progressLogging`: boolean (default: true) - send ChatLog progress updates
**For sub-prompts**:
- `sectionContext`: Previous sections for context
- `cachedContent`: Extracted content cache (to avoid re-extraction)
- `targetSection`: Section metadata
- `previousSections`: Array of already-generated sections for continuity
## Benefits
1. **Image Integration**: Images can be generated and embedded into documents
2. **Structured Approach**: Clear separation of structure and content
3. **Efficiency**: Content extracted once, reused across sections
4. **Scalability**: Can handle very long documents by splitting into sections
5. **Quality**: Better control over complex sections (images, long chapters)
6. **Flexibility**: Can generate different content types per section
## Migration Strategy
**Note**: No backwards compatibility needed - can implement directly as new default.
1. **Phase 1**: Implement hierarchical generation as new default
2. **Phase 2**: Update renderers (HTML, PDF, XLSX, PPTX) for image support
3. **Phase 3**: Testing and refinement
4. **Phase 4**: Remove old single-pass mode (or keep as internal fallback only)
## Example Workflow
**User Request**: "Create a children's bedtime story with 5 illustrations"
**Phase 1 Output**:
```json
{
"metadata": {"title": "Flöckchen's Adventure"},
"documents": [{
"sections": [
{"id": "title", "content_type": "heading", "complexity": "simple", ...},
{"id": "intro", "content_type": "paragraph", "complexity": "simple", ...},
{"id": "img1", "content_type": "image", "complexity": "complex",
"image_prompt": "Rabbit meeting owl", ...},
{"id": "chapter1", "content_type": "paragraph", "complexity": "complex", ...},
{"id": "img2", "content_type": "image", "complexity": "complex", ...},
...
]
}]
}
```
**Phase 2 Process**:
- Generate title → populate elements
- Generate intro → populate elements
- Generate image 1 → call `ai.generate`, store base64 → populate elements
- Generate chapter 1 → sub-prompt → populate elements
- Generate image 2 → call `ai.generate`, store base64 → populate elements
- ...
**Phase 3 Output**: Complete document with all sections populated, ready for rendering
## Renderer Readiness Assessment
### Current Renderer Status for Image Handling:
1. **Text Renderer** (`rendererText.py`): ✅ **READY**
- Skips images, shows placeholder: `[Image: altText]`
- No changes needed
2. **Markdown Renderer** (`rendererMarkdown.py`): ✅ **READY**
- Shows placeholder with truncated base64: `![altText](data:image/png;base64,...)`
- No changes needed (markdown limitation)
3. **HTML Renderer** (`rendererHtml.py`): ⚠️ **NEEDS UPDATE**
- Currently: Embeds base64 directly in `<img>` tag as data URI
- **Required Change**: Create separate image files and link to them
- Implementation: Generate image files (e.g., `image_1.png`, `image_2.png`) alongside HTML
- Update `<img>` tags to use relative paths: `<img src="image_1.png" alt="...">`
- Return multiple files: HTML file + image files
4. **PDF Renderer** (`rendererPdf.py`): ⚠️ **NEEDS UPDATE**
- Currently: Shows placeholder `[Image: altText]`
- **Required Change**: Embed images directly in PDF using reportlab
- Implementation: Use `reportlab.platypus.Image()` with base64 decoded bytes
5. **DOCX Renderer** (`rendererDocx.py`): ✅ **READY**
- Embeds images directly using `doc.add_picture()`
- Adds captions below images
- No changes needed
6. **XLSX Renderer** (`rendererXlsx.py`): ⚠️ **NEEDS IMPLEMENTATION**
- Currently: No image handling found
- **Required Change**: Add image support using openpyxl
- Implementation: Use `openpyxl.drawing.image.Image()` to embed images in cells
- Store images in worksheet cells or as floating images
7. **PPTX Renderer** (`rendererPptx.py`): ⚠️ **NEEDS IMPLEMENTATION**
- Currently: No image handling found
- **Required Change**: Add image support using python-pptx
- Implementation: Use `slide.shapes.add_picture()` to add images to slides
### Renderer Update Requirements:
**Priority 1 (Critical for HTML output)**:
- HTML Renderer: Create separate image files and link them
**Priority 2 (Important for document formats)**:
- PDF Renderer: Embed images using reportlab
- XLSX Renderer: Add image embedding support
- PPTX Renderer: Add image embedding support
## Answers to Open Questions
### 1. Performance: How to handle very large documents (100+ sections)?
**Answer**: Use parallel processing where possible, with progress ChatLog messages.
**Implementation Strategy**:
- **Parallel Section Generation**: Generate independent sections in parallel using asyncio
- **Batch Processing**: Process sections in batches (e.g., 10 sections at a time)
- **Progress Tracking**: Send ChatLog progress updates:
- "Generating structure..." (Phase 1)
- "Generating content for section X/Y..." (Phase 2)
- "Generating image for section X..." (Phase 2 - images)
- "Merging content..." (Phase 3)
- "Rendering final document..." (Phase 3)
- **Streaming**: For very large documents, consider streaming partial results
**Example Progress Messages**:
```
Phase 1: Structure Generation (0% → 33%)
Phase 2: Content Generation (33% → 90%)
- Section 1/10: Heading (34%)
- Section 2/10: Paragraph (40%)
- Section 3/10: Image generation (50%)
- Section 4/10: Chapter (60%)
...
Phase 3: Integration & Rendering (90% → 100%)
```
### 2. Error Handling: What if one section fails?
**Answer**: Skip failed sections, keep section title and type, show error message in the section.
**Implementation Strategy**:
- **Graceful Degradation**: Continue processing remaining sections
- **Error Section**: Create error placeholder section:
```json
{
"id": "section_failed_3",
"content_type": "paragraph",
"elements": [{
"text": "[ERROR: Failed to generate content for this section. Error: <error_message>]"
}],
"order": 3,
"error": true,
"errorMessage": "<detailed_error>"
}
```
- **Logging**: Log errors for debugging but don't fail entire document
- **User Notification**: Include error count in final progress message
### 3. Image Storage: Where to store generated images?
**Answer**: Store images in JSON as base64, as renderers need them afterwards.
**Implementation Strategy**:
- **In-Memory Storage**: Keep base64 strings in JSON structure during generation
- **JSON Structure**: Store in section elements:
```json
{
"url": "data:image/png;base64,<base64_data>",
"base64Data": "<full_base64_string>",
"altText": "Image description",
"caption": "Optional caption"
}
```
- **Memory Management**: For very large images, consider compression or chunking
- **Renderer Access**: All renderers can access `base64Data` directly from JSON
- **HTML Special Case**: HTML renderer will extract base64, decode, and save as separate files during rendering
### 4. Backward Compatibility: How to ensure existing workflows still work?
**Answer**: No backwards compatibility needed.
**Implementation Strategy**:
- **New Default**: Hierarchical generation becomes the default mode
- **Clean Migration**: All document generation uses hierarchical approach
- **No Fallback**: Remove single-pass mode (or keep as internal fallback only)
- **Breaking Change**: Acceptable since this is a new feature/enhancement
## Next Steps
1. **Review and Approval**: Get feedback on concept
2. **Detailed Design**: Design API and data structures
3. **Prototype**: Implement Phase 1 (structure generation)
4. **Testing**: Test with real use cases
5. **Full Implementation**: Implement all phases
6. **Migration**: Migrate existing workflows

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,398 @@
# Implementation Plan: Hierarchical Document Generation
## Overview
This document outlines the step-by-step implementation plan for the hierarchical document generation system with image integration.
## Implementation Phases
### Phase 1: Core Infrastructure (Week 1)
**Goal**: Set up core components and data structures
#### Tasks:
1. **Create StructureGenerator Component**
- [ ] Create `subStructureGenerator.py`
- [ ] Implement `generateStructure()` method
- [ ] Implement `_createStructurePrompt()` method
- [ ] Implement `_identifySectionComplexity()` method
- [ ] Implement `_extractImagePrompts()` method
- [ ] Add unit tests
2. **Create ContentGenerator Component**
- [ ] Create `subContentGenerator.py`
- [ ] Implement `generateContent()` method
- [ ] Implement `_generateSectionContent()` method
- [ ] Implement `_generateSimpleSection()` method
- [ ] Implement `_generateComplexTextSection()` method
- [ ] Implement `_createSectionPrompt()` method
- [ ] Add unit tests
3. **Create ContentIntegrator Component**
- [ ] Create `subContentIntegrator.py`
- [ ] Implement `integrateContent()` method
- [ ] Implement `validateCompleteness()` method
- [ ] Implement `createErrorSection()` method
- [ ] Add unit tests
4. **Update generateDocument Action**
- [ ] Modify `generateDocument.py` to use hierarchical approach
- [ ] Add Phase 1: Structure generation
- [ ] Add Phase 2: Content generation (sequential first)
- [ ] Add Phase 3: Integration & rendering
- [ ] Add basic progress logging
- [ ] Add error handling
**Deliverables**:
- Core components created
- Basic hierarchical generation working (sequential)
- Unit tests passing
**Estimated Time**: 3-4 days
---
### Phase 2: Image Generation Integration (Week 1-2)
**Goal**: Integrate image generation into content generation
#### Tasks:
1. **Implement Image Section Generation**
- [ ] Add `_generateImageSection()` method to ContentGenerator
- [ ] Integrate with `ai.generate` action
- [ ] Handle base64 image data storage
- [ ] Add image prompt extraction from structure
- [ ] Add error handling for image generation failures
2. **Update Structure Generation Prompt**
- [ ] Add image section detection in structure prompt
- [ ] Add image_prompt field extraction
- [ ] Test with user prompts requesting images
3. **Test Image Integration**
- [ ] Test image generation in document structure
- [ ] Test multiple images in one document
- [ ] Test image generation failures
**Deliverables**:
- Image generation integrated
- Images stored as base64 in JSON
- Error handling for image failures
**Estimated Time**: 2-3 days
---
### Phase 3: Parallel Processing & Progress Logging (Week 2)
**Goal**: Implement parallel section generation and detailed progress logging
#### Tasks:
1. **Implement Parallel Generation**
- [ ] Add `_generateSectionsParallel()` method
- [ ] Use `asyncio.gather()` for parallel execution
- [ ] Add batch processing for large documents
- [ ] Handle exceptions in parallel execution
- [ ] Test parallel vs sequential performance
2. **Enhance Progress Logging**
- [ ] Create progress callback system
- [ ] Add detailed progress messages:
- Structure generation progress
- Section-by-section progress
- Image generation progress
- Rendering progress
- [ ] Calculate accurate progress percentages
- [ ] Test progress updates
3. **Update generateDocument Action**
- [ ] Integrate parallel generation
- [ ] Add progress callback to content generation
- [ ] Update progress logging throughout phases
**Deliverables**:
- Parallel section generation working
- Detailed progress logging
- Performance improvements
**Estimated Time**: 2-3 days
---
### Phase 4: Renderer Updates (Week 2-3)
**Goal**: Update renderers to properly handle images
#### Tasks:
1. **Update HTML Renderer**
- [ ] Modify `rendererHtml.py`
- [ ] Add `_extractImages()` method
- [ ] Implement separate image file creation
- [ ] Update HTML to use relative image paths
- [ ] Handle multiple image files
- [ ] Test HTML + image files output
2. **Update PDF Renderer**
- [ ] Modify `rendererPdf.py`
- [ ] Update `_renderJsonImage()` to embed images
- [ ] Use `reportlab.platypus.Image()` with base64
- [ ] Handle image sizing and positioning
- [ ] Test PDF with embedded images
3. **Update XLSX Renderer**
- [ ] Modify `rendererXlsx.py`
- [ ] Add `_renderJsonImage()` method
- [ ] Use `openpyxl.drawing.image.Image()` to embed images
- [ ] Handle image placement in cells
- [ ] Test XLSX with images
4. **Update PPTX Renderer**
- [ ] Modify `rendererPptx.py`
- [ ] Add `_renderJsonImage()` method
- [ ] Use `slide.shapes.add_picture()` to add images
- [ ] Handle image sizing on slides
- [ ] Test PPTX with images
**Deliverables**:
- All renderers support images
- HTML creates separate image files
- PDF/XLSX/PPTX embed images directly
**Estimated Time**: 4-5 days
---
### Phase 5: Content Caching & Optimization (Week 3)
**Goal**: Implement content caching to avoid re-extraction
#### Tasks:
1. **Implement Content Cache**
- [ ] Create ContentCache data structure
- [ ] Extract content once at start of generation
- [ ] Pass cached content to all sub-prompts
- [ ] Add cache validation (check if documents changed)
- [ ] Test cache reuse
2. **Optimize Prompt Building**
- [ ] Update structure prompt to use cached content
- [ ] Update section prompts to use cached content
- [ ] Format cached content efficiently
- [ ] Test prompt sizes
3. **Performance Testing**
- [ ] Test with large documents
- [ ] Test with multiple source documents
- [ ] Measure performance improvements
- [ ] Optimize bottlenecks
**Deliverables**:
- Content caching implemented
- No redundant content extraction
- Performance optimized
**Estimated Time**: 2-3 days
---
### Phase 6: Error Handling & Edge Cases (Week 3-4)
**Goal**: Robust error handling and edge case coverage
#### Tasks:
1. **Enhance Error Handling**
- [ ] Improve error section creation
- [ ] Add error recovery strategies
- [ ] Handle partial failures gracefully
- [ ] Add error logging and reporting
2. **Handle Edge Cases**
- [ ] Empty document list
- [ ] No sections generated
- [ ] All sections fail
- [ ] Very large images
- [ ] Very long documents (100+ sections)
- [ ] Missing image prompts
- [ ] Invalid section types
3. **Add Validation**
- [ ] Validate structure before content generation
- [ ] Validate content before integration
- [ ] Validate final document before rendering
- [ ] Add comprehensive error messages
**Deliverables**:
- Robust error handling
- Edge cases covered
- Clear error messages
**Estimated Time**: 2-3 days
---
### Phase 7: Testing & Refinement (Week 4)
**Goal**: Comprehensive testing and refinement
#### Tasks:
1. **Unit Testing**
- [ ] Complete unit tests for all components
- [ ] Test all methods
- [ ] Test error scenarios
- [ ] Achieve >80% code coverage
2. **Integration Testing**
- [ ] Test end-to-end document generation
- [ ] Test with various document types
- [ ] Test with images
- [ ] Test with long documents
- [ ] Test error scenarios
3. **Performance Testing**
- [ ] Test with 10, 50, 100+ sections
- [ ] Measure generation time
- [ ] Measure memory usage
- [ ] Compare parallel vs sequential
- [ ] Optimize if needed
4. **User Acceptance Testing**
- [ ] Test with real user scenarios
- [ ] Test bedtime story with images (original use case)
- [ ] Test business documents
- [ ] Test technical documents
- [ ] Gather feedback
5. **Documentation**
- [ ] Update API documentation
- [ ] Add code comments
- [ ] Update user guides
- [ ] Create examples
**Deliverables**:
- Comprehensive test suite
- Performance benchmarks
- Documentation complete
- Ready for production
**Estimated Time**: 3-4 days
---
## Dependencies
### External Dependencies
- `asyncio` - For parallel processing
- `base64` - For image encoding/decoding
- `reportlab` - For PDF image embedding
- `openpyxl` - For XLSX image embedding
- `python-pptx` - For PPTX image embedding
### Internal Dependencies
- `serviceGeneration` - Main generation service
- `serviceAi` - AI service for generation
- `serviceExtraction` - Content extraction service
- `methodAi.actions.generate` - Image generation action
- `methodAi.actions.process` - Text generation action
## Risk Mitigation
### Risks and Mitigation Strategies
1. **Risk**: Image generation failures break entire document
- **Mitigation**: Error handling creates error sections, continues processing
2. **Risk**: Parallel generation causes memory issues
- **Mitigation**: Batch processing, limit concurrent operations
3. **Risk**: Large base64 images cause JSON size issues
- **Mitigation**: Consider compression or chunking for very large images
4. **Risk**: HTML renderer needs to return multiple files
- **Mitigation**: Modify return type or create file bundle system
5. **Risk**: Performance not meeting expectations
- **Mitigation**: Profile and optimize bottlenecks, consider caching
## Success Criteria
### Functional Requirements
- ✅ Documents can be generated with embedded images
- ✅ HTML renderer creates separate image files
- ✅ PDF/XLSX/PPTX renderers embed images
- ✅ Progress logging shows detailed progress
- ✅ Error handling prevents complete failures
- ✅ Content extraction happens only once
### Performance Requirements
- ✅ Parallel generation improves performance by 2x+ for multi-section documents
- ✅ Progress updates appear within 1 second of action
- ✅ Documents with 50+ sections complete in <5 minutes
### Quality Requirements
- ✅ >80% code coverage
- ✅ All edge cases handled
- ✅ Clear error messages
- ✅ Comprehensive documentation
## Rollout Plan
### Step 1: Internal Testing (Week 4)
- Deploy to development environment
- Internal team testing
- Fix critical issues
### Step 2: Beta Testing (Week 5)
- Deploy to staging environment
- Select beta users
- Gather feedback
- Fix issues
### Step 3: Production Deployment (Week 6)
- Deploy to production
- Monitor performance
- Monitor errors
- Gather user feedback
### Step 4: Optimization (Ongoing)
- Monitor usage patterns
- Optimize based on real-world usage
- Add enhancements based on feedback
## Timeline Summary
| Phase | Duration | Start | End |
|-------|----------|-------|-----|
| Phase 1: Core Infrastructure | 3-4 days | Day 1 | Day 4 |
| Phase 2: Image Integration | 2-3 days | Day 4 | Day 7 |
| Phase 3: Parallel Processing | 2-3 days | Day 7 | Day 10 |
| Phase 4: Renderer Updates | 4-5 days | Day 10 | Day 15 |
| Phase 5: Content Caching | 2-3 days | Day 15 | Day 18 |
| Phase 6: Error Handling | 2-3 days | Day 18 | Day 21 |
| Phase 7: Testing & Refinement | 3-4 days | Day 21 | Day 25 |
**Total Estimated Time**: 4-5 weeks
## Next Steps
1. **Review and Approve Plan**
- Review implementation plan
- Approve timeline
- Assign resources
2. **Set Up Development Environment**
- Create feature branch
- Set up test infrastructure
- Prepare development tools
3. **Begin Phase 1**
- Start with StructureGenerator
- Set up project structure
- Begin implementation

View file

@ -0,0 +1,238 @@
# Rendering Issue Analysis
## Why HTML Documents Are Being Rendered as Text
**Date**: 2025-12-22
**Issue**: Documents requested as HTML are being output as text/plain
---
## Root Cause Analysis
### Issue 1: `resultType` Not Extracted from Task Objective ❌ **CRITICAL**
**Problem**:
- Task objective clearly states: "Generate a complete, well-structured **HTML document**"
- Validation shows: `EXPECTED FORMATS: ['html']`
- But action was called with: `ai.generateDocument {}` (empty parameters)
- So `resultType` defaults to `"docx"` instead of `"html"`
**Location**:
- `generateDocument.py` line 44: `resultType = parameters.get("resultType", "docx")`
- No parameter extraction from task objective/prompt
**Impact**: **CRITICAL** - Wrong format is used even though task clearly requests HTML
**Fix Needed**:
- Extract `resultType` from task objective/prompt before calling action
- Or enhance `generateDocument` to detect format from prompt if not provided
---
### Issue 2: HTML Not in Action Definition Options ❌ **CRITICAL**
**Problem**:
- Action definition in `methodAi.py` line 357 only lists: `["docx", "pdf", "txt", "md"]`
- `"html"` is **NOT** in the allowed options
- But docstring says HTML is supported: `"resultType (str, optional): Output format (docx, pdf, txt, md, html, etc.)"`
**Location**:
- `methodAi.py` line 357: `frontendOptions=["docx", "pdf", "txt", "md"]`
**Impact**: **CRITICAL** - Even if HTML is requested, it might be rejected or not recognized
**Fix Needed**:
- Add `"html"` to `frontendOptions` list
---
### Issue 3: Renderer Fallback to Text ❌ **CRITICAL**
**Problem**:
- When `resultType="docx"` is used (default)
- If docx renderer fails or is not found
- System falls back to text renderer (line 403-404 of `mainServiceGeneration.py`)
- This explains why output is `text/plain` instead of HTML
**Location**:
- `mainServiceGeneration.py` lines 393-409: `_getFormatRenderer()` method
- Line 403: `logger.warning(f"No renderer found for format {output_format}, falling back to text")`
**Impact**: **CRITICAL** - Wrong format is rendered
**Fix Needed**:
- Fix docx renderer if it's failing
- Or better: Extract correct format from prompt
---
### Issue 4: Missing Parameter Extraction ❌ **HIGH PRIORITY**
**Problem**:
- Task objective contains format information ("HTML document")
- But no parameter extraction step extracts `resultType` from prompt
- Action is called with empty parameters `{}`
**Location**:
- Workflow execution - parameter extraction phase
- Should extract `resultType: "html"` from task objective
**Impact**: **HIGH** - System can't infer format from user intent
**Fix Needed**:
- Add parameter extraction that detects format from prompt
- Or enhance `generateDocument` to auto-detect format from prompt
---
## Flow Analysis
### Expected Flow:
```
1. Task Objective: "Generate HTML document..."
2. Parameter Extraction: Extract resultType="html" from objective
3. Action Call: ai.generateDocument({resultType: "html", prompt: "..."})
4. Content Generation: Generate sections with content
5. Integration: Merge sections into complete structure
6. Rendering: Call renderReport(outputFormat="html")
7. HTML Renderer: Render to HTML
8. Output: document.html (text/html)
```
### Actual Flow (Broken):
```
1. Task Objective: "Generate HTML document..."
2. Parameter Extraction: ❌ MISSING - no extraction
3. Action Call: ai.generateDocument({}) ❌ Empty parameters
4. Content Generation: ✅ Generate sections with content
5. Integration: ✅ Merge sections into complete structure
6. Rendering: Call renderReport(outputFormat="docx") ❌ Wrong format
7. Docx Renderer: ❌ Fails or not found
8. Fallback: Text renderer ❌ Wrong renderer
9. Output: document.text (text/plain) ❌ Wrong format
```
---
## Fixes Required
### Fix 1: Add HTML to Action Definition Options ✅ **EASY**
**File**: `gateway/modules/workflows/methods/methodAi/methodAi.py`
**Line**: 357
**Change**:
```python
frontendOptions=["docx", "pdf", "txt", "md", "html"], # Added "html"
```
---
### Fix 2: Extract resultType from Prompt ✅ **MEDIUM**
**Option A**: Enhance `generateDocument` to detect format from prompt
**File**: `gateway/modules/workflows/methods/methodAi/actions/generateDocument.py`
**After line 44**:
```python
resultType = parameters.get("resultType", "docx")
# Auto-detect format from prompt if not provided
if resultType == "docx" and prompt:
promptLower = prompt.lower()
if "html" in promptLower or "html5" in promptLower:
resultType = "html"
elif "pdf" in promptLower:
resultType = "pdf"
elif "markdown" in promptLower or "md" in promptLower:
resultType = "md"
elif "text" in promptLower or "txt" in promptLower:
resultType = "txt"
```
**Option B**: Extract in parameter planning phase (better, but requires workflow changes)
---
### Fix 3: Improve Renderer Error Handling ✅ **MEDIUM**
**File**: `gateway/modules/services/serviceGeneration/mainServiceGeneration.py`
**Lines**: 393-409
**Enhance**: Better error messages and logging when renderer not found
```python
def _getFormatRenderer(self, output_format: str):
"""Get the appropriate renderer for the specified format using auto-discovery."""
try:
from .renderers.registry import getRenderer
renderer = getRenderer(output_format, services=self.services)
if renderer:
return renderer
# Log available formats for debugging
from .renderers.registry import getSupportedFormats
availableFormats = getSupportedFormats()
logger.error(
f"No renderer found for format '{output_format}'. "
f"Available formats: {availableFormats}"
)
# Fallback to text renderer if no specific renderer found
logger.warning(f"Falling back to text renderer for format {output_format}")
fallbackRenderer = getRenderer('text', services=self.services)
if fallbackRenderer:
return fallbackRenderer
logger.error("Even text renderer fallback failed")
return None
except Exception as e:
logger.error(f"Error getting renderer for {output_format}: {str(e)}")
return None
```
---
## Verification Steps
After fixes:
1. **Test HTML Generation**:
- Task: "Generate HTML document about AI"
- Expected: `resultType="html"` extracted or detected
- Expected: HTML renderer used
- Expected: Output is `document.html` with `text/html` MIME type
2. **Test Format Detection**:
- Task: "Generate PDF report"
- Expected: `resultType="pdf"` detected
- Expected: PDF renderer used
3. **Test Explicit Parameter**:
- Action: `ai.generateDocument({resultType: "html", prompt: "..."})`
- Expected: HTML renderer used (no fallback)
---
## Summary
**Root Causes**:
1. ❌ `resultType` not extracted from task objective
2. ❌ HTML not in action definition options
3. ❌ Renderer fallback to text when docx fails
4. ❌ No format auto-detection from prompt
**Priority**: **CRITICAL** - System cannot produce HTML documents as requested
**Estimated Fix Time**:
- Fix 1: 5 minutes
- Fix 2: 30 minutes
- Fix 3: 15 minutes
- **Total**: ~1 hour
---
**Analysis Complete**: 2025-12-22

View file

@ -68,7 +68,7 @@ def discoverMethods(serviceCenter):
# Method not discovered yet - create new instance
methodInstance = item(serviceCenter)
# Use the actions property from MethodBase which handles @action decorator
# Use the actions property from MethodBase which handles WorkflowActionDefinition
actions = methodInstance.actions
# Create method info
@ -131,7 +131,7 @@ def getMethodsList(serviceCenter):
return "\n\n".join(methodsList)
def getActionParameterList(methodName: str, actionName: str, methods: Dict[str, Any]) -> str:
"""Get action parameter list from method docstring for AI parameter generation (list only)."""
"""Get action parameter list from WorkflowActionParameter structure for AI parameter generation (list only)."""
try:
if not methods or methodName not in methods:
return ""
@ -141,17 +141,21 @@ def getActionParameterList(methodName: str, actionName: str, methods: Dict[str,
return ""
action_info = methodInstance.actions[actionName]
# Extract parameter descriptions from docstring
docstring = action_info.get('description', '')
paramDescriptions, paramTypes = methodInstance._extractParameterDetails(docstring)
# Use structured WorkflowActionParameter objects from new system
parameters = action_info.get('parameters', {})
param_list = []
for paramName, paramDesc in paramDescriptions.items():
paramType = paramTypes.get(paramName, 'Any')
for paramName, paramInfo in parameters.items():
paramType = paramInfo.get('type', 'Any')
paramDesc = paramInfo.get('description', '')
paramRequired = paramInfo.get('required', False)
# Format: paramName (type, required/optional): description
reqText = "required" if paramRequired else "optional"
if paramDesc:
param_list.append(f"- {paramName} ({paramType}): {paramDesc}")
param_list.append(f"- {paramName} ({paramType}, {reqText}): {paramDesc}")
else:
param_list.append(f"- {paramName} ({paramType})")
param_list.append(f"- {paramName} ({paramType}, {reqText})")
# Return list only, without leading headings or trailing text
return "\n".join(param_list)

View file

@ -88,10 +88,23 @@ def extractAvailableMethods(service: Any) -> str:
# Create a flat JSON format with compound action names for better AI parsing
available_actions_json = {}
processed_methods = set() # Track processed methods to avoid duplicates
for methodName, methodInfo in methods.items():
# Skip short name aliases - only process full class names (MethodXxx)
# Short names are stored as aliases but we want to avoid processing them twice
if not methodName.startswith('Method'):
continue
# Convert MethodAi -> ai, MethodDocument -> document, etc.
shortName = methodName.replace('Method', '').lower()
# Skip if we've already processed this method (via its short name alias)
if shortName in processed_methods:
continue
processed_methods.add(shortName)
for actionName, actionInfo in methodInfo['actions'].items():
# Create compound action name: method.action
compoundActionName = f"{shortName}.{actionName}"

View file

@ -343,6 +343,12 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
- Check "structureComparison.gap" to see what's missing. If quantitative gaps are available, use them.
- Next action should ONLY generate the MISSING part, NOT repeat what's already delivered
CRITICAL - Missing Data Generation Strategy:
- When gap analysis shows missing data (found count = 0 but required count > 0):
* Generate the missing data FIRST as separate outputs before attempting integration
* Do NOT try to generate AND integrate missing data in one step - data must exist before integration
* Only AFTER missing data exists can you integrate it with existing data in a subsequent action
=== OUTPUT FORMAT ===
Return ONLY JSON (no markdown, no explanations). The decision MUST:
- Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)

View file

@ -28,6 +28,7 @@ class WorkflowProcessor:
self.services = services
self.mode = self._createMode(services.workflow.workflowMode)
self.workflow = services.workflow
self.workflowExecOperationId = None # Will be set by workflowManager for task hierarchy
def _createMode(self, workflowMode: WorkflowModeEnum) -> BaseMode:
"""Create the appropriate mode implementation based on workflow mode"""
@ -111,16 +112,20 @@ class WorkflowProcessor:
# Init progress logger
operationId = f"taskExec_{workflow.id}_{taskIndex}_{int(time.time())}"
# Get parent operationId (Service Workflow Execution) if available
parentOperationId = getattr(self, 'workflowExecOperationId', None)
try:
# Check workflow status before executing task
checkWorkflowStopped(self.services)
# Start progress tracking
# Start progress tracking - Task is child of Service Workflow Execution
self.services.chat.progressLogStart(
operationId,
"Workflow Execution",
"Task Execution",
f"Task {taskIndex}"
f"Task {taskIndex}",
parentOperationId=parentOperationId
)
logger.info(f"=== STARTING TASK EXECUTION ===")

View file

@ -566,72 +566,89 @@ class WorkflowManager:
allTaskResults: List = []
previousResults: List[str] = []
for idx, taskStep in enumerate(taskPlan.tasks):
currentTaskIndex = idx + 1
logger.info(f"Task {currentTaskIndex}/{totalTasks}: {taskStep.objective}")
# Create "Service Workflow Execution" root entry - parent of all tasks
workflowExecOperationId = f"workflowExec_{workflow.id}"
self.services.chat.progressLogStart(
workflowExecOperationId,
"Service",
"Workflow Execution",
f"Executing {totalTasks} task(s)"
)
# Update workflow state before executing task (fixes "Task 0" issue)
handling.updateWorkflowBeforeExecutingTask(currentTaskIndex)
# Store workflow execution operationId in workflowProcessor for task hierarchy
handling.workflowExecOperationId = workflowExecOperationId
# Build TaskContext (mode-specific behavior is inside WorkflowProcessor)
taskContext = TaskContext(
taskStep=taskStep,
workflow=workflow,
workflowId=workflow.id,
availableDocuments=None,
availableConnections=None,
previousResults=previousResults,
previousHandover=None,
improvements=[],
retryCount=0,
previousActionResults=[],
previousReviewResult=None,
isRegeneration=False,
failurePatterns=[],
failedActions=[],
successfulActions=[],
criteriaProgress={
'met_criteria': set(),
'unmet_criteria': set(),
'attempt_history': []
}
)
try:
for idx, taskStep in enumerate(taskPlan.tasks):
currentTaskIndex = idx + 1
logger.info(f"Task {currentTaskIndex}/{totalTasks}: {taskStep.objective}")
taskResult = await handling.executeTask(taskStep, workflow, taskContext)
# Update workflow state before executing task (fixes "Task 0" issue)
handling.updateWorkflowBeforeExecutingTask(currentTaskIndex)
# Persist task result for cross-task/round document references
# Convert ChatTaskResult to WorkflowTaskResult for persistence
from modules.datamodels.datamodelWorkflow import TaskResult as WorkflowTaskResult
from modules.datamodels.datamodelChat import ActionResult
# Get final ActionResult from task execution (last action result)
finalActionResult = None
if hasattr(taskResult, 'actionResult'):
finalActionResult = taskResult.actionResult
elif taskContext.previousActionResults and len(taskContext.previousActionResults) > 0:
# Use last action result from context
finalActionResult = taskContext.previousActionResults[-1]
# Create WorkflowTaskResult for persistence
if finalActionResult:
workflowTaskResult = WorkflowTaskResult(
taskId=taskStep.id,
actionResult=finalActionResult
# Build TaskContext (mode-specific behavior is inside WorkflowProcessor)
taskContext = TaskContext(
taskStep=taskStep,
workflow=workflow,
workflowId=workflow.id,
availableDocuments=None,
availableConnections=None,
previousResults=previousResults,
previousHandover=None,
improvements=[],
retryCount=0,
previousActionResults=[],
previousReviewResult=None,
isRegeneration=False,
failurePatterns=[],
failedActions=[],
successfulActions=[],
criteriaProgress={
'met_criteria': set(),
'unmet_criteria': set(),
'attempt_history': []
}
)
# Persist task result (creates ChatMessage + ChatDocuments)
await handling.persistTaskResult(workflowTaskResult, workflow, taskContext)
handoverData = await handling.prepareTaskHandover(taskStep, [], taskResult, workflow)
allTaskResults.append({
'taskStep': taskStep,
'taskResult': taskResult,
'handoverData': handoverData
})
if taskResult.success and taskResult.feedback:
previousResults.append(taskResult.feedback)
taskResult = await handling.executeTask(taskStep, workflow, taskContext)
# Persist task result for cross-task/round document references
# Convert ChatTaskResult to WorkflowTaskResult for persistence
from modules.datamodels.datamodelWorkflow import TaskResult as WorkflowTaskResult
from modules.datamodels.datamodelChat import ActionResult
# Get final ActionResult from task execution (last action result)
finalActionResult = None
if hasattr(taskResult, 'actionResult'):
finalActionResult = taskResult.actionResult
elif taskContext.previousActionResults and len(taskContext.previousActionResults) > 0:
# Use last action result from context
finalActionResult = taskContext.previousActionResults[-1]
# Create WorkflowTaskResult for persistence
if finalActionResult:
workflowTaskResult = WorkflowTaskResult(
taskId=taskStep.id,
actionResult=finalActionResult
)
# Persist task result (creates ChatMessage + ChatDocuments)
await handling.persistTaskResult(workflowTaskResult, workflow, taskContext)
handoverData = await handling.prepareTaskHandover(taskStep, [], taskResult, workflow)
allTaskResults.append({
'taskStep': taskStep,
'taskResult': taskResult,
'handoverData': handoverData
})
if taskResult.success and taskResult.feedback:
previousResults.append(taskResult.feedback)
# Mark workflow as completed; error/stop cases update status elsewhere
workflow.status = "completed"
finally:
# Finish "Service Workflow Execution" entry
self.services.chat.progressLogFinish(workflowExecOperationId, True)
# Mark workflow as completed; error/stop cases update status elsewhere
workflow.status = "completed"
return None
async def _processWorkflowResults(self) -> None:

View file

@ -71,6 +71,9 @@ google-cloud-texttospeech==2.16.3
## MSFT Integration
msal==1.24.1
## Azure Integration
azure-communication-email>=1.0.0 # Azure Communication Services Email
## Testing Dependencies
pytest>=8.0.0
pytest-asyncio>=0.21.0

View file

@ -0,0 +1,410 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Document Generation Formats Test - Tests document generation in all supported formats
Tests HTML, PDF, DOCX, XLSX, and PPTX generation with images and various content types.
"""
import asyncio
import json
import sys
import os
import time
import base64
from typing import Dict, Any, List, Optional
# Add the gateway to path (go up 2 levels from tests/functional/)
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if _gateway_path not in sys.path:
sys.path.insert(0, _gateway_path)
# Import the service initialization
from modules.services import getInterface as getServices
from modules.datamodels.datamodelChat import UserInputRequest, WorkflowModeEnum
from modules.datamodels.datamodelUam import User
from modules.features.workflow import chatStart
import modules.interfaces.interfaceDbChatObjects as interfaceDbChatObjects
class DocumentGenerationFormatsTester:
def __init__(self):
# Use root user for testing (has full access to everything)
from modules.interfaces.interfaceDbAppObjects import getRootInterface
rootInterface = getRootInterface()
self.testUser = rootInterface.currentUser
# Initialize services using the existing system
self.services = getServices(self.testUser, None) # Test user, no workflow
self.workflow = None
self.testResults = {}
self.generatedDocuments = {}
async def initialize(self):
"""Initialize the test environment."""
# Enable debug file logging for tests
from modules.shared.configuration import APP_CONFIG
APP_CONFIG.set("APP_DEBUG_CHAT_WORKFLOW_ENABLED", True)
# Set logging level to INFO to see workflow progress
import logging
logging.getLogger().setLevel(logging.INFO)
print(f"Initialized test with user: {self.testUser.id}")
print(f"Mandate ID: {self.testUser.mandateId}")
print(f"Debug logging enabled: {APP_CONFIG.get('APP_DEBUG_CHAT_WORKFLOW_ENABLED', False)}")
def createTestPrompt(self, format: str) -> str:
"""Create a test prompt for document generation in the specified format."""
prompts = {
"html": "Create a professional HTML document about 'The Future of Artificial Intelligence' with: 1) A main title, 2) An introduction paragraph, 3) Three key sections with headings, 4) A bullet list of benefits, 5) An image showing AI technology (generate it), 6) A conclusion paragraph. Format as HTML.",
"pdf": "Create a professional PDF report about 'Climate Change Impact Analysis' with: 1) A title page, 2) An executive summary, 3) Three main sections with data tables, 4) Charts/graphs described, 5) An image showing environmental impact (generate it), 6) Conclusions and recommendations. Format as PDF.",
"docx": "Create a comprehensive Word document about 'Project Management Best Practices' with: 1) A cover page with title, 2) Table of contents, 3) Five chapters with headings and paragraphs, 4) A table comparing methodologies, 5) An image illustrating project workflow (generate it), 6) Appendices. Format as DOCX.",
"xlsx": "Create an Excel workbook about 'Sales Performance Analysis' with: 1) A summary sheet with key metrics, 2) A detailed data sheet with sales data in a table format (columns: Month, Product, Sales, Units, Revenue), 3) A chart sheet with visualizations described, 4) An analysis sheet with calculations. Format as XLSX.",
"pptx": "Create a PowerPoint presentation about 'Digital Transformation Strategy' with: 1) A title slide, 2) An agenda slide, 3) Five content slides with bullet points, 4) A slide with an image showing transformation roadmap (generate it), 5) A conclusion slide. Format as PPTX."
}
return prompts.get(format.lower(), prompts["docx"])
async def generateDocumentInFormat(self, format: str) -> Dict[str, Any]:
"""Generate a document in the specified format using workflow."""
print("\n" + "="*80)
print(f"GENERATING DOCUMENT IN {format.upper()} FORMAT")
print("="*80)
prompt = self.createTestPrompt(format)
print(f"Prompt: {prompt[:200]}...")
# Create user input request
userInput = UserInputRequest(
prompt=prompt,
userLanguage="en"
)
# Start workflow
print(f"\nStarting workflow for {format.upper()} generation...")
workflow = await chatStart(
currentUser=self.testUser,
userInput=userInput,
workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC,
workflowId=None
)
if not workflow:
return {
"success": False,
"error": "Failed to start workflow"
}
self.workflow = workflow
print(f"Workflow started: {workflow.id}")
# Wait for workflow completion
print(f"Waiting for workflow completion...")
completed = await self.waitForWorkflowCompletion(timeout=300) # 5 minute timeout
if not completed:
return {
"success": False,
"error": "Workflow did not complete within timeout",
"workflowId": workflow.id,
"status": workflow.status if workflow else "unknown"
}
# Analyze results
results = self.analyzeWorkflowResults()
# Extract documents for this format
documents = results.get("documents", [])
formatDocuments = [d for d in documents if d.get("fileName", "").endswith(f".{format.lower()}")]
return {
"success": True,
"format": format,
"workflowId": workflow.id,
"status": results.get("status"),
"documentCount": len(formatDocuments),
"documents": formatDocuments,
"results": results
}
async def waitForWorkflowCompletion(self, timeout: int = 300, checkInterval: int = 2) -> bool:
"""Wait for workflow to complete."""
if not self.workflow:
return False
startTime = time.time()
lastStatus = None
interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser)
while True:
# Check timeout
if time.time() - startTime > timeout:
print(f"\n⏱️ Timeout after {timeout} seconds")
return False
# Get current workflow status
try:
currentWorkflow = interfaceDbChat.getWorkflow(self.workflow.id)
if not currentWorkflow:
print("\n❌ Workflow not found")
return False
currentStatus = currentWorkflow.status
elapsed = int(time.time() - startTime)
# Print status if it changed
if currentStatus != lastStatus:
print(f"Workflow status: {currentStatus} (elapsed: {elapsed}s)")
lastStatus = currentStatus
# Check if workflow is complete
if currentStatus in ["completed", "stopped", "failed"]:
self.workflow = currentWorkflow
statusIcon = "" if currentStatus == "completed" else ""
print(f"\n{statusIcon} Workflow finished with status: {currentStatus} (elapsed: {elapsed}s)")
return currentStatus == "completed"
# Wait before next check
await asyncio.sleep(checkInterval)
except Exception as e:
print(f"\n⚠️ Error checking workflow status: {str(e)}")
await asyncio.sleep(checkInterval)
def analyzeWorkflowResults(self) -> Dict[str, Any]:
"""Analyze workflow results and extract information."""
if not self.workflow:
return {"error": "No workflow to analyze"}
interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser)
workflow = interfaceDbChat.getWorkflow(self.workflow.id)
if not workflow:
return {"error": "Workflow not found"}
# Get unified chat data
chatData = interfaceDbChat.getUnifiedChatData(workflow.id, None)
# Count messages
messages = chatData.get("messages", [])
userMessages = [m for m in messages if m.get("role") == "user"]
assistantMessages = [m for m in messages if m.get("role") == "assistant"]
# Count documents
documents = chatData.get("documents", [])
# Get logs
logs = chatData.get("logs", [])
results = {
"workflowId": workflow.id,
"status": workflow.status,
"workflowMode": str(workflow.workflowMode) if hasattr(workflow, 'workflowMode') else None,
"currentRound": workflow.currentRound,
"totalTasks": workflow.totalTasks,
"totalActions": workflow.totalActions,
"messageCount": len(messages),
"userMessageCount": len(userMessages),
"assistantMessageCount": len(assistantMessages),
"documentCount": len(documents),
"logCount": len(logs),
"documents": documents,
"logs": logs
}
print(f"\nWorkflow Results:")
print(f" Status: {results['status']}")
print(f" Tasks: {results['totalTasks']}")
print(f" Actions: {results['totalActions']}")
print(f" Messages: {results['messageCount']}")
print(f" Documents: {results['documentCount']}")
# Print document details
if documents:
print(f"\nGenerated Documents:")
for doc in documents:
fileName = doc.get("fileName", "unknown")
fileSize = doc.get("fileSize", 0)
mimeType = doc.get("mimeType", "unknown")
print(f" - {fileName} ({fileSize} bytes, {mimeType})")
return results
def verifyDocumentFormat(self, document: Dict[str, Any], expectedFormat: str) -> Dict[str, Any]:
"""Verify that a document matches the expected format."""
fileName = document.get("fileName", "")
mimeType = document.get("mimeType", "")
fileSize = document.get("fileSize", 0)
# Expected MIME types
expectedMimeTypes = {
"html": ["text/html", "application/xhtml+xml"],
"pdf": ["application/pdf"],
"docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
"xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"],
"pptx": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"]
}
# Expected file extensions
expectedExtensions = {
"html": [".html", ".htm"],
"pdf": [".pdf"],
"docx": [".docx"],
"xlsx": [".xlsx"],
"pptx": [".pptx"]
}
formatLower = expectedFormat.lower()
expectedMimes = expectedMimeTypes.get(formatLower, [])
expectedExts = expectedExtensions.get(formatLower, [])
# Check file extension
hasCorrectExtension = any(fileName.lower().endswith(ext) for ext in expectedExts)
# Check MIME type
hasCorrectMimeType = any(mimeType.lower() == mime.lower() for mime in expectedMimes)
# Check file size (should be > 0)
hasValidSize = fileSize > 0
verification = {
"format": expectedFormat,
"fileName": fileName,
"mimeType": mimeType,
"fileSize": fileSize,
"hasCorrectExtension": hasCorrectExtension,
"hasCorrectMimeType": hasCorrectMimeType,
"hasValidSize": hasValidSize,
"isValid": hasCorrectExtension and hasValidSize
}
return verification
async def testAllFormats(self) -> Dict[str, Any]:
"""Test document generation in all formats."""
print("\n" + "="*80)
print("TESTING DOCUMENT GENERATION IN ALL FORMATS")
print("="*80)
formats = ["html", "pdf", "docx", "xlsx", "pptx"]
results = {}
for format in formats:
try:
print(f"\n{'='*80}")
print(f"Testing {format.upper()} format...")
print(f"{'='*80}")
result = await self.generateDocumentInFormat(format)
results[format] = result
if result.get("success"):
documents = result.get("documents", [])
if documents:
# Verify first document
verification = self.verifyDocumentFormat(documents[0], format)
result["verification"] = verification
print(f"\n{format.upper()} generation successful!")
print(f" Documents: {len(documents)}")
print(f" Verification: {'✅ PASS' if verification['isValid'] else '❌ FAIL'}")
if verification.get("fileName"):
print(f" File: {verification['fileName']}")
print(f" Size: {verification['fileSize']} bytes")
print(f" MIME: {verification['mimeType']}")
else:
print(f"\n⚠️ {format.upper()} generation completed but no documents found")
else:
error = result.get("error", "Unknown error")
print(f"\n{format.upper()} generation failed: {error}")
# Small delay between tests
await asyncio.sleep(2)
except Exception as e:
import traceback
print(f"\n❌ Error testing {format.upper()}: {str(e)}")
print(traceback.format_exc())
results[format] = {
"success": False,
"error": str(e),
"traceback": traceback.format_exc()
}
return results
async def runTest(self):
"""Run the complete test."""
print("\n" + "="*80)
print("DOCUMENT GENERATION FORMATS TEST")
print("="*80)
try:
# Initialize
await self.initialize()
# Test all formats
results = await self.testAllFormats()
# Summary
print("\n" + "="*80)
print("TEST SUMMARY")
print("="*80)
successCount = 0
failCount = 0
for format, result in results.items():
if result.get("success"):
successCount += 1
status = "✅ PASS"
docCount = result.get("documentCount", 0)
verification = result.get("verification", {})
isValid = verification.get("isValid", False)
statusIcon = "" if isValid else "⚠️"
print(f"{statusIcon} {format.upper():6s}: {status} - {docCount} document(s)")
else:
failCount += 1
error = result.get("error", "Unknown error")
print(f"{format.upper():6s}: FAIL - {error}")
print(f"\nTotal: {successCount} passed, {failCount} failed out of {len(results)} formats")
self.testResults = {
"success": failCount == 0,
"successCount": successCount,
"failCount": failCount,
"totalFormats": len(results),
"results": results
}
return self.testResults
except Exception as e:
import traceback
print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}")
print(f"Traceback:\n{traceback.format_exc()}")
self.testResults = {
"success": False,
"error": str(e),
"traceback": traceback.format_exc()
}
return self.testResults
async def main():
"""Run document generation formats test."""
tester = DocumentGenerationFormatsTester()
results = await tester.runTest()
# Print final results as JSON for easy parsing
print("\n" + "="*80)
print("FINAL RESULTS (JSON)")
print("="*80)
print(json.dumps(results, indent=2, default=str))
if __name__ == "__main__":
asyncio.run(main())