enhanced core ai call document handling with document intent

This commit is contained in:
ValueOn AG 2025-12-25 00:09:27 +01:00
parent 262f3296bf
commit e1b3cd36f0
32 changed files with 2799 additions and 3871 deletions

View file

@ -61,6 +61,14 @@ class MergeStrategy(BaseModel):
capabilities: Optional[Dict[str, Any]] = Field(default=None, description="Model capabilities for intelligent merging")
class DocumentIntent(BaseModel):
"""Intent-Analyse für ein einzelnes Dokument"""
documentId: str = Field(description="ID des Dokuments")
intents: List[str] = Field(description="Liste von Intents: ['extract', 'render', 'reference'] - mehrere möglich")
extractionPrompt: Optional[str] = Field(default=None, description="Spezifischer Prompt für Extraktion (z.B. 'Extract text from images for legends')")
reasoning: str = Field(description="Erklärung für Debugging/Transparenz: Warum wurde dieser Intent gewählt?")
class ExtractionOptions(BaseModel):
"""Options for document extraction and processing with clear data structures."""

File diff suppressed because it is too large Load diff

View file

@ -34,12 +34,42 @@ class StructureChunker(Chunker):
if bucket:
emit(bucket)
else:
# JSON object (dict) - check if it fits
text = json.dumps(obj, ensure_ascii=False)
if len(text.encode('utf-8')) <= maxBytes:
textSize = len(text.encode('utf-8'))
if textSize <= maxBytes:
emit(obj)
else:
# fallback to line chunking
raise ValueError("too large")
# Object too large - try to split by keys if possible
# For large objects, we need to chunk by character boundaries
# since we can't split JSON objects arbitrarily
if isinstance(obj, dict) and len(obj) > 1:
# Try to split object into multiple chunks by keys
# This preserves JSON structure better than line-based chunking
currentChunk: Dict[str, Any] = {}
currentSize = 2 # Start with "{}" overhead
for key, value in obj.items():
itemText = json.dumps({key: value}, ensure_ascii=False)
itemSize = len(itemText.encode('utf-8'))
# Account for comma and spacing between items
if currentChunk:
itemSize += 2 # ", " separator
if currentSize + itemSize > maxBytes and currentChunk:
# Current chunk is full, emit it
emit(currentChunk)
currentChunk = {key: value}
currentSize = len(itemText.encode('utf-8'))
else:
currentChunk[key] = value
currentSize += itemSize
# Emit remaining chunk
if currentChunk:
emit(currentChunk)
else:
# Single large value or can't split - fallback to line chunking
raise ValueError("too large")
except Exception:
current: List[str] = []
size = 0

View file

@ -6,10 +6,11 @@ import logging
import time
import asyncio
import base64
import json
from .subRegistry import ExtractorRegistry, ChunkerRegistry
from .subPipeline import runExtraction
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult, DocumentIntent
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallResponse, AiCallRequest, AiCallOptions, OperationTypeEnum, AiModelCall
from modules.aicore.aicoreModelRegistry import modelRegistry
@ -73,12 +74,14 @@ class ExtractionService:
if operationId:
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
docOperationId = f"{operationId}_doc_{i}"
# Use parentOperationId if provided, otherwise use operationId as parent
parentId = parentOperationId if parentOperationId else operationId
self.services.chat.progressLogStart(
docOperationId,
"Extracting Document",
f"Document {i + 1}/{totalDocs}",
doc.fileName[:50] + "..." if len(doc.fileName) > 50 else doc.fileName,
parentOperationId=operationId # Use operationId as parent (not parentOperationId)
parentOperationId=parentId # Correct parent reference for ChatLog hierarchy
)
# Start timing for this document
@ -125,12 +128,41 @@ class ExtractionService:
if part.metadata:
logger.debug(f" Metadata: {part.metadata}")
# Attach document id and MIME type to parts if missing
# Attach complete metadata to parts according to ContentPart Metadaten-Schema
for p in ec.parts:
# Ensure metadata dict exists
if not p.metadata:
p.metadata = {}
# Required metadata fields (from concept)
if "documentId" not in p.metadata:
p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4())
if "documentMimeType" not in p.metadata:
p.metadata["documentMimeType"] = documentData["mimeType"]
if "originalFileName" not in p.metadata:
p.metadata["originalFileName"] = documentData["fileName"]
# ContentFormat: Set based on typeGroup and mimeType
# Default to "extracted" for text content, but can be overridden by caller
if "contentFormat" not in p.metadata:
# Default: extracted text content
p.metadata["contentFormat"] = "extracted"
# Intent: Default to "extract" for extracted content
if "intent" not in p.metadata:
p.metadata["intent"] = "extract"
# ExtractionPrompt: Use from options if available
if "extractionPrompt" not in p.metadata and options and options.prompt:
p.metadata["extractionPrompt"] = options.prompt
# UsageHint: Provide default hint
if "usageHint" not in p.metadata:
p.metadata["usageHint"] = f"Use extracted content from {documentData['fileName']}"
# SourceAction: Mark as from extraction service
if "sourceAction" not in p.metadata:
p.metadata["sourceAction"] = "extraction.extractContent"
# Log chunking information
chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
@ -185,7 +217,7 @@ class ExtractionService:
# Write extraction results to debug file
try:
from modules.shared.debugLogger import writeDebugFile
import json
# json is already imported at module level
# Create summary of extraction results for debug
extractionSummary = {
"documentName": doc.fileName,
@ -487,7 +519,8 @@ class ExtractionService:
prompt: str,
aiObjects: Any,
options: Optional[AiCallOptions] = None,
operationId: Optional[str] = None
operationId: Optional[str] = None,
parentOperationId: Optional[str] = None
) -> str:
"""
Process documents with model-aware chunking and merge results.
@ -499,6 +532,7 @@ class ExtractionService:
aiObjects: AiObjects instance for making AI calls
options: AI call options
operationId: Optional operation ID for progress tracking
parentOperationId: Optional parent operation ID for hierarchical logging
Returns:
Merged AI results as string with preserved document structure
@ -514,7 +548,8 @@ class ExtractionService:
operationId,
"AI Text Extract",
"Document Processing",
f"Processing {len(documents)} documents"
f"Processing {len(documents)} documents",
parentOperationId=parentOperationId # Use parentOperationId if provided
)
try:
@ -539,7 +574,8 @@ class ExtractionService:
if operationId:
self.services.chat.progressLogUpdate(operationId, 0.1, f"Extracting content from {len(documents)} documents")
# Pass operationId as parentOperationId for hierarchical logging
extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=parentOperationId)
# Correct hierarchy: parentOperationId -> operationId -> docOperationId
extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=operationId)
if not isinstance(extractionResult, list):
if operationId:
@ -549,9 +585,10 @@ class ExtractionService:
# Process parts (not chunks) with model-aware AI calls
if operationId:
self.services.chat.progressLogUpdate(operationId, 0.3, f"Processing {len(extractionResult)} extracted content parts")
# Use parent operation ID directly (parentId should be operationId, not log entry ID)
parentOperationId = operationId # Use the parent's operationId directly
partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, parentOperationId)
# Use operationId as parentOperationId for child operations
# Correct hierarchy: parentOperationId -> operationId -> partOperationId
processParentOperationId = operationId
partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, processParentOperationId)
# Merge results using existing merging system
if operationId:
@ -733,7 +770,8 @@ class ExtractionService:
# Detect input type and convert accordingly
if isinstance(partResults[0], PartResult):
# Existing logic for PartResult (from processDocumentsPerChunk)
for part_result in partResults:
# Phase 7: Add originalIndex for explicit ordering
for i, part_result in enumerate(partResults):
content_part = ContentPart(
id=part_result.originalPart.id,
parentId=part_result.originalPart.parentId,
@ -744,7 +782,9 @@ class ExtractionService:
metadata={
**part_result.originalPart.metadata,
"aiResult": True,
"originalIndex": i, # Phase 7: Explicit order index
"partIndex": part_result.partIndex,
"processingOrder": i, # Phase 7: Processing order
"documentId": part_result.documentId,
"processingTime": part_result.processingTime,
"success": part_result.metadata.get("success", False)
@ -753,6 +793,7 @@ class ExtractionService:
content_parts.append(content_part)
elif isinstance(partResults[0], AiCallResponse):
# Logic from interfaceAiObjects (from content parts processing)
# Phase 7: Add originalIndex for explicit ordering
for i, result in enumerate(partResults):
if result.content:
content_part = ContentPart(
@ -764,6 +805,8 @@ class ExtractionService:
data=result.content,
metadata={
"aiResult": True,
"originalIndex": i, # Phase 7: Explicit order index
"processingOrder": i, # Phase 7: Processing order
"modelName": result.modelName,
"priceUsd": result.priceUsd,
"processingTime": result.processingTime,
@ -792,11 +835,12 @@ class ExtractionService:
# Determine merge strategy based on input type
if isinstance(partResults[0], PartResult):
# Use strategy for extraction workflow (group by document, order by part index)
# Phase 7: Use originalIndex for explicit ordering
# Use strategy for extraction workflow (group by document, order by originalIndex)
merge_strategy = MergeStrategy(
useIntelligentMerging=True,
groupBy="documentId", # Group by document
orderBy="partIndex", # Order by part index
orderBy="originalIndex", # Phase 7: Order by originalIndex instead of partIndex
mergeType="concatenate"
)
else:
@ -811,10 +855,52 @@ class ExtractionService:
# Apply merging
merged_parts = applyMerging(content_parts, merge_strategy)
# Convert back to string
final_content = "\n\n".join([part.data for part in merged_parts])
# Phase 6: Enhanced format with metadata preservation
# CRITICAL: For generation responses (JSON), don't add SOURCE markers - they interfere with JSON parsing
# Check if this is a generation response by looking at operationType or content structure
isGenerationResponse = False
if options and hasattr(options, 'operationType'):
# Generation responses use DATA_GENERATE operation type
from modules.datamodels.datamodelAi import OperationTypeEnum
isGenerationResponse = options.operationType == OperationTypeEnum.DATA_GENERATE
logger.info(f"Merged {len(partResults)} parts using unified merging system")
# Also check if content looks like JSON (starts with { or [)
if not isGenerationResponse and merged_parts:
firstPartData = merged_parts[0].data if merged_parts[0].data else ""
if isinstance(firstPartData, str) and firstPartData.strip().startswith(('{', '[')):
# Check if it's a complete JSON structure (not extracted content)
# Generation responses are complete JSON, extraction responses are text content
try:
# json is already imported at module level
json.loads(firstPartData.strip())
# If it parses as JSON and has "documents" key, it's likely a generation response
parsed = json.loads(firstPartData.strip())
if isinstance(parsed, dict) and "documents" in parsed:
isGenerationResponse = True
except:
pass
content_sections = []
for part in merged_parts:
if isGenerationResponse:
# For generation responses, return JSON directly without SOURCE markers
content_sections.append(part.data)
else:
# For extraction responses, include metadata in section header for traceability
doc_id = part.metadata.get("documentId", "unknown")
doc_mime = part.metadata.get("documentMimeType", "unknown")
label = part.label or "content"
section = f"""
[SOURCE: documentId={doc_id}, mimeType={doc_mime}, label={label}]
{part.data}
[END SOURCE]
"""
content_sections.append(section)
final_content = "\n\n".join(content_sections)
logger.info(f"Merged {len(partResults)} parts using unified merging system with metadata preservation (generationResponse={isGenerationResponse})")
return final_content.strip()
async def chunkContentPartForAi(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]:
@ -827,9 +913,14 @@ class ExtractionService:
modelContextTokens = model.contextLength # Total context in tokens
modelMaxOutputTokens = model.maxTokens # Maximum output tokens
# CRITICAL: Use same conservative token factor as in processContentPartWithFallback
# Real-world observation: Our calculation says 94k tokens, but API says 217k tokens (2.3x difference!)
TOKEN_SAFETY_FACTOR = 2.2 # Conservative: accounts for JSON tokenization and API overhead
# Reserve tokens for:
# 1. Prompt (user message)
promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0
# 1. Prompt (user message) - use conservative factor
promptSize = len(prompt.encode('utf-8')) if prompt else 0
promptTokens = promptSize / TOKEN_SAFETY_FACTOR
# 2. System message wrapper ("Context from documents:\n")
systemMessageTokens = 10 # ~40 bytes = 10 tokens
@ -844,31 +935,38 @@ class ExtractionService:
totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens
# Available tokens for content = context length - reserved tokens
# Use 80% of available for safety margin
availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8)
# Use 60% of available (same conservative margin as in processContentPartWithFallback)
availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.60)
# Ensure we have at least some space
if availableContentTokens < 100:
logger.warning(f"Very limited space for content: {availableContentTokens} tokens available. Model: {model.name}, contextLength: {modelContextTokens}, maxTokens: {modelMaxOutputTokens}, prompt: {promptTokens:.0f} tokens")
availableContentTokens = max(100, int(modelContextTokens * 0.1)) # Fallback to 10% of context
# Convert tokens to bytes (1 token ≈ 4 bytes)
availableContentBytes = availableContentTokens * 4
# Convert tokens to bytes using conservative factor (reverse: bytes = tokens * factor)
availableContentBytes = int(availableContentTokens * TOKEN_SAFETY_FACTOR)
logger.debug(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens, reserved={totalReservedTokens:.0f} tokens, available={availableContentTokens} tokens ({availableContentBytes} bytes)")
logger.info(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens est., reserved={totalReservedTokens:.0f} tokens est., available={availableContentTokens} tokens est. ({availableContentBytes} bytes), factor={TOKEN_SAFETY_FACTOR}")
# Use 70% of available content bytes for text chunks (conservative)
textChunkSize = int(availableContentBytes * 0.7)
imageChunkSize = int(availableContentBytes * 0.8) # 80% for image chunks
# Use 50% of available content bytes for text chunks (very conservative to ensure chunks fit)
# This ensures that even with token counting inaccuracies, chunks will fit
textChunkSize = int(availableContentBytes * 0.5)
structureChunkSize = int(availableContentBytes * 0.5) # CRITICAL: Also set for StructureChunker (JSON content)
tableChunkSize = int(availableContentBytes * 0.5) # Also set for TableChunker
imageChunkSize = int(availableContentBytes * 0.6) # 60% for image chunks
# Build chunking options
# Build chunking options - include ALL chunk size options for different chunkers
chunkingOptions = {
"textChunkSize": textChunkSize,
"structureChunkSize": structureChunkSize, # CRITICAL: Required for StructureChunker (JSON)
"tableChunkSize": tableChunkSize, # Required for TableChunker
"imageChunkSize": imageChunkSize,
"maxSize": availableContentBytes,
"chunkAllowed": True
}
logger.info(f"Chunking options: textChunkSize={textChunkSize} bytes, structureChunkSize={structureChunkSize} bytes, tableChunkSize={tableChunkSize} bytes, imageChunkSize={imageChunkSize} bytes, contentPartSize={len(contentPart.data.encode('utf-8')) if contentPart.data else 0} bytes")
# Get appropriate chunker (uses existing ChunkerRegistry ✅)
chunker = self._chunkerRegistry.resolve(contentPart.typeGroup)
@ -878,8 +976,14 @@ class ExtractionService:
# Chunk the content part
try:
contentSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0
logger.info(f"Chunking {contentPart.typeGroup} part: contentSize={contentSize} bytes, textChunkSize={textChunkSize} bytes, structureChunkSize={structureChunkSize} bytes")
chunks = chunker.chunk(contentPart, chunkingOptions)
logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part")
logger.info(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part (contentSize={contentSize} bytes)")
if chunks:
for i, chunk in enumerate(chunks):
chunkSize = len(chunk.get('data', '').encode('utf-8')) if chunk.get('data') else 0
logger.info(f" Chunk {i+1}/{len(chunks)}: {chunkSize} bytes")
return chunks
except Exception as e:
logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}")
@ -999,15 +1103,86 @@ class ExtractionService:
availableContentBytes = availableContentTokens * 4
logger.debug(f"Size check for {model.name}: partSize={partSize} bytes, availableContentBytes={availableContentBytes} bytes")
# Also check prompt size - prompt + content together must fit
promptSize = len(prompt.encode('utf-8')) if prompt else 0
if partSize <= availableContentBytes:
# CRITICAL: Token counting approximation is VERY inaccurate for JSON/content
# Real-world observation: Our calculation says 94k tokens, but API says 217k tokens (2.3x difference!)
# This happens because:
# 1. JSON/structured content tokenizes differently (more tokens per byte)
# 2. API has message structure overhead (system prompts, message wrappers)
# 3. Tokenizer differences between our approximation and actual API tokenizer
# Use conservative factor: 1 token ≈ 2.2 bytes (instead of 4) to account for these differences
TOKEN_SAFETY_FACTOR = 2.2 # Conservative: accounts for JSON tokenization and API overhead
promptTokens = promptSize / TOKEN_SAFETY_FACTOR
contentTokens = partSize / TOKEN_SAFETY_FACTOR
totalTokens = promptTokens + contentTokens
# CRITICAL: Use very conservative margin (60%) because:
# 1. Token counting approximation is inaccurate - real tokens can be 2-3x more
# 2. API has additional overhead (message structure, system prompts, etc.)
# 3. Anthropic API is strict about the 200k limit
# 4. We've seen cases where our calculation says "fits" but API says "too long"
maxTotalTokens = int(modelContextTokens * 0.60)
logger.info(f"Size check for {model.name}: partSize={partSize} bytes ({contentTokens:.0f} tokens est.), promptSize={promptSize} bytes ({promptTokens:.0f} tokens est.), total={totalTokens:.0f} tokens est., modelContext={modelContextTokens} tokens, maxTotal={maxTotalTokens} tokens (60% margin, conservative factor={TOKEN_SAFETY_FACTOR})")
# CRITICAL: Always check totalTokens first - if prompt + content exceeds limit, MUST chunk
# Token counting approximation may differ significantly from API, so use very conservative margin
if totalTokens > maxTotalTokens:
logger.warning(f"⚠️ Total tokens ({totalTokens:.0f} est.) exceed model limit ({maxTotalTokens}), chunking required. Prompt: {promptTokens:.0f} tokens est., Content: {contentTokens:.0f} tokens est.")
elif partSize > availableContentBytes:
logger.warning(f"⚠️ Content part ({contentTokens:.0f} tokens est.) exceeds available space ({availableContentBytes/TOKEN_SAFETY_FACTOR:.0f} tokens est.), chunking required")
# If either condition fails, chunk the content
if totalTokens > maxTotalTokens or partSize > availableContentBytes:
# Part too large or total exceeds limit - chunk it
chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt)
if not chunks:
raise ValueError(f"Failed to chunk content part for model {model.name}")
logger.info(f"Starting to process {len(chunks)} chunks with model {model.name}")
if progressCallback:
progressCallback(0.0, f"Starting to process {len(chunks)} chunks")
chunkResults = []
for idx, chunk in enumerate(chunks):
chunkNum = idx + 1
chunkData = chunk.get('data', '')
logger.info(f"Processing chunk {chunkNum}/{len(chunks)} with model {model.name}")
if progressCallback:
progressCallback(chunkNum / len(chunks), f"Processing chunk {chunkNum}/{len(chunks)}")
try:
chunkResponse = await aiObjects._callWithModel(model, prompt, chunkData, options)
chunkResults.append(chunkResponse)
except Exception as chunkError:
logger.error(f"Error processing chunk {chunkNum}/{len(chunks)}: {str(chunkError)}")
# Continue with other chunks even if one fails
continue
# Merge chunk results
if not chunkResults:
raise ValueError(f"All chunks failed for content part")
mergedContent = self.mergePartResults(chunkResults, options)
return AiCallResponse(
content=mergedContent,
modelName=model.name,
priceUsd=sum(r.priceUsd for r in chunkResults),
processingTime=sum(r.processingTime for r in chunkResults),
bytesSent=sum(r.bytesSent for r in chunkResults),
bytesReceived=sum(r.bytesReceived for r in chunkResults),
errorCount=sum(r.errorCount for r in chunkResults)
)
else:
# Part fits - call AI directly via aiObjects interface
logger.info(f"✅ Content part fits within model limits, processing directly")
response = await aiObjects._callWithModel(model, prompt, contentPart.data, options)
logger.info(f"✅ Content part processed successfully with model: {model.name}")
return response
else:
# Part too large - chunk it
chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt)
if not chunks:
raise ValueError(f"Failed to chunk content part for model {model.name}")
@ -1037,8 +1212,8 @@ class ExtractionService:
logger.error(f"❌ Error processing chunk {chunkNum}/{len(chunks)}: {str(e)}")
raise
# Merge chunk results
mergedContent = self.mergeChunkResults(chunkResults)
# Merge chunk results using unified mergePartResults
mergedContent = self.mergePartResults(chunkResults, options)
logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
return AiCallResponse(

View file

@ -2,7 +2,9 @@
# All rights reserved.
import logging
import uuid
from typing import Any, Dict, List, Optional
import base64
import traceback
from typing import Any, Dict, List, Optional, Callable
from modules.datamodels.datamodelChat import ChatDocument
from modules.services.serviceGeneration.subDocumentUtility import (
getFileExtension,
@ -100,12 +102,12 @@ class GenerationService:
# For binary data, handle bytes vs base64 string vs regular string
if isinstance(documentData, bytes):
# Already bytes - encode to base64 string for storage
import base64
# base64 is already imported at module level
content = base64.b64encode(documentData).decode('utf-8')
base64encoded = True
elif isinstance(documentData, str):
# Check if it's already valid base64
import base64
# base64 is already imported at module level
try:
# Try to decode to verify it's base64
base64.b64decode(documentData, validate=True)
@ -122,7 +124,7 @@ class GenerationService:
continue
else:
# Other types - convert to string then base64
import base64
# base64 is already imported at module level
try:
content = base64.b64encode(str(documentData).encode('utf-8')).decode('utf-8')
base64encoded = True
@ -231,7 +233,7 @@ class GenerationService:
return None
# Convert content to bytes
if base64encoded:
import base64
# base64 is already imported at module level
content_bytes = base64.b64decode(content)
else:
content_bytes = content.encode('utf-8')
@ -319,10 +321,12 @@ class GenerationService:
'workflowId': 'unknown'
}
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str, List[Dict[str, Any]]]:
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> tuple[str, str, List[Dict[str, Any]]]:
"""
Render extracted JSON content to the specified output format.
Supports multiple documents in documents array (Phase 5: Multi-Dokument-Rendering).
Always uses unified "documents" array format.
Supports three content formats: reference, object (base64), extracted_text.
Args:
extractedContent: Structured JSON document from AI extraction
@ -330,6 +334,7 @@ class GenerationService:
title: Report title
userPrompt: User's original prompt for report generation
aiService: AI service instance for generation prompt creation
parentOperationId: Optional parent operation ID for hierarchical logging
Returns:
tuple: (rendered_content, mime_type, images_list)
@ -348,15 +353,40 @@ class GenerationService:
if len(documents) == 0:
raise ValueError("No documents found in 'documents' array")
# Use first document for rendering
single_doc = documents[0]
if "sections" not in single_doc:
raise ValueError("Document must contain 'sections' field")
# Pass standardized schema to renderer (maintains architecture)
# Renderer should extract sections from documents array according to standardized schema
# Standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
contentToRender = extractedContent # Pass full standardized schema
# Phase 5: Multi-Dokument-Rendering
if len(documents) == 1:
# Single document - use existing logic
single_doc = documents[0]
if "sections" not in single_doc:
raise ValueError("Document must contain 'sections' field")
# Pass standardized schema to renderer (maintains architecture)
contentToRender = extractedContent # Pass full standardized schema
else:
# Multiple documents - merge all sections into one document for rendering
# Option: Merge all sections from all documents into a single document
all_sections = []
for doc in documents:
if isinstance(doc, dict) and "sections" in doc:
sections = doc.get("sections", [])
if isinstance(sections, list):
all_sections.extend(sections)
if not all_sections:
raise ValueError("No sections found in any document")
# Create merged document with all sections
merged_document = {
"metadata": extractedContent.get("metadata", {}),
"documents": [{
"id": "merged",
"title": title,
"filename": f"{title}.{outputFormat}",
"sections": all_sections
}]
}
contentToRender = merged_document
logger.info(f"Rendering {len(documents)} documents with {len(all_sections)} total sections")
# Get the appropriate renderer for the format
renderer = self._getFormatRenderer(outputFormat)
@ -378,6 +408,92 @@ class GenerationService:
logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
raise
async def generateDocumentWithTwoPhases(
self,
userPrompt: str,
cachedContent: Optional[Dict[str, Any]] = None,
contentParts: Optional[List[Any]] = None,
maxSectionLength: int = 500,
parallelGeneration: bool = True,
progressCallback: Optional[Callable] = None
) -> Dict[str, Any]:
"""
Generate document using two-phase approach:
1. Generate structure skeleton with empty sections
2. Generate content for each section iteratively
This is the core logic for document generation in AI calls.
Args:
userPrompt: User's original prompt
cachedContent: Optional extracted content cache (from extraction phase)
contentParts: Optional list of ContentParts to use for structure generation
maxSectionLength: Maximum words for simple sections
parallelGeneration: Enable parallel section generation
progressCallback: Optional callback function(progress, total, message) for progress updates
Returns:
Complete document structure with populated elements ready for rendering
"""
try:
from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator
from modules.services.serviceGeneration.subContentGenerator import ContentGenerator
# Phase 1: Generate structure skeleton
if progressCallback:
progressCallback(0, 100, "Generating document structure...")
structureGenerator = StructureGenerator(self.services)
# Extract imageDocuments from cachedContent if available
existingImages = None
if cachedContent and cachedContent.get("imageDocuments"):
existingImages = cachedContent.get("imageDocuments")
structure = await structureGenerator.generateStructure(
userPrompt=userPrompt,
documentList=None, # Not used in current implementation
cachedContent=cachedContent,
contentParts=contentParts, # Pass ContentParts for structure generation
maxSectionLength=maxSectionLength,
existingImages=existingImages
)
if progressCallback:
progressCallback(30, 100, "Structure generated, starting content generation...")
# Phase 2: Generate content for each section
contentGenerator = ContentGenerator(self.services)
# Create progress callback wrapper for content generation phase (30-90%)
def contentProgressCallback(sectionIndex: int, totalSections: int, message: str):
if progressCallback:
# Map section progress to overall progress (30% to 90%)
if totalSections > 0:
overallProgress = 30 + int(60 * (sectionIndex / totalSections))
else:
overallProgress = 30
progressCallback(overallProgress, 100, f"Section {sectionIndex}/{totalSections}: {message}")
completeStructure = await contentGenerator.generateContent(
structure=structure,
cachedContent=cachedContent,
userPrompt=userPrompt,
contentParts=contentParts, # Pass ContentParts for content generation
progressCallback=contentProgressCallback,
parallelGeneration=parallelGeneration
)
if progressCallback:
progressCallback(100, 100, "Document generation complete")
return completeStructure
except Exception as e:
logger.error(f"Error in two-phase document generation: {str(e)}")
logger.debug(traceback.format_exc())
raise
async def getAdaptiveExtractionPrompt(
self,
outputFormat: str,
@ -423,6 +539,6 @@ class GenerationService:
except Exception as e:
logger.error(f"Error getting renderer for {output_format}: {str(e)}")
import traceback
# traceback is already imported at module level
logger.debug(traceback.format_exc())
return None

View file

@ -68,6 +68,7 @@ class BaseRenderer(ABC):
def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
Phase 5: Supports multiple documents - extracts all sections from all documents.
"""
if "documents" not in reportData:
raise ValueError("Report data must follow standardized schema with 'documents' array")
@ -76,11 +77,18 @@ class BaseRenderer(ABC):
if not isinstance(documents, list) or len(documents) == 0:
raise ValueError("Standardized schema must contain at least one document in 'documents' array")
firstDoc = documents[0]
if not isinstance(firstDoc, dict) or "sections" not in firstDoc:
raise ValueError("Document in standardized schema must contain 'sections' field")
# Phase 5: Extract sections from ALL documents
all_sections = []
for doc in documents:
if isinstance(doc, dict) and "sections" in doc:
sections = doc.get("sections", [])
if isinstance(sections, list):
all_sections.extend(sections)
return firstDoc.get("sections", [])
if not all_sections:
raise ValueError("No sections found in any document")
return all_sections
def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]:
"""

View file

@ -9,6 +9,7 @@ from typing import Dict, Any, Tuple, List
import io
import base64
import re
import csv
try:
from docx import Document
@ -225,13 +226,36 @@ class RendererDocx(BaseRenderer):
self.logger.warning(f"Could not clear template content: {str(e)}")
def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a single JSON section to DOCX using AI-generated styles."""
"""Render a single JSON section to DOCX using AI-generated styles.
Supports three content formats: reference, object (base64), extracted_text.
"""
try:
section_type = section.get("content_type", "paragraph")
elements = section.get("elements", [])
# Process each element in the section
for element in elements:
element_type = element.get("type", "")
# Support three content formats from Phase 5D
if element_type == "reference":
# Document reference format
doc_ref = element.get("documentReference", "")
label = element.get("label", "Reference")
para = doc.add_paragraph(f"[Reference: {label}]")
para.runs[0].italic = True
continue
elif element_type == "extracted_text":
# Extracted text format - render as paragraph
content = element.get("content", "")
source = element.get("source", "")
if content:
para = doc.add_paragraph(content)
if source:
para.add_run(f" (Source: {source})").italic = True
continue
# Standard section types
if section_type == "table":
self._renderJsonTable(doc, element, styles)
elif section_type == "bullet_list":
@ -848,7 +872,7 @@ class RendererDocx(BaseRenderer):
Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
Returns the content with tables replaced by placeholders.
"""
import csv
# csv is already imported at module level
lines = content.split('\n')
processed_lines = []

View file

@ -297,11 +297,39 @@ class RendererHtml(BaseRenderer):
return '\n'.join(css_parts)
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a single JSON section to HTML using AI-generated styles."""
"""Render a single JSON section to HTML using AI-generated styles.
Supports three content formats: reference, object (base64), extracted_text.
"""
try:
sectionType = self._getSectionType(section)
sectionData = self._getSectionData(section)
# Check for three content formats from Phase 5D in elements
if isinstance(sectionData, list):
htmlParts = []
for element in sectionData:
element_type = element.get("type", "") if isinstance(element, dict) else ""
# Support three content formats from Phase 5D
if element_type == "reference":
# Document reference format
doc_ref = element.get("documentReference", "")
label = element.get("label", "Reference")
htmlParts.append(f'<p class="reference"><em>[Reference: {label}]</em></p>')
continue
elif element_type == "extracted_text":
# Extracted text format
content = element.get("content", "")
source = element.get("source", "")
if content:
source_text = f' <small><em>(Source: {source})</em></small>' if source else ''
htmlParts.append(f'<p class="extracted-text">{content}{source_text}</p>')
continue
# If we processed reference/extracted_text elements, return them
if htmlParts:
return '\n'.join(htmlParts)
if sectionType == "table":
# Process the section data to extract table structure
processedData = self._processSectionByType(section)

View file

@ -77,11 +77,39 @@ class RendererMarkdown(BaseRenderer):
raise Exception(f"Markdown generation failed: {str(e)}")
def _renderJsonSection(self, section: Dict[str, Any]) -> str:
"""Render a single JSON section to markdown."""
"""Render a single JSON section to markdown.
Supports three content formats: reference, object (base64), extracted_text.
"""
try:
sectionType = self._getSectionType(section)
sectionData = self._getSectionData(section)
# Check for three content formats from Phase 5D in elements
if isinstance(sectionData, list):
markdownParts = []
for element in sectionData:
element_type = element.get("type", "") if isinstance(element, dict) else ""
# Support three content formats from Phase 5D
if element_type == "reference":
# Document reference format
doc_ref = element.get("documentReference", "")
label = element.get("label", "Reference")
markdownParts.append(f"*[Reference: {label}]*")
continue
elif element_type == "extracted_text":
# Extracted text format
content = element.get("content", "")
source = element.get("source", "")
if content:
source_text = f" *(Source: {source})*" if source else ""
markdownParts.append(f"{content}{source_text}")
continue
# If we processed reference/extracted_text elements, return them
if markdownParts:
return '\n\n'.join(markdownParts)
if sectionType == "table":
# Process the section data to extract table structure
processedData = self._processSectionByType(section)

View file

@ -477,7 +477,9 @@ class RendererPdf(BaseRenderer):
return colors.black
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a single JSON section to PDF elements using AI-generated styles."""
"""Render a single JSON section to PDF elements using AI-generated styles.
Supports three content formats: reference, object (base64), extracted_text.
"""
try:
section_type = self._getSectionType(section)
elements = self._getSectionData(section)
@ -485,6 +487,33 @@ class RendererPdf(BaseRenderer):
# Process each element in the section
all_elements = []
for element in elements:
element_type = element.get("type", "") if isinstance(element, dict) else ""
# Support three content formats from Phase 5D
if element_type == "reference":
# Document reference format
doc_ref = element.get("documentReference", "")
label = element.get("label", "Reference")
ref_style = ParagraphStyle(
'Reference',
parent=self._createNormalStyle(styles),
fontStyle='italic',
textColor=colors.grey
)
all_elements.append(Paragraph(f"[Reference: {label}]", ref_style))
all_elements.append(Spacer(1, 6))
continue
elif element_type == "extracted_text":
# Extracted text format
content = element.get("content", "")
source = element.get("source", "")
if content:
source_text = f" <i>(Source: {source})</i>" if source else ""
all_elements.append(Paragraph(f"{content}{source_text}", self._createNormalStyle(styles)))
all_elements.append(Spacer(1, 6))
continue
# Standard section types
if section_type == "table":
all_elements.extend(self._renderJsonTable(element, styles))
elif section_type == "bullet_list":

View file

@ -3,6 +3,9 @@
import logging
import base64
import io
import json
import re
from datetime import datetime, UTC
from typing import Dict, Any, Optional, Tuple, List
from .rendererBaseTemplate import BaseRenderer
@ -261,7 +264,7 @@ class RendererPptx(BaseRenderer):
Returns:
List of slide content strings
"""
import re
# re is already imported at module level
# First, try to split by major headers (# or ##)
# This is the most common case for AI-generated content
@ -399,7 +402,7 @@ class RendererPptx(BaseRenderer):
def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str:
"""Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
import json
# json is already imported at module level
schema_json = json.dumps(style_schema, indent=4)
return f"""Customize the JSON below for professional PowerPoint slides.
@ -443,8 +446,7 @@ JSON ONLY. NO OTHER TEXT."""
self.logger.warning("AI service returned no response, using defaults")
return default_styles
import json
import re
# json and re are already imported at module level
# Clean and parse JSON
result = response.content.strip() if response and response.content else ""
@ -634,6 +636,27 @@ JSON ONLY. NO OTHER TEXT."""
content_type = section.get("content_type", "paragraph")
elements = section.get("elements", [])
# Check for three content formats from Phase 5D in elements
content_parts = []
for element in elements:
element_type = element.get("type", "") if isinstance(element, dict) else ""
# Support three content formats from Phase 5D
if element_type == "reference":
# Document reference format
doc_ref = element.get("documentReference", "")
label = element.get("label", "Reference")
content_parts.append(f"[Reference: {label}]")
continue
elif element_type == "extracted_text":
# Extracted text format
content = element.get("content", "")
source = element.get("source", "")
if content:
source_text = f" (Source: {source})" if source else ""
content_parts.append(f"{content}{source_text}")
continue
# Handle image sections specially
if content_type == "image":
# Extract image data
@ -647,26 +670,25 @@ JSON ONLY. NO OTHER TEXT."""
})
return {
"title": section_title or element.get("altText", "Image"),
"content": "", # No text content for image slides
"title": section_title or (elements[0].get("altText", "Image") if elements else "Image"),
"content": "\n\n".join(content_parts) if content_parts else "", # Include reference/extracted_text if present
"images": images
}
# Build slide content based on section type
content_parts = []
if content_type == "table":
content_parts.append(self._formatTableForSlide(elements))
elif content_type == "list":
content_parts.append(self._formatListForSlide(elements))
elif content_type == "heading":
content_parts.append(self._formatHeadingForSlide(elements))
elif content_type == "paragraph":
content_parts.append(self._formatParagraphForSlide(elements))
elif content_type == "code":
content_parts.append(self._formatCodeForSlide(elements))
else:
content_parts.append(self._format_paragraph_for_slide(elements))
if not content_parts: # Only if we didn't process reference/extracted_text above
if content_type == "table":
content_parts.append(self._formatTableForSlide(elements))
elif content_type == "list":
content_parts.append(self._formatListForSlide(elements))
elif content_type == "heading":
content_parts.append(self._formatHeadingForSlide(elements))
elif content_type == "paragraph":
content_parts.append(self._formatParagraphForSlide(elements))
elif content_type == "code":
content_parts.append(self._formatCodeForSlide(elements))
else:
content_parts.append(self._format_paragraph_for_slide(elements))
# Combine content parts
slide_content = "\n\n".join(filter(None, content_parts))
@ -1057,5 +1079,5 @@ JSON ONLY. NO OTHER TEXT."""
def _formatTimestamp(self) -> str:
"""Format current timestamp for presentation generation."""
from datetime import datetime, UTC
# datetime and UTC are already imported at module level
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")

View file

@ -100,11 +100,39 @@ class RendererText(BaseRenderer):
raise Exception(f"Text generation failed: {str(e)}")
def _renderJsonSection(self, section: Dict[str, Any]) -> str:
"""Render a single JSON section to text."""
"""Render a single JSON section to text.
Supports three content formats: reference, object (base64), extracted_text.
"""
try:
sectionType = self._getSectionType(section)
sectionData = self._getSectionData(section)
# Check for three content formats from Phase 5D in elements
if isinstance(sectionData, list):
textParts = []
for element in sectionData:
element_type = element.get("type", "") if isinstance(element, dict) else ""
# Support three content formats from Phase 5D
if element_type == "reference":
# Document reference format
doc_ref = element.get("documentReference", "")
label = element.get("label", "Reference")
textParts.append(f"[Reference: {label}]")
continue
elif element_type == "extracted_text":
# Extracted text format
content = element.get("content", "")
source = element.get("source", "")
if content:
source_text = f" (Source: {source})" if source else ""
textParts.append(f"{content}{source_text}")
continue
# If we processed reference/extracted_text elements, return them
if textParts:
return '\n\n'.join(textParts)
if sectionType == "table":
# Process the section data to extract table structure
processedData = self._processSectionByType(section)

View file

@ -7,6 +7,10 @@ Generates content for each section in the document structure.
import logging
import asyncio
import json
import base64
import re
import traceback
from typing import Dict, Any, Optional, List, Callable
from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator
@ -25,6 +29,7 @@ class ContentGenerator:
structure: Dict[str, Any],
cachedContent: Optional[Dict[str, Any]] = None,
userPrompt: str = "",
contentParts: Optional[List[Any]] = None,
progressCallback: Optional[Callable] = None,
parallelGeneration: bool = True,
batchSize: int = 10
@ -33,9 +38,10 @@ class ContentGenerator:
Generate content for all sections in structure.
Args:
structure: Document structure from Phase 1
structure: Document structure from Phase 1 (with contentPartIds per section)
cachedContent: Extracted content cache
userPrompt: Original user prompt
contentParts: List of all available ContentParts (for mapping by contentPartIds)
progressCallback: Function to call for progress updates
parallelGeneration: Enable parallel section generation
batchSize: Number of sections to process in parallel
@ -89,6 +95,7 @@ class ContentGenerator:
sections=sections,
cachedContent=cachedContent,
userPrompt=userPrompt,
contentParts=contentParts, # Pass ContentParts for section generation
documentMetadata=structure.get("metadata", {}),
progressCallback=lambda idx, total, msg: progressCallback(
currentSectionIndex + idx,
@ -103,6 +110,7 @@ class ContentGenerator:
sections=sections,
cachedContent=cachedContent,
userPrompt=userPrompt,
contentParts=contentParts, # Pass ContentParts for section generation
documentMetadata=structure.get("metadata", {}),
progressCallback=lambda idx, total, msg: progressCallback(
currentSectionIndex + idx,
@ -138,7 +146,8 @@ class ContentGenerator:
sections: List[Dict[str, Any]],
cachedContent: Optional[Dict[str, Any]],
userPrompt: str,
documentMetadata: Dict[str, Any],
contentParts: Optional[List[Any]] = None,
documentMetadata: Dict[str, Any] = {},
progressCallback: Optional[Callable] = None
) -> List[Dict[str, Any]]:
"""
@ -149,6 +158,14 @@ class ContentGenerator:
previousSections = []
totalSections = len(sections)
# Create ContentParts lookup map by ID
contentPartsMap = {}
if contentParts:
for part in contentParts:
partId = part.id if hasattr(part, 'id') else part.get('id', '')
if partId:
contentPartsMap[partId] = part
for idx, section in enumerate(sections):
try:
contentType = section.get("content_type", "content")
@ -171,11 +188,20 @@ class ContentGenerator:
message
)
# Get ContentParts for this section
sectionContentPartIds = section.get("contentPartIds", [])
sectionContentParts = []
if sectionContentPartIds and contentPartsMap:
for partId in sectionContentPartIds:
if partId in contentPartsMap:
sectionContentParts.append(contentPartsMap[partId])
context = {
"userPrompt": userPrompt,
"cachedContent": cachedContent,
"previousSections": previousSections.copy(),
"targetSection": section,
"sectionContentParts": sectionContentParts, # ContentParts for this section
"documentMetadata": documentMetadata,
"operationId": None
}
@ -272,11 +298,20 @@ class ContentGenerator:
message
)
# Get ContentParts for this section
sectionContentPartIds = section.get("contentPartIds", [])
sectionContentParts = []
if sectionContentPartIds and contentPartsMap:
for partId in sectionContentPartIds:
if partId in contentPartsMap:
sectionContentParts.append(contentPartsMap[partId])
context = {
"userPrompt": userPrompt,
"cachedContent": cachedContent,
"previousSections": batchPreviousSections.copy(), # Include sections from previous batches
"targetSection": section,
"sectionContentParts": sectionContentParts, # ContentParts for this section
"documentMetadata": documentMetadata,
"operationId": None # Can be set if needed for nested progress
}
@ -371,17 +406,13 @@ class ContentGenerator:
# Create section-specific prompt
sectionPrompt = self._createSectionPrompt(section, context)
# Debug: Log section generation prompt
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
sectionId = section.get('id', 'unknown')
contentType = section.get('content_type', 'unknown')
try:
self.services.utils.writeDebugFile(
sectionPrompt,
f"document_generation_section_{sectionId}_{contentType}_prompt"
)
except Exception as e:
logger.debug(f"Could not write debug file for section prompt: {e}")
# Debug: Log section generation prompt (harmonisiert - keine Checks nötig)
sectionId = section.get('id', 'unknown')
contentType = section.get('content_type', 'unknown')
self.services.utils.writeDebugFile(
sectionPrompt,
f"document_generation_section_{sectionId}_{contentType}_prompt"
)
# Call AI to generate content
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
@ -397,32 +428,27 @@ class ContentGenerator:
outputFormat="json"
)
# Debug: Log section generation response (always log, even if empty)
# Debug: Log section generation response (harmonisiert - keine Checks nötig)
sectionId = section.get('id', 'unknown')
contentType = section.get('content_type', 'unknown')
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
responseContent = ''
if aiResponse:
if hasattr(aiResponse, 'content') and aiResponse.content:
responseContent = aiResponse.content
elif hasattr(aiResponse, 'documents') and aiResponse.documents:
responseContent = f"[Response has {len(aiResponse.documents)} documents]"
else:
responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]"
else:
responseContent = '[No response object]'
self.services.utils.writeDebugFile(
responseContent,
f"document_generation_section_{sectionId}_{contentType}_response"
)
logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)")
except Exception as e:
logger.warning(f"Could not write debug file for section response: {e}")
import traceback
logger.debug(traceback.format_exc())
responseContent = ''
if aiResponse:
if hasattr(aiResponse, 'content') and aiResponse.content:
responseContent = aiResponse.content
elif hasattr(aiResponse, 'documents') and aiResponse.documents:
responseContent = f"[Response has {len(aiResponse.documents)} documents]"
else:
responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]"
else:
responseContent = '[No response object]'
# Debug: Log section generation response (harmonisiert - keine Checks nötig)
self.services.utils.writeDebugFile(
responseContent,
f"document_generation_section_{sectionId}_{contentType}_response"
)
logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)")
if not aiResponse or not aiResponse.content:
logger.error(f"AI section generation returned empty response for section {sectionId}")
@ -443,7 +469,7 @@ class ContentGenerator:
logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500] if extractedJson else 'None'}")
raise ValueError("No JSON found in AI section response")
import json
# json is already imported at module level
try:
elementsData = json.loads(extractedJson)
logger.debug(f"Parsed JSON for section {section.get('id')}: type={type(elementsData)}, keys={list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}")
@ -480,7 +506,7 @@ class ContentGenerator:
# Last resort: try to extract partial content and create minimal valid JSON
try:
# Try to extract text content before the truncation point
import re
# re is already imported at module level
# Look for text field that might be partially complete
textMatch = re.search(r'"text"\s*:\s*"([^"]*)', extractedJson)
if textMatch:
@ -577,14 +603,14 @@ class ContentGenerator:
) -> Dict[str, Any]:
"""Generate image for image section or include existing image"""
try:
# Check if this is an existing image to include
# Check if this is an existing image to include or render
imageSource = section.get("image_source", "generate")
if imageSource == "existing":
# Include existing image from cachedContent
if imageSource == "existing" or imageSource == "render":
# Phase 4: Include existing image or render image from cachedContent
imageRefId = section.get("image_reference_id")
if not imageRefId:
raise ValueError(f"Image section {section.get('id')} has image_source='existing' but no image_reference_id")
raise ValueError(f"Image section {section.get('id')} has image_source='{imageSource}' but no image_reference_id")
cachedContent = context.get("cachedContent", {})
imageDocuments = cachedContent.get("imageDocuments", [])
@ -594,7 +620,7 @@ class ContentGenerator:
if not imageDoc:
raise ValueError(f"Image document {imageRefId} not found in cachedContent.imageDocuments")
# Create image element from existing image
# Create image element from existing/render image
altText = imageDoc.get("altText", section.get("generation_hint", "Image"))
mimeType = imageDoc.get("mimeType", "image/png")
@ -605,7 +631,7 @@ class ContentGenerator:
"caption": section.get("metadata", {}).get("caption")
}]
logger.info(f"Successfully included existing image {imageRefId} for section {section.get('id')}")
logger.info(f"Successfully integrated image {imageRefId} for section {section.get('id')} (source={imageSource})")
return section
# Generate new image (existing logic)
@ -620,7 +646,7 @@ class ContentGenerator:
# Call AI service for image generation
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptImage
import json
# json is already imported at module level
# Create image generation prompt
promptModel = AiCallPromptImage(
@ -664,7 +690,7 @@ class ContentGenerator:
# Validate base64 data
try:
import base64
# base64 is already imported at module level
base64.b64decode(base64Data[:100], validate=True) # Validate first 100 chars
except Exception as e:
logger.warning(f"Image data may not be valid base64: {str(e)}")
@ -710,9 +736,11 @@ class ContentGenerator:
"""Create sub-prompt for section content generation"""
contentType = section.get("content_type", "")
generationHint = section.get("generation_hint", "")
extractionPrompt = section.get("extractionPrompt") # Optional extraction prompt for ContentParts
userPrompt = context.get("userPrompt", "")
cachedContent = context.get("cachedContent")
previousSections = context.get("previousSections", [])
sectionContentParts = context.get("sectionContentParts", []) # ContentParts for this section
documentMetadata = context.get("documentMetadata", {})
# Get user language
@ -723,6 +751,51 @@ class ContentGenerator:
if cachedContent and cachedContent.get("extractedContent"):
cachedContentText = self._formatCachedContent(cachedContent)
# Format ContentParts for this section
contentPartsText = ""
imagePartReferences = [] # Track image parts for text reference
if sectionContentParts:
try:
partsList = []
imageIndex = 1
for part in sectionContentParts:
partTypeGroup = part.typeGroup if hasattr(part, 'typeGroup') else part.get('typeGroup', '')
partMimeType = part.mimeType if hasattr(part, 'mimeType') else part.get('mimeType', '')
partId = part.id if hasattr(part, 'id') else part.get('id', '')
partData = part.data if hasattr(part, 'data') else part.get('data', '')
# Check if this is an image part
isImage = partTypeGroup == "image" or (partMimeType and partMimeType.startswith("image/"))
if contentType == "image" and isImage:
# For image sections: include image data for integration
partsList.append(f"- ContentPart {partId} (image): [Image data available for integration]")
elif isImage:
# For non-image sections: track for text reference
imagePartReferences.append({
"id": partId,
"index": imageIndex
})
imageIndex += 1
# Don't include image data in prompt for non-image sections
else:
# For text/table/etc parts: include data preview
dataPreview = str(partData)[:200] if partData else "[No data]"
partsList.append(f"- ContentPart {partId} ({partTypeGroup}): {dataPreview}{'...' if partData and len(str(partData)) > 200 else ''}")
if partsList:
contentPartsText = "\n".join(partsList)
# Add image reference instructions for non-image sections
if imagePartReferences and contentType != "image":
refText = ", ".join([f"Bild {ref['index']}" if userLanguage == "de" else f"Image {ref['index']}" for ref in imagePartReferences])
contentPartsText += f"\n\nNOTE: Reference images as text in the document language: {refText}"
except Exception as e:
logger.warning(f"Could not format ContentParts for section prompt: {str(e)}")
contentPartsText = ""
# Format previous sections for context
previousSectionsText = ""
if previousSections:
@ -787,14 +860,22 @@ EXTRACTED CONTENT (if available):
{cachedContentText if cachedContentText else "None"}
{'='*80}
{'='*80}
CONTENT PARTS FOR THIS SECTION:
{'='*80}
{contentPartsText if contentPartsText else "No ContentParts assigned to this section."}
{'='*80}
TASK: Generate content for this section ONLY.
INSTRUCTIONS:
1. Generate content appropriate for section type: {contentType}
2. Use the generation hint: {generationHint}
3. Consider previous sections for continuity
4. Use extracted content if relevant
5. All content must be in the language '{userLanguage}'
{f"3. Use extractionPrompt for ContentParts: {extractionPrompt}" if extractionPrompt else "3. Use ContentParts data if provided"}
4. Consider previous sections for continuity
5. Use extracted content if relevant
6. All content must be in the language '{userLanguage}'
7. {'For image sections: Integrate image ContentParts as visual elements' if contentType == "image" else 'For non-image sections: Reference image ContentParts as text (e.g., "siehe Bild 1" in German, "see Image 1" in English)'}
6. CRITICAL: Return ONLY a JSON object with an "elements" array. DO NOT return a full document structure.

View file

@ -65,18 +65,14 @@ class ContentIntegrator:
)
sections[idx] = section
# Debug: Write final merged structure to debug file
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
import json
structureJson = json.dumps(structure, indent=2, ensure_ascii=False)
self.services.utils.writeDebugFile(
structureJson,
"document_generation_final_merged_json"
)
logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)")
except Exception as e:
logger.debug(f"Could not write debug file for final merged JSON: {e}")
# Debug: Write final merged structure to debug file (harmonisiert - keine Checks nötig)
import json
structureJson = json.dumps(structure, indent=2, ensure_ascii=False)
self.services.utils.writeDebugFile(
structureJson,
"document_generation_final_merged_json"
)
logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)")
return structure

View file

@ -1,316 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Document Purpose Analyzer for hierarchical document generation.
Uses AI to analyze user prompt and determine purpose for each document.
"""
import logging
import json
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
logger = logging.getLogger(__name__)
class DocumentPurposeAnalyzer:
"""Analyzes user prompt and documents to determine document purposes"""
def __init__(self, services: Any):
self.services = services
async def analyzeDocumentPurposes(
self,
userPrompt: str,
chatDocuments: List[ChatDocument],
actionContext: str = "generateDocument"
) -> Dict[str, Any]:
"""
Use AI to analyze user prompt and determine purpose for each document.
Args:
userPrompt: User's original prompt
chatDocuments: List of ChatDocument objects to analyze
actionContext: Action name (e.g., "generateDocument", "extractData")
Returns:
{
"document_purposes": [
{
"document_id": "...",
"purpose": "extract_text_content" | "include_image" | ...,
"reasoning": "...",
"extractionPrompt": "..." (if purpose requires extraction),
"processingNotes": "..."
}
],
"overall_intent": "..."
}
"""
try:
if not chatDocuments:
return {
"document_purposes": [],
"overall_intent": "No documents provided"
}
# Create document metadata list for AI analysis
documentMetadata = []
for doc in chatDocuments:
docInfo = {
"document_id": doc.id,
"fileName": doc.fileName,
"mimeType": doc.mimeType,
"fileSize": doc.fileSize
}
documentMetadata.append(docInfo)
# Create analysis prompt
analysisPrompt = self._createAnalysisPrompt(
userPrompt=userPrompt,
actionContext=actionContext,
documentMetadata=documentMetadata
)
# Debug: Log purpose analysis prompt
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
self.services.utils.writeDebugFile(
analysisPrompt,
"document_purpose_analysis_prompt"
)
except Exception as e:
logger.debug(f"Could not write debug file for purpose analysis prompt: {e}")
# Call AI for analysis
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
resultFormat="json"
)
aiResponse = await self.services.ai.callAiContent(
prompt=analysisPrompt,
options=options,
outputFormat="json"
)
# Debug: Log purpose analysis response
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
responseContent = aiResponse.content if aiResponse and aiResponse.content else ''
responseMetadata = {
"status": aiResponse.status if aiResponse else "N/A",
"error": aiResponse.error if aiResponse else "N/A",
"documents_count": len(aiResponse.documents) if aiResponse and aiResponse.documents else 0
}
self.services.utils.writeDebugFile(
f"Response Content:\n{responseContent}\n\nResponse Metadata:\n{json.dumps(responseMetadata, indent=2)}",
"document_purpose_analysis_response"
)
except Exception as e:
logger.debug(f"Could not write debug file for purpose analysis response: {e}")
if not aiResponse or not aiResponse.content:
logger.warning("AI purpose analysis returned empty response, using defaults")
return self._createDefaultPurposes(chatDocuments, actionContext)
# Extract and parse JSON
extractedJson = self.services.utils.jsonExtractString(aiResponse.content)
if not extractedJson:
logger.warning("No JSON found in purpose analysis response, using defaults")
return self._createDefaultPurposes(chatDocuments, actionContext)
try:
analysisResult = json.loads(extractedJson)
# Validate structure
if "document_purposes" not in analysisResult:
logger.warning("Invalid analysis result structure, using defaults")
return self._createDefaultPurposes(chatDocuments, actionContext)
# Ensure all documents have purposes
analyzedIds = {dp.get("document_id") for dp in analysisResult.get("document_purposes", [])}
for doc in chatDocuments:
if doc.id not in analyzedIds:
logger.warning(f"Document {doc.id} not in analysis result, adding default purpose")
defaultPurpose = self._determineDefaultPurpose(doc, actionContext)
analysisResult["document_purposes"].append({
"document_id": doc.id,
"purpose": defaultPurpose,
"reasoning": f"Default purpose based on document type and action context",
"extractionPrompt": None,
"processingNotes": None
})
return analysisResult
except json.JSONDecodeError as e:
logger.error(f"Failed to parse purpose analysis JSON: {str(e)}")
logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500]}")
return self._createDefaultPurposes(chatDocuments, actionContext)
except Exception as e:
logger.error(f"Error analyzing document purposes: {str(e)}")
return self._createDefaultPurposes(chatDocuments, actionContext)
def _createAnalysisPrompt(
self,
userPrompt: str,
actionContext: str,
documentMetadata: List[Dict[str, Any]]
) -> str:
"""Create AI prompt for document purpose analysis"""
# Format document list
docListText = ""
for i, docInfo in enumerate(documentMetadata, 1):
docListText += f"\n{i}. Document ID: {docInfo['document_id']}\n"
docListText += f" File Name: {docInfo['fileName']}\n"
docListText += f" MIME Type: {docInfo['mimeType']}\n"
docListText += f" File Size: {docInfo['fileSize']} bytes\n"
# Get user language
userLanguage = self._getUserLanguage()
prompt = f"""{'='*80}
DOCUMENT PURPOSE ANALYSIS
{'='*80}
USER PROMPT:
{userPrompt}
ACTION CONTEXT: {actionContext}
DOCUMENTS PROVIDED:
{docListText}
{'='*80}
TASK: For each document, determine its purpose based on:
1. User prompt intent (what the user wants to do)
2. Action context (what action is being performed)
3. Document type (mimeType - is it text, image, etc.)
4. Document metadata (fileName, size)
AVAILABLE PURPOSES:
- "extract_text_content": Extract text content for use in document generation
- "include_image": Include the image directly in the generated document (for images)
- "analyze_image_vision": Analyze image with vision AI to extract text/information (for images with text/charts)
- "use_as_template": Use document structure/layout as template for generation
- "use_as_reference": Use as background context/reference without detailed extraction
- "extract_data": Extract structured data (key-value pairs, entities, fields)
- "attach": Document is an attachment - don't process, just attach to output
- "convert_format": Convert document format (for convert actions)
- "translate": Translate document content (for translate actions)
- "summarize": Create summary of document (for summarize actions)
- "compare": Compare documents (for comparison actions)
- "merge": Merge documents (for merge actions)
- "extract_tables_charts": Extract tables and charts specifically
- "use_for_styling": Use document for styling/formatting reference only
- "extract_metadata": Extract only document metadata
CRITICAL RULES:
1. For images (mimeType starts with "image/"):
- If user wants to "include" or "show" images "include_image"
- If user wants to "analyze", "read text", or "extract text" from images "analyze_image_vision"
- Default for images in generateDocument "include_image"
2. For text documents in generateDocument:
- If user mentions "template" or "structure" "use_as_template"
- If user mentions "reference" or "context" "use_as_reference"
- Default "extract_text_content"
3. Consider action context:
- generateDocument: Usually "extract_text_content" or "include_image"
- extractData: Usually "extract_data"
- translateDocument: Usually "translate"
- summarizeDocument: Usually "summarize"
4. Return ONLY valid JSON following this structure:
{{
"document_purposes": [
{{
"document_id": "document_id_here",
"purpose": "extract_text_content",
"reasoning": "Brief explanation in language '{userLanguage}'",
"extractionPrompt": "Specific extraction prompt if purpose requires extraction, otherwise null",
"processingNotes": "Any special processing requirements or null"
}}
],
"overall_intent": "Summary of how documents should be used together in language '{userLanguage}'"
}}
5. All content must be in the language '{userLanguage}'
6. Return ONLY the JSON structure. No explanations before or after.
Return ONLY the JSON structure.
"""
return prompt
def _createDefaultPurposes(
self,
chatDocuments: List[ChatDocument],
actionContext: str
) -> Dict[str, Any]:
"""Create default purposes when AI analysis fails"""
purposes = []
for doc in chatDocuments:
purpose = self._determineDefaultPurpose(doc, actionContext)
purposes.append({
"document_id": doc.id,
"purpose": purpose,
"reasoning": f"Default purpose based on document type ({doc.mimeType}) and action context ({actionContext})",
"extractionPrompt": None,
"processingNotes": None
})
return {
"document_purposes": purposes,
"overall_intent": f"Default processing for {len(chatDocuments)} document(s) in {actionContext} action"
}
def _determineDefaultPurpose(
self,
doc: ChatDocument,
actionContext: str
) -> str:
"""Determine default purpose based on document type and action context"""
mimeType = doc.mimeType or ""
# Image documents
if mimeType.startswith("image/"):
if actionContext == "generateDocument":
return "include_image"
elif actionContext in ["extractData", "process"]:
return "analyze_image_vision"
else:
return "include_image" # Default for images
# Action-specific defaults
if actionContext == "extractData":
return "extract_data"
elif actionContext == "translateDocument":
return "translate"
elif actionContext == "summarizeDocument":
return "summarize"
elif actionContext == "convertDocument" or actionContext == "convert":
return "convert_format"
elif actionContext == "generateDocument":
return "extract_text_content"
else:
# Default for other actions
return "extract_text_content"
def _getUserLanguage(self) -> str:
"""Get user language for document generation"""
try:
if self.services:
if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
return self.services.currentUserLanguage
elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
return self.services.user.language
except Exception:
pass
return 'en' # Default fallback

View file

@ -19,7 +19,8 @@ async def buildGenerationPrompt(
title: str,
extracted_content: str = None,
continuationContext: Dict[str, Any] = None,
services: Any = None
services: Any = None,
useContentParts: bool = False # ARCHITECTURE: If True, don't include full content in prompt (ContentParts will be used directly)
) -> str:
"""
Build the unified generation prompt using a single JSON template.
@ -120,7 +121,9 @@ Continue generating the remaining content now.
# PROMPT FOR FIRST CALL
# Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions
if extracted_content:
# ARCHITECTURE: If useContentParts=True, don't include full content in prompt
# ContentParts will be passed directly to callAi for model-aware chunking
if extracted_content and not useContentParts:
# If we have extracted content, put it FIRST and make it very clear it's the source data
generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:

View file

@ -24,6 +24,7 @@ class StructureGenerator:
userPrompt: str,
documentList: Optional[Any] = None,
cachedContent: Optional[Dict[str, Any]] = None,
contentParts: Optional[List[Any]] = None,
maxSectionLength: int = 500,
existingImages: Optional[List[Dict[str, Any]]] = None
) -> Dict[str, Any]:
@ -34,30 +35,28 @@ class StructureGenerator:
userPrompt: User's original prompt
documentList: Optional document references
cachedContent: Optional extracted content cache
contentParts: Optional list of ContentParts to analyze for structure generation
maxSectionLength: Maximum words for simple sections
existingImages: Optional list of existing images to include
Returns:
Document structure with empty elements arrays
Document structure with empty elements arrays and contentPartIds per section
"""
try:
# Create structure generation prompt
structurePrompt = self._createStructurePrompt(
userPrompt=userPrompt,
cachedContent=cachedContent,
contentParts=contentParts,
maxSectionLength=maxSectionLength,
existingImages=existingImages or []
)
# Debug: Log structure generation prompt
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
self.services.utils.writeDebugFile(
structurePrompt,
"document_generation_structure_prompt"
)
except Exception as e:
logger.debug(f"Could not write debug file for structure prompt: {e}")
# Debug: Log structure generation prompt (harmonisiert - keine Checks nötig)
self.services.utils.writeDebugFile(
structurePrompt,
"document_generation_structure_prompt"
)
# Call AI to generate structure
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
@ -73,15 +72,11 @@ class StructureGenerator:
outputFormat="json"
)
# Debug: Log structure generation response
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
self.services.utils.writeDebugFile(
aiResponse.content if aiResponse and aiResponse.content else '',
"document_generation_structure_response"
)
except Exception as e:
logger.debug(f"Could not write debug file for structure response: {e}")
# Debug: Log structure generation response (harmonisiert - keine Checks nötig)
self.services.utils.writeDebugFile(
aiResponse.content if aiResponse and aiResponse.content else '',
"document_generation_structure_response"
)
if not aiResponse or not aiResponse.content:
raise ValueError("AI structure generation returned empty response")
@ -106,6 +101,7 @@ class StructureGenerator:
self,
userPrompt: str,
cachedContent: Optional[Dict[str, Any]] = None,
contentParts: Optional[List[Any]] = None,
maxSectionLength: int = 500,
existingImages: Optional[List[Dict[str, Any]]] = None
) -> str:
@ -126,6 +122,41 @@ class StructureGenerator:
if cachedContent and cachedContent.get("imageDocuments"):
existingImages = cachedContent.get("imageDocuments", [])
# Format ContentParts as JSON for structure generation
contentPartsJson = ""
if contentParts:
try:
import json
# Convert ContentParts to dict format for JSON serialization
contentPartsList = []
for part in contentParts:
if hasattr(part, 'dict'):
partDict = part.dict()
elif isinstance(part, dict):
partDict = part
else:
# Try to convert to dict
partDict = {
"id": getattr(part, 'id', ''),
"typeGroup": getattr(part, 'typeGroup', ''),
"mimeType": getattr(part, 'mimeType', ''),
"label": getattr(part, 'label', ''),
"metadata": getattr(part, 'metadata', {})
}
# Only include essential fields for structure generation (not full data)
contentPartsList.append({
"id": partDict.get("id", ""),
"typeGroup": partDict.get("typeGroup", ""),
"mimeType": partDict.get("mimeType", ""),
"label": partDict.get("label", ""),
"metadata": partDict.get("metadata", {})
})
contentPartsJson = json.dumps(contentPartsList, indent=2, ensure_ascii=False)
except Exception as e:
logger.warning(f"Could not format ContentParts as JSON: {str(e)}")
contentPartsJson = ""
# Create structure template
structureTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", "Document Title")
@ -145,13 +176,15 @@ EXTRACTED CONTENT (if available):
{'='*80}
INSTRUCTIONS:
1. Analyze the user request and extracted content
1. Analyze the user request, extracted content, and available ContentParts
2. Create a document structure with CONTENT sections only
3. For each section, specify:
- id: Unique identifier (e.g., "section_title_1", "section_image_1")
- content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block"
- complexity: "simple" (can generate directly) or "complex" (needs sub-prompt)
- generation_hint: Brief description of what content should be generated
- contentPartIds: Array of ContentPart IDs that should be used for this section (e.g., ["part_1", "part_2"]) - can be empty []
- extractionPrompt: (optional) Specific prompt for extracting/processing ContentParts for this section
- image_prompt: (only for image sections) Detailed prompt for image generation
- order: Section order number (starting from 1)
- elements: [] (empty array - will be populated later)
@ -160,10 +193,12 @@ INSTRUCTIONS:
- If user requests illustrations/images, create image sections
- If existing images are provided in documentList (check EXISTING IMAGES section below), create image sections that reference them
- Add image_prompt field with detailed description for image generation (only for new images)
- Set complexity to "complex"
- Set complexity to "complex" for new images, "simple" for existing/render images
- For existing images: Set image_source to "existing" and image_reference_id to the image document ID
- For images to render (from input documents): Set image_source to "render" and image_reference_id to the image document ID
- Example for new image: {{"id": "section_image_1", "content_type": "image", "complexity": "complex", "generation_hint": "Illustration for chapter 1", "image_prompt": "A detailed description for image generation", "order": 2, "elements": []}}
- Example for existing image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Include provided image", "image_source": "existing", "image_reference_id": "doc_id_here", "order": 2, "elements": []}}
- Example for render image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Render input image", "image_source": "render", "image_reference_id": "doc_id_here", "order": 2, "elements": []}}
{'='*80}
EXISTING IMAGES (to include in document):
@ -178,12 +213,21 @@ EXISTING IMAGES (to include in document):
7. Return ONLY valid JSON following this structure:
{structureTemplate}
5. CRITICAL RULES:
5. CRITICAL RULES FOR CONTENT PARTS:
- Analyze available ContentParts and determine which ones are needed for each section
- For image sections (content_type == "image"): Include image ContentParts in contentPartIds - images will be integrated as visual elements
- For other sections (heading, paragraph, etc.): If image ContentParts are referenced, they will be referenced as text in the document language (not integrated as images)
- Each section can reference multiple ContentParts via contentPartIds array
- If specific extraction/processing is needed for ContentParts, provide extractionPrompt
- Image references in non-image sections should be automatically derived in the document language (e.g., "siehe Bild 1" in German, "see Image 1" in English)
6. CRITICAL RULES:
- Return ONLY valid JSON (no comments, no trailing commas, double quotes only)
- Follow the exact JSON schema structure provided
- IMPORTANT: All sections MUST have empty elements arrays: "elements": [] (the template shows examples with content, but you must use empty arrays)
- ALL sections MUST include "generation_hint" field with a brief description of what content should be generated
- ALL sections MUST include "complexity" field: "simple" for short content, "complex" for long chapters/images
- ALL sections MUST include "contentPartIds" field (can be empty array [] if no ContentParts needed)
- Image sections MUST include "image_prompt" field with detailed description for image generation
- Order numbers MUST start from 1 (not 0)
- All content must be in the language '{userLanguage}'
@ -235,6 +279,14 @@ Return ONLY the JSON structure. No explanations.
if "elements" not in section:
section["elements"] = []
# Ensure contentPartIds field exists (can be empty array)
if "contentPartIds" not in section:
section["contentPartIds"] = []
# Ensure extractionPrompt field exists (optional)
if "extractionPrompt" not in section:
section["extractionPrompt"] = None
# Identify complexity if not set
if "complexity" not in section:
section["complexity"] = self._identifySectionComplexity(
@ -255,11 +307,11 @@ Return ONLY the JSON structure. No explanations.
if section.get("content_type") == "image":
imageSource = section.get("image_source", "generate")
if imageSource == "existing":
# Existing image - ensure image_reference_id is set
if imageSource == "existing" or imageSource == "render":
# Existing or render image - ensure image_reference_id is set
if "image_reference_id" not in section:
logger.warning(f"Image section {sectionId} has image_source='existing' but no image_reference_id")
# Existing images are simple (no generation needed)
logger.warning(f"Image section {sectionId} has image_source='{imageSource}' but no image_reference_id")
# Existing/render images are simple (no generation needed, code integration)
section["complexity"] = "simple"
else:
# New image generation - ensure image_prompt

View file

@ -2,6 +2,7 @@
# All rights reserved.
import json
import logging
import re
from typing import Any, Dict, List, Optional, Tuple, Union, Type, TypeVar
from pydantic import BaseModel, ValidationError
@ -11,10 +12,32 @@ T = TypeVar('T', bound=BaseModel)
def stripCodeFences(text: str) -> str:
"""Remove ```json / ``` fences and surrounding whitespace if present."""
"""Remove ```json / ``` fences and surrounding whitespace if present.
Also removes [SOURCE: ...] and [END SOURCE] tags that may wrap the JSON."""
if not text:
return text
s = text.strip()
# Remove [SOURCE: ...] tags at the beginning
if s.startswith("[SOURCE:"):
# Find the end of the SOURCE tag (newline or end of string)
end_pos = s.find("\n")
if end_pos != -1:
s = s[end_pos+1:]
else:
# No newline, entire string is SOURCE tag
return ""
# Remove [END SOURCE] tags at the end
if s.endswith("[END SOURCE]"):
# Find the start of END SOURCE tag (newline before it)
start_pos = s.rfind("\n[END SOURCE]")
if start_pos != -1:
s = s[:start_pos]
else:
# No newline, entire string is END SOURCE tag
return ""
# Handle opening fence (may or may not have closing fence)
if s.startswith("```"):
# Remove first triple backticks
@ -201,7 +224,7 @@ def closeJsonStructures(text: str) -> str:
# Look for patterns like: "value" or "value\n (unterminated)
# Check if we're in the middle of a string value when text ends
if result.strip():
import re
# re is already imported at module level
# Count quotes - if odd number, we have an unterminated string
quoteCount = result.count('"')
if quoteCount % 2 == 1:
@ -367,7 +390,7 @@ def _removeLastIncompleteItem(items: List[str], original_text: str) -> List[str]
Remove the last item if it appears to be incomplete/corrupted.
This prevents corrupted data from being included in the final result.
"""
import re
# re is already imported at module level
if not items:
return items
@ -418,7 +441,7 @@ def _extractGenericContent(text: str) -> List[Dict[str, Any]]:
CRITICAL: Must preserve original content_type and id from the JSON structure!
"""
import re
# re is already imported at module level
sections = []
@ -1025,7 +1048,7 @@ def _extractCutOffElements(incomplete_section: Dict[str, Any], raw_json: str) ->
if not cut_off_element:
# Extract the last incomplete part from raw JSON
# Find the last incomplete string/number/array
import re
# re is already imported at module level
# Look for incomplete string at the end
incomplete_match = re.search(r'"([^"]*?)(?:"|$)', raw_json[-500:], re.DOTALL)
if incomplete_match:
@ -1045,7 +1068,7 @@ def _extractCutOffFromElement(element: Dict[str, Any], raw_json: str) -> Optiona
This helps identify where exactly to continue within nested structures.
"""
import re
# re is already imported at module level
# Check for code_block with nested JSON
if "code" in element:

View file

@ -8,9 +8,7 @@ from .process import process
from .webResearch import webResearch
from .summarizeDocument import summarizeDocument
from .translateDocument import translateDocument
from .convert import convert
from .convertDocument import convertDocument
from .extractData import extractData
from .generateDocument import generateDocument
__all__ = [
@ -18,9 +16,7 @@ __all__ = [
'webResearch',
'summarizeDocument',
'translateDocument',
'convert',
'convertDocument',
'extractData',
'generateDocument',
]

View file

@ -1,157 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Convert action for AI operations.
Converts documents/data between different formats with specific formatting options.
"""
import logging
import json
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList
logger = logging.getLogger(__name__)
@action
async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSONCSV with custom columns, delimiters).
- Input requirements: documentList (required); inputFormat and outputFormat (required).
- Output format: Document in target format with specified formatting options.
- CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed).
Parameters:
- documentList (list, required): Document reference(s) to convert.
- inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
- outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
- columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
- delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
- includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
- language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
inputFormat = parameters.get("inputFormat")
outputFormat = parameters.get("outputFormat")
if not inputFormat or not outputFormat:
return ActionResult.isFailure(error="inputFormat and outputFormat are required")
# Normalize formats (remove leading dot if present)
normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
# Get documents
if isinstance(documentList, DocumentReferenceList):
docRefList = documentList
elif isinstance(documentList, list):
docRefList = DocumentReferenceList.from_string_list(documentList)
else:
docRefList = DocumentReferenceList.from_string_list([documentList])
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
if not chatDocuments:
return ActionResult.isFailure(error="No documents found in documentList")
# Check if input is standardized JSON format - if so, use direct rendering
if normalizedInputFormat == "json" and len(chatDocuments) == 1:
try:
doc = chatDocuments[0]
# ChatDocument doesn't have documentData - need to load file content using fileId
docBytes = self.services.chat.getFileData(doc.fileId)
if not docBytes:
raise ValueError(f"No file data found for fileId={doc.fileId}")
# Decode bytes to string
docData = docBytes.decode('utf-8')
# Try to parse as JSON
if isinstance(docData, str):
jsonData = json.loads(docData)
elif isinstance(docData, dict):
jsonData = docData
else:
jsonData = None
# Check if it's standardized JSON format (has "documents" or "sections")
if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
# Use direct rendering - no AI call needed!
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
# Ensure format is "documents" array
if "documents" not in jsonData:
jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
# Get title
title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document")
# Render with options
renderOptions = {}
if normalizedOutputFormat == "csv":
renderOptions["delimiter"] = parameters.get("delimiter", ",")
renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
renderOptions["includeHeader"] = parameters.get("includeHeader", True)
rendered_content, mime_type, _images = await generationService.renderReport(
jsonData, normalizedOutputFormat, title, None, None
)
# Apply CSV options if needed (renderer will handle them)
if normalizedOutputFormat == "csv" and renderOptions:
rendered_content = self.csvProcessing.applyCsvOptions(rendered_content, renderOptions)
validationMetadata = {
"actionType": "ai.convert",
"inputFormat": normalizedInputFormat,
"outputFormat": normalizedOutputFormat,
"hasSourceJson": True,
"conversionType": "direct_rendering"
}
actionDoc = ActionDocument(
documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
documentData=rendered_content,
mimeType=mime_type,
sourceJson=jsonData, # Preserve source JSON for structure validation
validationMetadata=validationMetadata
)
return ActionResult.isSuccess(documents=[actionDoc])
except Exception as e:
logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}")
# Fall through to AI-based conversion
# Fallback: Use AI for conversion (for non-JSON inputs or complex conversions)
columnsPerRow = parameters.get("columnsPerRow")
delimiter = parameters.get("delimiter", ",")
includeHeader = parameters.get("includeHeader", True)
language = parameters.get("language", "en")
aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
if normalizedOutputFormat == "csv":
aiPrompt += f" Use '{delimiter}' as the delimiter character."
if columnsPerRow:
aiPrompt += f" Format the output with {columnsPerRow} columns per row."
if not includeHeader:
aiPrompt += " Do not include a header row."
else:
aiPrompt += " Include a header row with column names."
if language and language != "en":
aiPrompt += f" Use language: {language}."
aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": normalizedOutputFormat
})

View file

@ -1,59 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Extract Data action for AI operations.
Extracts structured data from documents (key-value pairs, entities, facts, etc.).
"""
import logging
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult
logger = logging.getLogger(__name__)
@action
async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
- Input requirements: documentList (required); optional dataStructure, fields.
- Output format: JSON by default, or specified resultType.
Parameters:
- documentList (list, required): Document reference(s) to extract data from.
- dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
- fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
- resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
dataStructure = parameters.get("dataStructure", "nested")
fields = parameters.get("fields", [])
resultType = parameters.get("resultType", "json")
aiPrompt = "Extract structured data from the provided document(s)."
if fields:
fieldsStr = ", ".join(fields)
aiPrompt += f" Extract the following specific fields: {fieldsStr}."
else:
aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
structureInstructions = {
"flat": "Use a flat key-value structure with simple properties.",
"nested": "Use a nested JSON structure with logical grouping of related data.",
"list": "Structure the data as a list/array of objects, one per entity or record."
}
aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
aiPrompt += " Ensure all extracted data is accurate and complete."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType
})

View file

@ -3,18 +3,17 @@
"""
Generate Document action for AI operations.
Generates documents from scratch or based on templates/inputs using hierarchical approach.
Wrapper around AI service callAiContent method.
"""
import logging
import time
from typing import Dict, Any, Optional
from typing import Dict, Any, Optional, List
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator
from modules.services.serviceGeneration.subContentGenerator import ContentGenerator
from modules.services.serviceGeneration.subDocumentPurposeAnalyzer import DocumentPurposeAnalyzer
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
logger = logging.getLogger(__name__)
@ -59,38 +58,15 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
resultType = "txt"
logger.info(f"Auto-detected Text format from prompt")
maxSectionLength = parameters.get("maxSectionLength", 500)
parallelGeneration = parameters.get("parallelGeneration", True)
progressLogging = parameters.get("progressLogging", True)
# Create operation ID for progress tracking
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"doc_gen_{workflowId}_{int(time.time())}"
parentOperationId = parameters.get('parentOperationId')
try:
# Phase 1: Structure Generation
if progressLogging:
self.services.chat.progressLogStart(
operationId,
"Document",
"Structure Generation",
"Generating document structure...",
parentOperationId=parentOperationId
)
structureGenerator = StructureGenerator(self.services)
# Analyze document purposes and process documents accordingly
cachedContent = None
imageDocuments = []
documentPurposes = {}
# Convert documentList to DocumentReferenceList if needed
docRefList = None
if documentList:
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.1, "Analyzing document purposes...")
# Convert documentList to DocumentReferenceList
from modules.datamodels.datamodelDocref import DocumentReferenceList
if isinstance(documentList, DocumentReferenceList):
@ -101,301 +77,78 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
docRefList = DocumentReferenceList.from_string_list(documentList)
else:
docRefList = DocumentReferenceList(references=[])
# Get ChatDocuments
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
if chatDocuments:
logger.info(f"Analyzing purposes for {len(chatDocuments)} documents")
# Analyze document purposes using AI
purposeAnalyzer = DocumentPurposeAnalyzer(self.services)
purposeAnalysis = await purposeAnalyzer.analyzeDocumentPurposes(
userPrompt=prompt,
chatDocuments=chatDocuments,
actionContext="generateDocument"
)
documentPurposes = {dp["document_id"]: dp for dp in purposeAnalysis.get("document_purposes", [])}
logger.info(f"Purpose analysis complete: {purposeAnalysis.get('overall_intent', 'N/A')}")
# Separate documents by purpose
textDocs = []
imageDocsToInclude = []
imageDocsToAnalyze = []
for doc in chatDocuments:
docPurpose = documentPurposes.get(doc.id, {})
purpose = docPurpose.get("purpose", "extract_text_content")
if purpose == "include_image":
imageDocsToInclude.append(doc)
elif purpose == "analyze_image_vision":
imageDocsToAnalyze.append(doc)
elif purpose in ["extract_text_content", "use_as_template", "use_as_reference", "extract_data"]:
textDocs.append(doc)
# Skip "attach" purpose - don't process
# Process text documents (extract content)
extractedResults = []
if textDocs:
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.15, f"Extracting content from {len(textDocs)} text document(s)...")
# Prepare extraction options with purpose-specific prompts
extractionOptionsList = []
for doc in textDocs:
docPurpose = documentPurposes.get(doc.id, {})
extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all content from the document"
extractionOptions = ExtractionOptions(
prompt=extractionPrompt,
mergeStrategy=MergeStrategy(
mergeType="concatenate",
groupBy="typeGroup",
orderBy="id"
),
processDocumentsIndividually=True
)
extractionOptionsList.append((doc, extractionOptions))
# Extract content from text documents
for doc, extractionOptions in extractionOptionsList:
try:
docResults = self.services.extraction.extractContent(
[doc],
extractionOptions,
parentOperationId=operationId
)
extractedResults.extend(docResults)
except Exception as e:
logger.error(f"Error extracting content from {doc.fileName}: {str(e)}")
logger.info(f"Extracted content from {len(extractedResults)} text document(s)")
# Process images to analyze (vision call)
if imageDocsToAnalyze:
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.2, f"Analyzing {len(imageDocsToAnalyze)} image(s) with vision AI...")
# Extract content from images using vision analysis
for doc in imageDocsToAnalyze:
try:
docPurpose = documentPurposes.get(doc.id, {})
extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all text and information from this image"
extractionOptions = ExtractionOptions(
prompt=extractionPrompt,
mergeStrategy=MergeStrategy(
mergeType="concatenate",
groupBy="typeGroup",
orderBy="id"
),
processDocumentsIndividually=True
)
docResults = self.services.extraction.extractContent(
[doc],
extractionOptions,
parentOperationId=operationId
)
extractedResults.extend(docResults)
except Exception as e:
logger.error(f"Error analyzing image {doc.fileName}: {str(e)}")
logger.info(f"Analyzed {len(imageDocsToAnalyze)} image(s) with vision AI")
# Process images to include (store image data)
if imageDocsToInclude:
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.25, f"Preparing {len(imageDocsToInclude)} image(s) for inclusion...")
# Get image data for inclusion
from modules.interfaces.interfaceDbComponentObjects import getInterface
dbInterface = getInterface()
for doc in imageDocsToInclude:
try:
# Get image bytes
imageBytes = dbInterface.getFileData(doc.fileId)
if imageBytes:
# Encode to base64
import base64
base64Data = base64.b64encode(imageBytes).decode('utf-8')
# Create image document entry
imageDoc = {
"id": doc.id,
"fileName": doc.fileName,
"mimeType": doc.mimeType,
"base64Data": base64Data,
"altText": doc.fileName or "Image",
"fileSize": doc.fileSize
}
imageDocuments.append(imageDoc)
logger.debug(f"Prepared image {doc.fileName} for inclusion ({len(base64Data)} chars base64)")
else:
logger.warning(f"Could not retrieve image data for {doc.fileName}")
except Exception as e:
logger.error(f"Error preparing image {doc.fileName} for inclusion: {str(e)}")
logger.info(f"Prepared {len(imageDocuments)} image(s) for inclusion")
# Build cachedContent with all information
cachedContent = {
"extractedContent": extractedResults,
"imageDocuments": imageDocuments,
"documentPurposes": documentPurposes,
"extractionTimestamp": time.time(),
"sourceDocuments": [doc.id for doc in chatDocuments]
}
logger.info(f"Document processing complete: {len(extractedResults)} extracted, {len(imageDocuments)} images to include")
# Generate structure
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.2, "Generating document structure...")
# Prepare title
title = parameters.get("documentType") or "Generated Document"
structure = await structureGenerator.generateStructure(
userPrompt=prompt,
documentList=documentList if documentList else None,
cachedContent=cachedContent,
maxSectionLength=maxSectionLength,
existingImages=imageDocuments # Pass existing images for structure generation
# Call AI service for document generation
# callAiContent handles documentList internally via Phases 5A-5E
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.DETAILED,
compressPrompt=False,
compressContext=False
)
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated")
# Phase 2: Content Generation
if progressLogging:
self.services.chat.progressLogUpdate(
operationId,
0.34,
"Starting content generation..."
)
contentGenerator = ContentGenerator(self.services)
# Create enhanced progress callback
def progressCallback(sectionIndex: int, totalSections: int, message: str):
if progressLogging:
# Calculate progress: 34% to 90% for content generation phase
if totalSections > 0:
progress = 0.34 + (0.56 * (sectionIndex / totalSections))
else:
progress = 0.34
# Format message
if sectionIndex > 0 and totalSections > 0:
progressMessage = f"Section {sectionIndex}/{totalSections}: {message}"
else:
progressMessage = message
self.services.chat.progressLogUpdate(
operationId,
progress,
progressMessage
)
completeStructure = await contentGenerator.generateContent(
structure=structure,
cachedContent=cachedContent,
userPrompt=prompt,
progressCallback=progressCallback,
parallelGeneration=parallelGeneration
)
if progressLogging:
self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated")
# Phase 3: Integration & Rendering
if progressLogging:
self.services.chat.progressLogUpdate(
operationId,
0.91,
"Rendering final document..."
)
# Use existing renderReport method
title = structure.get("metadata", {}).get("title", "Generated Document")
if documentType:
title = f"{title} ({documentType})"
renderedContent, mimeType, images = await self.services.generation.renderReport(
extractedContent=completeStructure,
aiResponse: AiResponse = await self.services.ai.callAiContent(
prompt=prompt,
options=options,
documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E
outputFormat=resultType,
title=title,
userPrompt=prompt,
aiService=self.services.ai
parentOperationId=parentOperationId
)
# Build list of documents to return
documents = [
ActionDocument(
documentName=f"document.{resultType}",
documentData=renderedContent,
mimeType=mimeType
)
]
# Convert AiResponse to ActionResult
documents = []
# Add images as separate documents
if images:
logger.info(f"Processing {len(images)} image(s) from renderer")
import base64
for idx, imageData in enumerate(images):
try:
base64Data = imageData.get("base64Data", "")
altText = imageData.get("altText", f"image_{idx + 1}")
caption = imageData.get("caption", "")
sectionId = imageData.get("sectionId", f"section_{idx + 1}")
if base64Data:
# Decode base64 to bytes
imageBytes = base64.b64decode(base64Data)
# Determine filename and mime type
filename = imageData.get("filename", f"image_{idx + 1}.png")
if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')):
filename = f"image_{idx + 1}.png"
# Determine mime type from filename
if filename.lower().endswith('.png'):
imageMimeType = "image/png"
elif filename.lower().endswith(('.jpg', '.jpeg')):
imageMimeType = "image/jpeg"
elif filename.lower().endswith('.gif'):
imageMimeType = "image/gif"
elif filename.lower().endswith('.webp'):
imageMimeType = "image/webp"
else:
imageMimeType = "image/png" # Default
# Add image document
documents.append(ActionDocument(
documentName=filename,
documentData=imageBytes,
mimeType=imageMimeType
))
logger.info(f"Added image document: {filename} (section: {sectionId}, {len(imageBytes)} bytes, alt: {altText})")
# Convert DocumentData to ActionDocument
if aiResponse.documents:
for docData in aiResponse.documents:
documents.append(ActionDocument(
documentName=docData.documentName,
documentData=docData.documentData,
mimeType=docData.mimeType,
sourceJson=docData.sourceJson if hasattr(docData, 'sourceJson') else None
))
# If no documents but content exists, create a document from content
if not documents and aiResponse.content:
# Determine document name from metadata
docName = f"document.{resultType}"
if aiResponse.metadata and aiResponse.metadata.filename:
docName = aiResponse.metadata.filename
elif aiResponse.metadata and aiResponse.metadata.title:
import re
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if sanitized:
if not sanitized.lower().endswith(f".{resultType}"):
docName = f"{sanitized}.{resultType}"
else:
logger.warning(f"Image {idx + 1} (section: {sectionId}) has no base64Data, skipping")
except Exception as e:
logger.error(f"Error adding image document {idx + 1}: {str(e)}", exc_info=True)
continue
else:
logger.debug("No images returned from renderer")
# Note: Document creation is handled by the workflow system
# We just return the rendered content and images in ActionResult
if progressLogging:
self.services.chat.progressLogFinish(operationId, True)
docName = sanitized
# Determine mime type
mimeType = "text/plain"
if resultType == "html":
mimeType = "text/html"
elif resultType == "json":
mimeType = "application/json"
elif resultType == "pdf":
mimeType = "application/pdf"
elif resultType == "md":
mimeType = "text/markdown"
documents.append(ActionDocument(
documentName=docName,
documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content,
mimeType=mimeType
))
return ActionResult.isSuccess(documents=documents)
except Exception as e:
logger.error(f"Error in hierarchical document generation: {str(e)}")
if progressLogging:
self.services.chat.progressLogFinish(operationId, False)
logger.error(f"Error in document generation: {str(e)}")
return ActionResult.isFailure(error=str(e))

View file

@ -8,11 +8,12 @@ Universal AI document processing action.
import logging
import time
import json
from typing import Dict, Any, List, Optional
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelAi import AiCallOptions
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart
from modules.datamodels.datamodelExtraction import ContentPart
logger = logging.getLogger(__name__)
@ -82,8 +83,7 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
logger.info(f"Using result type: {resultType} -> {output_extension}")
# Phase 7.3: Extract content first if documents provided, then use contentParts
# Check if contentParts are already provided (preferred path)
# Check if contentParts are already provided (from context.extractContent or other sources)
contentParts: Optional[List[ContentPart]] = None
if "contentParts" in parameters:
contentParts = parameters.get("contentParts")
@ -95,63 +95,42 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
contentParts = None
# If contentParts not provided but documentList is, extract content first
if not contentParts and documentList.references:
self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
# Get ChatDocuments
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
if not chatDocuments:
logger.warning("No documents found in documentList")
else:
logger.info(f"Extracting content from {len(chatDocuments)} documents")
# Prepare extraction options (use defaults if not provided)
extractionOptions = parameters.get("extractionOptions")
if not extractionOptions:
extractionOptions = ExtractionOptions(
prompt="Extract all content from the document",
mergeStrategy=MergeStrategy(
mergeType="concatenate",
groupBy="typeGroup",
orderBy="id"
),
processDocumentsIndividually=True
)
# Extract content using extraction service with hierarchical progress logging
# Pass operationId for per-document progress tracking
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
# Combine all ContentParts from all extracted results
contentParts = []
for extracted in extractedResults:
if extracted.parts:
contentParts.extend(extracted.parts)
logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
# Update progress - preparing AI call
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
# Build options with only resultFormat - let service layer handle all other parameters
# Build options
output_format = output_extension.replace('.', '') or 'txt'
options = AiCallOptions(
resultFormat=output_format
# Removed all model parameters - service layer will analyze prompt and determine optimal parameters
)
# Update progress - calling AI
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
# Use unified callAiContent method with contentParts (extraction is now separate)
aiResponse = await self.services.ai.callAiContent(
prompt=aiPrompt,
options=options,
contentParts=contentParts, # Already extracted (or None if no documents)
outputFormat=output_format,
parentOperationId=operationId
)
# Use unified callAiContent method
# If contentParts provided (pre-extracted), use them directly
# Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally
# Note: ContentExtracted documents (from context.extractContent) are now handled
# automatically in _extractAndPrepareContent() (Phase 5B)
if contentParts:
# Pre-extracted ContentParts - use them directly
aiResponse = await self.services.ai.callAiContent(
prompt=aiPrompt,
options=options,
contentParts=contentParts, # Pre-extracted ContentParts
outputFormat=output_format,
parentOperationId=operationId
)
else:
# Pass documentList - callAiContent handles Phases 5A-5E internally
# This includes automatic detection of ContentExtracted documents
aiResponse = await self.services.ai.callAiContent(
prompt=aiPrompt,
options=options,
documentList=documentList, # callAiContent macht Phasen 5A-5E
outputFormat=output_format,
parentOperationId=operationId
)
# Update progress - processing result
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")

View file

@ -15,9 +15,7 @@ from .actions.process import process
from .actions.webResearch import webResearch
from .actions.summarizeDocument import summarizeDocument
from .actions.translateDocument import translateDocument
from .actions.convert import convert
from .actions.convertDocument import convertDocument
from .actions.extractData import extractData
from .actions.generateDocument import generateDocument
logger = logging.getLogger(__name__)
@ -192,69 +190,6 @@ class MethodAi(MethodBase):
},
execute=translateDocument.__get__(self, self.__class__)
),
"convert": WorkflowActionDefinition(
actionId="ai.convert",
description="Convert documents/data between different formats with specific formatting options",
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
type="List[str]",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True,
description="Document reference(s) to convert"
),
"inputFormat": WorkflowActionParameter(
name="inputFormat",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["json", "csv", "xlsx", "txt"],
required=True,
description="Source format"
),
"outputFormat": WorkflowActionParameter(
name="outputFormat",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["csv", "json", "xlsx", "txt"],
required=True,
description="Target format"
),
"columnsPerRow": WorkflowActionParameter(
name="columnsPerRow",
type="int",
frontendType=FrontendType.NUMBER,
required=False,
description="For CSV output, number of columns per row. Default: auto-detect",
validation={"min": 1, "max": 100}
),
"delimiter": WorkflowActionParameter(
name="delimiter",
type="str",
frontendType=FrontendType.TEXT,
required=False,
default=",",
description="For CSV output, delimiter character"
),
"includeHeader": WorkflowActionParameter(
name="includeHeader",
type="bool",
frontendType=FrontendType.CHECKBOX,
required=False,
default=True,
description="For CSV output, whether to include header row"
),
"language": WorkflowActionParameter(
name="language",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["de", "en", "fr"],
required=False,
default="en",
description="Language for output"
)
},
execute=convert.__get__(self, self.__class__)
),
"convertDocument": WorkflowActionDefinition(
actionId="ai.convertDocument",
description="Convert documents between different formats (PDF→Word, Excel→CSV, etc.)",
@ -285,45 +220,6 @@ class MethodAi(MethodBase):
},
execute=convertDocument.__get__(self, self.__class__)
),
"extractData": WorkflowActionDefinition(
actionId="ai.extractData",
description="Extract structured data from documents (key-value pairs, entities, facts, etc.)",
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
type="List[str]",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True,
description="Document reference(s) to extract data from"
),
"dataStructure": WorkflowActionParameter(
name="dataStructure",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["flat", "nested", "list"],
required=False,
default="nested",
description="Desired data structure"
),
"fields": WorkflowActionParameter(
name="fields",
type="List[str]",
frontendType=FrontendType.MULTISELECT,
required=False,
description="Specific fields/properties to extract (e.g., [name, date, amount])"
),
"resultType": WorkflowActionParameter(
name="resultType",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["json", "csv", "xlsx"],
required=False,
default="json",
description="Output format"
)
},
execute=extractData.__get__(self, self.__class__)
),
"generateDocument": WorkflowActionDefinition(
actionId="ai.generateDocument",
description="Generate documents from scratch or based on templates/inputs",
@ -371,9 +267,7 @@ class MethodAi(MethodBase):
self.webResearch = webResearch.__get__(self, self.__class__)
self.summarizeDocument = summarizeDocument.__get__(self, self.__class__)
self.translateDocument = translateDocument.__get__(self, self.__class__)
self.convert = convert.__get__(self, self.__class__)
self.convertDocument = convertDocument.__get__(self, self.__class__)
self.extractData = extractData.__get__(self, self.__class__)
self.generateDocument = generateDocument.__get__(self, self.__class__)
def _format_timestamp_for_filename(self) -> str:

View file

@ -19,10 +19,21 @@ logger = logging.getLogger(__name__)
@action
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Extract content from documents (separate from AI calls).
Extract raw content parts from documents without AI processing.
This action performs pure content extraction without AI processing.
The extracted ContentParts can then be used by subsequent AI processing actions.
This action performs pure content extraction WITHOUT AI/OCR processing.
It returns ContentParts with different typeGroups:
- "text": Extracted text from text-based formats (PDF text layers, Word docs, etc.)
- "image": Images as base64-encoded data (NOT converted to text, no OCR)
- "table": Tables as structured data
- "structure": Structured content (JSON, etc.)
- "container": Container elements (PDF pages, etc.)
IMPORTANT:
- Images are returned as base64 data, NOT as extracted text
- No OCR is performed - images are preserved as visual elements
- Text extraction only works for text-based formats (not images)
- The extracted ContentParts can then be used by subsequent AI processing actions
Parameters:
- documentList (list, required): Document reference(s) to extract content from.
@ -30,7 +41,8 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
Returns:
- ActionResult with ActionDocument containing ContentExtracted objects
- ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
- ContentExtracted.parts contains List[ContentPart] with various typeGroups
- Each ContentPart has a typeGroup indicating its type (text, image, table, etc.)
"""
try:
# Init progress logger
@ -79,12 +91,26 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
# Convert dict to ExtractionOptions object if needed, or create defaults
if extractionOptionsParam:
if isinstance(extractionOptionsParam, dict):
# Ensure required fields are present
if "prompt" not in extractionOptionsParam:
extractionOptionsParam["prompt"] = "Extract all content from the document"
if "mergeStrategy" not in extractionOptionsParam:
extractionOptionsParam["mergeStrategy"] = MergeStrategy(
mergeType="concatenate",
groupBy="typeGroup",
orderBy="id"
)
# Convert dict to ExtractionOptions object
extractionOptions = ExtractionOptions(**extractionOptionsParam)
try:
extractionOptions = ExtractionOptions(**extractionOptionsParam)
except Exception as e:
logger.warning(f"Failed to create ExtractionOptions from dict: {str(e)}, using defaults")
extractionOptions = None
elif isinstance(extractionOptionsParam, ExtractionOptions):
extractionOptions = extractionOptionsParam
else:
# Invalid type, use defaults
logger.warning(f"Invalid extractionOptions type: {type(extractionOptionsParam)}, using defaults")
extractionOptions = None
else:
extractionOptions = None

View file

@ -50,7 +50,7 @@ class MethodContext(MethodBase):
),
"extractContent": WorkflowActionDefinition(
actionId="context.extractContent",
description="Extract content from documents (separate from AI calls)",
description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.",
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
@ -64,7 +64,7 @@ class MethodContext(MethodBase):
type="dict",
frontendType=FrontendType.JSON,
required=False,
description="Extraction options (if not provided, defaults are used)"
description="Extraction options (if not provided, defaults are used). Note: This action does NOT use AI - it performs pure content extraction. Images are preserved as base64 data, not converted to text."
)
},
execute=extractContent.__get__(self, self.__class__)

View file

@ -1,354 +0,0 @@
# Architecture & Implementation Analysis
## Deep Review of Hierarchical Document Generation
**Date**: 2025-12-22
**Status**: Critical Issues Found
---
## Executive Summary
The hierarchical document generation system is **partially implemented** but has **critical architectural mismatches** and **implementation gaps** that prevent it from working correctly. While core components exist, several fundamental issues need to be addressed.
---
## ✅ What's Correctly Implemented
### Phase 1: Core Infrastructure ✅
- ✅ `StructureGenerator` class exists with `generateStructure()` method
- ✅ `ContentGenerator` class exists with `generateContent()` method
- ✅ `ContentIntegrator` class exists with `integrateContent()` method
- ✅ `generateDocument` action uses hierarchical approach
- ✅ Basic progress logging implemented
- ✅ Error handling with `createErrorSection()` implemented
### Phase 2: Image Generation ✅
- ✅ `_generateImageSection()` method implemented
- ✅ Image prompt extraction from structure
- ✅ Base64 image data storage
- ✅ Error handling for image failures
### Phase 3: Parallel Processing ✅
- ✅ `_generateSectionsParallel()` method implemented
- ✅ `_generateSectionsSequential()` method implemented
- ✅ Batch processing for large documents
- ✅ Progress callback system
- ✅ Exception handling in parallel execution
---
## ❌ Critical Issues Found
### Issue 1: Previous Sections Context Not Working in Parallel Mode ⚠️ **PARTIALLY FIXED**
**Problem**:
- In parallel mode, sections within the same batch cannot see each other (correct)
- BUT: Sections in later batches should see sections from earlier batches
- **Current Status**: Code was fixed to accumulate previous sections, but needs verification
**Location**: `subContentGenerator.py` lines 240-319
**Fix Applied**:
- Added `accumulatedPreviousSections` to track sections across batches
- Pass accumulated sections to each batch
- **VERIFICATION NEEDED**: Test that prompts actually show previous sections
**Risk**: Medium - May cause continuity issues in generated content
---
### Issue 2: Variable Shadowing Bug ✅ **FIXED**
**Problem**:
- `contentType` variable was shadowed in loop, causing wrong section type in prompts
**Location**: `subContentGenerator.py` line 676
**Fix Applied**:
- Renamed loop variable to `prevContentType`
**Status**: ✅ Fixed
---
### Issue 3: Missing `generation_hint` in Structure Response ✅ **FIXED**
**Problem**:
- Structure generator creates generic hints like "Section heading" instead of meaningful hints
- AI generates same content for all headings because hints are identical
**Location**: `subStructureGenerator.py` lines 242-269
**Fix Applied**:
- Added `_extractMeaningfulHint()` method to extract meaningful hints from section IDs
- Example: `section_heading_current_state` → "Current State"
**Status**: ✅ Fixed
---
### Issue 4: JSON Template Architecture Mismatch ✅ **FIXED**
**Problem**:
- `jsonTemplateDocument` showed filled `elements` arrays, but structure generation requires empty arrays
- Template missing `generation_hint` and `complexity` fields
- Template showed `order: 0` but should start from 1
**Location**: `datamodelJson.py`
**Fix Applied**:
- Updated template to show empty `elements: []`
- Added `generation_hint` to all sections
- Added `complexity` to all sections
- Changed `order` to start from 1
- Added `title` to metadata
**Status**: ✅ Fixed
---
### Issue 5: Structure Prompt Instructions Mismatch ✅ **FIXED**
**Problem**:
- Prompt said "All sections must have empty elements arrays" but template showed filled arrays
- Prompt didn't explicitly require `generation_hint` and `complexity` fields
**Location**: `subStructureGenerator.py` lines 181-190
**Fix Applied**:
- Enhanced prompt to explicitly require `generation_hint` and `complexity`
- Clarified that template examples show structure, but elements must be empty
**Status**: ✅ Fixed
---
## ⚠️ Remaining Issues & Gaps
### Issue 6: Missing Validation Before Content Generation ⚠️ **NOT IMPLEMENTED**
**Problem**:
- No validation that structure has required fields before content generation
- No check that all sections have `generation_hint` before generating content
**Expected** (from Phase 6):
```python
# Validate structure before content generation
if not validateStructure(structure):
raise ValueError("Invalid structure")
```
**Current**: Validation happens in `_validateAndEnhanceStructure()` but only adds missing fields, doesn't validate
**Impact**: Low - Enhancement adds missing fields, but explicit validation would be better
**Recommendation**: Add explicit validation method
---
### Issue 7: Previous Sections Formatting Missing Content ⚠️ **PARTIALLY IMPLEMENTED**
**Problem**:
- Previous sections formatting extracts content from `elements`, but if sections don't have elements yet (in parallel mode), it shows nothing
- Should show `generation_hint` as fallback when elements not available
**Location**: `subContentGenerator.py` lines 671-709
**Current Behavior**:
- Shows content preview if elements exist
- Shows nothing if elements don't exist
**Expected Behavior**:
- Show content preview if elements exist
- Show `generation_hint` as fallback if elements don't exist
**Impact**: Medium - Reduces context quality in parallel generation
**Recommendation**: Add fallback to show `generation_hint` when elements not available
---
### Issue 8: Debug File Shows Raw Response, Not Validated Structure ⚠️ **NOT FIXED**
**Problem**:
- Debug file writes `aiResponse.content` (raw AI response) before validation
- Can't verify if `generation_hint` was added by validation
**Location**: `subStructureGenerator.py` lines 77-84
**Impact**: Low - Makes debugging harder but doesn't affect functionality
**Recommendation**: Write validated structure to separate debug file
---
### Issue 9: Missing Unit Tests ⚠️ **NOT IMPLEMENTED**
**Problem**:
- No unit tests for any components (Phase 7 requirement)
- No tests for structure generation
- No tests for content generation
- No tests for integration
**Impact**: High - No way to verify correctness or catch regressions
**Recommendation**: Add comprehensive unit tests
---
### Issue 10: Missing Integration Tests ⚠️ **NOT IMPLEMENTED**
**Problem**:
- No end-to-end tests
- No tests with images
- No tests with long documents
- No error scenario tests
**Impact**: High - No verification of complete flow
**Recommendation**: Add integration tests
---
### Issue 11: Content Caching Not Optimized ⚠️ **PARTIALLY IMPLEMENTED**
**Problem**:
- Content is extracted and cached, but:
- No cache validation (check if documents changed)
- No cache reuse verification
- Content is passed to prompts but may not be formatted efficiently
**Expected** (from Phase 5):
- Cache validation
- Efficient formatting
- Performance testing
**Current**: Basic caching exists but not optimized
**Impact**: Medium - Works but could be more efficient
**Recommendation**: Add cache validation and optimization
---
### Issue 12: Renderer Updates Not Verified ⚠️ **UNKNOWN**
**Problem**:
- Implementation plan requires renderer updates for images
- HTML renderer should create separate image files
- PDF/XLSX/PPTX renderers should embed images
- **Status unknown** - need to verify renderers handle images correctly
**Impact**: High - Images may not render correctly
**Recommendation**: Verify all renderers handle images correctly
---
## 📋 Architecture Compliance Check
### Data Structure Compliance ✅
| Field | Required | Implemented | Status |
|-------|----------|-------------|--------|
| `metadata.title` | Yes | ✅ | ✅ |
| `metadata.split_strategy` | Yes | ✅ | ✅ |
| `sections[].id` | Yes | ✅ | ✅ |
| `sections[].content_type` | Yes | ✅ | ✅ |
| `sections[].complexity` | Yes | ✅ | ✅ |
| `sections[].generation_hint` | Yes | ✅ | ✅ |
| `sections[].order` | Yes | ✅ | ✅ |
| `sections[].elements` | Yes | ✅ | ✅ |
| `sections[].image_prompt` | Image only | ✅ | ✅ |
### Component Method Compliance ✅
| Component | Method | Required | Implemented | Status |
|-----------|--------|----------|-------------|--------|
| StructureGenerator | `generateStructure()` | Yes | ✅ | ✅ |
| StructureGenerator | `_createStructurePrompt()` | Yes | ✅ | ✅ |
| StructureGenerator | `_identifySectionComplexity()` | Yes | ✅ | ✅ |
| StructureGenerator | `_extractImagePrompts()` | Yes | ✅ | ✅ |
| StructureGenerator | `_validateAndEnhanceStructure()` | Yes | ✅ | ✅ |
| StructureGenerator | `_extractMeaningfulHint()` | Yes | ✅ | ✅ |
| ContentGenerator | `generateContent()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateSectionContent()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateSimpleSection()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateComplexTextSection()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateImageSection()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateSectionsParallel()` | Yes | ✅ | ✅ |
| ContentGenerator | `_generateSectionsSequential()` | Yes | ✅ | ✅ |
| ContentGenerator | `_createSectionPrompt()` | Yes | ✅ | ✅ |
| ContentIntegrator | `integrateContent()` | Yes | ✅ | ✅ |
| ContentIntegrator | `validateCompleteness()` | Yes | ✅ | ✅ |
| ContentIntegrator | `createErrorSection()` | Yes | ✅ | ✅ |
---
## 🎯 Priority Fixes Needed
### Critical (Must Fix)
1. ✅ **Issue 2**: Variable shadowing bug - **FIXED**
2. ✅ **Issue 3**: Missing generation_hint - **FIXED**
3. ✅ **Issue 4**: JSON template mismatch - **FIXED**
4. ✅ **Issue 5**: Prompt instructions mismatch - **FIXED**
5. ⚠️ **Issue 1**: Previous sections context - **NEEDS VERIFICATION**
### High Priority (Should Fix)
6. ⚠️ **Issue 12**: Renderer image handling - **NEEDS VERIFICATION**
7. ⚠️ **Issue 9**: Missing unit tests - **NOT IMPLEMENTED**
8. ⚠️ **Issue 10**: Missing integration tests - **NOT IMPLEMENTED**
### Medium Priority (Nice to Have)
9. ⚠️ **Issue 7**: Previous sections formatting fallback - **PARTIALLY IMPLEMENTED**
10. ⚠️ **Issue 11**: Content caching optimization - **PARTIALLY IMPLEMENTED**
11. ⚠️ **Issue 6**: Structure validation - **NOT IMPLEMENTED**
12. ⚠️ **Issue 8**: Debug file improvements - **NOT IMPLEMENTED**
---
## ✅ Summary
### What Works
- Core infrastructure is implemented
- Image generation is integrated
- Parallel processing is implemented
- Error handling is in place
- Progress logging works
### What's Fixed (This Session)
- Variable shadowing bug
- Missing generation_hint extraction
- JSON template architecture mismatch
- Prompt instructions clarity
- Previous sections tracking (needs verification)
### What Needs Work
- Unit and integration tests
- Renderer verification
- Previous sections formatting fallback
- Cache optimization
- Structure validation
### Overall Status
**Architecture**: ✅ **85% Compliant**
**Implementation**: ✅ **80% Complete**
**Testing**: ❌ **0% Complete**
**Production Ready**: ⚠️ **Not Yet** (needs testing and verification)
---
## Next Steps
1. **Verify Issue 1 Fix**: Test that previous sections are correctly tracked in parallel mode
2. **Verify Issue 12**: Test that all renderers handle images correctly
3. **Add Unit Tests**: Start with critical components (StructureGenerator, ContentGenerator)
4. **Add Integration Tests**: Test end-to-end flow with various scenarios
5. **Improve Previous Sections Formatting**: Add fallback to show generation_hint when elements not available
6. **Add Structure Validation**: Explicit validation before content generation
7. **Optimize Content Caching**: Add cache validation and efficient formatting
---
**Analysis Complete**: 2025-12-22

View file

@ -1,459 +0,0 @@
# Concept: Hierarchical Document Generation with Image Integration
## Executive Summary
This concept proposes a **three-phase hierarchical approach** to document generation that enables proper image integration and handles complex documents efficiently.
**Key Decisions**:
- ✅ **Performance**: Parallel processing with ChatLog progress messages
- ✅ **Error Handling**: Skip failed sections, show error messages
- ✅ **Image Storage**: Store as base64 in JSON (renderers need direct access)
- ✅ **Backward Compatibility**: Not needed - implement as new default
**Renderer Status**:
- ✅ **Ready**: Text, Markdown, DOCX renderers
- ⚠️ **Needs Update**: HTML (create separate image files), PDF (embed images)
- ⚠️ **Needs Implementation**: XLSX, PPTX (add image support)
## Problem Statement
Currently, the document generation system has the following limitations:
1. **No Image Integration**: Images are generated separately but cannot be embedded into document structures
2. **Single-Pass Generation**: Documents are generated in one AI call, making it difficult to handle complex sections (long text, images, chapters)
3. **Repeated Extraction**: Content extraction may happen multiple times unnecessarily
4. **No Structured Approach**: No mechanism to first define document structure, then populate sections
## Current Architecture Analysis
### Current Flow:
```
User Request → ai.generateDocument → ai.process → AI JSON Generation → Renderer → Final Document
```
### Issues:
- AI generates complete JSON structure in one pass
- Images are generated separately via `ai.generate` action
- No mechanism to integrate generated images into document structure
- JSON schema supports `image` content_type, but AI rarely generates it
- Content extraction happens per action, not cached/reused
### Current Image Handling:
- Images can be rendered IF they exist in JSON structure (`content_type: "image"`)
- Image data expected as `base64Data` in elements
- Renderers support image rendering (Docx, PDF, HTML, etc.)
- But images are never generated WITHIN document generation
## Proposed Solution: Hierarchical Document Generation
### Core Concept
**Three-Phase Approach:**
1. **Structure Generation Phase**: Generate document skeleton with section placeholders
2. **Content Generation Phase**: Generate content for each section (text or image) via sub-prompts
3. **Integration Phase**: Merge all generated content into final document structure
### Architecture Overview
```
┌─────────────────────────────────────────────────────────────┐
│ Phase 1: Structure Generation │
│ - Generate document skeleton │
│ - Identify sections (text, image, complex) │
│ - Create section placeholders with metadata │
└─────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────┐
│ Phase 2: Content Generation (Tree-like) │
│ │
│ ┌──────────────────────────────────────────────┐ │
│ │ Section 1: Heading (simple) │ │
│ │ → Generate directly │ │
│ └──────────────────────────────────────────────┘ │
│ │
│ ┌──────────────────────────────────────────────┐ │
│ │ Section 2: Paragraph (simple) │ │
│ │ → Generate directly │ │
│ └──────────────────────────────────────────────┘ │
│ │
│ ┌──────────────────────────────────────────────┐ │
│ │ Section 3: Image (complex) │ │
│ │ → Sub-prompt: Generate image │ │
│ │ → Store image data │ │
│ │ → Create image section with base64Data │ │
│ └──────────────────────────────────────────────┘ │
│ │
│ ┌──────────────────────────────────────────────┐ │
│ │ Section 4: Long Chapter (complex) │ │
│ │ → Sub-prompt: Generate chapter content │ │
│ │ → Split into subsections if needed │ │
│ └──────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────┐
│ Phase 3: Integration │
│ - Merge all generated content │
│ - Replace placeholders with actual data │
│ - Validate structure completeness │
│ - Render to final format │
└─────────────────────────────────────────────────────────────┘
```
## Detailed Design
### Phase 1: Structure Generation
**Purpose**: Create document skeleton with section metadata
**Process**:
1. AI generates document structure with sections
2. Each section includes:
- `id`: Unique identifier
- `content_type`: Type (heading, paragraph, image, table, etc.)
- `complexity`: "simple" or "complex"
- `generation_hint`: Instructions for content generation
- `order`: Section order
- `elements`: Empty or placeholder
**Example Structure**:
```json
{
"metadata": {
"title": "Children's Bedtime Story",
"split_strategy": "single_document"
},
"documents": [{
"id": "doc_1",
"sections": [
{
"id": "section_title",
"content_type": "heading",
"complexity": "simple",
"generation_hint": "Story title",
"order": 1,
"elements": []
},
{
"id": "section_intro",
"content_type": "paragraph",
"complexity": "simple",
"generation_hint": "Introduction paragraph",
"order": 2,
"elements": []
},
{
"id": "section_image_1",
"content_type": "image",
"complexity": "complex",
"generation_hint": "Illustration: Rabbit meeting owl in moonlit forest",
"image_prompt": "A small brown rabbit sitting in a peaceful forest clearing under moonlight with stars, meeting a wise owl perched on a branch",
"order": 3,
"elements": []
},
{
"id": "section_chapter_1",
"content_type": "paragraph",
"complexity": "complex",
"generation_hint": "First chapter: Rabbit's adventure begins",
"order": 4,
"elements": []
}
]
}]
}
```
### Phase 2: Content Generation
**Purpose**: Generate actual content for each section
**Process**:
1. Iterate through sections in order
2. For each section:
- **Simple sections** (heading, short paragraph):
- Generate content directly via AI
- Populate `elements` array
- **Complex sections** (image, long chapter):
- Create sub-prompt based on `generation_hint` and `image_prompt`
- Generate content via specialized action:
- Images: `ai.generate` with image generation
- Long text: `ai.process` with focused prompt
- Store generated content
- Populate `elements` array
**Content Caching**:
- Extract content from source documents ONCE at the start
- Cache extracted content for reuse across all sections
- Pass cached content to sub-prompts to avoid re-extraction
**Image Generation**:
- For `content_type: "image"` sections:
- Use `image_prompt` from structure
- Call `ai.generate` action with image generation
- Receive base64 image data
- Create image element:
```json
{
"url": "data:image/png;base64,<base64_data>",
"base64Data": "<base64_data>",
"altText": "<alt_text>",
"caption": "<caption>"
}
```
### Phase 3: Integration
**Purpose**: Merge all content into final document structure
**Process**:
1. Validate all sections have content
2. Merge generated content into structure
3. Replace placeholders with actual data
4. Finalize JSON structure
5. Render to target format (docx, pdf, html, etc.)
## Implementation Strategy
### New Components Needed
1. **Structure Generator** (`structureGenerator.py`)
- Generates document skeleton
- Identifies section complexity
- Creates generation hints
2. **Content Generator** (`contentGenerator.py`)
- Generates content for each section
- Handles simple vs complex sections
- Manages sub-prompts and image generation
- Caches extracted content
3. **Content Integrator** (`contentIntegrator.py`)
- Merges generated content
- Validates completeness
- Finalizes document structure
### Modified Components
1. **`generateDocument` action**
- Implement hierarchical generation as default
- Orchestrate three phases
- Add progress logging for each phase
2. **`process` action**
- Support content caching (extract once, reuse)
- Support sub-prompt generation for sections
3. **Prompt Builder** (`subPromptBuilderGeneration.py`)
- Add structure generation prompt
- Add section-specific content prompts
- Add image generation prompt templates
4. **Renderers** (Update required):
- **HTML Renderer**: Create separate image files and link them
- **PDF Renderer**: Embed images using reportlab
- **XLSX Renderer**: Add image embedding support
- **PPTX Renderer**: Add image embedding support
### New Action Parameters
**For `generateDocument`**:
- `enableImageIntegration`: boolean (default: true)
- `maxSectionLength`: int (threshold for "complex" sections, default: 500 words)
- `parallelGeneration`: boolean (default: true) - enable parallel section generation
- `progressLogging`: boolean (default: true) - send ChatLog progress updates
**For sub-prompts**:
- `sectionContext`: Previous sections for context
- `cachedContent`: Extracted content cache (to avoid re-extraction)
- `targetSection`: Section metadata
- `previousSections`: Array of already-generated sections for continuity
## Benefits
1. **Image Integration**: Images can be generated and embedded into documents
2. **Structured Approach**: Clear separation of structure and content
3. **Efficiency**: Content extracted once, reused across sections
4. **Scalability**: Can handle very long documents by splitting into sections
5. **Quality**: Better control over complex sections (images, long chapters)
6. **Flexibility**: Can generate different content types per section
## Migration Strategy
**Note**: No backwards compatibility needed - can implement directly as new default.
1. **Phase 1**: Implement hierarchical generation as new default
2. **Phase 2**: Update renderers (HTML, PDF, XLSX, PPTX) for image support
3. **Phase 3**: Testing and refinement
4. **Phase 4**: Remove old single-pass mode (or keep as internal fallback only)
## Example Workflow
**User Request**: "Create a children's bedtime story with 5 illustrations"
**Phase 1 Output**:
```json
{
"metadata": {"title": "Flöckchen's Adventure"},
"documents": [{
"sections": [
{"id": "title", "content_type": "heading", "complexity": "simple", ...},
{"id": "intro", "content_type": "paragraph", "complexity": "simple", ...},
{"id": "img1", "content_type": "image", "complexity": "complex",
"image_prompt": "Rabbit meeting owl", ...},
{"id": "chapter1", "content_type": "paragraph", "complexity": "complex", ...},
{"id": "img2", "content_type": "image", "complexity": "complex", ...},
...
]
}]
}
```
**Phase 2 Process**:
- Generate title → populate elements
- Generate intro → populate elements
- Generate image 1 → call `ai.generate`, store base64 → populate elements
- Generate chapter 1 → sub-prompt → populate elements
- Generate image 2 → call `ai.generate`, store base64 → populate elements
- ...
**Phase 3 Output**: Complete document with all sections populated, ready for rendering
## Renderer Readiness Assessment
### Current Renderer Status for Image Handling:
1. **Text Renderer** (`rendererText.py`): ✅ **READY**
- Skips images, shows placeholder: `[Image: altText]`
- No changes needed
2. **Markdown Renderer** (`rendererMarkdown.py`): ✅ **READY**
- Shows placeholder with truncated base64: `![altText](data:image/png;base64,...)`
- No changes needed (markdown limitation)
3. **HTML Renderer** (`rendererHtml.py`): ⚠️ **NEEDS UPDATE**
- Currently: Embeds base64 directly in `<img>` tag as data URI
- **Required Change**: Create separate image files and link to them
- Implementation: Generate image files (e.g., `image_1.png`, `image_2.png`) alongside HTML
- Update `<img>` tags to use relative paths: `<img src="image_1.png" alt="...">`
- Return multiple files: HTML file + image files
4. **PDF Renderer** (`rendererPdf.py`): ⚠️ **NEEDS UPDATE**
- Currently: Shows placeholder `[Image: altText]`
- **Required Change**: Embed images directly in PDF using reportlab
- Implementation: Use `reportlab.platypus.Image()` with base64 decoded bytes
5. **DOCX Renderer** (`rendererDocx.py`): ✅ **READY**
- Embeds images directly using `doc.add_picture()`
- Adds captions below images
- No changes needed
6. **XLSX Renderer** (`rendererXlsx.py`): ⚠️ **NEEDS IMPLEMENTATION**
- Currently: No image handling found
- **Required Change**: Add image support using openpyxl
- Implementation: Use `openpyxl.drawing.image.Image()` to embed images in cells
- Store images in worksheet cells or as floating images
7. **PPTX Renderer** (`rendererPptx.py`): ⚠️ **NEEDS IMPLEMENTATION**
- Currently: No image handling found
- **Required Change**: Add image support using python-pptx
- Implementation: Use `slide.shapes.add_picture()` to add images to slides
### Renderer Update Requirements:
**Priority 1 (Critical for HTML output)**:
- HTML Renderer: Create separate image files and link them
**Priority 2 (Important for document formats)**:
- PDF Renderer: Embed images using reportlab
- XLSX Renderer: Add image embedding support
- PPTX Renderer: Add image embedding support
## Answers to Open Questions
### 1. Performance: How to handle very large documents (100+ sections)?
**Answer**: Use parallel processing where possible, with progress ChatLog messages.
**Implementation Strategy**:
- **Parallel Section Generation**: Generate independent sections in parallel using asyncio
- **Batch Processing**: Process sections in batches (e.g., 10 sections at a time)
- **Progress Tracking**: Send ChatLog progress updates:
- "Generating structure..." (Phase 1)
- "Generating content for section X/Y..." (Phase 2)
- "Generating image for section X..." (Phase 2 - images)
- "Merging content..." (Phase 3)
- "Rendering final document..." (Phase 3)
- **Streaming**: For very large documents, consider streaming partial results
**Example Progress Messages**:
```
Phase 1: Structure Generation (0% → 33%)
Phase 2: Content Generation (33% → 90%)
- Section 1/10: Heading (34%)
- Section 2/10: Paragraph (40%)
- Section 3/10: Image generation (50%)
- Section 4/10: Chapter (60%)
...
Phase 3: Integration & Rendering (90% → 100%)
```
### 2. Error Handling: What if one section fails?
**Answer**: Skip failed sections, keep section title and type, show error message in the section.
**Implementation Strategy**:
- **Graceful Degradation**: Continue processing remaining sections
- **Error Section**: Create error placeholder section:
```json
{
"id": "section_failed_3",
"content_type": "paragraph",
"elements": [{
"text": "[ERROR: Failed to generate content for this section. Error: <error_message>]"
}],
"order": 3,
"error": true,
"errorMessage": "<detailed_error>"
}
```
- **Logging**: Log errors for debugging but don't fail entire document
- **User Notification**: Include error count in final progress message
### 3. Image Storage: Where to store generated images?
**Answer**: Store images in JSON as base64, as renderers need them afterwards.
**Implementation Strategy**:
- **In-Memory Storage**: Keep base64 strings in JSON structure during generation
- **JSON Structure**: Store in section elements:
```json
{
"url": "data:image/png;base64,<base64_data>",
"base64Data": "<full_base64_string>",
"altText": "Image description",
"caption": "Optional caption"
}
```
- **Memory Management**: For very large images, consider compression or chunking
- **Renderer Access**: All renderers can access `base64Data` directly from JSON
- **HTML Special Case**: HTML renderer will extract base64, decode, and save as separate files during rendering
### 4. Backward Compatibility: How to ensure existing workflows still work?
**Answer**: No backwards compatibility needed.
**Implementation Strategy**:
- **New Default**: Hierarchical generation becomes the default mode
- **Clean Migration**: All document generation uses hierarchical approach
- **No Fallback**: Remove single-pass mode (or keep as internal fallback only)
- **Breaking Change**: Acceptable since this is a new feature/enhancement
## Next Steps
1. **Review and Approval**: Get feedback on concept
2. **Detailed Design**: Design API and data structures
3. **Prototype**: Implement Phase 1 (structure generation)
4. **Testing**: Test with real use cases
5. **Full Implementation**: Implement all phases
6. **Migration**: Migrate existing workflows

View file

@ -1,398 +0,0 @@
# Implementation Plan: Hierarchical Document Generation
## Overview
This document outlines the step-by-step implementation plan for the hierarchical document generation system with image integration.
## Implementation Phases
### Phase 1: Core Infrastructure (Week 1)
**Goal**: Set up core components and data structures
#### Tasks:
1. **Create StructureGenerator Component**
- [ ] Create `subStructureGenerator.py`
- [ ] Implement `generateStructure()` method
- [ ] Implement `_createStructurePrompt()` method
- [ ] Implement `_identifySectionComplexity()` method
- [ ] Implement `_extractImagePrompts()` method
- [ ] Add unit tests
2. **Create ContentGenerator Component**
- [ ] Create `subContentGenerator.py`
- [ ] Implement `generateContent()` method
- [ ] Implement `_generateSectionContent()` method
- [ ] Implement `_generateSimpleSection()` method
- [ ] Implement `_generateComplexTextSection()` method
- [ ] Implement `_createSectionPrompt()` method
- [ ] Add unit tests
3. **Create ContentIntegrator Component**
- [ ] Create `subContentIntegrator.py`
- [ ] Implement `integrateContent()` method
- [ ] Implement `validateCompleteness()` method
- [ ] Implement `createErrorSection()` method
- [ ] Add unit tests
4. **Update generateDocument Action**
- [ ] Modify `generateDocument.py` to use hierarchical approach
- [ ] Add Phase 1: Structure generation
- [ ] Add Phase 2: Content generation (sequential first)
- [ ] Add Phase 3: Integration & rendering
- [ ] Add basic progress logging
- [ ] Add error handling
**Deliverables**:
- Core components created
- Basic hierarchical generation working (sequential)
- Unit tests passing
**Estimated Time**: 3-4 days
---
### Phase 2: Image Generation Integration (Week 1-2)
**Goal**: Integrate image generation into content generation
#### Tasks:
1. **Implement Image Section Generation**
- [ ] Add `_generateImageSection()` method to ContentGenerator
- [ ] Integrate with `ai.generate` action
- [ ] Handle base64 image data storage
- [ ] Add image prompt extraction from structure
- [ ] Add error handling for image generation failures
2. **Update Structure Generation Prompt**
- [ ] Add image section detection in structure prompt
- [ ] Add image_prompt field extraction
- [ ] Test with user prompts requesting images
3. **Test Image Integration**
- [ ] Test image generation in document structure
- [ ] Test multiple images in one document
- [ ] Test image generation failures
**Deliverables**:
- Image generation integrated
- Images stored as base64 in JSON
- Error handling for image failures
**Estimated Time**: 2-3 days
---
### Phase 3: Parallel Processing & Progress Logging (Week 2)
**Goal**: Implement parallel section generation and detailed progress logging
#### Tasks:
1. **Implement Parallel Generation**
- [ ] Add `_generateSectionsParallel()` method
- [ ] Use `asyncio.gather()` for parallel execution
- [ ] Add batch processing for large documents
- [ ] Handle exceptions in parallel execution
- [ ] Test parallel vs sequential performance
2. **Enhance Progress Logging**
- [ ] Create progress callback system
- [ ] Add detailed progress messages:
- Structure generation progress
- Section-by-section progress
- Image generation progress
- Rendering progress
- [ ] Calculate accurate progress percentages
- [ ] Test progress updates
3. **Update generateDocument Action**
- [ ] Integrate parallel generation
- [ ] Add progress callback to content generation
- [ ] Update progress logging throughout phases
**Deliverables**:
- Parallel section generation working
- Detailed progress logging
- Performance improvements
**Estimated Time**: 2-3 days
---
### Phase 4: Renderer Updates (Week 2-3)
**Goal**: Update renderers to properly handle images
#### Tasks:
1. **Update HTML Renderer**
- [ ] Modify `rendererHtml.py`
- [ ] Add `_extractImages()` method
- [ ] Implement separate image file creation
- [ ] Update HTML to use relative image paths
- [ ] Handle multiple image files
- [ ] Test HTML + image files output
2. **Update PDF Renderer**
- [ ] Modify `rendererPdf.py`
- [ ] Update `_renderJsonImage()` to embed images
- [ ] Use `reportlab.platypus.Image()` with base64
- [ ] Handle image sizing and positioning
- [ ] Test PDF with embedded images
3. **Update XLSX Renderer**
- [ ] Modify `rendererXlsx.py`
- [ ] Add `_renderJsonImage()` method
- [ ] Use `openpyxl.drawing.image.Image()` to embed images
- [ ] Handle image placement in cells
- [ ] Test XLSX with images
4. **Update PPTX Renderer**
- [ ] Modify `rendererPptx.py`
- [ ] Add `_renderJsonImage()` method
- [ ] Use `slide.shapes.add_picture()` to add images
- [ ] Handle image sizing on slides
- [ ] Test PPTX with images
**Deliverables**:
- All renderers support images
- HTML creates separate image files
- PDF/XLSX/PPTX embed images directly
**Estimated Time**: 4-5 days
---
### Phase 5: Content Caching & Optimization (Week 3)
**Goal**: Implement content caching to avoid re-extraction
#### Tasks:
1. **Implement Content Cache**
- [ ] Create ContentCache data structure
- [ ] Extract content once at start of generation
- [ ] Pass cached content to all sub-prompts
- [ ] Add cache validation (check if documents changed)
- [ ] Test cache reuse
2. **Optimize Prompt Building**
- [ ] Update structure prompt to use cached content
- [ ] Update section prompts to use cached content
- [ ] Format cached content efficiently
- [ ] Test prompt sizes
3. **Performance Testing**
- [ ] Test with large documents
- [ ] Test with multiple source documents
- [ ] Measure performance improvements
- [ ] Optimize bottlenecks
**Deliverables**:
- Content caching implemented
- No redundant content extraction
- Performance optimized
**Estimated Time**: 2-3 days
---
### Phase 6: Error Handling & Edge Cases (Week 3-4)
**Goal**: Robust error handling and edge case coverage
#### Tasks:
1. **Enhance Error Handling**
- [ ] Improve error section creation
- [ ] Add error recovery strategies
- [ ] Handle partial failures gracefully
- [ ] Add error logging and reporting
2. **Handle Edge Cases**
- [ ] Empty document list
- [ ] No sections generated
- [ ] All sections fail
- [ ] Very large images
- [ ] Very long documents (100+ sections)
- [ ] Missing image prompts
- [ ] Invalid section types
3. **Add Validation**
- [ ] Validate structure before content generation
- [ ] Validate content before integration
- [ ] Validate final document before rendering
- [ ] Add comprehensive error messages
**Deliverables**:
- Robust error handling
- Edge cases covered
- Clear error messages
**Estimated Time**: 2-3 days
---
### Phase 7: Testing & Refinement (Week 4)
**Goal**: Comprehensive testing and refinement
#### Tasks:
1. **Unit Testing**
- [ ] Complete unit tests for all components
- [ ] Test all methods
- [ ] Test error scenarios
- [ ] Achieve >80% code coverage
2. **Integration Testing**
- [ ] Test end-to-end document generation
- [ ] Test with various document types
- [ ] Test with images
- [ ] Test with long documents
- [ ] Test error scenarios
3. **Performance Testing**
- [ ] Test with 10, 50, 100+ sections
- [ ] Measure generation time
- [ ] Measure memory usage
- [ ] Compare parallel vs sequential
- [ ] Optimize if needed
4. **User Acceptance Testing**
- [ ] Test with real user scenarios
- [ ] Test bedtime story with images (original use case)
- [ ] Test business documents
- [ ] Test technical documents
- [ ] Gather feedback
5. **Documentation**
- [ ] Update API documentation
- [ ] Add code comments
- [ ] Update user guides
- [ ] Create examples
**Deliverables**:
- Comprehensive test suite
- Performance benchmarks
- Documentation complete
- Ready for production
**Estimated Time**: 3-4 days
---
## Dependencies
### External Dependencies
- `asyncio` - For parallel processing
- `base64` - For image encoding/decoding
- `reportlab` - For PDF image embedding
- `openpyxl` - For XLSX image embedding
- `python-pptx` - For PPTX image embedding
### Internal Dependencies
- `serviceGeneration` - Main generation service
- `serviceAi` - AI service for generation
- `serviceExtraction` - Content extraction service
- `methodAi.actions.generate` - Image generation action
- `methodAi.actions.process` - Text generation action
## Risk Mitigation
### Risks and Mitigation Strategies
1. **Risk**: Image generation failures break entire document
- **Mitigation**: Error handling creates error sections, continues processing
2. **Risk**: Parallel generation causes memory issues
- **Mitigation**: Batch processing, limit concurrent operations
3. **Risk**: Large base64 images cause JSON size issues
- **Mitigation**: Consider compression or chunking for very large images
4. **Risk**: HTML renderer needs to return multiple files
- **Mitigation**: Modify return type or create file bundle system
5. **Risk**: Performance not meeting expectations
- **Mitigation**: Profile and optimize bottlenecks, consider caching
## Success Criteria
### Functional Requirements
- ✅ Documents can be generated with embedded images
- ✅ HTML renderer creates separate image files
- ✅ PDF/XLSX/PPTX renderers embed images
- ✅ Progress logging shows detailed progress
- ✅ Error handling prevents complete failures
- ✅ Content extraction happens only once
### Performance Requirements
- ✅ Parallel generation improves performance by 2x+ for multi-section documents
- ✅ Progress updates appear within 1 second of action
- ✅ Documents with 50+ sections complete in <5 minutes
### Quality Requirements
- ✅ >80% code coverage
- ✅ All edge cases handled
- ✅ Clear error messages
- ✅ Comprehensive documentation
## Rollout Plan
### Step 1: Internal Testing (Week 4)
- Deploy to development environment
- Internal team testing
- Fix critical issues
### Step 2: Beta Testing (Week 5)
- Deploy to staging environment
- Select beta users
- Gather feedback
- Fix issues
### Step 3: Production Deployment (Week 6)
- Deploy to production
- Monitor performance
- Monitor errors
- Gather user feedback
### Step 4: Optimization (Ongoing)
- Monitor usage patterns
- Optimize based on real-world usage
- Add enhancements based on feedback
## Timeline Summary
| Phase | Duration | Start | End |
|-------|----------|-------|-----|
| Phase 1: Core Infrastructure | 3-4 days | Day 1 | Day 4 |
| Phase 2: Image Integration | 2-3 days | Day 4 | Day 7 |
| Phase 3: Parallel Processing | 2-3 days | Day 7 | Day 10 |
| Phase 4: Renderer Updates | 4-5 days | Day 10 | Day 15 |
| Phase 5: Content Caching | 2-3 days | Day 15 | Day 18 |
| Phase 6: Error Handling | 2-3 days | Day 18 | Day 21 |
| Phase 7: Testing & Refinement | 3-4 days | Day 21 | Day 25 |
**Total Estimated Time**: 4-5 weeks
## Next Steps
1. **Review and Approve Plan**
- Review implementation plan
- Approve timeline
- Assign resources
2. **Set Up Development Environment**
- Create feature branch
- Set up test infrastructure
- Prepare development tools
3. **Begin Phase 1**
- Start with StructureGenerator
- Set up project structure
- Begin implementation

View file

@ -167,50 +167,86 @@ class WorkflowManager:
self.workflowProcessor = WorkflowProcessor(self.services)
# Get workflow mode to determine if complexity detection is needed
# Get workflow mode to determine if combined analysis is needed
workflowMode = getattr(self.services.workflow, 'workflowMode', None)
skipComplexityDetection = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
skipCombinedAnalysis = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
if skipComplexityDetection:
logger.info("Skipping complexity detection for AUTOMATION mode - using predefined plan")
if skipCombinedAnalysis:
logger.info("Skipping combined analysis for AUTOMATION mode - using predefined plan")
complexity = "moderate" # Default for automation workflows
needsWorkflowHistory = False # Automation workflows don't need history
detectedLanguage = None # No language detection in automation mode
normalizedRequest = userInput.prompt
intentText = userInput.prompt
contextItems = []
workflowIntent = None
else:
# Process user-uploaded documents from userInput for complexity detection
# This is the correct way: use the input data directly, not workflow state
# Process user-uploaded documents from userInput for combined analysis
documents = []
if userInput.listFileId:
try:
documents = await self._processFileIds(userInput.listFileId, None)
except Exception as e:
logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
logger.warning(f"Failed to process user fileIds for combined analysis: {e}")
# Detect complexity (AI-based semantic understanding) using user input documents
# Also detects language for fast path responses
complexity, needsWorkflowHistory, detectedLanguage = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
logger.info(f"Request complexity detected: {complexity}, needsWorkflowHistory: {needsWorkflowHistory}, language: {detectedLanguage}")
# Phase 1+2: Kombinierte Analyse: Intent + Komplexität in einem AI-Call
analysisResult = await self._analyzeUserInputAndComplexity(userInput.prompt, documents)
# Set detected language for fast path (if detected)
# Extract results
detectedLanguage = analysisResult.get('detectedLanguage')
normalizedRequest = analysisResult.get('normalizedRequest')
intentText = analysisResult.get('intent') or userInput.prompt
contextItems = analysisResult.get('contextItems', [])
complexity = analysisResult.get('complexity', 'moderate')
needsWorkflowHistory = analysisResult.get('needsWorkflowHistory', False)
fastTrack = analysisResult.get('fastTrack', False)
# Extract intent analysis fields and store as workflowIntent
workflowIntent = {
'primaryGoal': analysisResult.get('primaryGoal'),
'dataType': analysisResult.get('dataType', 'unknown'),
'expectedFormats': analysisResult.get('expectedFormats', []),
'qualityRequirements': analysisResult.get('qualityRequirements', {}),
'successCriteria': analysisResult.get('successCriteria', []),
'languageUserDetected': detectedLanguage,
'needsWorkflowHistory': needsWorkflowHistory
}
# Store needsWorkflowHistory in services
setattr(self.services, '_needsWorkflowHistory', bool(needsWorkflowHistory))
# Store workflowIntent in workflow object for reuse
if hasattr(self.services, 'workflow') and self.services.workflow:
self.services.workflow._workflowIntent = workflowIntent
# Store normalized request and intent
self.services.currentUserPrompt = intentText or userInput.prompt
self.services.currentUserPromptNormalized = normalizedRequest or intentText or userInput.prompt
if contextItems is not None:
self.services.currentUserContextItems = contextItems
# Set detected language
if detectedLanguage and isinstance(detectedLanguage, str):
self._setUserLanguage(detectedLanguage)
try:
setattr(self.services, 'currentUserLanguage', detectedLanguage)
except Exception:
pass
logger.info(f"Combined analysis: complexity={complexity}, needsWorkflowHistory={needsWorkflowHistory}, language={detectedLanguage}, fastTrack={fastTrack}")
# Route to fast path for simple requests if history is not needed
# Skip fast path for automation mode or if history is needed
if complexity == "simple" and not needsWorkflowHistory:
if not skipCombinedAnalysis and complexity == "simple" and not needsWorkflowHistory:
logger.info("Routing to fast path for simple request")
await self._executeFastPath(userInput, documents)
return # Fast path completes the workflow
# Now send the first message (which will also process the documents again, but that's fine)
await self._sendFirstMessage(userInput)
# Now send the first message (use already analyzed data if available)
await self._sendFirstMessage(userInput, skipIntentionAnalysis=not skipCombinedAnalysis)
# Route to full workflow for moderate/complex requests or automation mode
logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipComplexityDetection else ""))
logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipCombinedAnalysis else ""))
taskPlan = await self._planTasks(userInput)
await self._executeTasks(taskPlan)
await self._processWorkflowResults()
@ -223,6 +259,143 @@ class WorkflowManager:
# Helper functions
async def _analyzeUserInputAndComplexity(
self,
userPrompt: str,
documents: List[ChatDocument]
) -> Dict[str, Any]:
"""
Phase 1+2: Kombinierte Analyse: Intent + Komplexität in einem AI-Call.
Args:
userPrompt: User-Anfrage
documents: Liste der Dokumente
Returns:
Dict mit:
- detectedLanguage: ISO 639-1 Sprachcode
- normalizedRequest: Vollständige, explizite Umformulierung
- intent: Kurze Kern-Anfrage
- contextItems: Große Datenblöcke als separate Dokumente
- complexity: "simple" | "moderate" | "complex"
- needsWorkflowHistory: bool
- fastTrack: bool
- primaryGoal: Hauptziel
- dataType: Datentyp
- expectedFormats: Erwartete Formate
- qualityRequirements: Qualitätsanforderungen
- successCriteria: Erfolgskriterien
"""
# Baue Dokument-Liste für Prompt
docListText = ""
if documents:
for i, doc in enumerate(documents, 1):
docListText += f"\n{i}. {doc.fileName} ({doc.mimeType}, {doc.fileSize} bytes)"
analysisPrompt = f"""You are an input analyzer. From the user's message, perform ALL of the following in one pass:
1. detectedLanguage: Detect ISO 639-1 language code (e.g., de, en, fr, it)
2. normalizedRequest: Full, explicit restatement of the user's request in the detected language; do NOT summarize; preserve ALL constraints and details
3. intent: Concise single-paragraph core request in the detected language for high-level routing
4. contextItems: Supportive data blocks to attach as separate documents if significantly larger than the intent (large literal content, long lists/tables, code/JSON blocks, transcripts, CSV fragments, detailed specs). Keep URLs in the intent unless they embed large pasted content
5. complexity: "simple" | "moderate" | "complex"
- "simple": Only if NO documents AND NO web search required. Single question, straightforward answer (5-15s)
- "moderate": Multiple steps, some documents, structured response requiring some processing, or web search needed (30-60s)
- "complex": Multi-task workflow, many documents, research needed, content generation required, multi-step planning (60-120s)
6. needsWorkflowHistory: Boolean indicating if this request needs previous workflow rounds/history (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work)
7. fastTrack: Boolean indicating if Fast Track is possible (simple requests without documents and without workflow history)
8. primaryGoal: The main objective the user wants to achieve
9. dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown)
10. expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., ["xlsx", "pdf"]). If format is unclear or not specified, use empty list []
11. qualityRequirements: Quality requirements they have (accuracy, completeness) as {{accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}}
12. successCriteria: Specific success criteria that define completion (array of strings)
Rules:
- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained
- If content exceeds that threshold, move bulky parts into contextItems; keep intent short and clear
- Preserve critical references (URLs, filenames) in intent
- Normalize to the primary detected language if mixed-language
- Consider number of documents provided when determining complexity
- Consider need for external research or web search when determining complexity
Documents provided: {len(documents)} document(s)
{docListText}
Return ONLY JSON (no markdown) with this exact structure:
{{
"detectedLanguage": "de|en|fr|it|...",
"normalizedRequest": "Full explicit instruction in detected language",
"intent": "Concise normalized request...",
"contextItems": [
{{
"title": "User context 1",
"mimeType": "text/plain",
"content": "Full extracted content block here"
}}
],
"complexity": "simple" | "moderate" | "complex",
"needsWorkflowHistory": true|false,
"fastTrack": true|false,
"primaryGoal": "The main objective the user wants to achieve",
"dataType": "numbers|text|documents|analysis|code|unknown",
"expectedFormats": ["pdf", "docx", "xlsx", "txt", "json", "csv", "html", "md"],
"qualityRequirements": {{
"accuracyThreshold": 0.0-1.0,
"completenessThreshold": 0.0-1.0
}},
"successCriteria": ["specific criterion 1", "specific criterion 2"]
}}
## User Message
The following is the user's original input message. Analyze intent, normalize the request, determine complexity, and identify any large context blocks that should be moved to separate documents:
################ USER INPUT START #################
{userPrompt.replace('{', '{{').replace('}', '}}') if userPrompt else ''}
################ USER INPUT FINISH #################
"""
# AI-Call (verwende callAiPlanning für einfache JSON-Responses)
# Debug-Logs werden bereits von callAiPlanning geschrieben
aiResponse = await self.services.ai.callAiPlanning(
prompt=analysisPrompt,
placeholders=None,
debugType="user_input_analysis"
)
# Parse Result
try:
jsonStart = aiResponse.find('{') if aiResponse else -1
jsonEnd = aiResponse.rfind('}') + 1 if aiResponse else 0
if jsonStart != -1 and jsonEnd > jsonStart:
result = json.loads(aiResponse[jsonStart:jsonEnd])
return result
else:
logger.warning("Could not parse combined analysis response, using defaults")
return self._getDefaultAnalysisResult()
except Exception as e:
logger.warning(f"Error parsing combined analysis response: {str(e)}, using defaults")
return self._getDefaultAnalysisResult()
def _getDefaultAnalysisResult(self) -> Dict[str, Any]:
"""Fallback Default-Werte wenn Parsing fehlschlägt."""
return {
"detectedLanguage": "en",
"normalizedRequest": "",
"intent": "",
"contextItems": [],
"complexity": "moderate",
"needsWorkflowHistory": False,
"fastTrack": False,
"primaryGoal": None,
"dataType": "unknown",
"expectedFormats": [],
"qualityRequirements": {
"accuracyThreshold": 0.8,
"completenessThreshold": 0.8
},
"successCriteria": []
}
async def _executeFastPath(self, userInput: UserInputRequest, documents: List[ChatDocument]) -> None:
"""Execute fast path for simple requests and deliver result to user"""
try:
@ -330,7 +503,7 @@ class WorkflowManager:
await self._executeTasks(taskPlan)
await self._processWorkflowResults()
async def _sendFirstMessage(self, userInput: UserInputRequest) -> None:
async def _sendFirstMessage(self, userInput: UserInputRequest, skipIntentionAnalysis: bool = False) -> None:
"""Send first message to start workflow"""
try:
workflow = self.services.workflow
@ -360,21 +533,58 @@ class WorkflowManager:
}
# Analyze the user's input to detect language, normalize request, extract intent, and offload bulky context into documents
# SKIP user intention analysis for AUTOMATION mode - it uses predefined JSON plans
# SKIP user intention analysis if already done in combined analysis (skipIntentionAnalysis=True)
# or for AUTOMATION mode - it uses predefined JSON plans
createdDocs = []
workflowMode = getattr(workflow, 'workflowMode', None)
skipIntentionAnalysis = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
skipIntentionAnalysis = skipIntentionAnalysis or (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
if skipIntentionAnalysis:
logger.info("Skipping user intention analysis for AUTOMATION mode - using direct user input")
# For automation mode, use user input directly without AI analysis
self.services.currentUserPrompt = userInput.prompt
# Always set currentUserPromptNormalized - use user input directly for automation mode
self.services.currentUserPromptNormalized = userInput.prompt
detectedLanguage = None
normalizedRequest = None
intentText = userInput.prompt
contextItems = []
logger.info("Skipping user intention analysis (already done in combined analysis or AUTOMATION mode)")
# Use already analyzed data if available, otherwise use user input directly
detectedLanguage = getattr(self.services, 'currentUserLanguage', None)
normalizedRequest = getattr(self.services, 'currentUserPromptNormalized', None) or userInput.prompt
intentText = getattr(self.services, 'currentUserPrompt', None) or userInput.prompt
contextItems = getattr(self.services, 'currentUserContextItems', None) or []
workflowIntent = getattr(workflow, '_workflowIntent', None)
# Create documents for context items (if available from combined analysis)
if contextItems and isinstance(contextItems, list):
for idx, item in enumerate(contextItems):
try:
title = item.get('title') if isinstance(item, dict) else None
mime = item.get('mimeType') if isinstance(item, dict) else None
content = item.get('content') if isinstance(item, dict) else None
if not content:
continue
fileName = (title or f"user_context_{idx+1}.txt").strip()
mimeType = (mime or "text/plain").strip()
# Neutralize content before storing if neutralization is enabled
contentBytes = content.encode('utf-8')
contentBytes = await self._neutralizeContentIfEnabled(contentBytes, mimeType)
# Create file in component storage
fileItem = self.services.interfaceDbComponent.createFile(
name=fileName,
mimeType=mimeType,
content=contentBytes
)
# Persist file data
self.services.interfaceDbComponent.createFileData(fileItem.id, contentBytes)
# Collect file info
fileInfo = self.services.chat.getFileInfo(fileItem.id)
from modules.datamodels.datamodelChat import ChatDocument
doc = ChatDocument(
fileId=fileItem.id,
fileName=fileInfo.get("fileName", fileName) if fileInfo else fileName,
fileSize=fileInfo.get("size", len(contentBytes)) if fileInfo else len(contentBytes),
mimeType=fileInfo.get("mimeType", mimeType) if fileInfo else mimeType
)
createdDocs.append(doc)
except Exception:
continue
else:
try:
analyzerPrompt = (

View file

@ -39,6 +39,7 @@ class DocumentGenerationFormatsTester:
self.workflow = None
self.testResults = {}
self.generatedDocuments = {}
self.pdfFileId = None # Store PDF file ID for reuse
async def initialize(self):
"""Initialize the test environment."""
@ -53,17 +54,123 @@ class DocumentGenerationFormatsTester:
print(f"Initialized test with user: {self.testUser.id}")
print(f"Mandate ID: {self.testUser.mandateId}")
print(f"Debug logging enabled: {APP_CONFIG.get('APP_DEBUG_CHAT_WORKFLOW_ENABLED', False)}")
# Upload PDF file for testing
await self.uploadPdfFile()
async def uploadPdfFile(self):
"""Upload the PDF file and store its file ID."""
pdfPath = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "temp", "B2025-02c.pdf")
pdfPath = os.path.abspath(pdfPath)
if not os.path.exists(pdfPath):
print(f"⚠️ Warning: PDF file not found at {pdfPath}")
print(" Test will continue without PDF attachment")
return
try:
# Read PDF file
with open(pdfPath, "rb") as f:
pdfContent = f.read()
# Create file using services.interfaceDbComponent
if not hasattr(self.services, 'interfaceDbComponent') or not self.services.interfaceDbComponent:
print("⚠️ Warning: interfaceDbComponent not available in services")
print(" Test will continue without PDF attachment")
return
interfaceDbComponent = self.services.interfaceDbComponent
fileItem = interfaceDbComponent.createFile(
name="B2025-02c.pdf",
mimeType="application/pdf",
content=pdfContent
)
# Store file data
interfaceDbComponent.createFileData(fileItem.id, pdfContent)
self.pdfFileId = fileItem.id
print(f"✅ Uploaded PDF file: {fileItem.fileName} (ID: {self.pdfFileId}, Size: {len(pdfContent)} bytes)")
except Exception as e:
import traceback
print(f"⚠️ Warning: Failed to upload PDF file: {str(e)}")
print(f" Traceback: {traceback.format_exc()}")
print(" Test will continue without PDF attachment")
def createTestPrompt(self, format: str) -> str:
"""Create a test prompt for document generation in the specified format."""
"""Create a unified test prompt for document generation in the specified format.
The prompt requests:
- Extraction of images from the attached PDF
- Generation of a new image
- Document creation with both images
"""
basePrompt = (
"Create a professional document about 'Fuel Station Receipt Analysis' with the following content:\n"
"1) A main title\n"
"2) An introduction paragraph explaining the receipt analysis\n"
"3) Extract and include the image from the attached PDF document (B2025-02c.pdf)\n"
"4) A section analyzing the receipt data with bullet points\n"
"5) Generate a new image showing a visual representation of fuel consumption trends\n"
"6) A conclusion paragraph with recommendations\n\n"
"Make sure to include both: the image extracted from the PDF and the newly generated image.\n"
f"Format the output as {format.upper()}."
)
return basePrompt
def createRefactoringTestPrompt(self, testType: str, format: str = "html") -> str:
"""Create test prompts for specific refactoring features.
Args:
testType: Type of refactoring test:
- "intent_analysis": Test DocumentIntent analysis
- "conditional_extraction": Test conditional extraction (extract vs render)
- "image_render": Test image rendering as asset
- "multi_document": Test multi-document rendering
- "metadata_preservation": Test metadata preservation
format: Output format (default: html)
"""
prompts = {
"html": "Create a professional HTML document about 'The Future of Artificial Intelligence' with: 1) A main title, 2) An introduction paragraph, 3) Three key sections with headings, 4) A bullet list of benefits, 5) An image showing AI technology (generate it), 6) A conclusion paragraph. Format as HTML.",
"pdf": "Create a professional PDF report about 'Climate Change Impact Analysis' with: 1) A title page, 2) An executive summary, 3) Three main sections with data tables, 4) Charts/graphs described, 5) An image showing environmental impact (generate it), 6) Conclusions and recommendations. Format as PDF.",
"docx": "Create a comprehensive Word document about 'Project Management Best Practices' with: 1) A cover page with title, 2) Table of contents, 3) Five chapters with headings and paragraphs, 4) A table comparing methodologies, 5) An image illustrating project workflow (generate it), 6) Appendices. Format as DOCX.",
"xlsx": "Create an Excel workbook about 'Sales Performance Analysis' with: 1) A summary sheet with key metrics, 2) A detailed data sheet with sales data in a table format (columns: Month, Product, Sales, Units, Revenue), 3) A chart sheet with visualizations described, 4) An analysis sheet with calculations. Format as XLSX.",
"pptx": "Create a PowerPoint presentation about 'Digital Transformation Strategy' with: 1) A title slide, 2) An agenda slide, 3) Five content slides with bullet points, 4) A slide with an image showing transformation roadmap (generate it), 5) A conclusion slide. Format as PPTX."
"intent_analysis": (
"Create a document with the following requirements:\n"
"1) Extract text content from the attached PDF\n"
"2) Include images from the PDF as visual elements (render them, don't extract text from them)\n"
"3) Generate a summary document\n\n"
"This tests that the system correctly identifies which documents need extraction vs rendering."
),
"conditional_extraction": (
"Create a document that:\n"
"1) Extracts and uses text from the attached PDF\n"
"2) Renders images from the PDF as visual assets (not as extracted text)\n"
"3) Generates new content based on the extracted text\n\n"
"This tests conditional extraction - only extract what needs extraction, render what needs rendering."
),
"image_render": (
"Create a document that includes images from the attached PDF.\n"
"The images should be rendered as visual elements in the document, not extracted as text.\n"
"Include a title and description for each image.\n\n"
"This tests the image asset pipeline with render intent."
),
"multi_document": (
"Create multiple separate documents:\n"
"1) Document 1: Summary of the PDF content\n"
"2) Document 2: Analysis of the PDF content\n"
"3) Document 3: Recommendations based on the PDF content\n\n"
"Each document should be separate and complete.\n"
"This tests multi-document generation and rendering."
),
"metadata_preservation": (
"Create a document that extracts content from the attached PDF.\n"
"The document should clearly show which content came from which source document.\n"
"Include source references in the generated content.\n\n"
"This tests that metadata (documentId, mimeType) is preserved in the generation prompt."
)
}
return prompts.get(format.lower(), prompts["docx"])
prompt = prompts.get(testType, self.createTestPrompt(format))
return f"{prompt}\n\nFormat the output as {format.upper()}."
async def generateDocumentInFormat(self, format: str) -> Dict[str, Any]:
"""Generate a document in the specified format using workflow."""
@ -74,9 +181,18 @@ class DocumentGenerationFormatsTester:
prompt = self.createTestPrompt(format)
print(f"Prompt: {prompt[:200]}...")
# Create user input request with PDF file attachment
listFileId = []
if self.pdfFileId:
listFileId = [self.pdfFileId]
print(f"Attaching PDF file (ID: {self.pdfFileId})")
else:
print("⚠️ No PDF file attached (file upload may have failed)")
# Create user input request
userInput = UserInputRequest(
prompt=prompt,
listFileId=listFileId,
userLanguage="en"
)
@ -281,6 +397,166 @@ class DocumentGenerationFormatsTester:
return verification
async def testRefactoringFeatures(self) -> Dict[str, Any]:
"""Test specific refactoring features."""
print("\n" + "="*80)
print("TESTING REFACTORING FEATURES")
print("="*80)
refactoringTests = [
("intent_analysis", "html"),
("conditional_extraction", "html"),
("image_render", "html"),
("multi_document", "html"),
("metadata_preservation", "html")
]
results = {}
for testType, format in refactoringTests:
try:
print(f"\n{'='*80}")
print(f"Testing Refactoring Feature: {testType}")
print(f"{'='*80}")
prompt = self.createRefactoringTestPrompt(testType, format)
print(f"Prompt: {prompt[:200]}...")
# Create user input request with PDF file attachment
listFileId = []
if self.pdfFileId:
listFileId = [self.pdfFileId]
print(f"Attaching PDF file (ID: {self.pdfFileId})")
else:
print("⚠️ No PDF file attached (file upload may have failed)")
userInput = UserInputRequest(
prompt=prompt,
listFileId=listFileId,
userLanguage="en"
)
# Start workflow
print(f"\nStarting workflow for {testType} test...")
workflow = await chatStart(
currentUser=self.testUser,
userInput=userInput,
workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC,
workflowId=None
)
if not workflow:
results[testType] = {
"success": False,
"error": "Failed to start workflow"
}
continue
self.workflow = workflow
print(f"Workflow started: {workflow.id}")
# Wait for workflow completion
completed = await self.waitForWorkflowCompletion(timeout=300)
if not completed:
results[testType] = {
"success": False,
"error": "Workflow did not complete within timeout",
"workflowId": workflow.id
}
continue
# Analyze results
workflowResults = self.analyzeWorkflowResults()
# Check for specific refactoring features
verification = self.verifyRefactoringFeature(testType, workflowResults)
results[testType] = {
"success": True,
"workflowId": workflow.id,
"verification": verification,
"workflowResults": workflowResults
}
print(f"\n{testType} test completed!")
print(f" Verification: {'✅ PASS' if verification.get('passed', False) else '❌ FAIL'}")
if verification.get("details"):
for detail in verification["details"]:
print(f" - {detail}")
await asyncio.sleep(2)
except Exception as e:
import traceback
print(f"\n❌ Error testing {testType}: {str(e)}")
print(traceback.format_exc())
results[testType] = {
"success": False,
"error": str(e),
"traceback": traceback.format_exc()
}
return results
def verifyRefactoringFeature(self, testType: str, workflowResults: Dict[str, Any]) -> Dict[str, Any]:
"""Verify that a refactoring feature works correctly."""
documents = workflowResults.get("documents", [])
logs = workflowResults.get("logs", [])
verification = {
"testType": testType,
"passed": False,
"details": []
}
if testType == "intent_analysis":
# Check that intent analysis was performed
intentLogs = [log for log in logs if "intent" in str(log).lower() or "analyzing document intent" in str(log).lower()]
if intentLogs:
verification["details"].append("Intent analysis logs found")
verification["passed"] = True
else:
verification["details"].append("No intent analysis logs found")
elif testType == "conditional_extraction":
# Check that extraction and rendering both occurred
extractionLogs = [log for log in logs if "extract" in str(log).lower()]
renderLogs = [log for log in logs if "render" in str(log).lower() or "image" in str(log).lower()]
if extractionLogs and renderLogs:
verification["details"].append("Both extraction and rendering occurred")
verification["passed"] = True
else:
verification["details"].append(f"Missing logs: extraction={len(extractionLogs)}, render={len(renderLogs)}")
elif testType == "image_render":
# Check that images were rendered (not extracted as text)
imageLogs = [log for log in logs if "image" in str(log).lower()]
if imageLogs:
verification["details"].append("Image rendering logs found")
verification["passed"] = True
else:
verification["details"].append("No image rendering logs found")
elif testType == "multi_document":
# Check that multiple documents were generated
if len(documents) >= 2:
verification["details"].append(f"Multiple documents generated: {len(documents)}")
verification["passed"] = True
else:
verification["details"].append(f"Expected multiple documents, got {len(documents)}")
elif testType == "metadata_preservation":
# Check that metadata was preserved (check logs for documentId references)
metadataLogs = [log for log in logs if "documentId" in str(log) or "SOURCE:" in str(log)]
if metadataLogs:
verification["details"].append("Metadata preservation logs found")
verification["passed"] = True
else:
verification["details"].append("No metadata preservation logs found")
return verification
async def testAllFormats(self) -> Dict[str, Any]:
"""Test document generation in all formats."""
print("\n" + "="*80)
@ -334,8 +610,12 @@ class DocumentGenerationFormatsTester:
return results
async def runTest(self):
"""Run the complete test."""
async def runTest(self, includeRefactoringTests: bool = True):
"""Run the complete test.
Args:
includeRefactoringTests: If True, also run refactoring feature tests
"""
print("\n" + "="*80)
print("DOCUMENT GENERATION FORMATS TEST")
print("="*80)
@ -344,18 +624,43 @@ class DocumentGenerationFormatsTester:
# Initialize
await self.initialize()
# Test refactoring features first (if enabled)
refactoringResults = {}
if includeRefactoringTests:
refactoringResults = await self.testRefactoringFeatures()
# Test all formats
results = await self.testAllFormats()
formatResults = await self.testAllFormats()
# Summary
print("\n" + "="*80)
print("TEST SUMMARY")
print("="*80)
# Refactoring tests summary
refactoringSuccessCount = 0
refactoringFailCount = 0
if includeRefactoringTests and refactoringResults:
print("\nRefactoring Features:")
for testType, result in refactoringResults.items():
if result.get("success"):
refactoringSuccessCount += 1
verification = result.get("verification", {})
passed = verification.get("passed", False)
statusIcon = "" if passed else "⚠️"
print(f"{statusIcon} {testType:25s}: {'PASS' if passed else 'FAIL'}")
else:
refactoringFailCount += 1
error = result.get("error", "Unknown error")
print(f"{testType:25s}: FAIL - {error}")
print(f"Refactoring Tests: {refactoringSuccessCount} passed, {refactoringFailCount} failed out of {len(refactoringResults)} tests")
# Format tests summary
print("\nFormat Tests:")
successCount = 0
failCount = 0
for format, result in results.items():
for format, result in formatResults.items():
if result.get("success"):
successCount += 1
status = "✅ PASS"
@ -369,14 +674,28 @@ class DocumentGenerationFormatsTester:
error = result.get("error", "Unknown error")
print(f"{format.upper():6s}: FAIL - {error}")
print(f"\nTotal: {successCount} passed, {failCount} failed out of {len(results)} formats")
print(f"\nFormat Tests: {successCount} passed, {failCount} failed out of {len(formatResults)} formats")
# Calculate totals
totalSuccess = successCount + refactoringSuccessCount if includeRefactoringTests else successCount
totalFail = failCount + refactoringFailCount if includeRefactoringTests else failCount
self.testResults = {
"success": failCount == 0,
"successCount": successCount,
"failCount": failCount,
"totalFormats": len(results),
"results": results
"success": failCount == 0 and (not includeRefactoringTests or refactoringFailCount == 0),
"formatTests": {
"successCount": successCount,
"failCount": failCount,
"totalFormats": len(formatResults),
"results": formatResults
},
"refactoringTests": {
"successCount": refactoringSuccessCount if includeRefactoringTests else 0,
"failCount": refactoringFailCount if includeRefactoringTests else 0,
"totalTests": len(refactoringResults) if includeRefactoringTests else 0,
"results": refactoringResults if includeRefactoringTests else {}
},
"totalSuccess": totalSuccess,
"totalFail": totalFail
}
return self.testResults