enhanced core ai call document handling with document intent
This commit is contained in:
parent
262f3296bf
commit
e1b3cd36f0
32 changed files with 2799 additions and 3871 deletions
|
|
@ -61,6 +61,14 @@ class MergeStrategy(BaseModel):
|
|||
capabilities: Optional[Dict[str, Any]] = Field(default=None, description="Model capabilities for intelligent merging")
|
||||
|
||||
|
||||
class DocumentIntent(BaseModel):
|
||||
"""Intent-Analyse für ein einzelnes Dokument"""
|
||||
documentId: str = Field(description="ID des Dokuments")
|
||||
intents: List[str] = Field(description="Liste von Intents: ['extract', 'render', 'reference'] - mehrere möglich")
|
||||
extractionPrompt: Optional[str] = Field(default=None, description="Spezifischer Prompt für Extraktion (z.B. 'Extract text from images for legends')")
|
||||
reasoning: str = Field(description="Erklärung für Debugging/Transparenz: Warum wurde dieser Intent gewählt?")
|
||||
|
||||
|
||||
class ExtractionOptions(BaseModel):
|
||||
"""Options for document extraction and processing with clear data structures."""
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -34,12 +34,42 @@ class StructureChunker(Chunker):
|
|||
if bucket:
|
||||
emit(bucket)
|
||||
else:
|
||||
# JSON object (dict) - check if it fits
|
||||
text = json.dumps(obj, ensure_ascii=False)
|
||||
if len(text.encode('utf-8')) <= maxBytes:
|
||||
textSize = len(text.encode('utf-8'))
|
||||
if textSize <= maxBytes:
|
||||
emit(obj)
|
||||
else:
|
||||
# fallback to line chunking
|
||||
raise ValueError("too large")
|
||||
# Object too large - try to split by keys if possible
|
||||
# For large objects, we need to chunk by character boundaries
|
||||
# since we can't split JSON objects arbitrarily
|
||||
if isinstance(obj, dict) and len(obj) > 1:
|
||||
# Try to split object into multiple chunks by keys
|
||||
# This preserves JSON structure better than line-based chunking
|
||||
currentChunk: Dict[str, Any] = {}
|
||||
currentSize = 2 # Start with "{}" overhead
|
||||
for key, value in obj.items():
|
||||
itemText = json.dumps({key: value}, ensure_ascii=False)
|
||||
itemSize = len(itemText.encode('utf-8'))
|
||||
# Account for comma and spacing between items
|
||||
if currentChunk:
|
||||
itemSize += 2 # ", " separator
|
||||
|
||||
if currentSize + itemSize > maxBytes and currentChunk:
|
||||
# Current chunk is full, emit it
|
||||
emit(currentChunk)
|
||||
currentChunk = {key: value}
|
||||
currentSize = len(itemText.encode('utf-8'))
|
||||
else:
|
||||
currentChunk[key] = value
|
||||
currentSize += itemSize
|
||||
|
||||
# Emit remaining chunk
|
||||
if currentChunk:
|
||||
emit(currentChunk)
|
||||
else:
|
||||
# Single large value or can't split - fallback to line chunking
|
||||
raise ValueError("too large")
|
||||
except Exception:
|
||||
current: List[str] = []
|
||||
size = 0
|
||||
|
|
|
|||
|
|
@ -6,10 +6,11 @@ import logging
|
|||
import time
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .subRegistry import ExtractorRegistry, ChunkerRegistry
|
||||
from .subPipeline import runExtraction
|
||||
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult
|
||||
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult, DocumentIntent
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.datamodels.datamodelAi import AiCallResponse, AiCallRequest, AiCallOptions, OperationTypeEnum, AiModelCall
|
||||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
||||
|
|
@ -73,12 +74,14 @@ class ExtractionService:
|
|||
if operationId:
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
docOperationId = f"{operationId}_doc_{i}"
|
||||
# Use parentOperationId if provided, otherwise use operationId as parent
|
||||
parentId = parentOperationId if parentOperationId else operationId
|
||||
self.services.chat.progressLogStart(
|
||||
docOperationId,
|
||||
"Extracting Document",
|
||||
f"Document {i + 1}/{totalDocs}",
|
||||
doc.fileName[:50] + "..." if len(doc.fileName) > 50 else doc.fileName,
|
||||
parentOperationId=operationId # Use operationId as parent (not parentOperationId)
|
||||
parentOperationId=parentId # Correct parent reference for ChatLog hierarchy
|
||||
)
|
||||
|
||||
# Start timing for this document
|
||||
|
|
@ -125,12 +128,41 @@ class ExtractionService:
|
|||
if part.metadata:
|
||||
logger.debug(f" Metadata: {part.metadata}")
|
||||
|
||||
# Attach document id and MIME type to parts if missing
|
||||
# Attach complete metadata to parts according to ContentPart Metadaten-Schema
|
||||
for p in ec.parts:
|
||||
# Ensure metadata dict exists
|
||||
if not p.metadata:
|
||||
p.metadata = {}
|
||||
|
||||
# Required metadata fields (from concept)
|
||||
if "documentId" not in p.metadata:
|
||||
p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4())
|
||||
if "documentMimeType" not in p.metadata:
|
||||
p.metadata["documentMimeType"] = documentData["mimeType"]
|
||||
if "originalFileName" not in p.metadata:
|
||||
p.metadata["originalFileName"] = documentData["fileName"]
|
||||
|
||||
# ContentFormat: Set based on typeGroup and mimeType
|
||||
# Default to "extracted" for text content, but can be overridden by caller
|
||||
if "contentFormat" not in p.metadata:
|
||||
# Default: extracted text content
|
||||
p.metadata["contentFormat"] = "extracted"
|
||||
|
||||
# Intent: Default to "extract" for extracted content
|
||||
if "intent" not in p.metadata:
|
||||
p.metadata["intent"] = "extract"
|
||||
|
||||
# ExtractionPrompt: Use from options if available
|
||||
if "extractionPrompt" not in p.metadata and options and options.prompt:
|
||||
p.metadata["extractionPrompt"] = options.prompt
|
||||
|
||||
# UsageHint: Provide default hint
|
||||
if "usageHint" not in p.metadata:
|
||||
p.metadata["usageHint"] = f"Use extracted content from {documentData['fileName']}"
|
||||
|
||||
# SourceAction: Mark as from extraction service
|
||||
if "sourceAction" not in p.metadata:
|
||||
p.metadata["sourceAction"] = "extraction.extractContent"
|
||||
|
||||
# Log chunking information
|
||||
chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
|
||||
|
|
@ -185,7 +217,7 @@ class ExtractionService:
|
|||
# Write extraction results to debug file
|
||||
try:
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
import json
|
||||
# json is already imported at module level
|
||||
# Create summary of extraction results for debug
|
||||
extractionSummary = {
|
||||
"documentName": doc.fileName,
|
||||
|
|
@ -487,7 +519,8 @@ class ExtractionService:
|
|||
prompt: str,
|
||||
aiObjects: Any,
|
||||
options: Optional[AiCallOptions] = None,
|
||||
operationId: Optional[str] = None
|
||||
operationId: Optional[str] = None,
|
||||
parentOperationId: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Process documents with model-aware chunking and merge results.
|
||||
|
|
@ -499,6 +532,7 @@ class ExtractionService:
|
|||
aiObjects: AiObjects instance for making AI calls
|
||||
options: AI call options
|
||||
operationId: Optional operation ID for progress tracking
|
||||
parentOperationId: Optional parent operation ID for hierarchical logging
|
||||
|
||||
Returns:
|
||||
Merged AI results as string with preserved document structure
|
||||
|
|
@ -514,7 +548,8 @@ class ExtractionService:
|
|||
operationId,
|
||||
"AI Text Extract",
|
||||
"Document Processing",
|
||||
f"Processing {len(documents)} documents"
|
||||
f"Processing {len(documents)} documents",
|
||||
parentOperationId=parentOperationId # Use parentOperationId if provided
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
@ -539,7 +574,8 @@ class ExtractionService:
|
|||
if operationId:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.1, f"Extracting content from {len(documents)} documents")
|
||||
# Pass operationId as parentOperationId for hierarchical logging
|
||||
extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=parentOperationId)
|
||||
# Correct hierarchy: parentOperationId -> operationId -> docOperationId
|
||||
extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=operationId)
|
||||
|
||||
if not isinstance(extractionResult, list):
|
||||
if operationId:
|
||||
|
|
@ -549,9 +585,10 @@ class ExtractionService:
|
|||
# Process parts (not chunks) with model-aware AI calls
|
||||
if operationId:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, f"Processing {len(extractionResult)} extracted content parts")
|
||||
# Use parent operation ID directly (parentId should be operationId, not log entry ID)
|
||||
parentOperationId = operationId # Use the parent's operationId directly
|
||||
partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, parentOperationId)
|
||||
# Use operationId as parentOperationId for child operations
|
||||
# Correct hierarchy: parentOperationId -> operationId -> partOperationId
|
||||
processParentOperationId = operationId
|
||||
partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, processParentOperationId)
|
||||
|
||||
# Merge results using existing merging system
|
||||
if operationId:
|
||||
|
|
@ -733,7 +770,8 @@ class ExtractionService:
|
|||
# Detect input type and convert accordingly
|
||||
if isinstance(partResults[0], PartResult):
|
||||
# Existing logic for PartResult (from processDocumentsPerChunk)
|
||||
for part_result in partResults:
|
||||
# Phase 7: Add originalIndex for explicit ordering
|
||||
for i, part_result in enumerate(partResults):
|
||||
content_part = ContentPart(
|
||||
id=part_result.originalPart.id,
|
||||
parentId=part_result.originalPart.parentId,
|
||||
|
|
@ -744,7 +782,9 @@ class ExtractionService:
|
|||
metadata={
|
||||
**part_result.originalPart.metadata,
|
||||
"aiResult": True,
|
||||
"originalIndex": i, # Phase 7: Explicit order index
|
||||
"partIndex": part_result.partIndex,
|
||||
"processingOrder": i, # Phase 7: Processing order
|
||||
"documentId": part_result.documentId,
|
||||
"processingTime": part_result.processingTime,
|
||||
"success": part_result.metadata.get("success", False)
|
||||
|
|
@ -753,6 +793,7 @@ class ExtractionService:
|
|||
content_parts.append(content_part)
|
||||
elif isinstance(partResults[0], AiCallResponse):
|
||||
# Logic from interfaceAiObjects (from content parts processing)
|
||||
# Phase 7: Add originalIndex for explicit ordering
|
||||
for i, result in enumerate(partResults):
|
||||
if result.content:
|
||||
content_part = ContentPart(
|
||||
|
|
@ -764,6 +805,8 @@ class ExtractionService:
|
|||
data=result.content,
|
||||
metadata={
|
||||
"aiResult": True,
|
||||
"originalIndex": i, # Phase 7: Explicit order index
|
||||
"processingOrder": i, # Phase 7: Processing order
|
||||
"modelName": result.modelName,
|
||||
"priceUsd": result.priceUsd,
|
||||
"processingTime": result.processingTime,
|
||||
|
|
@ -792,11 +835,12 @@ class ExtractionService:
|
|||
|
||||
# Determine merge strategy based on input type
|
||||
if isinstance(partResults[0], PartResult):
|
||||
# Use strategy for extraction workflow (group by document, order by part index)
|
||||
# Phase 7: Use originalIndex for explicit ordering
|
||||
# Use strategy for extraction workflow (group by document, order by originalIndex)
|
||||
merge_strategy = MergeStrategy(
|
||||
useIntelligentMerging=True,
|
||||
groupBy="documentId", # Group by document
|
||||
orderBy="partIndex", # Order by part index
|
||||
orderBy="originalIndex", # Phase 7: Order by originalIndex instead of partIndex
|
||||
mergeType="concatenate"
|
||||
)
|
||||
else:
|
||||
|
|
@ -811,10 +855,52 @@ class ExtractionService:
|
|||
# Apply merging
|
||||
merged_parts = applyMerging(content_parts, merge_strategy)
|
||||
|
||||
# Convert back to string
|
||||
final_content = "\n\n".join([part.data for part in merged_parts])
|
||||
# Phase 6: Enhanced format with metadata preservation
|
||||
# CRITICAL: For generation responses (JSON), don't add SOURCE markers - they interfere with JSON parsing
|
||||
# Check if this is a generation response by looking at operationType or content structure
|
||||
isGenerationResponse = False
|
||||
if options and hasattr(options, 'operationType'):
|
||||
# Generation responses use DATA_GENERATE operation type
|
||||
from modules.datamodels.datamodelAi import OperationTypeEnum
|
||||
isGenerationResponse = options.operationType == OperationTypeEnum.DATA_GENERATE
|
||||
|
||||
logger.info(f"Merged {len(partResults)} parts using unified merging system")
|
||||
# Also check if content looks like JSON (starts with { or [)
|
||||
if not isGenerationResponse and merged_parts:
|
||||
firstPartData = merged_parts[0].data if merged_parts[0].data else ""
|
||||
if isinstance(firstPartData, str) and firstPartData.strip().startswith(('{', '[')):
|
||||
# Check if it's a complete JSON structure (not extracted content)
|
||||
# Generation responses are complete JSON, extraction responses are text content
|
||||
try:
|
||||
# json is already imported at module level
|
||||
json.loads(firstPartData.strip())
|
||||
# If it parses as JSON and has "documents" key, it's likely a generation response
|
||||
parsed = json.loads(firstPartData.strip())
|
||||
if isinstance(parsed, dict) and "documents" in parsed:
|
||||
isGenerationResponse = True
|
||||
except:
|
||||
pass
|
||||
|
||||
content_sections = []
|
||||
for part in merged_parts:
|
||||
if isGenerationResponse:
|
||||
# For generation responses, return JSON directly without SOURCE markers
|
||||
content_sections.append(part.data)
|
||||
else:
|
||||
# For extraction responses, include metadata in section header for traceability
|
||||
doc_id = part.metadata.get("documentId", "unknown")
|
||||
doc_mime = part.metadata.get("documentMimeType", "unknown")
|
||||
label = part.label or "content"
|
||||
|
||||
section = f"""
|
||||
[SOURCE: documentId={doc_id}, mimeType={doc_mime}, label={label}]
|
||||
{part.data}
|
||||
[END SOURCE]
|
||||
"""
|
||||
content_sections.append(section)
|
||||
|
||||
final_content = "\n\n".join(content_sections)
|
||||
|
||||
logger.info(f"Merged {len(partResults)} parts using unified merging system with metadata preservation (generationResponse={isGenerationResponse})")
|
||||
return final_content.strip()
|
||||
|
||||
async def chunkContentPartForAi(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]:
|
||||
|
|
@ -827,9 +913,14 @@ class ExtractionService:
|
|||
modelContextTokens = model.contextLength # Total context in tokens
|
||||
modelMaxOutputTokens = model.maxTokens # Maximum output tokens
|
||||
|
||||
# CRITICAL: Use same conservative token factor as in processContentPartWithFallback
|
||||
# Real-world observation: Our calculation says 94k tokens, but API says 217k tokens (2.3x difference!)
|
||||
TOKEN_SAFETY_FACTOR = 2.2 # Conservative: accounts for JSON tokenization and API overhead
|
||||
|
||||
# Reserve tokens for:
|
||||
# 1. Prompt (user message)
|
||||
promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0
|
||||
# 1. Prompt (user message) - use conservative factor
|
||||
promptSize = len(prompt.encode('utf-8')) if prompt else 0
|
||||
promptTokens = promptSize / TOKEN_SAFETY_FACTOR
|
||||
|
||||
# 2. System message wrapper ("Context from documents:\n")
|
||||
systemMessageTokens = 10 # ~40 bytes = 10 tokens
|
||||
|
|
@ -844,31 +935,38 @@ class ExtractionService:
|
|||
totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens
|
||||
|
||||
# Available tokens for content = context length - reserved tokens
|
||||
# Use 80% of available for safety margin
|
||||
availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8)
|
||||
# Use 60% of available (same conservative margin as in processContentPartWithFallback)
|
||||
availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.60)
|
||||
|
||||
# Ensure we have at least some space
|
||||
if availableContentTokens < 100:
|
||||
logger.warning(f"Very limited space for content: {availableContentTokens} tokens available. Model: {model.name}, contextLength: {modelContextTokens}, maxTokens: {modelMaxOutputTokens}, prompt: {promptTokens:.0f} tokens")
|
||||
availableContentTokens = max(100, int(modelContextTokens * 0.1)) # Fallback to 10% of context
|
||||
|
||||
# Convert tokens to bytes (1 token ≈ 4 bytes)
|
||||
availableContentBytes = availableContentTokens * 4
|
||||
# Convert tokens to bytes using conservative factor (reverse: bytes = tokens * factor)
|
||||
availableContentBytes = int(availableContentTokens * TOKEN_SAFETY_FACTOR)
|
||||
|
||||
logger.debug(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens, reserved={totalReservedTokens:.0f} tokens, available={availableContentTokens} tokens ({availableContentBytes} bytes)")
|
||||
logger.info(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens est., reserved={totalReservedTokens:.0f} tokens est., available={availableContentTokens} tokens est. ({availableContentBytes} bytes), factor={TOKEN_SAFETY_FACTOR}")
|
||||
|
||||
# Use 70% of available content bytes for text chunks (conservative)
|
||||
textChunkSize = int(availableContentBytes * 0.7)
|
||||
imageChunkSize = int(availableContentBytes * 0.8) # 80% for image chunks
|
||||
# Use 50% of available content bytes for text chunks (very conservative to ensure chunks fit)
|
||||
# This ensures that even with token counting inaccuracies, chunks will fit
|
||||
textChunkSize = int(availableContentBytes * 0.5)
|
||||
structureChunkSize = int(availableContentBytes * 0.5) # CRITICAL: Also set for StructureChunker (JSON content)
|
||||
tableChunkSize = int(availableContentBytes * 0.5) # Also set for TableChunker
|
||||
imageChunkSize = int(availableContentBytes * 0.6) # 60% for image chunks
|
||||
|
||||
# Build chunking options
|
||||
# Build chunking options - include ALL chunk size options for different chunkers
|
||||
chunkingOptions = {
|
||||
"textChunkSize": textChunkSize,
|
||||
"structureChunkSize": structureChunkSize, # CRITICAL: Required for StructureChunker (JSON)
|
||||
"tableChunkSize": tableChunkSize, # Required for TableChunker
|
||||
"imageChunkSize": imageChunkSize,
|
||||
"maxSize": availableContentBytes,
|
||||
"chunkAllowed": True
|
||||
}
|
||||
|
||||
logger.info(f"Chunking options: textChunkSize={textChunkSize} bytes, structureChunkSize={structureChunkSize} bytes, tableChunkSize={tableChunkSize} bytes, imageChunkSize={imageChunkSize} bytes, contentPartSize={len(contentPart.data.encode('utf-8')) if contentPart.data else 0} bytes")
|
||||
|
||||
# Get appropriate chunker (uses existing ChunkerRegistry ✅)
|
||||
chunker = self._chunkerRegistry.resolve(contentPart.typeGroup)
|
||||
|
||||
|
|
@ -878,8 +976,14 @@ class ExtractionService:
|
|||
|
||||
# Chunk the content part
|
||||
try:
|
||||
contentSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0
|
||||
logger.info(f"Chunking {contentPart.typeGroup} part: contentSize={contentSize} bytes, textChunkSize={textChunkSize} bytes, structureChunkSize={structureChunkSize} bytes")
|
||||
chunks = chunker.chunk(contentPart, chunkingOptions)
|
||||
logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part")
|
||||
logger.info(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part (contentSize={contentSize} bytes)")
|
||||
if chunks:
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunkSize = len(chunk.get('data', '').encode('utf-8')) if chunk.get('data') else 0
|
||||
logger.info(f" Chunk {i+1}/{len(chunks)}: {chunkSize} bytes")
|
||||
return chunks
|
||||
except Exception as e:
|
||||
logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}")
|
||||
|
|
@ -999,15 +1103,86 @@ class ExtractionService:
|
|||
|
||||
availableContentBytes = availableContentTokens * 4
|
||||
|
||||
logger.debug(f"Size check for {model.name}: partSize={partSize} bytes, availableContentBytes={availableContentBytes} bytes")
|
||||
# Also check prompt size - prompt + content together must fit
|
||||
promptSize = len(prompt.encode('utf-8')) if prompt else 0
|
||||
|
||||
if partSize <= availableContentBytes:
|
||||
# CRITICAL: Token counting approximation is VERY inaccurate for JSON/content
|
||||
# Real-world observation: Our calculation says 94k tokens, but API says 217k tokens (2.3x difference!)
|
||||
# This happens because:
|
||||
# 1. JSON/structured content tokenizes differently (more tokens per byte)
|
||||
# 2. API has message structure overhead (system prompts, message wrappers)
|
||||
# 3. Tokenizer differences between our approximation and actual API tokenizer
|
||||
# Use conservative factor: 1 token ≈ 2.2 bytes (instead of 4) to account for these differences
|
||||
TOKEN_SAFETY_FACTOR = 2.2 # Conservative: accounts for JSON tokenization and API overhead
|
||||
promptTokens = promptSize / TOKEN_SAFETY_FACTOR
|
||||
contentTokens = partSize / TOKEN_SAFETY_FACTOR
|
||||
totalTokens = promptTokens + contentTokens
|
||||
|
||||
# CRITICAL: Use very conservative margin (60%) because:
|
||||
# 1. Token counting approximation is inaccurate - real tokens can be 2-3x more
|
||||
# 2. API has additional overhead (message structure, system prompts, etc.)
|
||||
# 3. Anthropic API is strict about the 200k limit
|
||||
# 4. We've seen cases where our calculation says "fits" but API says "too long"
|
||||
maxTotalTokens = int(modelContextTokens * 0.60)
|
||||
|
||||
logger.info(f"Size check for {model.name}: partSize={partSize} bytes ({contentTokens:.0f} tokens est.), promptSize={promptSize} bytes ({promptTokens:.0f} tokens est.), total={totalTokens:.0f} tokens est., modelContext={modelContextTokens} tokens, maxTotal={maxTotalTokens} tokens (60% margin, conservative factor={TOKEN_SAFETY_FACTOR})")
|
||||
|
||||
# CRITICAL: Always check totalTokens first - if prompt + content exceeds limit, MUST chunk
|
||||
# Token counting approximation may differ significantly from API, so use very conservative margin
|
||||
if totalTokens > maxTotalTokens:
|
||||
logger.warning(f"⚠️ Total tokens ({totalTokens:.0f} est.) exceed model limit ({maxTotalTokens}), chunking required. Prompt: {promptTokens:.0f} tokens est., Content: {contentTokens:.0f} tokens est.")
|
||||
elif partSize > availableContentBytes:
|
||||
logger.warning(f"⚠️ Content part ({contentTokens:.0f} tokens est.) exceeds available space ({availableContentBytes/TOKEN_SAFETY_FACTOR:.0f} tokens est.), chunking required")
|
||||
|
||||
# If either condition fails, chunk the content
|
||||
if totalTokens > maxTotalTokens or partSize > availableContentBytes:
|
||||
# Part too large or total exceeds limit - chunk it
|
||||
chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt)
|
||||
if not chunks:
|
||||
raise ValueError(f"Failed to chunk content part for model {model.name}")
|
||||
|
||||
logger.info(f"Starting to process {len(chunks)} chunks with model {model.name}")
|
||||
|
||||
if progressCallback:
|
||||
progressCallback(0.0, f"Starting to process {len(chunks)} chunks")
|
||||
|
||||
chunkResults = []
|
||||
for idx, chunk in enumerate(chunks):
|
||||
chunkNum = idx + 1
|
||||
chunkData = chunk.get('data', '')
|
||||
logger.info(f"Processing chunk {chunkNum}/{len(chunks)} with model {model.name}")
|
||||
|
||||
if progressCallback:
|
||||
progressCallback(chunkNum / len(chunks), f"Processing chunk {chunkNum}/{len(chunks)}")
|
||||
|
||||
try:
|
||||
chunkResponse = await aiObjects._callWithModel(model, prompt, chunkData, options)
|
||||
chunkResults.append(chunkResponse)
|
||||
except Exception as chunkError:
|
||||
logger.error(f"Error processing chunk {chunkNum}/{len(chunks)}: {str(chunkError)}")
|
||||
# Continue with other chunks even if one fails
|
||||
continue
|
||||
|
||||
# Merge chunk results
|
||||
if not chunkResults:
|
||||
raise ValueError(f"All chunks failed for content part")
|
||||
|
||||
mergedContent = self.mergePartResults(chunkResults, options)
|
||||
return AiCallResponse(
|
||||
content=mergedContent,
|
||||
modelName=model.name,
|
||||
priceUsd=sum(r.priceUsd for r in chunkResults),
|
||||
processingTime=sum(r.processingTime for r in chunkResults),
|
||||
bytesSent=sum(r.bytesSent for r in chunkResults),
|
||||
bytesReceived=sum(r.bytesReceived for r in chunkResults),
|
||||
errorCount=sum(r.errorCount for r in chunkResults)
|
||||
)
|
||||
else:
|
||||
# Part fits - call AI directly via aiObjects interface
|
||||
logger.info(f"✅ Content part fits within model limits, processing directly")
|
||||
response = await aiObjects._callWithModel(model, prompt, contentPart.data, options)
|
||||
logger.info(f"✅ Content part processed successfully with model: {model.name}")
|
||||
return response
|
||||
else:
|
||||
# Part too large - chunk it
|
||||
chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt)
|
||||
if not chunks:
|
||||
raise ValueError(f"Failed to chunk content part for model {model.name}")
|
||||
|
|
@ -1037,8 +1212,8 @@ class ExtractionService:
|
|||
logger.error(f"❌ Error processing chunk {chunkNum}/{len(chunks)}: {str(e)}")
|
||||
raise
|
||||
|
||||
# Merge chunk results
|
||||
mergedContent = self.mergeChunkResults(chunkResults)
|
||||
# Merge chunk results using unified mergePartResults
|
||||
mergedContent = self.mergePartResults(chunkResults, options)
|
||||
|
||||
logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
|
||||
return AiCallResponse(
|
||||
|
|
|
|||
|
|
@ -2,7 +2,9 @@
|
|||
# All rights reserved.
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
import base64
|
||||
import traceback
|
||||
from typing import Any, Dict, List, Optional, Callable
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.services.serviceGeneration.subDocumentUtility import (
|
||||
getFileExtension,
|
||||
|
|
@ -100,12 +102,12 @@ class GenerationService:
|
|||
# For binary data, handle bytes vs base64 string vs regular string
|
||||
if isinstance(documentData, bytes):
|
||||
# Already bytes - encode to base64 string for storage
|
||||
import base64
|
||||
# base64 is already imported at module level
|
||||
content = base64.b64encode(documentData).decode('utf-8')
|
||||
base64encoded = True
|
||||
elif isinstance(documentData, str):
|
||||
# Check if it's already valid base64
|
||||
import base64
|
||||
# base64 is already imported at module level
|
||||
try:
|
||||
# Try to decode to verify it's base64
|
||||
base64.b64decode(documentData, validate=True)
|
||||
|
|
@ -122,7 +124,7 @@ class GenerationService:
|
|||
continue
|
||||
else:
|
||||
# Other types - convert to string then base64
|
||||
import base64
|
||||
# base64 is already imported at module level
|
||||
try:
|
||||
content = base64.b64encode(str(documentData).encode('utf-8')).decode('utf-8')
|
||||
base64encoded = True
|
||||
|
|
@ -231,7 +233,7 @@ class GenerationService:
|
|||
return None
|
||||
# Convert content to bytes
|
||||
if base64encoded:
|
||||
import base64
|
||||
# base64 is already imported at module level
|
||||
content_bytes = base64.b64decode(content)
|
||||
else:
|
||||
content_bytes = content.encode('utf-8')
|
||||
|
|
@ -319,10 +321,12 @@ class GenerationService:
|
|||
'workflowId': 'unknown'
|
||||
}
|
||||
|
||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str, List[Dict[str, Any]]]:
|
||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> tuple[str, str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
Render extracted JSON content to the specified output format.
|
||||
Supports multiple documents in documents array (Phase 5: Multi-Dokument-Rendering).
|
||||
Always uses unified "documents" array format.
|
||||
Supports three content formats: reference, object (base64), extracted_text.
|
||||
|
||||
Args:
|
||||
extractedContent: Structured JSON document from AI extraction
|
||||
|
|
@ -330,6 +334,7 @@ class GenerationService:
|
|||
title: Report title
|
||||
userPrompt: User's original prompt for report generation
|
||||
aiService: AI service instance for generation prompt creation
|
||||
parentOperationId: Optional parent operation ID for hierarchical logging
|
||||
|
||||
Returns:
|
||||
tuple: (rendered_content, mime_type, images_list)
|
||||
|
|
@ -348,15 +353,40 @@ class GenerationService:
|
|||
if len(documents) == 0:
|
||||
raise ValueError("No documents found in 'documents' array")
|
||||
|
||||
# Use first document for rendering
|
||||
single_doc = documents[0]
|
||||
if "sections" not in single_doc:
|
||||
raise ValueError("Document must contain 'sections' field")
|
||||
|
||||
# Pass standardized schema to renderer (maintains architecture)
|
||||
# Renderer should extract sections from documents array according to standardized schema
|
||||
# Standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
|
||||
contentToRender = extractedContent # Pass full standardized schema
|
||||
# Phase 5: Multi-Dokument-Rendering
|
||||
if len(documents) == 1:
|
||||
# Single document - use existing logic
|
||||
single_doc = documents[0]
|
||||
if "sections" not in single_doc:
|
||||
raise ValueError("Document must contain 'sections' field")
|
||||
|
||||
# Pass standardized schema to renderer (maintains architecture)
|
||||
contentToRender = extractedContent # Pass full standardized schema
|
||||
else:
|
||||
# Multiple documents - merge all sections into one document for rendering
|
||||
# Option: Merge all sections from all documents into a single document
|
||||
all_sections = []
|
||||
for doc in documents:
|
||||
if isinstance(doc, dict) and "sections" in doc:
|
||||
sections = doc.get("sections", [])
|
||||
if isinstance(sections, list):
|
||||
all_sections.extend(sections)
|
||||
|
||||
if not all_sections:
|
||||
raise ValueError("No sections found in any document")
|
||||
|
||||
# Create merged document with all sections
|
||||
merged_document = {
|
||||
"metadata": extractedContent.get("metadata", {}),
|
||||
"documents": [{
|
||||
"id": "merged",
|
||||
"title": title,
|
||||
"filename": f"{title}.{outputFormat}",
|
||||
"sections": all_sections
|
||||
}]
|
||||
}
|
||||
contentToRender = merged_document
|
||||
logger.info(f"Rendering {len(documents)} documents with {len(all_sections)} total sections")
|
||||
|
||||
# Get the appropriate renderer for the format
|
||||
renderer = self._getFormatRenderer(outputFormat)
|
||||
|
|
@ -378,6 +408,92 @@ class GenerationService:
|
|||
logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
|
||||
raise
|
||||
|
||||
async def generateDocumentWithTwoPhases(
|
||||
self,
|
||||
userPrompt: str,
|
||||
cachedContent: Optional[Dict[str, Any]] = None,
|
||||
contentParts: Optional[List[Any]] = None,
|
||||
maxSectionLength: int = 500,
|
||||
parallelGeneration: bool = True,
|
||||
progressCallback: Optional[Callable] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate document using two-phase approach:
|
||||
1. Generate structure skeleton with empty sections
|
||||
2. Generate content for each section iteratively
|
||||
|
||||
This is the core logic for document generation in AI calls.
|
||||
|
||||
Args:
|
||||
userPrompt: User's original prompt
|
||||
cachedContent: Optional extracted content cache (from extraction phase)
|
||||
contentParts: Optional list of ContentParts to use for structure generation
|
||||
maxSectionLength: Maximum words for simple sections
|
||||
parallelGeneration: Enable parallel section generation
|
||||
progressCallback: Optional callback function(progress, total, message) for progress updates
|
||||
|
||||
Returns:
|
||||
Complete document structure with populated elements ready for rendering
|
||||
"""
|
||||
try:
|
||||
from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator
|
||||
from modules.services.serviceGeneration.subContentGenerator import ContentGenerator
|
||||
|
||||
# Phase 1: Generate structure skeleton
|
||||
if progressCallback:
|
||||
progressCallback(0, 100, "Generating document structure...")
|
||||
|
||||
structureGenerator = StructureGenerator(self.services)
|
||||
|
||||
# Extract imageDocuments from cachedContent if available
|
||||
existingImages = None
|
||||
if cachedContent and cachedContent.get("imageDocuments"):
|
||||
existingImages = cachedContent.get("imageDocuments")
|
||||
|
||||
structure = await structureGenerator.generateStructure(
|
||||
userPrompt=userPrompt,
|
||||
documentList=None, # Not used in current implementation
|
||||
cachedContent=cachedContent,
|
||||
contentParts=contentParts, # Pass ContentParts for structure generation
|
||||
maxSectionLength=maxSectionLength,
|
||||
existingImages=existingImages
|
||||
)
|
||||
|
||||
if progressCallback:
|
||||
progressCallback(30, 100, "Structure generated, starting content generation...")
|
||||
|
||||
# Phase 2: Generate content for each section
|
||||
contentGenerator = ContentGenerator(self.services)
|
||||
|
||||
# Create progress callback wrapper for content generation phase (30-90%)
|
||||
def contentProgressCallback(sectionIndex: int, totalSections: int, message: str):
|
||||
if progressCallback:
|
||||
# Map section progress to overall progress (30% to 90%)
|
||||
if totalSections > 0:
|
||||
overallProgress = 30 + int(60 * (sectionIndex / totalSections))
|
||||
else:
|
||||
overallProgress = 30
|
||||
progressCallback(overallProgress, 100, f"Section {sectionIndex}/{totalSections}: {message}")
|
||||
|
||||
completeStructure = await contentGenerator.generateContent(
|
||||
structure=structure,
|
||||
cachedContent=cachedContent,
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts, # Pass ContentParts for content generation
|
||||
progressCallback=contentProgressCallback,
|
||||
parallelGeneration=parallelGeneration
|
||||
)
|
||||
|
||||
if progressCallback:
|
||||
progressCallback(100, 100, "Document generation complete")
|
||||
|
||||
return completeStructure
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in two-phase document generation: {str(e)}")
|
||||
logger.debug(traceback.format_exc())
|
||||
raise
|
||||
|
||||
async def getAdaptiveExtractionPrompt(
|
||||
self,
|
||||
outputFormat: str,
|
||||
|
|
@ -423,6 +539,6 @@ class GenerationService:
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting renderer for {output_format}: {str(e)}")
|
||||
import traceback
|
||||
# traceback is already imported at module level
|
||||
logger.debug(traceback.format_exc())
|
||||
return None
|
||||
|
|
@ -68,6 +68,7 @@ class BaseRenderer(ABC):
|
|||
def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
|
||||
Phase 5: Supports multiple documents - extracts all sections from all documents.
|
||||
"""
|
||||
if "documents" not in reportData:
|
||||
raise ValueError("Report data must follow standardized schema with 'documents' array")
|
||||
|
|
@ -76,11 +77,18 @@ class BaseRenderer(ABC):
|
|||
if not isinstance(documents, list) or len(documents) == 0:
|
||||
raise ValueError("Standardized schema must contain at least one document in 'documents' array")
|
||||
|
||||
firstDoc = documents[0]
|
||||
if not isinstance(firstDoc, dict) or "sections" not in firstDoc:
|
||||
raise ValueError("Document in standardized schema must contain 'sections' field")
|
||||
# Phase 5: Extract sections from ALL documents
|
||||
all_sections = []
|
||||
for doc in documents:
|
||||
if isinstance(doc, dict) and "sections" in doc:
|
||||
sections = doc.get("sections", [])
|
||||
if isinstance(sections, list):
|
||||
all_sections.extend(sections)
|
||||
|
||||
return firstDoc.get("sections", [])
|
||||
if not all_sections:
|
||||
raise ValueError("No sections found in any document")
|
||||
|
||||
return all_sections
|
||||
|
||||
def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from typing import Dict, Any, Tuple, List
|
|||
import io
|
||||
import base64
|
||||
import re
|
||||
import csv
|
||||
|
||||
try:
|
||||
from docx import Document
|
||||
|
|
@ -225,13 +226,36 @@ class RendererDocx(BaseRenderer):
|
|||
self.logger.warning(f"Could not clear template content: {str(e)}")
|
||||
|
||||
def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a single JSON section to DOCX using AI-generated styles."""
|
||||
"""Render a single JSON section to DOCX using AI-generated styles.
|
||||
Supports three content formats: reference, object (base64), extracted_text.
|
||||
"""
|
||||
try:
|
||||
section_type = section.get("content_type", "paragraph")
|
||||
elements = section.get("elements", [])
|
||||
|
||||
# Process each element in the section
|
||||
for element in elements:
|
||||
element_type = element.get("type", "")
|
||||
|
||||
# Support three content formats from Phase 5D
|
||||
if element_type == "reference":
|
||||
# Document reference format
|
||||
doc_ref = element.get("documentReference", "")
|
||||
label = element.get("label", "Reference")
|
||||
para = doc.add_paragraph(f"[Reference: {label}]")
|
||||
para.runs[0].italic = True
|
||||
continue
|
||||
elif element_type == "extracted_text":
|
||||
# Extracted text format - render as paragraph
|
||||
content = element.get("content", "")
|
||||
source = element.get("source", "")
|
||||
if content:
|
||||
para = doc.add_paragraph(content)
|
||||
if source:
|
||||
para.add_run(f" (Source: {source})").italic = True
|
||||
continue
|
||||
|
||||
# Standard section types
|
||||
if section_type == "table":
|
||||
self._renderJsonTable(doc, element, styles)
|
||||
elif section_type == "bullet_list":
|
||||
|
|
@ -848,7 +872,7 @@ class RendererDocx(BaseRenderer):
|
|||
Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
|
||||
Returns the content with tables replaced by placeholders.
|
||||
"""
|
||||
import csv
|
||||
# csv is already imported at module level
|
||||
|
||||
lines = content.split('\n')
|
||||
processed_lines = []
|
||||
|
|
|
|||
|
|
@ -297,11 +297,39 @@ class RendererHtml(BaseRenderer):
|
|||
return '\n'.join(css_parts)
|
||||
|
||||
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a single JSON section to HTML using AI-generated styles."""
|
||||
"""Render a single JSON section to HTML using AI-generated styles.
|
||||
Supports three content formats: reference, object (base64), extracted_text.
|
||||
"""
|
||||
try:
|
||||
sectionType = self._getSectionType(section)
|
||||
sectionData = self._getSectionData(section)
|
||||
|
||||
# Check for three content formats from Phase 5D in elements
|
||||
if isinstance(sectionData, list):
|
||||
htmlParts = []
|
||||
for element in sectionData:
|
||||
element_type = element.get("type", "") if isinstance(element, dict) else ""
|
||||
|
||||
# Support three content formats from Phase 5D
|
||||
if element_type == "reference":
|
||||
# Document reference format
|
||||
doc_ref = element.get("documentReference", "")
|
||||
label = element.get("label", "Reference")
|
||||
htmlParts.append(f'<p class="reference"><em>[Reference: {label}]</em></p>')
|
||||
continue
|
||||
elif element_type == "extracted_text":
|
||||
# Extracted text format
|
||||
content = element.get("content", "")
|
||||
source = element.get("source", "")
|
||||
if content:
|
||||
source_text = f' <small><em>(Source: {source})</em></small>' if source else ''
|
||||
htmlParts.append(f'<p class="extracted-text">{content}{source_text}</p>')
|
||||
continue
|
||||
|
||||
# If we processed reference/extracted_text elements, return them
|
||||
if htmlParts:
|
||||
return '\n'.join(htmlParts)
|
||||
|
||||
if sectionType == "table":
|
||||
# Process the section data to extract table structure
|
||||
processedData = self._processSectionByType(section)
|
||||
|
|
|
|||
|
|
@ -77,11 +77,39 @@ class RendererMarkdown(BaseRenderer):
|
|||
raise Exception(f"Markdown generation failed: {str(e)}")
|
||||
|
||||
def _renderJsonSection(self, section: Dict[str, Any]) -> str:
|
||||
"""Render a single JSON section to markdown."""
|
||||
"""Render a single JSON section to markdown.
|
||||
Supports three content formats: reference, object (base64), extracted_text.
|
||||
"""
|
||||
try:
|
||||
sectionType = self._getSectionType(section)
|
||||
sectionData = self._getSectionData(section)
|
||||
|
||||
# Check for three content formats from Phase 5D in elements
|
||||
if isinstance(sectionData, list):
|
||||
markdownParts = []
|
||||
for element in sectionData:
|
||||
element_type = element.get("type", "") if isinstance(element, dict) else ""
|
||||
|
||||
# Support three content formats from Phase 5D
|
||||
if element_type == "reference":
|
||||
# Document reference format
|
||||
doc_ref = element.get("documentReference", "")
|
||||
label = element.get("label", "Reference")
|
||||
markdownParts.append(f"*[Reference: {label}]*")
|
||||
continue
|
||||
elif element_type == "extracted_text":
|
||||
# Extracted text format
|
||||
content = element.get("content", "")
|
||||
source = element.get("source", "")
|
||||
if content:
|
||||
source_text = f" *(Source: {source})*" if source else ""
|
||||
markdownParts.append(f"{content}{source_text}")
|
||||
continue
|
||||
|
||||
# If we processed reference/extracted_text elements, return them
|
||||
if markdownParts:
|
||||
return '\n\n'.join(markdownParts)
|
||||
|
||||
if sectionType == "table":
|
||||
# Process the section data to extract table structure
|
||||
processedData = self._processSectionByType(section)
|
||||
|
|
|
|||
|
|
@ -477,7 +477,9 @@ class RendererPdf(BaseRenderer):
|
|||
return colors.black
|
||||
|
||||
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a single JSON section to PDF elements using AI-generated styles."""
|
||||
"""Render a single JSON section to PDF elements using AI-generated styles.
|
||||
Supports three content formats: reference, object (base64), extracted_text.
|
||||
"""
|
||||
try:
|
||||
section_type = self._getSectionType(section)
|
||||
elements = self._getSectionData(section)
|
||||
|
|
@ -485,6 +487,33 @@ class RendererPdf(BaseRenderer):
|
|||
# Process each element in the section
|
||||
all_elements = []
|
||||
for element in elements:
|
||||
element_type = element.get("type", "") if isinstance(element, dict) else ""
|
||||
|
||||
# Support three content formats from Phase 5D
|
||||
if element_type == "reference":
|
||||
# Document reference format
|
||||
doc_ref = element.get("documentReference", "")
|
||||
label = element.get("label", "Reference")
|
||||
ref_style = ParagraphStyle(
|
||||
'Reference',
|
||||
parent=self._createNormalStyle(styles),
|
||||
fontStyle='italic',
|
||||
textColor=colors.grey
|
||||
)
|
||||
all_elements.append(Paragraph(f"[Reference: {label}]", ref_style))
|
||||
all_elements.append(Spacer(1, 6))
|
||||
continue
|
||||
elif element_type == "extracted_text":
|
||||
# Extracted text format
|
||||
content = element.get("content", "")
|
||||
source = element.get("source", "")
|
||||
if content:
|
||||
source_text = f" <i>(Source: {source})</i>" if source else ""
|
||||
all_elements.append(Paragraph(f"{content}{source_text}", self._createNormalStyle(styles)))
|
||||
all_elements.append(Spacer(1, 6))
|
||||
continue
|
||||
|
||||
# Standard section types
|
||||
if section_type == "table":
|
||||
all_elements.extend(self._renderJsonTable(element, styles))
|
||||
elif section_type == "bullet_list":
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@
|
|||
import logging
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, UTC
|
||||
from typing import Dict, Any, Optional, Tuple, List
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
|
||||
|
|
@ -261,7 +264,7 @@ class RendererPptx(BaseRenderer):
|
|||
Returns:
|
||||
List of slide content strings
|
||||
"""
|
||||
import re
|
||||
# re is already imported at module level
|
||||
|
||||
# First, try to split by major headers (# or ##)
|
||||
# This is the most common case for AI-generated content
|
||||
|
|
@ -399,7 +402,7 @@ class RendererPptx(BaseRenderer):
|
|||
|
||||
def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str:
|
||||
"""Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
|
||||
import json
|
||||
# json is already imported at module level
|
||||
schema_json = json.dumps(style_schema, indent=4)
|
||||
|
||||
return f"""Customize the JSON below for professional PowerPoint slides.
|
||||
|
|
@ -443,8 +446,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
self.logger.warning("AI service returned no response, using defaults")
|
||||
return default_styles
|
||||
|
||||
import json
|
||||
import re
|
||||
# json and re are already imported at module level
|
||||
|
||||
# Clean and parse JSON
|
||||
result = response.content.strip() if response and response.content else ""
|
||||
|
|
@ -634,6 +636,27 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
content_type = section.get("content_type", "paragraph")
|
||||
elements = section.get("elements", [])
|
||||
|
||||
# Check for three content formats from Phase 5D in elements
|
||||
content_parts = []
|
||||
for element in elements:
|
||||
element_type = element.get("type", "") if isinstance(element, dict) else ""
|
||||
|
||||
# Support three content formats from Phase 5D
|
||||
if element_type == "reference":
|
||||
# Document reference format
|
||||
doc_ref = element.get("documentReference", "")
|
||||
label = element.get("label", "Reference")
|
||||
content_parts.append(f"[Reference: {label}]")
|
||||
continue
|
||||
elif element_type == "extracted_text":
|
||||
# Extracted text format
|
||||
content = element.get("content", "")
|
||||
source = element.get("source", "")
|
||||
if content:
|
||||
source_text = f" (Source: {source})" if source else ""
|
||||
content_parts.append(f"{content}{source_text}")
|
||||
continue
|
||||
|
||||
# Handle image sections specially
|
||||
if content_type == "image":
|
||||
# Extract image data
|
||||
|
|
@ -647,26 +670,25 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
})
|
||||
|
||||
return {
|
||||
"title": section_title or element.get("altText", "Image"),
|
||||
"content": "", # No text content for image slides
|
||||
"title": section_title or (elements[0].get("altText", "Image") if elements else "Image"),
|
||||
"content": "\n\n".join(content_parts) if content_parts else "", # Include reference/extracted_text if present
|
||||
"images": images
|
||||
}
|
||||
|
||||
# Build slide content based on section type
|
||||
content_parts = []
|
||||
|
||||
if content_type == "table":
|
||||
content_parts.append(self._formatTableForSlide(elements))
|
||||
elif content_type == "list":
|
||||
content_parts.append(self._formatListForSlide(elements))
|
||||
elif content_type == "heading":
|
||||
content_parts.append(self._formatHeadingForSlide(elements))
|
||||
elif content_type == "paragraph":
|
||||
content_parts.append(self._formatParagraphForSlide(elements))
|
||||
elif content_type == "code":
|
||||
content_parts.append(self._formatCodeForSlide(elements))
|
||||
else:
|
||||
content_parts.append(self._format_paragraph_for_slide(elements))
|
||||
if not content_parts: # Only if we didn't process reference/extracted_text above
|
||||
if content_type == "table":
|
||||
content_parts.append(self._formatTableForSlide(elements))
|
||||
elif content_type == "list":
|
||||
content_parts.append(self._formatListForSlide(elements))
|
||||
elif content_type == "heading":
|
||||
content_parts.append(self._formatHeadingForSlide(elements))
|
||||
elif content_type == "paragraph":
|
||||
content_parts.append(self._formatParagraphForSlide(elements))
|
||||
elif content_type == "code":
|
||||
content_parts.append(self._formatCodeForSlide(elements))
|
||||
else:
|
||||
content_parts.append(self._format_paragraph_for_slide(elements))
|
||||
|
||||
# Combine content parts
|
||||
slide_content = "\n\n".join(filter(None, content_parts))
|
||||
|
|
@ -1057,5 +1079,5 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
|
||||
def _formatTimestamp(self) -> str:
|
||||
"""Format current timestamp for presentation generation."""
|
||||
from datetime import datetime, UTC
|
||||
# datetime and UTC are already imported at module level
|
||||
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
|
|
|||
|
|
@ -100,11 +100,39 @@ class RendererText(BaseRenderer):
|
|||
raise Exception(f"Text generation failed: {str(e)}")
|
||||
|
||||
def _renderJsonSection(self, section: Dict[str, Any]) -> str:
|
||||
"""Render a single JSON section to text."""
|
||||
"""Render a single JSON section to text.
|
||||
Supports three content formats: reference, object (base64), extracted_text.
|
||||
"""
|
||||
try:
|
||||
sectionType = self._getSectionType(section)
|
||||
sectionData = self._getSectionData(section)
|
||||
|
||||
# Check for three content formats from Phase 5D in elements
|
||||
if isinstance(sectionData, list):
|
||||
textParts = []
|
||||
for element in sectionData:
|
||||
element_type = element.get("type", "") if isinstance(element, dict) else ""
|
||||
|
||||
# Support three content formats from Phase 5D
|
||||
if element_type == "reference":
|
||||
# Document reference format
|
||||
doc_ref = element.get("documentReference", "")
|
||||
label = element.get("label", "Reference")
|
||||
textParts.append(f"[Reference: {label}]")
|
||||
continue
|
||||
elif element_type == "extracted_text":
|
||||
# Extracted text format
|
||||
content = element.get("content", "")
|
||||
source = element.get("source", "")
|
||||
if content:
|
||||
source_text = f" (Source: {source})" if source else ""
|
||||
textParts.append(f"{content}{source_text}")
|
||||
continue
|
||||
|
||||
# If we processed reference/extracted_text elements, return them
|
||||
if textParts:
|
||||
return '\n\n'.join(textParts)
|
||||
|
||||
if sectionType == "table":
|
||||
# Process the section data to extract table structure
|
||||
processedData = self._processSectionByType(section)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,10 @@ Generates content for each section in the document structure.
|
|||
|
||||
import logging
|
||||
import asyncio
|
||||
import json
|
||||
import base64
|
||||
import re
|
||||
import traceback
|
||||
from typing import Dict, Any, Optional, List, Callable
|
||||
from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator
|
||||
|
||||
|
|
@ -25,6 +29,7 @@ class ContentGenerator:
|
|||
structure: Dict[str, Any],
|
||||
cachedContent: Optional[Dict[str, Any]] = None,
|
||||
userPrompt: str = "",
|
||||
contentParts: Optional[List[Any]] = None,
|
||||
progressCallback: Optional[Callable] = None,
|
||||
parallelGeneration: bool = True,
|
||||
batchSize: int = 10
|
||||
|
|
@ -33,9 +38,10 @@ class ContentGenerator:
|
|||
Generate content for all sections in structure.
|
||||
|
||||
Args:
|
||||
structure: Document structure from Phase 1
|
||||
structure: Document structure from Phase 1 (with contentPartIds per section)
|
||||
cachedContent: Extracted content cache
|
||||
userPrompt: Original user prompt
|
||||
contentParts: List of all available ContentParts (for mapping by contentPartIds)
|
||||
progressCallback: Function to call for progress updates
|
||||
parallelGeneration: Enable parallel section generation
|
||||
batchSize: Number of sections to process in parallel
|
||||
|
|
@ -89,6 +95,7 @@ class ContentGenerator:
|
|||
sections=sections,
|
||||
cachedContent=cachedContent,
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts, # Pass ContentParts for section generation
|
||||
documentMetadata=structure.get("metadata", {}),
|
||||
progressCallback=lambda idx, total, msg: progressCallback(
|
||||
currentSectionIndex + idx,
|
||||
|
|
@ -103,6 +110,7 @@ class ContentGenerator:
|
|||
sections=sections,
|
||||
cachedContent=cachedContent,
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts, # Pass ContentParts for section generation
|
||||
documentMetadata=structure.get("metadata", {}),
|
||||
progressCallback=lambda idx, total, msg: progressCallback(
|
||||
currentSectionIndex + idx,
|
||||
|
|
@ -138,7 +146,8 @@ class ContentGenerator:
|
|||
sections: List[Dict[str, Any]],
|
||||
cachedContent: Optional[Dict[str, Any]],
|
||||
userPrompt: str,
|
||||
documentMetadata: Dict[str, Any],
|
||||
contentParts: Optional[List[Any]] = None,
|
||||
documentMetadata: Dict[str, Any] = {},
|
||||
progressCallback: Optional[Callable] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
|
|
@ -149,6 +158,14 @@ class ContentGenerator:
|
|||
previousSections = []
|
||||
totalSections = len(sections)
|
||||
|
||||
# Create ContentParts lookup map by ID
|
||||
contentPartsMap = {}
|
||||
if contentParts:
|
||||
for part in contentParts:
|
||||
partId = part.id if hasattr(part, 'id') else part.get('id', '')
|
||||
if partId:
|
||||
contentPartsMap[partId] = part
|
||||
|
||||
for idx, section in enumerate(sections):
|
||||
try:
|
||||
contentType = section.get("content_type", "content")
|
||||
|
|
@ -171,11 +188,20 @@ class ContentGenerator:
|
|||
message
|
||||
)
|
||||
|
||||
# Get ContentParts for this section
|
||||
sectionContentPartIds = section.get("contentPartIds", [])
|
||||
sectionContentParts = []
|
||||
if sectionContentPartIds and contentPartsMap:
|
||||
for partId in sectionContentPartIds:
|
||||
if partId in contentPartsMap:
|
||||
sectionContentParts.append(contentPartsMap[partId])
|
||||
|
||||
context = {
|
||||
"userPrompt": userPrompt,
|
||||
"cachedContent": cachedContent,
|
||||
"previousSections": previousSections.copy(),
|
||||
"targetSection": section,
|
||||
"sectionContentParts": sectionContentParts, # ContentParts for this section
|
||||
"documentMetadata": documentMetadata,
|
||||
"operationId": None
|
||||
}
|
||||
|
|
@ -272,11 +298,20 @@ class ContentGenerator:
|
|||
message
|
||||
)
|
||||
|
||||
# Get ContentParts for this section
|
||||
sectionContentPartIds = section.get("contentPartIds", [])
|
||||
sectionContentParts = []
|
||||
if sectionContentPartIds and contentPartsMap:
|
||||
for partId in sectionContentPartIds:
|
||||
if partId in contentPartsMap:
|
||||
sectionContentParts.append(contentPartsMap[partId])
|
||||
|
||||
context = {
|
||||
"userPrompt": userPrompt,
|
||||
"cachedContent": cachedContent,
|
||||
"previousSections": batchPreviousSections.copy(), # Include sections from previous batches
|
||||
"targetSection": section,
|
||||
"sectionContentParts": sectionContentParts, # ContentParts for this section
|
||||
"documentMetadata": documentMetadata,
|
||||
"operationId": None # Can be set if needed for nested progress
|
||||
}
|
||||
|
|
@ -371,17 +406,13 @@ class ContentGenerator:
|
|||
# Create section-specific prompt
|
||||
sectionPrompt = self._createSectionPrompt(section, context)
|
||||
|
||||
# Debug: Log section generation prompt
|
||||
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
|
||||
sectionId = section.get('id', 'unknown')
|
||||
contentType = section.get('content_type', 'unknown')
|
||||
try:
|
||||
self.services.utils.writeDebugFile(
|
||||
sectionPrompt,
|
||||
f"document_generation_section_{sectionId}_{contentType}_prompt"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not write debug file for section prompt: {e}")
|
||||
# Debug: Log section generation prompt (harmonisiert - keine Checks nötig)
|
||||
sectionId = section.get('id', 'unknown')
|
||||
contentType = section.get('content_type', 'unknown')
|
||||
self.services.utils.writeDebugFile(
|
||||
sectionPrompt,
|
||||
f"document_generation_section_{sectionId}_{contentType}_prompt"
|
||||
)
|
||||
|
||||
# Call AI to generate content
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
|
|
@ -397,32 +428,27 @@ class ContentGenerator:
|
|||
outputFormat="json"
|
||||
)
|
||||
|
||||
# Debug: Log section generation response (always log, even if empty)
|
||||
# Debug: Log section generation response (harmonisiert - keine Checks nötig)
|
||||
sectionId = section.get('id', 'unknown')
|
||||
contentType = section.get('content_type', 'unknown')
|
||||
|
||||
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
|
||||
try:
|
||||
responseContent = ''
|
||||
if aiResponse:
|
||||
if hasattr(aiResponse, 'content') and aiResponse.content:
|
||||
responseContent = aiResponse.content
|
||||
elif hasattr(aiResponse, 'documents') and aiResponse.documents:
|
||||
responseContent = f"[Response has {len(aiResponse.documents)} documents]"
|
||||
else:
|
||||
responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]"
|
||||
else:
|
||||
responseContent = '[No response object]'
|
||||
|
||||
self.services.utils.writeDebugFile(
|
||||
responseContent,
|
||||
f"document_generation_section_{sectionId}_{contentType}_response"
|
||||
)
|
||||
logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not write debug file for section response: {e}")
|
||||
import traceback
|
||||
logger.debug(traceback.format_exc())
|
||||
responseContent = ''
|
||||
if aiResponse:
|
||||
if hasattr(aiResponse, 'content') and aiResponse.content:
|
||||
responseContent = aiResponse.content
|
||||
elif hasattr(aiResponse, 'documents') and aiResponse.documents:
|
||||
responseContent = f"[Response has {len(aiResponse.documents)} documents]"
|
||||
else:
|
||||
responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]"
|
||||
else:
|
||||
responseContent = '[No response object]'
|
||||
|
||||
# Debug: Log section generation response (harmonisiert - keine Checks nötig)
|
||||
self.services.utils.writeDebugFile(
|
||||
responseContent,
|
||||
f"document_generation_section_{sectionId}_{contentType}_response"
|
||||
)
|
||||
logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)")
|
||||
|
||||
if not aiResponse or not aiResponse.content:
|
||||
logger.error(f"AI section generation returned empty response for section {sectionId}")
|
||||
|
|
@ -443,7 +469,7 @@ class ContentGenerator:
|
|||
logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500] if extractedJson else 'None'}")
|
||||
raise ValueError("No JSON found in AI section response")
|
||||
|
||||
import json
|
||||
# json is already imported at module level
|
||||
try:
|
||||
elementsData = json.loads(extractedJson)
|
||||
logger.debug(f"Parsed JSON for section {section.get('id')}: type={type(elementsData)}, keys={list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}")
|
||||
|
|
@ -480,7 +506,7 @@ class ContentGenerator:
|
|||
# Last resort: try to extract partial content and create minimal valid JSON
|
||||
try:
|
||||
# Try to extract text content before the truncation point
|
||||
import re
|
||||
# re is already imported at module level
|
||||
# Look for text field that might be partially complete
|
||||
textMatch = re.search(r'"text"\s*:\s*"([^"]*)', extractedJson)
|
||||
if textMatch:
|
||||
|
|
@ -577,14 +603,14 @@ class ContentGenerator:
|
|||
) -> Dict[str, Any]:
|
||||
"""Generate image for image section or include existing image"""
|
||||
try:
|
||||
# Check if this is an existing image to include
|
||||
# Check if this is an existing image to include or render
|
||||
imageSource = section.get("image_source", "generate")
|
||||
|
||||
if imageSource == "existing":
|
||||
# Include existing image from cachedContent
|
||||
if imageSource == "existing" or imageSource == "render":
|
||||
# Phase 4: Include existing image or render image from cachedContent
|
||||
imageRefId = section.get("image_reference_id")
|
||||
if not imageRefId:
|
||||
raise ValueError(f"Image section {section.get('id')} has image_source='existing' but no image_reference_id")
|
||||
raise ValueError(f"Image section {section.get('id')} has image_source='{imageSource}' but no image_reference_id")
|
||||
|
||||
cachedContent = context.get("cachedContent", {})
|
||||
imageDocuments = cachedContent.get("imageDocuments", [])
|
||||
|
|
@ -594,7 +620,7 @@ class ContentGenerator:
|
|||
if not imageDoc:
|
||||
raise ValueError(f"Image document {imageRefId} not found in cachedContent.imageDocuments")
|
||||
|
||||
# Create image element from existing image
|
||||
# Create image element from existing/render image
|
||||
altText = imageDoc.get("altText", section.get("generation_hint", "Image"))
|
||||
mimeType = imageDoc.get("mimeType", "image/png")
|
||||
|
||||
|
|
@ -605,7 +631,7 @@ class ContentGenerator:
|
|||
"caption": section.get("metadata", {}).get("caption")
|
||||
}]
|
||||
|
||||
logger.info(f"Successfully included existing image {imageRefId} for section {section.get('id')}")
|
||||
logger.info(f"Successfully integrated image {imageRefId} for section {section.get('id')} (source={imageSource})")
|
||||
return section
|
||||
|
||||
# Generate new image (existing logic)
|
||||
|
|
@ -620,7 +646,7 @@ class ContentGenerator:
|
|||
|
||||
# Call AI service for image generation
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptImage
|
||||
import json
|
||||
# json is already imported at module level
|
||||
|
||||
# Create image generation prompt
|
||||
promptModel = AiCallPromptImage(
|
||||
|
|
@ -664,7 +690,7 @@ class ContentGenerator:
|
|||
|
||||
# Validate base64 data
|
||||
try:
|
||||
import base64
|
||||
# base64 is already imported at module level
|
||||
base64.b64decode(base64Data[:100], validate=True) # Validate first 100 chars
|
||||
except Exception as e:
|
||||
logger.warning(f"Image data may not be valid base64: {str(e)}")
|
||||
|
|
@ -710,9 +736,11 @@ class ContentGenerator:
|
|||
"""Create sub-prompt for section content generation"""
|
||||
contentType = section.get("content_type", "")
|
||||
generationHint = section.get("generation_hint", "")
|
||||
extractionPrompt = section.get("extractionPrompt") # Optional extraction prompt for ContentParts
|
||||
userPrompt = context.get("userPrompt", "")
|
||||
cachedContent = context.get("cachedContent")
|
||||
previousSections = context.get("previousSections", [])
|
||||
sectionContentParts = context.get("sectionContentParts", []) # ContentParts for this section
|
||||
documentMetadata = context.get("documentMetadata", {})
|
||||
|
||||
# Get user language
|
||||
|
|
@ -723,6 +751,51 @@ class ContentGenerator:
|
|||
if cachedContent and cachedContent.get("extractedContent"):
|
||||
cachedContentText = self._formatCachedContent(cachedContent)
|
||||
|
||||
# Format ContentParts for this section
|
||||
contentPartsText = ""
|
||||
imagePartReferences = [] # Track image parts for text reference
|
||||
|
||||
if sectionContentParts:
|
||||
try:
|
||||
partsList = []
|
||||
imageIndex = 1
|
||||
for part in sectionContentParts:
|
||||
partTypeGroup = part.typeGroup if hasattr(part, 'typeGroup') else part.get('typeGroup', '')
|
||||
partMimeType = part.mimeType if hasattr(part, 'mimeType') else part.get('mimeType', '')
|
||||
partId = part.id if hasattr(part, 'id') else part.get('id', '')
|
||||
partData = part.data if hasattr(part, 'data') else part.get('data', '')
|
||||
|
||||
# Check if this is an image part
|
||||
isImage = partTypeGroup == "image" or (partMimeType and partMimeType.startswith("image/"))
|
||||
|
||||
if contentType == "image" and isImage:
|
||||
# For image sections: include image data for integration
|
||||
partsList.append(f"- ContentPart {partId} (image): [Image data available for integration]")
|
||||
elif isImage:
|
||||
# For non-image sections: track for text reference
|
||||
imagePartReferences.append({
|
||||
"id": partId,
|
||||
"index": imageIndex
|
||||
})
|
||||
imageIndex += 1
|
||||
# Don't include image data in prompt for non-image sections
|
||||
else:
|
||||
# For text/table/etc parts: include data preview
|
||||
dataPreview = str(partData)[:200] if partData else "[No data]"
|
||||
partsList.append(f"- ContentPart {partId} ({partTypeGroup}): {dataPreview}{'...' if partData and len(str(partData)) > 200 else ''}")
|
||||
|
||||
if partsList:
|
||||
contentPartsText = "\n".join(partsList)
|
||||
|
||||
# Add image reference instructions for non-image sections
|
||||
if imagePartReferences and contentType != "image":
|
||||
refText = ", ".join([f"Bild {ref['index']}" if userLanguage == "de" else f"Image {ref['index']}" for ref in imagePartReferences])
|
||||
contentPartsText += f"\n\nNOTE: Reference images as text in the document language: {refText}"
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not format ContentParts for section prompt: {str(e)}")
|
||||
contentPartsText = ""
|
||||
|
||||
# Format previous sections for context
|
||||
previousSectionsText = ""
|
||||
if previousSections:
|
||||
|
|
@ -787,14 +860,22 @@ EXTRACTED CONTENT (if available):
|
|||
{cachedContentText if cachedContentText else "None"}
|
||||
{'='*80}
|
||||
|
||||
{'='*80}
|
||||
CONTENT PARTS FOR THIS SECTION:
|
||||
{'='*80}
|
||||
{contentPartsText if contentPartsText else "No ContentParts assigned to this section."}
|
||||
{'='*80}
|
||||
|
||||
TASK: Generate content for this section ONLY.
|
||||
|
||||
INSTRUCTIONS:
|
||||
1. Generate content appropriate for section type: {contentType}
|
||||
2. Use the generation hint: {generationHint}
|
||||
3. Consider previous sections for continuity
|
||||
4. Use extracted content if relevant
|
||||
5. All content must be in the language '{userLanguage}'
|
||||
{f"3. Use extractionPrompt for ContentParts: {extractionPrompt}" if extractionPrompt else "3. Use ContentParts data if provided"}
|
||||
4. Consider previous sections for continuity
|
||||
5. Use extracted content if relevant
|
||||
6. All content must be in the language '{userLanguage}'
|
||||
7. {'For image sections: Integrate image ContentParts as visual elements' if contentType == "image" else 'For non-image sections: Reference image ContentParts as text (e.g., "siehe Bild 1" in German, "see Image 1" in English)'}
|
||||
|
||||
6. CRITICAL: Return ONLY a JSON object with an "elements" array. DO NOT return a full document structure.
|
||||
|
||||
|
|
|
|||
|
|
@ -65,18 +65,14 @@ class ContentIntegrator:
|
|||
)
|
||||
sections[idx] = section
|
||||
|
||||
# Debug: Write final merged structure to debug file
|
||||
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
|
||||
try:
|
||||
import json
|
||||
structureJson = json.dumps(structure, indent=2, ensure_ascii=False)
|
||||
self.services.utils.writeDebugFile(
|
||||
structureJson,
|
||||
"document_generation_final_merged_json"
|
||||
)
|
||||
logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not write debug file for final merged JSON: {e}")
|
||||
# Debug: Write final merged structure to debug file (harmonisiert - keine Checks nötig)
|
||||
import json
|
||||
structureJson = json.dumps(structure, indent=2, ensure_ascii=False)
|
||||
self.services.utils.writeDebugFile(
|
||||
structureJson,
|
||||
"document_generation_final_merged_json"
|
||||
)
|
||||
logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)")
|
||||
|
||||
return structure
|
||||
|
||||
|
|
|
|||
|
|
@ -1,316 +0,0 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Document Purpose Analyzer for hierarchical document generation.
|
||||
Uses AI to analyze user prompt and determine purpose for each document.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DocumentPurposeAnalyzer:
|
||||
"""Analyzes user prompt and documents to determine document purposes"""
|
||||
|
||||
def __init__(self, services: Any):
|
||||
self.services = services
|
||||
|
||||
async def analyzeDocumentPurposes(
|
||||
self,
|
||||
userPrompt: str,
|
||||
chatDocuments: List[ChatDocument],
|
||||
actionContext: str = "generateDocument"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Use AI to analyze user prompt and determine purpose for each document.
|
||||
|
||||
Args:
|
||||
userPrompt: User's original prompt
|
||||
chatDocuments: List of ChatDocument objects to analyze
|
||||
actionContext: Action name (e.g., "generateDocument", "extractData")
|
||||
|
||||
Returns:
|
||||
{
|
||||
"document_purposes": [
|
||||
{
|
||||
"document_id": "...",
|
||||
"purpose": "extract_text_content" | "include_image" | ...,
|
||||
"reasoning": "...",
|
||||
"extractionPrompt": "..." (if purpose requires extraction),
|
||||
"processingNotes": "..."
|
||||
}
|
||||
],
|
||||
"overall_intent": "..."
|
||||
}
|
||||
"""
|
||||
try:
|
||||
if not chatDocuments:
|
||||
return {
|
||||
"document_purposes": [],
|
||||
"overall_intent": "No documents provided"
|
||||
}
|
||||
|
||||
# Create document metadata list for AI analysis
|
||||
documentMetadata = []
|
||||
for doc in chatDocuments:
|
||||
docInfo = {
|
||||
"document_id": doc.id,
|
||||
"fileName": doc.fileName,
|
||||
"mimeType": doc.mimeType,
|
||||
"fileSize": doc.fileSize
|
||||
}
|
||||
documentMetadata.append(docInfo)
|
||||
|
||||
# Create analysis prompt
|
||||
analysisPrompt = self._createAnalysisPrompt(
|
||||
userPrompt=userPrompt,
|
||||
actionContext=actionContext,
|
||||
documentMetadata=documentMetadata
|
||||
)
|
||||
|
||||
# Debug: Log purpose analysis prompt
|
||||
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
|
||||
try:
|
||||
self.services.utils.writeDebugFile(
|
||||
analysisPrompt,
|
||||
"document_purpose_analysis_prompt"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not write debug file for purpose analysis prompt: {e}")
|
||||
|
||||
# Call AI for analysis
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||
resultFormat="json"
|
||||
)
|
||||
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=analysisPrompt,
|
||||
options=options,
|
||||
outputFormat="json"
|
||||
)
|
||||
|
||||
# Debug: Log purpose analysis response
|
||||
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
|
||||
try:
|
||||
responseContent = aiResponse.content if aiResponse and aiResponse.content else ''
|
||||
responseMetadata = {
|
||||
"status": aiResponse.status if aiResponse else "N/A",
|
||||
"error": aiResponse.error if aiResponse else "N/A",
|
||||
"documents_count": len(aiResponse.documents) if aiResponse and aiResponse.documents else 0
|
||||
}
|
||||
self.services.utils.writeDebugFile(
|
||||
f"Response Content:\n{responseContent}\n\nResponse Metadata:\n{json.dumps(responseMetadata, indent=2)}",
|
||||
"document_purpose_analysis_response"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not write debug file for purpose analysis response: {e}")
|
||||
|
||||
if not aiResponse or not aiResponse.content:
|
||||
logger.warning("AI purpose analysis returned empty response, using defaults")
|
||||
return self._createDefaultPurposes(chatDocuments, actionContext)
|
||||
|
||||
# Extract and parse JSON
|
||||
extractedJson = self.services.utils.jsonExtractString(aiResponse.content)
|
||||
if not extractedJson:
|
||||
logger.warning("No JSON found in purpose analysis response, using defaults")
|
||||
return self._createDefaultPurposes(chatDocuments, actionContext)
|
||||
|
||||
try:
|
||||
analysisResult = json.loads(extractedJson)
|
||||
|
||||
# Validate structure
|
||||
if "document_purposes" not in analysisResult:
|
||||
logger.warning("Invalid analysis result structure, using defaults")
|
||||
return self._createDefaultPurposes(chatDocuments, actionContext)
|
||||
|
||||
# Ensure all documents have purposes
|
||||
analyzedIds = {dp.get("document_id") for dp in analysisResult.get("document_purposes", [])}
|
||||
for doc in chatDocuments:
|
||||
if doc.id not in analyzedIds:
|
||||
logger.warning(f"Document {doc.id} not in analysis result, adding default purpose")
|
||||
defaultPurpose = self._determineDefaultPurpose(doc, actionContext)
|
||||
analysisResult["document_purposes"].append({
|
||||
"document_id": doc.id,
|
||||
"purpose": defaultPurpose,
|
||||
"reasoning": f"Default purpose based on document type and action context",
|
||||
"extractionPrompt": None,
|
||||
"processingNotes": None
|
||||
})
|
||||
|
||||
return analysisResult
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse purpose analysis JSON: {str(e)}")
|
||||
logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500]}")
|
||||
return self._createDefaultPurposes(chatDocuments, actionContext)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing document purposes: {str(e)}")
|
||||
return self._createDefaultPurposes(chatDocuments, actionContext)
|
||||
|
||||
def _createAnalysisPrompt(
|
||||
self,
|
||||
userPrompt: str,
|
||||
actionContext: str,
|
||||
documentMetadata: List[Dict[str, Any]]
|
||||
) -> str:
|
||||
"""Create AI prompt for document purpose analysis"""
|
||||
|
||||
# Format document list
|
||||
docListText = ""
|
||||
for i, docInfo in enumerate(documentMetadata, 1):
|
||||
docListText += f"\n{i}. Document ID: {docInfo['document_id']}\n"
|
||||
docListText += f" File Name: {docInfo['fileName']}\n"
|
||||
docListText += f" MIME Type: {docInfo['mimeType']}\n"
|
||||
docListText += f" File Size: {docInfo['fileSize']} bytes\n"
|
||||
|
||||
# Get user language
|
||||
userLanguage = self._getUserLanguage()
|
||||
|
||||
prompt = f"""{'='*80}
|
||||
DOCUMENT PURPOSE ANALYSIS
|
||||
{'='*80}
|
||||
|
||||
USER PROMPT:
|
||||
{userPrompt}
|
||||
|
||||
ACTION CONTEXT: {actionContext}
|
||||
|
||||
DOCUMENTS PROVIDED:
|
||||
{docListText}
|
||||
{'='*80}
|
||||
|
||||
TASK: For each document, determine its purpose based on:
|
||||
1. User prompt intent (what the user wants to do)
|
||||
2. Action context (what action is being performed)
|
||||
3. Document type (mimeType - is it text, image, etc.)
|
||||
4. Document metadata (fileName, size)
|
||||
|
||||
AVAILABLE PURPOSES:
|
||||
- "extract_text_content": Extract text content for use in document generation
|
||||
- "include_image": Include the image directly in the generated document (for images)
|
||||
- "analyze_image_vision": Analyze image with vision AI to extract text/information (for images with text/charts)
|
||||
- "use_as_template": Use document structure/layout as template for generation
|
||||
- "use_as_reference": Use as background context/reference without detailed extraction
|
||||
- "extract_data": Extract structured data (key-value pairs, entities, fields)
|
||||
- "attach": Document is an attachment - don't process, just attach to output
|
||||
- "convert_format": Convert document format (for convert actions)
|
||||
- "translate": Translate document content (for translate actions)
|
||||
- "summarize": Create summary of document (for summarize actions)
|
||||
- "compare": Compare documents (for comparison actions)
|
||||
- "merge": Merge documents (for merge actions)
|
||||
- "extract_tables_charts": Extract tables and charts specifically
|
||||
- "use_for_styling": Use document for styling/formatting reference only
|
||||
- "extract_metadata": Extract only document metadata
|
||||
|
||||
CRITICAL RULES:
|
||||
1. For images (mimeType starts with "image/"):
|
||||
- If user wants to "include" or "show" images → "include_image"
|
||||
- If user wants to "analyze", "read text", or "extract text" from images → "analyze_image_vision"
|
||||
- Default for images in generateDocument → "include_image"
|
||||
|
||||
2. For text documents in generateDocument:
|
||||
- If user mentions "template" or "structure" → "use_as_template"
|
||||
- If user mentions "reference" or "context" → "use_as_reference"
|
||||
- Default → "extract_text_content"
|
||||
|
||||
3. Consider action context:
|
||||
- generateDocument: Usually "extract_text_content" or "include_image"
|
||||
- extractData: Usually "extract_data"
|
||||
- translateDocument: Usually "translate"
|
||||
- summarizeDocument: Usually "summarize"
|
||||
|
||||
4. Return ONLY valid JSON following this structure:
|
||||
{{
|
||||
"document_purposes": [
|
||||
{{
|
||||
"document_id": "document_id_here",
|
||||
"purpose": "extract_text_content",
|
||||
"reasoning": "Brief explanation in language '{userLanguage}'",
|
||||
"extractionPrompt": "Specific extraction prompt if purpose requires extraction, otherwise null",
|
||||
"processingNotes": "Any special processing requirements or null"
|
||||
}}
|
||||
],
|
||||
"overall_intent": "Summary of how documents should be used together in language '{userLanguage}'"
|
||||
}}
|
||||
|
||||
5. All content must be in the language '{userLanguage}'
|
||||
6. Return ONLY the JSON structure. No explanations before or after.
|
||||
|
||||
Return ONLY the JSON structure.
|
||||
"""
|
||||
return prompt
|
||||
|
||||
def _createDefaultPurposes(
|
||||
self,
|
||||
chatDocuments: List[ChatDocument],
|
||||
actionContext: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Create default purposes when AI analysis fails"""
|
||||
purposes = []
|
||||
|
||||
for doc in chatDocuments:
|
||||
purpose = self._determineDefaultPurpose(doc, actionContext)
|
||||
purposes.append({
|
||||
"document_id": doc.id,
|
||||
"purpose": purpose,
|
||||
"reasoning": f"Default purpose based on document type ({doc.mimeType}) and action context ({actionContext})",
|
||||
"extractionPrompt": None,
|
||||
"processingNotes": None
|
||||
})
|
||||
|
||||
return {
|
||||
"document_purposes": purposes,
|
||||
"overall_intent": f"Default processing for {len(chatDocuments)} document(s) in {actionContext} action"
|
||||
}
|
||||
|
||||
def _determineDefaultPurpose(
|
||||
self,
|
||||
doc: ChatDocument,
|
||||
actionContext: str
|
||||
) -> str:
|
||||
"""Determine default purpose based on document type and action context"""
|
||||
mimeType = doc.mimeType or ""
|
||||
|
||||
# Image documents
|
||||
if mimeType.startswith("image/"):
|
||||
if actionContext == "generateDocument":
|
||||
return "include_image"
|
||||
elif actionContext in ["extractData", "process"]:
|
||||
return "analyze_image_vision"
|
||||
else:
|
||||
return "include_image" # Default for images
|
||||
|
||||
# Action-specific defaults
|
||||
if actionContext == "extractData":
|
||||
return "extract_data"
|
||||
elif actionContext == "translateDocument":
|
||||
return "translate"
|
||||
elif actionContext == "summarizeDocument":
|
||||
return "summarize"
|
||||
elif actionContext == "convertDocument" or actionContext == "convert":
|
||||
return "convert_format"
|
||||
elif actionContext == "generateDocument":
|
||||
return "extract_text_content"
|
||||
else:
|
||||
# Default for other actions
|
||||
return "extract_text_content"
|
||||
|
||||
def _getUserLanguage(self) -> str:
|
||||
"""Get user language for document generation"""
|
||||
try:
|
||||
if self.services:
|
||||
if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
|
||||
return self.services.currentUserLanguage
|
||||
elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
|
||||
return self.services.user.language
|
||||
except Exception:
|
||||
pass
|
||||
return 'en' # Default fallback
|
||||
|
||||
|
|
@ -19,7 +19,8 @@ async def buildGenerationPrompt(
|
|||
title: str,
|
||||
extracted_content: str = None,
|
||||
continuationContext: Dict[str, Any] = None,
|
||||
services: Any = None
|
||||
services: Any = None,
|
||||
useContentParts: bool = False # ARCHITECTURE: If True, don't include full content in prompt (ContentParts will be used directly)
|
||||
) -> str:
|
||||
"""
|
||||
Build the unified generation prompt using a single JSON template.
|
||||
|
|
@ -120,7 +121,9 @@ Continue generating the remaining content now.
|
|||
# PROMPT FOR FIRST CALL
|
||||
# Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions
|
||||
|
||||
if extracted_content:
|
||||
# ARCHITECTURE: If useContentParts=True, don't include full content in prompt
|
||||
# ContentParts will be passed directly to callAi for model-aware chunking
|
||||
if extracted_content and not useContentParts:
|
||||
# If we have extracted content, put it FIRST and make it very clear it's the source data
|
||||
generationPrompt = f"""{'='*80}
|
||||
USER REQUEST / USER PROMPT:
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ class StructureGenerator:
|
|||
userPrompt: str,
|
||||
documentList: Optional[Any] = None,
|
||||
cachedContent: Optional[Dict[str, Any]] = None,
|
||||
contentParts: Optional[List[Any]] = None,
|
||||
maxSectionLength: int = 500,
|
||||
existingImages: Optional[List[Dict[str, Any]]] = None
|
||||
) -> Dict[str, Any]:
|
||||
|
|
@ -34,30 +35,28 @@ class StructureGenerator:
|
|||
userPrompt: User's original prompt
|
||||
documentList: Optional document references
|
||||
cachedContent: Optional extracted content cache
|
||||
contentParts: Optional list of ContentParts to analyze for structure generation
|
||||
maxSectionLength: Maximum words for simple sections
|
||||
existingImages: Optional list of existing images to include
|
||||
|
||||
Returns:
|
||||
Document structure with empty elements arrays
|
||||
Document structure with empty elements arrays and contentPartIds per section
|
||||
"""
|
||||
try:
|
||||
# Create structure generation prompt
|
||||
structurePrompt = self._createStructurePrompt(
|
||||
userPrompt=userPrompt,
|
||||
cachedContent=cachedContent,
|
||||
contentParts=contentParts,
|
||||
maxSectionLength=maxSectionLength,
|
||||
existingImages=existingImages or []
|
||||
)
|
||||
|
||||
# Debug: Log structure generation prompt
|
||||
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
|
||||
try:
|
||||
self.services.utils.writeDebugFile(
|
||||
structurePrompt,
|
||||
"document_generation_structure_prompt"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not write debug file for structure prompt: {e}")
|
||||
# Debug: Log structure generation prompt (harmonisiert - keine Checks nötig)
|
||||
self.services.utils.writeDebugFile(
|
||||
structurePrompt,
|
||||
"document_generation_structure_prompt"
|
||||
)
|
||||
|
||||
# Call AI to generate structure
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
|
|
@ -73,15 +72,11 @@ class StructureGenerator:
|
|||
outputFormat="json"
|
||||
)
|
||||
|
||||
# Debug: Log structure generation response
|
||||
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
|
||||
try:
|
||||
self.services.utils.writeDebugFile(
|
||||
aiResponse.content if aiResponse and aiResponse.content else '',
|
||||
"document_generation_structure_response"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not write debug file for structure response: {e}")
|
||||
# Debug: Log structure generation response (harmonisiert - keine Checks nötig)
|
||||
self.services.utils.writeDebugFile(
|
||||
aiResponse.content if aiResponse and aiResponse.content else '',
|
||||
"document_generation_structure_response"
|
||||
)
|
||||
|
||||
if not aiResponse or not aiResponse.content:
|
||||
raise ValueError("AI structure generation returned empty response")
|
||||
|
|
@ -106,6 +101,7 @@ class StructureGenerator:
|
|||
self,
|
||||
userPrompt: str,
|
||||
cachedContent: Optional[Dict[str, Any]] = None,
|
||||
contentParts: Optional[List[Any]] = None,
|
||||
maxSectionLength: int = 500,
|
||||
existingImages: Optional[List[Dict[str, Any]]] = None
|
||||
) -> str:
|
||||
|
|
@ -126,6 +122,41 @@ class StructureGenerator:
|
|||
if cachedContent and cachedContent.get("imageDocuments"):
|
||||
existingImages = cachedContent.get("imageDocuments", [])
|
||||
|
||||
# Format ContentParts as JSON for structure generation
|
||||
contentPartsJson = ""
|
||||
if contentParts:
|
||||
try:
|
||||
import json
|
||||
# Convert ContentParts to dict format for JSON serialization
|
||||
contentPartsList = []
|
||||
for part in contentParts:
|
||||
if hasattr(part, 'dict'):
|
||||
partDict = part.dict()
|
||||
elif isinstance(part, dict):
|
||||
partDict = part
|
||||
else:
|
||||
# Try to convert to dict
|
||||
partDict = {
|
||||
"id": getattr(part, 'id', ''),
|
||||
"typeGroup": getattr(part, 'typeGroup', ''),
|
||||
"mimeType": getattr(part, 'mimeType', ''),
|
||||
"label": getattr(part, 'label', ''),
|
||||
"metadata": getattr(part, 'metadata', {})
|
||||
}
|
||||
# Only include essential fields for structure generation (not full data)
|
||||
contentPartsList.append({
|
||||
"id": partDict.get("id", ""),
|
||||
"typeGroup": partDict.get("typeGroup", ""),
|
||||
"mimeType": partDict.get("mimeType", ""),
|
||||
"label": partDict.get("label", ""),
|
||||
"metadata": partDict.get("metadata", {})
|
||||
})
|
||||
|
||||
contentPartsJson = json.dumps(contentPartsList, indent=2, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not format ContentParts as JSON: {str(e)}")
|
||||
contentPartsJson = ""
|
||||
|
||||
# Create structure template
|
||||
structureTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", "Document Title")
|
||||
|
||||
|
|
@ -145,13 +176,15 @@ EXTRACTED CONTENT (if available):
|
|||
{'='*80}
|
||||
|
||||
INSTRUCTIONS:
|
||||
1. Analyze the user request and extracted content
|
||||
1. Analyze the user request, extracted content, and available ContentParts
|
||||
2. Create a document structure with CONTENT sections only
|
||||
3. For each section, specify:
|
||||
- id: Unique identifier (e.g., "section_title_1", "section_image_1")
|
||||
- content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block"
|
||||
- complexity: "simple" (can generate directly) or "complex" (needs sub-prompt)
|
||||
- generation_hint: Brief description of what content should be generated
|
||||
- contentPartIds: Array of ContentPart IDs that should be used for this section (e.g., ["part_1", "part_2"]) - can be empty []
|
||||
- extractionPrompt: (optional) Specific prompt for extracting/processing ContentParts for this section
|
||||
- image_prompt: (only for image sections) Detailed prompt for image generation
|
||||
- order: Section order number (starting from 1)
|
||||
- elements: [] (empty array - will be populated later)
|
||||
|
|
@ -160,10 +193,12 @@ INSTRUCTIONS:
|
|||
- If user requests illustrations/images, create image sections
|
||||
- If existing images are provided in documentList (check EXISTING IMAGES section below), create image sections that reference them
|
||||
- Add image_prompt field with detailed description for image generation (only for new images)
|
||||
- Set complexity to "complex"
|
||||
- Set complexity to "complex" for new images, "simple" for existing/render images
|
||||
- For existing images: Set image_source to "existing" and image_reference_id to the image document ID
|
||||
- For images to render (from input documents): Set image_source to "render" and image_reference_id to the image document ID
|
||||
- Example for new image: {{"id": "section_image_1", "content_type": "image", "complexity": "complex", "generation_hint": "Illustration for chapter 1", "image_prompt": "A detailed description for image generation", "order": 2, "elements": []}}
|
||||
- Example for existing image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Include provided image", "image_source": "existing", "image_reference_id": "doc_id_here", "order": 2, "elements": []}}
|
||||
- Example for render image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Render input image", "image_source": "render", "image_reference_id": "doc_id_here", "order": 2, "elements": []}}
|
||||
|
||||
{'='*80}
|
||||
EXISTING IMAGES (to include in document):
|
||||
|
|
@ -178,12 +213,21 @@ EXISTING IMAGES (to include in document):
|
|||
7. Return ONLY valid JSON following this structure:
|
||||
{structureTemplate}
|
||||
|
||||
5. CRITICAL RULES:
|
||||
5. CRITICAL RULES FOR CONTENT PARTS:
|
||||
- Analyze available ContentParts and determine which ones are needed for each section
|
||||
- For image sections (content_type == "image"): Include image ContentParts in contentPartIds - images will be integrated as visual elements
|
||||
- For other sections (heading, paragraph, etc.): If image ContentParts are referenced, they will be referenced as text in the document language (not integrated as images)
|
||||
- Each section can reference multiple ContentParts via contentPartIds array
|
||||
- If specific extraction/processing is needed for ContentParts, provide extractionPrompt
|
||||
- Image references in non-image sections should be automatically derived in the document language (e.g., "siehe Bild 1" in German, "see Image 1" in English)
|
||||
|
||||
6. CRITICAL RULES:
|
||||
- Return ONLY valid JSON (no comments, no trailing commas, double quotes only)
|
||||
- Follow the exact JSON schema structure provided
|
||||
- IMPORTANT: All sections MUST have empty elements arrays: "elements": [] (the template shows examples with content, but you must use empty arrays)
|
||||
- ALL sections MUST include "generation_hint" field with a brief description of what content should be generated
|
||||
- ALL sections MUST include "complexity" field: "simple" for short content, "complex" for long chapters/images
|
||||
- ALL sections MUST include "contentPartIds" field (can be empty array [] if no ContentParts needed)
|
||||
- Image sections MUST include "image_prompt" field with detailed description for image generation
|
||||
- Order numbers MUST start from 1 (not 0)
|
||||
- All content must be in the language '{userLanguage}'
|
||||
|
|
@ -235,6 +279,14 @@ Return ONLY the JSON structure. No explanations.
|
|||
if "elements" not in section:
|
||||
section["elements"] = []
|
||||
|
||||
# Ensure contentPartIds field exists (can be empty array)
|
||||
if "contentPartIds" not in section:
|
||||
section["contentPartIds"] = []
|
||||
|
||||
# Ensure extractionPrompt field exists (optional)
|
||||
if "extractionPrompt" not in section:
|
||||
section["extractionPrompt"] = None
|
||||
|
||||
# Identify complexity if not set
|
||||
if "complexity" not in section:
|
||||
section["complexity"] = self._identifySectionComplexity(
|
||||
|
|
@ -255,11 +307,11 @@ Return ONLY the JSON structure. No explanations.
|
|||
if section.get("content_type") == "image":
|
||||
imageSource = section.get("image_source", "generate")
|
||||
|
||||
if imageSource == "existing":
|
||||
# Existing image - ensure image_reference_id is set
|
||||
if imageSource == "existing" or imageSource == "render":
|
||||
# Existing or render image - ensure image_reference_id is set
|
||||
if "image_reference_id" not in section:
|
||||
logger.warning(f"Image section {sectionId} has image_source='existing' but no image_reference_id")
|
||||
# Existing images are simple (no generation needed)
|
||||
logger.warning(f"Image section {sectionId} has image_source='{imageSource}' but no image_reference_id")
|
||||
# Existing/render images are simple (no generation needed, code integration)
|
||||
section["complexity"] = "simple"
|
||||
else:
|
||||
# New image generation - ensure image_prompt
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
# All rights reserved.
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, Type, TypeVar
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
|
|
@ -11,10 +12,32 @@ T = TypeVar('T', bound=BaseModel)
|
|||
|
||||
|
||||
def stripCodeFences(text: str) -> str:
|
||||
"""Remove ```json / ``` fences and surrounding whitespace if present."""
|
||||
"""Remove ```json / ``` fences and surrounding whitespace if present.
|
||||
Also removes [SOURCE: ...] and [END SOURCE] tags that may wrap the JSON."""
|
||||
if not text:
|
||||
return text
|
||||
s = text.strip()
|
||||
|
||||
# Remove [SOURCE: ...] tags at the beginning
|
||||
if s.startswith("[SOURCE:"):
|
||||
# Find the end of the SOURCE tag (newline or end of string)
|
||||
end_pos = s.find("\n")
|
||||
if end_pos != -1:
|
||||
s = s[end_pos+1:]
|
||||
else:
|
||||
# No newline, entire string is SOURCE tag
|
||||
return ""
|
||||
|
||||
# Remove [END SOURCE] tags at the end
|
||||
if s.endswith("[END SOURCE]"):
|
||||
# Find the start of END SOURCE tag (newline before it)
|
||||
start_pos = s.rfind("\n[END SOURCE]")
|
||||
if start_pos != -1:
|
||||
s = s[:start_pos]
|
||||
else:
|
||||
# No newline, entire string is END SOURCE tag
|
||||
return ""
|
||||
|
||||
# Handle opening fence (may or may not have closing fence)
|
||||
if s.startswith("```"):
|
||||
# Remove first triple backticks
|
||||
|
|
@ -201,7 +224,7 @@ def closeJsonStructures(text: str) -> str:
|
|||
# Look for patterns like: "value" or "value\n (unterminated)
|
||||
# Check if we're in the middle of a string value when text ends
|
||||
if result.strip():
|
||||
import re
|
||||
# re is already imported at module level
|
||||
# Count quotes - if odd number, we have an unterminated string
|
||||
quoteCount = result.count('"')
|
||||
if quoteCount % 2 == 1:
|
||||
|
|
@ -367,7 +390,7 @@ def _removeLastIncompleteItem(items: List[str], original_text: str) -> List[str]
|
|||
Remove the last item if it appears to be incomplete/corrupted.
|
||||
This prevents corrupted data from being included in the final result.
|
||||
"""
|
||||
import re
|
||||
# re is already imported at module level
|
||||
|
||||
if not items:
|
||||
return items
|
||||
|
|
@ -418,7 +441,7 @@ def _extractGenericContent(text: str) -> List[Dict[str, Any]]:
|
|||
|
||||
CRITICAL: Must preserve original content_type and id from the JSON structure!
|
||||
"""
|
||||
import re
|
||||
# re is already imported at module level
|
||||
|
||||
sections = []
|
||||
|
||||
|
|
@ -1025,7 +1048,7 @@ def _extractCutOffElements(incomplete_section: Dict[str, Any], raw_json: str) ->
|
|||
if not cut_off_element:
|
||||
# Extract the last incomplete part from raw JSON
|
||||
# Find the last incomplete string/number/array
|
||||
import re
|
||||
# re is already imported at module level
|
||||
# Look for incomplete string at the end
|
||||
incomplete_match = re.search(r'"([^"]*?)(?:"|$)', raw_json[-500:], re.DOTALL)
|
||||
if incomplete_match:
|
||||
|
|
@ -1045,7 +1068,7 @@ def _extractCutOffFromElement(element: Dict[str, Any], raw_json: str) -> Optiona
|
|||
|
||||
This helps identify where exactly to continue within nested structures.
|
||||
"""
|
||||
import re
|
||||
# re is already imported at module level
|
||||
|
||||
# Check for code_block with nested JSON
|
||||
if "code" in element:
|
||||
|
|
|
|||
|
|
@ -8,9 +8,7 @@ from .process import process
|
|||
from .webResearch import webResearch
|
||||
from .summarizeDocument import summarizeDocument
|
||||
from .translateDocument import translateDocument
|
||||
from .convert import convert
|
||||
from .convertDocument import convertDocument
|
||||
from .extractData import extractData
|
||||
from .generateDocument import generateDocument
|
||||
|
||||
__all__ = [
|
||||
|
|
@ -18,9 +16,7 @@ __all__ = [
|
|||
'webResearch',
|
||||
'summarizeDocument',
|
||||
'translateDocument',
|
||||
'convert',
|
||||
'convertDocument',
|
||||
'extractData',
|
||||
'generateDocument',
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -1,157 +0,0 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Convert action for AI operations.
|
||||
Converts documents/data between different formats with specific formatting options.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters).
|
||||
- Input requirements: documentList (required); inputFormat and outputFormat (required).
|
||||
- Output format: Document in target format with specified formatting options.
|
||||
- CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed).
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to convert.
|
||||
- inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
|
||||
- outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
|
||||
- columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
|
||||
- delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
|
||||
- includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
|
||||
- language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
inputFormat = parameters.get("inputFormat")
|
||||
outputFormat = parameters.get("outputFormat")
|
||||
if not inputFormat or not outputFormat:
|
||||
return ActionResult.isFailure(error="inputFormat and outputFormat are required")
|
||||
|
||||
# Normalize formats (remove leading dot if present)
|
||||
normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
|
||||
normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
|
||||
|
||||
# Get documents
|
||||
if isinstance(documentList, DocumentReferenceList):
|
||||
docRefList = documentList
|
||||
elif isinstance(documentList, list):
|
||||
docRefList = DocumentReferenceList.from_string_list(documentList)
|
||||
else:
|
||||
docRefList = DocumentReferenceList.from_string_list([documentList])
|
||||
|
||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
|
||||
if not chatDocuments:
|
||||
return ActionResult.isFailure(error="No documents found in documentList")
|
||||
|
||||
# Check if input is standardized JSON format - if so, use direct rendering
|
||||
if normalizedInputFormat == "json" and len(chatDocuments) == 1:
|
||||
try:
|
||||
doc = chatDocuments[0]
|
||||
# ChatDocument doesn't have documentData - need to load file content using fileId
|
||||
docBytes = self.services.chat.getFileData(doc.fileId)
|
||||
if not docBytes:
|
||||
raise ValueError(f"No file data found for fileId={doc.fileId}")
|
||||
|
||||
# Decode bytes to string
|
||||
docData = docBytes.decode('utf-8')
|
||||
|
||||
# Try to parse as JSON
|
||||
if isinstance(docData, str):
|
||||
jsonData = json.loads(docData)
|
||||
elif isinstance(docData, dict):
|
||||
jsonData = docData
|
||||
else:
|
||||
jsonData = None
|
||||
|
||||
# Check if it's standardized JSON format (has "documents" or "sections")
|
||||
if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
|
||||
# Use direct rendering - no AI call needed!
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
|
||||
# Ensure format is "documents" array
|
||||
if "documents" not in jsonData:
|
||||
jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
|
||||
|
||||
# Get title
|
||||
title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document")
|
||||
|
||||
# Render with options
|
||||
renderOptions = {}
|
||||
if normalizedOutputFormat == "csv":
|
||||
renderOptions["delimiter"] = parameters.get("delimiter", ",")
|
||||
renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
|
||||
renderOptions["includeHeader"] = parameters.get("includeHeader", True)
|
||||
|
||||
rendered_content, mime_type, _images = await generationService.renderReport(
|
||||
jsonData, normalizedOutputFormat, title, None, None
|
||||
)
|
||||
|
||||
# Apply CSV options if needed (renderer will handle them)
|
||||
if normalizedOutputFormat == "csv" and renderOptions:
|
||||
rendered_content = self.csvProcessing.applyCsvOptions(rendered_content, renderOptions)
|
||||
|
||||
validationMetadata = {
|
||||
"actionType": "ai.convert",
|
||||
"inputFormat": normalizedInputFormat,
|
||||
"outputFormat": normalizedOutputFormat,
|
||||
"hasSourceJson": True,
|
||||
"conversionType": "direct_rendering"
|
||||
}
|
||||
actionDoc = ActionDocument(
|
||||
documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
|
||||
documentData=rendered_content,
|
||||
mimeType=mime_type,
|
||||
sourceJson=jsonData, # Preserve source JSON for structure validation
|
||||
validationMetadata=validationMetadata
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(documents=[actionDoc])
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}")
|
||||
# Fall through to AI-based conversion
|
||||
|
||||
# Fallback: Use AI for conversion (for non-JSON inputs or complex conversions)
|
||||
columnsPerRow = parameters.get("columnsPerRow")
|
||||
delimiter = parameters.get("delimiter", ",")
|
||||
includeHeader = parameters.get("includeHeader", True)
|
||||
language = parameters.get("language", "en")
|
||||
|
||||
aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
|
||||
|
||||
if normalizedOutputFormat == "csv":
|
||||
aiPrompt += f" Use '{delimiter}' as the delimiter character."
|
||||
if columnsPerRow:
|
||||
aiPrompt += f" Format the output with {columnsPerRow} columns per row."
|
||||
if not includeHeader:
|
||||
aiPrompt += " Do not include a header row."
|
||||
else:
|
||||
aiPrompt += " Include a header row with column names."
|
||||
|
||||
if language and language != "en":
|
||||
aiPrompt += f" Use language: {language}."
|
||||
|
||||
aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": normalizedOutputFormat
|
||||
})
|
||||
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""
|
||||
Extract Data action for AI operations.
|
||||
Extracts structured data from documents (key-value pairs, entities, facts, etc.).
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@action
|
||||
async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
|
||||
- Input requirements: documentList (required); optional dataStructure, fields.
|
||||
- Output format: JSON by default, or specified resultType.
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to extract data from.
|
||||
- dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
|
||||
- fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
|
||||
- resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
|
||||
"""
|
||||
documentList = parameters.get("documentList", [])
|
||||
if not documentList:
|
||||
return ActionResult.isFailure(error="documentList is required")
|
||||
|
||||
dataStructure = parameters.get("dataStructure", "nested")
|
||||
fields = parameters.get("fields", [])
|
||||
resultType = parameters.get("resultType", "json")
|
||||
|
||||
aiPrompt = "Extract structured data from the provided document(s)."
|
||||
if fields:
|
||||
fieldsStr = ", ".join(fields)
|
||||
aiPrompt += f" Extract the following specific fields: {fieldsStr}."
|
||||
else:
|
||||
aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
|
||||
|
||||
structureInstructions = {
|
||||
"flat": "Use a flat key-value structure with simple properties.",
|
||||
"nested": "Use a nested JSON structure with logical grouping of related data.",
|
||||
"list": "Structure the data as a list/array of objects, one per entity or record."
|
||||
}
|
||||
aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
|
||||
|
||||
aiPrompt += " Ensure all extracted data is accurate and complete."
|
||||
|
||||
return await self.process({
|
||||
"aiPrompt": aiPrompt,
|
||||
"documentList": documentList,
|
||||
"resultType": resultType
|
||||
})
|
||||
|
||||
|
|
@ -3,18 +3,17 @@
|
|||
|
||||
"""
|
||||
Generate Document action for AI operations.
|
||||
Generates documents from scratch or based on templates/inputs using hierarchical approach.
|
||||
Wrapper around AI service callAiContent method.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, Any, Optional
|
||||
from typing import Dict, Any, Optional, List
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
||||
from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator
|
||||
from modules.services.serviceGeneration.subContentGenerator import ContentGenerator
|
||||
from modules.services.serviceGeneration.subDocumentPurposeAnalyzer import DocumentPurposeAnalyzer
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||
from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -59,38 +58,15 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
resultType = "txt"
|
||||
logger.info(f"Auto-detected Text format from prompt")
|
||||
|
||||
maxSectionLength = parameters.get("maxSectionLength", 500)
|
||||
parallelGeneration = parameters.get("parallelGeneration", True)
|
||||
progressLogging = parameters.get("progressLogging", True)
|
||||
|
||||
# Create operation ID for progress tracking
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
operationId = f"doc_gen_{workflowId}_{int(time.time())}"
|
||||
parentOperationId = parameters.get('parentOperationId')
|
||||
|
||||
try:
|
||||
# Phase 1: Structure Generation
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogStart(
|
||||
operationId,
|
||||
"Document",
|
||||
"Structure Generation",
|
||||
"Generating document structure...",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
structureGenerator = StructureGenerator(self.services)
|
||||
|
||||
# Analyze document purposes and process documents accordingly
|
||||
cachedContent = None
|
||||
imageDocuments = []
|
||||
documentPurposes = {}
|
||||
|
||||
# Convert documentList to DocumentReferenceList if needed
|
||||
docRefList = None
|
||||
if documentList:
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.1, "Analyzing document purposes...")
|
||||
|
||||
# Convert documentList to DocumentReferenceList
|
||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||
|
||||
if isinstance(documentList, DocumentReferenceList):
|
||||
|
|
@ -101,301 +77,78 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
docRefList = DocumentReferenceList.from_string_list(documentList)
|
||||
else:
|
||||
docRefList = DocumentReferenceList(references=[])
|
||||
|
||||
# Get ChatDocuments
|
||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
|
||||
if chatDocuments:
|
||||
logger.info(f"Analyzing purposes for {len(chatDocuments)} documents")
|
||||
|
||||
# Analyze document purposes using AI
|
||||
purposeAnalyzer = DocumentPurposeAnalyzer(self.services)
|
||||
purposeAnalysis = await purposeAnalyzer.analyzeDocumentPurposes(
|
||||
userPrompt=prompt,
|
||||
chatDocuments=chatDocuments,
|
||||
actionContext="generateDocument"
|
||||
)
|
||||
|
||||
documentPurposes = {dp["document_id"]: dp for dp in purposeAnalysis.get("document_purposes", [])}
|
||||
logger.info(f"Purpose analysis complete: {purposeAnalysis.get('overall_intent', 'N/A')}")
|
||||
|
||||
# Separate documents by purpose
|
||||
textDocs = []
|
||||
imageDocsToInclude = []
|
||||
imageDocsToAnalyze = []
|
||||
|
||||
for doc in chatDocuments:
|
||||
docPurpose = documentPurposes.get(doc.id, {})
|
||||
purpose = docPurpose.get("purpose", "extract_text_content")
|
||||
|
||||
if purpose == "include_image":
|
||||
imageDocsToInclude.append(doc)
|
||||
elif purpose == "analyze_image_vision":
|
||||
imageDocsToAnalyze.append(doc)
|
||||
elif purpose in ["extract_text_content", "use_as_template", "use_as_reference", "extract_data"]:
|
||||
textDocs.append(doc)
|
||||
# Skip "attach" purpose - don't process
|
||||
|
||||
# Process text documents (extract content)
|
||||
extractedResults = []
|
||||
if textDocs:
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.15, f"Extracting content from {len(textDocs)} text document(s)...")
|
||||
|
||||
# Prepare extraction options with purpose-specific prompts
|
||||
extractionOptionsList = []
|
||||
for doc in textDocs:
|
||||
docPurpose = documentPurposes.get(doc.id, {})
|
||||
extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all content from the document"
|
||||
|
||||
extractionOptions = ExtractionOptions(
|
||||
prompt=extractionPrompt,
|
||||
mergeStrategy=MergeStrategy(
|
||||
mergeType="concatenate",
|
||||
groupBy="typeGroup",
|
||||
orderBy="id"
|
||||
),
|
||||
processDocumentsIndividually=True
|
||||
)
|
||||
extractionOptionsList.append((doc, extractionOptions))
|
||||
|
||||
# Extract content from text documents
|
||||
for doc, extractionOptions in extractionOptionsList:
|
||||
try:
|
||||
docResults = self.services.extraction.extractContent(
|
||||
[doc],
|
||||
extractionOptions,
|
||||
parentOperationId=operationId
|
||||
)
|
||||
extractedResults.extend(docResults)
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content from {doc.fileName}: {str(e)}")
|
||||
|
||||
logger.info(f"Extracted content from {len(extractedResults)} text document(s)")
|
||||
|
||||
# Process images to analyze (vision call)
|
||||
if imageDocsToAnalyze:
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.2, f"Analyzing {len(imageDocsToAnalyze)} image(s) with vision AI...")
|
||||
|
||||
# Extract content from images using vision analysis
|
||||
for doc in imageDocsToAnalyze:
|
||||
try:
|
||||
docPurpose = documentPurposes.get(doc.id, {})
|
||||
extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all text and information from this image"
|
||||
|
||||
extractionOptions = ExtractionOptions(
|
||||
prompt=extractionPrompt,
|
||||
mergeStrategy=MergeStrategy(
|
||||
mergeType="concatenate",
|
||||
groupBy="typeGroup",
|
||||
orderBy="id"
|
||||
),
|
||||
processDocumentsIndividually=True
|
||||
)
|
||||
|
||||
docResults = self.services.extraction.extractContent(
|
||||
[doc],
|
||||
extractionOptions,
|
||||
parentOperationId=operationId
|
||||
)
|
||||
extractedResults.extend(docResults)
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing image {doc.fileName}: {str(e)}")
|
||||
|
||||
logger.info(f"Analyzed {len(imageDocsToAnalyze)} image(s) with vision AI")
|
||||
|
||||
# Process images to include (store image data)
|
||||
if imageDocsToInclude:
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.25, f"Preparing {len(imageDocsToInclude)} image(s) for inclusion...")
|
||||
|
||||
# Get image data for inclusion
|
||||
from modules.interfaces.interfaceDbComponentObjects import getInterface
|
||||
dbInterface = getInterface()
|
||||
|
||||
for doc in imageDocsToInclude:
|
||||
try:
|
||||
# Get image bytes
|
||||
imageBytes = dbInterface.getFileData(doc.fileId)
|
||||
if imageBytes:
|
||||
# Encode to base64
|
||||
import base64
|
||||
base64Data = base64.b64encode(imageBytes).decode('utf-8')
|
||||
|
||||
# Create image document entry
|
||||
imageDoc = {
|
||||
"id": doc.id,
|
||||
"fileName": doc.fileName,
|
||||
"mimeType": doc.mimeType,
|
||||
"base64Data": base64Data,
|
||||
"altText": doc.fileName or "Image",
|
||||
"fileSize": doc.fileSize
|
||||
}
|
||||
imageDocuments.append(imageDoc)
|
||||
logger.debug(f"Prepared image {doc.fileName} for inclusion ({len(base64Data)} chars base64)")
|
||||
else:
|
||||
logger.warning(f"Could not retrieve image data for {doc.fileName}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error preparing image {doc.fileName} for inclusion: {str(e)}")
|
||||
|
||||
logger.info(f"Prepared {len(imageDocuments)} image(s) for inclusion")
|
||||
|
||||
# Build cachedContent with all information
|
||||
cachedContent = {
|
||||
"extractedContent": extractedResults,
|
||||
"imageDocuments": imageDocuments,
|
||||
"documentPurposes": documentPurposes,
|
||||
"extractionTimestamp": time.time(),
|
||||
"sourceDocuments": [doc.id for doc in chatDocuments]
|
||||
}
|
||||
|
||||
logger.info(f"Document processing complete: {len(extractedResults)} extracted, {len(imageDocuments)} images to include")
|
||||
|
||||
# Generate structure
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.2, "Generating document structure...")
|
||||
# Prepare title
|
||||
title = parameters.get("documentType") or "Generated Document"
|
||||
|
||||
structure = await structureGenerator.generateStructure(
|
||||
userPrompt=prompt,
|
||||
documentList=documentList if documentList else None,
|
||||
cachedContent=cachedContent,
|
||||
maxSectionLength=maxSectionLength,
|
||||
existingImages=imageDocuments # Pass existing images for structure generation
|
||||
# Call AI service for document generation
|
||||
# callAiContent handles documentList internally via Phases 5A-5E
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||
priority=PriorityEnum.BALANCED,
|
||||
processingMode=ProcessingModeEnum.DETAILED,
|
||||
compressPrompt=False,
|
||||
compressContext=False
|
||||
)
|
||||
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated")
|
||||
|
||||
# Phase 2: Content Generation
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogUpdate(
|
||||
operationId,
|
||||
0.34,
|
||||
"Starting content generation..."
|
||||
)
|
||||
|
||||
contentGenerator = ContentGenerator(self.services)
|
||||
|
||||
# Create enhanced progress callback
|
||||
def progressCallback(sectionIndex: int, totalSections: int, message: str):
|
||||
if progressLogging:
|
||||
# Calculate progress: 34% to 90% for content generation phase
|
||||
if totalSections > 0:
|
||||
progress = 0.34 + (0.56 * (sectionIndex / totalSections))
|
||||
else:
|
||||
progress = 0.34
|
||||
|
||||
# Format message
|
||||
if sectionIndex > 0 and totalSections > 0:
|
||||
progressMessage = f"Section {sectionIndex}/{totalSections}: {message}"
|
||||
else:
|
||||
progressMessage = message
|
||||
|
||||
self.services.chat.progressLogUpdate(
|
||||
operationId,
|
||||
progress,
|
||||
progressMessage
|
||||
)
|
||||
|
||||
completeStructure = await contentGenerator.generateContent(
|
||||
structure=structure,
|
||||
cachedContent=cachedContent,
|
||||
userPrompt=prompt,
|
||||
progressCallback=progressCallback,
|
||||
parallelGeneration=parallelGeneration
|
||||
)
|
||||
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated")
|
||||
|
||||
# Phase 3: Integration & Rendering
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogUpdate(
|
||||
operationId,
|
||||
0.91,
|
||||
"Rendering final document..."
|
||||
)
|
||||
|
||||
# Use existing renderReport method
|
||||
title = structure.get("metadata", {}).get("title", "Generated Document")
|
||||
if documentType:
|
||||
title = f"{title} ({documentType})"
|
||||
|
||||
renderedContent, mimeType, images = await self.services.generation.renderReport(
|
||||
extractedContent=completeStructure,
|
||||
aiResponse: AiResponse = await self.services.ai.callAiContent(
|
||||
prompt=prompt,
|
||||
options=options,
|
||||
documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E
|
||||
outputFormat=resultType,
|
||||
title=title,
|
||||
userPrompt=prompt,
|
||||
aiService=self.services.ai
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
# Build list of documents to return
|
||||
documents = [
|
||||
ActionDocument(
|
||||
documentName=f"document.{resultType}",
|
||||
documentData=renderedContent,
|
||||
mimeType=mimeType
|
||||
)
|
||||
]
|
||||
# Convert AiResponse to ActionResult
|
||||
documents = []
|
||||
|
||||
# Add images as separate documents
|
||||
if images:
|
||||
logger.info(f"Processing {len(images)} image(s) from renderer")
|
||||
import base64
|
||||
for idx, imageData in enumerate(images):
|
||||
try:
|
||||
base64Data = imageData.get("base64Data", "")
|
||||
altText = imageData.get("altText", f"image_{idx + 1}")
|
||||
caption = imageData.get("caption", "")
|
||||
sectionId = imageData.get("sectionId", f"section_{idx + 1}")
|
||||
|
||||
if base64Data:
|
||||
# Decode base64 to bytes
|
||||
imageBytes = base64.b64decode(base64Data)
|
||||
|
||||
# Determine filename and mime type
|
||||
filename = imageData.get("filename", f"image_{idx + 1}.png")
|
||||
if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')):
|
||||
filename = f"image_{idx + 1}.png"
|
||||
|
||||
# Determine mime type from filename
|
||||
if filename.lower().endswith('.png'):
|
||||
imageMimeType = "image/png"
|
||||
elif filename.lower().endswith(('.jpg', '.jpeg')):
|
||||
imageMimeType = "image/jpeg"
|
||||
elif filename.lower().endswith('.gif'):
|
||||
imageMimeType = "image/gif"
|
||||
elif filename.lower().endswith('.webp'):
|
||||
imageMimeType = "image/webp"
|
||||
else:
|
||||
imageMimeType = "image/png" # Default
|
||||
|
||||
# Add image document
|
||||
documents.append(ActionDocument(
|
||||
documentName=filename,
|
||||
documentData=imageBytes,
|
||||
mimeType=imageMimeType
|
||||
))
|
||||
logger.info(f"Added image document: {filename} (section: {sectionId}, {len(imageBytes)} bytes, alt: {altText})")
|
||||
# Convert DocumentData to ActionDocument
|
||||
if aiResponse.documents:
|
||||
for docData in aiResponse.documents:
|
||||
documents.append(ActionDocument(
|
||||
documentName=docData.documentName,
|
||||
documentData=docData.documentData,
|
||||
mimeType=docData.mimeType,
|
||||
sourceJson=docData.sourceJson if hasattr(docData, 'sourceJson') else None
|
||||
))
|
||||
|
||||
# If no documents but content exists, create a document from content
|
||||
if not documents and aiResponse.content:
|
||||
# Determine document name from metadata
|
||||
docName = f"document.{resultType}"
|
||||
if aiResponse.metadata and aiResponse.metadata.filename:
|
||||
docName = aiResponse.metadata.filename
|
||||
elif aiResponse.metadata and aiResponse.metadata.title:
|
||||
import re
|
||||
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
|
||||
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
|
||||
if sanitized:
|
||||
if not sanitized.lower().endswith(f".{resultType}"):
|
||||
docName = f"{sanitized}.{resultType}"
|
||||
else:
|
||||
logger.warning(f"Image {idx + 1} (section: {sectionId}) has no base64Data, skipping")
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding image document {idx + 1}: {str(e)}", exc_info=True)
|
||||
continue
|
||||
else:
|
||||
logger.debug("No images returned from renderer")
|
||||
|
||||
# Note: Document creation is handled by the workflow system
|
||||
# We just return the rendered content and images in ActionResult
|
||||
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogFinish(operationId, True)
|
||||
docName = sanitized
|
||||
|
||||
# Determine mime type
|
||||
mimeType = "text/plain"
|
||||
if resultType == "html":
|
||||
mimeType = "text/html"
|
||||
elif resultType == "json":
|
||||
mimeType = "application/json"
|
||||
elif resultType == "pdf":
|
||||
mimeType = "application/pdf"
|
||||
elif resultType == "md":
|
||||
mimeType = "text/markdown"
|
||||
|
||||
documents.append(ActionDocument(
|
||||
documentName=docName,
|
||||
documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content,
|
||||
mimeType=mimeType
|
||||
))
|
||||
|
||||
return ActionResult.isSuccess(documents=documents)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in hierarchical document generation: {str(e)}")
|
||||
if progressLogging:
|
||||
self.services.chat.progressLogFinish(operationId, False)
|
||||
logger.error(f"Error in document generation: {str(e)}")
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
|
|
|
|||
|
|
@ -8,11 +8,12 @@ Universal AI document processing action.
|
|||
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from modules.workflows.methods.methodBase import action
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelAi import AiCallOptions
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -82,8 +83,7 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
||||
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
||||
|
||||
# Phase 7.3: Extract content first if documents provided, then use contentParts
|
||||
# Check if contentParts are already provided (preferred path)
|
||||
# Check if contentParts are already provided (from context.extractContent or other sources)
|
||||
contentParts: Optional[List[ContentPart]] = None
|
||||
if "contentParts" in parameters:
|
||||
contentParts = parameters.get("contentParts")
|
||||
|
|
@ -95,63 +95,42 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
|
||||
contentParts = None
|
||||
|
||||
# If contentParts not provided but documentList is, extract content first
|
||||
if not contentParts and documentList.references:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
|
||||
|
||||
# Get ChatDocuments
|
||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
if not chatDocuments:
|
||||
logger.warning("No documents found in documentList")
|
||||
else:
|
||||
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
||||
|
||||
# Prepare extraction options (use defaults if not provided)
|
||||
extractionOptions = parameters.get("extractionOptions")
|
||||
if not extractionOptions:
|
||||
extractionOptions = ExtractionOptions(
|
||||
prompt="Extract all content from the document",
|
||||
mergeStrategy=MergeStrategy(
|
||||
mergeType="concatenate",
|
||||
groupBy="typeGroup",
|
||||
orderBy="id"
|
||||
),
|
||||
processDocumentsIndividually=True
|
||||
)
|
||||
|
||||
# Extract content using extraction service with hierarchical progress logging
|
||||
# Pass operationId for per-document progress tracking
|
||||
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
|
||||
|
||||
# Combine all ContentParts from all extracted results
|
||||
contentParts = []
|
||||
for extracted in extractedResults:
|
||||
if extracted.parts:
|
||||
contentParts.extend(extracted.parts)
|
||||
|
||||
logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
|
||||
|
||||
# Update progress - preparing AI call
|
||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
|
||||
|
||||
# Build options with only resultFormat - let service layer handle all other parameters
|
||||
# Build options
|
||||
output_format = output_extension.replace('.', '') or 'txt'
|
||||
options = AiCallOptions(
|
||||
resultFormat=output_format
|
||||
# Removed all model parameters - service layer will analyze prompt and determine optimal parameters
|
||||
)
|
||||
|
||||
# Update progress - calling AI
|
||||
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
|
||||
|
||||
# Use unified callAiContent method with contentParts (extraction is now separate)
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=aiPrompt,
|
||||
options=options,
|
||||
contentParts=contentParts, # Already extracted (or None if no documents)
|
||||
outputFormat=output_format,
|
||||
parentOperationId=operationId
|
||||
)
|
||||
# Use unified callAiContent method
|
||||
# If contentParts provided (pre-extracted), use them directly
|
||||
# Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally
|
||||
# Note: ContentExtracted documents (from context.extractContent) are now handled
|
||||
# automatically in _extractAndPrepareContent() (Phase 5B)
|
||||
if contentParts:
|
||||
# Pre-extracted ContentParts - use them directly
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=aiPrompt,
|
||||
options=options,
|
||||
contentParts=contentParts, # Pre-extracted ContentParts
|
||||
outputFormat=output_format,
|
||||
parentOperationId=operationId
|
||||
)
|
||||
else:
|
||||
# Pass documentList - callAiContent handles Phases 5A-5E internally
|
||||
# This includes automatic detection of ContentExtracted documents
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=aiPrompt,
|
||||
options=options,
|
||||
documentList=documentList, # callAiContent macht Phasen 5A-5E
|
||||
outputFormat=output_format,
|
||||
parentOperationId=operationId
|
||||
)
|
||||
|
||||
# Update progress - processing result
|
||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
|
||||
|
|
|
|||
|
|
@ -15,9 +15,7 @@ from .actions.process import process
|
|||
from .actions.webResearch import webResearch
|
||||
from .actions.summarizeDocument import summarizeDocument
|
||||
from .actions.translateDocument import translateDocument
|
||||
from .actions.convert import convert
|
||||
from .actions.convertDocument import convertDocument
|
||||
from .actions.extractData import extractData
|
||||
from .actions.generateDocument import generateDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -192,69 +190,6 @@ class MethodAi(MethodBase):
|
|||
},
|
||||
execute=translateDocument.__get__(self, self.__class__)
|
||||
),
|
||||
"convert": WorkflowActionDefinition(
|
||||
actionId="ai.convert",
|
||||
description="Convert documents/data between different formats with specific formatting options",
|
||||
parameters={
|
||||
"documentList": WorkflowActionParameter(
|
||||
name="documentList",
|
||||
type="List[str]",
|
||||
frontendType=FrontendType.DOCUMENT_REFERENCE,
|
||||
required=True,
|
||||
description="Document reference(s) to convert"
|
||||
),
|
||||
"inputFormat": WorkflowActionParameter(
|
||||
name="inputFormat",
|
||||
type="str",
|
||||
frontendType=FrontendType.SELECT,
|
||||
frontendOptions=["json", "csv", "xlsx", "txt"],
|
||||
required=True,
|
||||
description="Source format"
|
||||
),
|
||||
"outputFormat": WorkflowActionParameter(
|
||||
name="outputFormat",
|
||||
type="str",
|
||||
frontendType=FrontendType.SELECT,
|
||||
frontendOptions=["csv", "json", "xlsx", "txt"],
|
||||
required=True,
|
||||
description="Target format"
|
||||
),
|
||||
"columnsPerRow": WorkflowActionParameter(
|
||||
name="columnsPerRow",
|
||||
type="int",
|
||||
frontendType=FrontendType.NUMBER,
|
||||
required=False,
|
||||
description="For CSV output, number of columns per row. Default: auto-detect",
|
||||
validation={"min": 1, "max": 100}
|
||||
),
|
||||
"delimiter": WorkflowActionParameter(
|
||||
name="delimiter",
|
||||
type="str",
|
||||
frontendType=FrontendType.TEXT,
|
||||
required=False,
|
||||
default=",",
|
||||
description="For CSV output, delimiter character"
|
||||
),
|
||||
"includeHeader": WorkflowActionParameter(
|
||||
name="includeHeader",
|
||||
type="bool",
|
||||
frontendType=FrontendType.CHECKBOX,
|
||||
required=False,
|
||||
default=True,
|
||||
description="For CSV output, whether to include header row"
|
||||
),
|
||||
"language": WorkflowActionParameter(
|
||||
name="language",
|
||||
type="str",
|
||||
frontendType=FrontendType.SELECT,
|
||||
frontendOptions=["de", "en", "fr"],
|
||||
required=False,
|
||||
default="en",
|
||||
description="Language for output"
|
||||
)
|
||||
},
|
||||
execute=convert.__get__(self, self.__class__)
|
||||
),
|
||||
"convertDocument": WorkflowActionDefinition(
|
||||
actionId="ai.convertDocument",
|
||||
description="Convert documents between different formats (PDF→Word, Excel→CSV, etc.)",
|
||||
|
|
@ -285,45 +220,6 @@ class MethodAi(MethodBase):
|
|||
},
|
||||
execute=convertDocument.__get__(self, self.__class__)
|
||||
),
|
||||
"extractData": WorkflowActionDefinition(
|
||||
actionId="ai.extractData",
|
||||
description="Extract structured data from documents (key-value pairs, entities, facts, etc.)",
|
||||
parameters={
|
||||
"documentList": WorkflowActionParameter(
|
||||
name="documentList",
|
||||
type="List[str]",
|
||||
frontendType=FrontendType.DOCUMENT_REFERENCE,
|
||||
required=True,
|
||||
description="Document reference(s) to extract data from"
|
||||
),
|
||||
"dataStructure": WorkflowActionParameter(
|
||||
name="dataStructure",
|
||||
type="str",
|
||||
frontendType=FrontendType.SELECT,
|
||||
frontendOptions=["flat", "nested", "list"],
|
||||
required=False,
|
||||
default="nested",
|
||||
description="Desired data structure"
|
||||
),
|
||||
"fields": WorkflowActionParameter(
|
||||
name="fields",
|
||||
type="List[str]",
|
||||
frontendType=FrontendType.MULTISELECT,
|
||||
required=False,
|
||||
description="Specific fields/properties to extract (e.g., [name, date, amount])"
|
||||
),
|
||||
"resultType": WorkflowActionParameter(
|
||||
name="resultType",
|
||||
type="str",
|
||||
frontendType=FrontendType.SELECT,
|
||||
frontendOptions=["json", "csv", "xlsx"],
|
||||
required=False,
|
||||
default="json",
|
||||
description="Output format"
|
||||
)
|
||||
},
|
||||
execute=extractData.__get__(self, self.__class__)
|
||||
),
|
||||
"generateDocument": WorkflowActionDefinition(
|
||||
actionId="ai.generateDocument",
|
||||
description="Generate documents from scratch or based on templates/inputs",
|
||||
|
|
@ -371,9 +267,7 @@ class MethodAi(MethodBase):
|
|||
self.webResearch = webResearch.__get__(self, self.__class__)
|
||||
self.summarizeDocument = summarizeDocument.__get__(self, self.__class__)
|
||||
self.translateDocument = translateDocument.__get__(self, self.__class__)
|
||||
self.convert = convert.__get__(self, self.__class__)
|
||||
self.convertDocument = convertDocument.__get__(self, self.__class__)
|
||||
self.extractData = extractData.__get__(self, self.__class__)
|
||||
self.generateDocument = generateDocument.__get__(self, self.__class__)
|
||||
|
||||
def _format_timestamp_for_filename(self) -> str:
|
||||
|
|
|
|||
|
|
@ -19,10 +19,21 @@ logger = logging.getLogger(__name__)
|
|||
@action
|
||||
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Extract content from documents (separate from AI calls).
|
||||
Extract raw content parts from documents without AI processing.
|
||||
|
||||
This action performs pure content extraction without AI processing.
|
||||
The extracted ContentParts can then be used by subsequent AI processing actions.
|
||||
This action performs pure content extraction WITHOUT AI/OCR processing.
|
||||
It returns ContentParts with different typeGroups:
|
||||
- "text": Extracted text from text-based formats (PDF text layers, Word docs, etc.)
|
||||
- "image": Images as base64-encoded data (NOT converted to text, no OCR)
|
||||
- "table": Tables as structured data
|
||||
- "structure": Structured content (JSON, etc.)
|
||||
- "container": Container elements (PDF pages, etc.)
|
||||
|
||||
IMPORTANT:
|
||||
- Images are returned as base64 data, NOT as extracted text
|
||||
- No OCR is performed - images are preserved as visual elements
|
||||
- Text extraction only works for text-based formats (not images)
|
||||
- The extracted ContentParts can then be used by subsequent AI processing actions
|
||||
|
||||
Parameters:
|
||||
- documentList (list, required): Document reference(s) to extract content from.
|
||||
|
|
@ -30,7 +41,8 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
|
||||
Returns:
|
||||
- ActionResult with ActionDocument containing ContentExtracted objects
|
||||
- ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
|
||||
- ContentExtracted.parts contains List[ContentPart] with various typeGroups
|
||||
- Each ContentPart has a typeGroup indicating its type (text, image, table, etc.)
|
||||
"""
|
||||
try:
|
||||
# Init progress logger
|
||||
|
|
@ -79,12 +91,26 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
# Convert dict to ExtractionOptions object if needed, or create defaults
|
||||
if extractionOptionsParam:
|
||||
if isinstance(extractionOptionsParam, dict):
|
||||
# Ensure required fields are present
|
||||
if "prompt" not in extractionOptionsParam:
|
||||
extractionOptionsParam["prompt"] = "Extract all content from the document"
|
||||
if "mergeStrategy" not in extractionOptionsParam:
|
||||
extractionOptionsParam["mergeStrategy"] = MergeStrategy(
|
||||
mergeType="concatenate",
|
||||
groupBy="typeGroup",
|
||||
orderBy="id"
|
||||
)
|
||||
# Convert dict to ExtractionOptions object
|
||||
extractionOptions = ExtractionOptions(**extractionOptionsParam)
|
||||
try:
|
||||
extractionOptions = ExtractionOptions(**extractionOptionsParam)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to create ExtractionOptions from dict: {str(e)}, using defaults")
|
||||
extractionOptions = None
|
||||
elif isinstance(extractionOptionsParam, ExtractionOptions):
|
||||
extractionOptions = extractionOptionsParam
|
||||
else:
|
||||
# Invalid type, use defaults
|
||||
logger.warning(f"Invalid extractionOptions type: {type(extractionOptionsParam)}, using defaults")
|
||||
extractionOptions = None
|
||||
else:
|
||||
extractionOptions = None
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ class MethodContext(MethodBase):
|
|||
),
|
||||
"extractContent": WorkflowActionDefinition(
|
||||
actionId="context.extractContent",
|
||||
description="Extract content from documents (separate from AI calls)",
|
||||
description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.",
|
||||
parameters={
|
||||
"documentList": WorkflowActionParameter(
|
||||
name="documentList",
|
||||
|
|
@ -64,7 +64,7 @@ class MethodContext(MethodBase):
|
|||
type="dict",
|
||||
frontendType=FrontendType.JSON,
|
||||
required=False,
|
||||
description="Extraction options (if not provided, defaults are used)"
|
||||
description="Extraction options (if not provided, defaults are used). Note: This action does NOT use AI - it performs pure content extraction. Images are preserved as base64 data, not converted to text."
|
||||
)
|
||||
},
|
||||
execute=extractContent.__get__(self, self.__class__)
|
||||
|
|
|
|||
|
|
@ -1,354 +0,0 @@
|
|||
# Architecture & Implementation Analysis
|
||||
## Deep Review of Hierarchical Document Generation
|
||||
|
||||
**Date**: 2025-12-22
|
||||
**Status**: Critical Issues Found
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
The hierarchical document generation system is **partially implemented** but has **critical architectural mismatches** and **implementation gaps** that prevent it from working correctly. While core components exist, several fundamental issues need to be addressed.
|
||||
|
||||
---
|
||||
|
||||
## ✅ What's Correctly Implemented
|
||||
|
||||
### Phase 1: Core Infrastructure ✅
|
||||
- ✅ `StructureGenerator` class exists with `generateStructure()` method
|
||||
- ✅ `ContentGenerator` class exists with `generateContent()` method
|
||||
- ✅ `ContentIntegrator` class exists with `integrateContent()` method
|
||||
- ✅ `generateDocument` action uses hierarchical approach
|
||||
- ✅ Basic progress logging implemented
|
||||
- ✅ Error handling with `createErrorSection()` implemented
|
||||
|
||||
### Phase 2: Image Generation ✅
|
||||
- ✅ `_generateImageSection()` method implemented
|
||||
- ✅ Image prompt extraction from structure
|
||||
- ✅ Base64 image data storage
|
||||
- ✅ Error handling for image failures
|
||||
|
||||
### Phase 3: Parallel Processing ✅
|
||||
- ✅ `_generateSectionsParallel()` method implemented
|
||||
- ✅ `_generateSectionsSequential()` method implemented
|
||||
- ✅ Batch processing for large documents
|
||||
- ✅ Progress callback system
|
||||
- ✅ Exception handling in parallel execution
|
||||
|
||||
---
|
||||
|
||||
## ❌ Critical Issues Found
|
||||
|
||||
### Issue 1: Previous Sections Context Not Working in Parallel Mode ⚠️ **PARTIALLY FIXED**
|
||||
|
||||
**Problem**:
|
||||
- In parallel mode, sections within the same batch cannot see each other (correct)
|
||||
- BUT: Sections in later batches should see sections from earlier batches
|
||||
- **Current Status**: Code was fixed to accumulate previous sections, but needs verification
|
||||
|
||||
**Location**: `subContentGenerator.py` lines 240-319
|
||||
|
||||
**Fix Applied**:
|
||||
- Added `accumulatedPreviousSections` to track sections across batches
|
||||
- Pass accumulated sections to each batch
|
||||
- **VERIFICATION NEEDED**: Test that prompts actually show previous sections
|
||||
|
||||
**Risk**: Medium - May cause continuity issues in generated content
|
||||
|
||||
---
|
||||
|
||||
### Issue 2: Variable Shadowing Bug ✅ **FIXED**
|
||||
|
||||
**Problem**:
|
||||
- `contentType` variable was shadowed in loop, causing wrong section type in prompts
|
||||
|
||||
**Location**: `subContentGenerator.py` line 676
|
||||
|
||||
**Fix Applied**:
|
||||
- Renamed loop variable to `prevContentType`
|
||||
|
||||
**Status**: ✅ Fixed
|
||||
|
||||
---
|
||||
|
||||
### Issue 3: Missing `generation_hint` in Structure Response ✅ **FIXED**
|
||||
|
||||
**Problem**:
|
||||
- Structure generator creates generic hints like "Section heading" instead of meaningful hints
|
||||
- AI generates same content for all headings because hints are identical
|
||||
|
||||
**Location**: `subStructureGenerator.py` lines 242-269
|
||||
|
||||
**Fix Applied**:
|
||||
- Added `_extractMeaningfulHint()` method to extract meaningful hints from section IDs
|
||||
- Example: `section_heading_current_state` → "Current State"
|
||||
|
||||
**Status**: ✅ Fixed
|
||||
|
||||
---
|
||||
|
||||
### Issue 4: JSON Template Architecture Mismatch ✅ **FIXED**
|
||||
|
||||
**Problem**:
|
||||
- `jsonTemplateDocument` showed filled `elements` arrays, but structure generation requires empty arrays
|
||||
- Template missing `generation_hint` and `complexity` fields
|
||||
- Template showed `order: 0` but should start from 1
|
||||
|
||||
**Location**: `datamodelJson.py`
|
||||
|
||||
**Fix Applied**:
|
||||
- Updated template to show empty `elements: []`
|
||||
- Added `generation_hint` to all sections
|
||||
- Added `complexity` to all sections
|
||||
- Changed `order` to start from 1
|
||||
- Added `title` to metadata
|
||||
|
||||
**Status**: ✅ Fixed
|
||||
|
||||
---
|
||||
|
||||
### Issue 5: Structure Prompt Instructions Mismatch ✅ **FIXED**
|
||||
|
||||
**Problem**:
|
||||
- Prompt said "All sections must have empty elements arrays" but template showed filled arrays
|
||||
- Prompt didn't explicitly require `generation_hint` and `complexity` fields
|
||||
|
||||
**Location**: `subStructureGenerator.py` lines 181-190
|
||||
|
||||
**Fix Applied**:
|
||||
- Enhanced prompt to explicitly require `generation_hint` and `complexity`
|
||||
- Clarified that template examples show structure, but elements must be empty
|
||||
|
||||
**Status**: ✅ Fixed
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ Remaining Issues & Gaps
|
||||
|
||||
### Issue 6: Missing Validation Before Content Generation ⚠️ **NOT IMPLEMENTED**
|
||||
|
||||
**Problem**:
|
||||
- No validation that structure has required fields before content generation
|
||||
- No check that all sections have `generation_hint` before generating content
|
||||
|
||||
**Expected** (from Phase 6):
|
||||
```python
|
||||
# Validate structure before content generation
|
||||
if not validateStructure(structure):
|
||||
raise ValueError("Invalid structure")
|
||||
```
|
||||
|
||||
**Current**: Validation happens in `_validateAndEnhanceStructure()` but only adds missing fields, doesn't validate
|
||||
|
||||
**Impact**: Low - Enhancement adds missing fields, but explicit validation would be better
|
||||
|
||||
**Recommendation**: Add explicit validation method
|
||||
|
||||
---
|
||||
|
||||
### Issue 7: Previous Sections Formatting Missing Content ⚠️ **PARTIALLY IMPLEMENTED**
|
||||
|
||||
**Problem**:
|
||||
- Previous sections formatting extracts content from `elements`, but if sections don't have elements yet (in parallel mode), it shows nothing
|
||||
- Should show `generation_hint` as fallback when elements not available
|
||||
|
||||
**Location**: `subContentGenerator.py` lines 671-709
|
||||
|
||||
**Current Behavior**:
|
||||
- Shows content preview if elements exist
|
||||
- Shows nothing if elements don't exist
|
||||
|
||||
**Expected Behavior**:
|
||||
- Show content preview if elements exist
|
||||
- Show `generation_hint` as fallback if elements don't exist
|
||||
|
||||
**Impact**: Medium - Reduces context quality in parallel generation
|
||||
|
||||
**Recommendation**: Add fallback to show `generation_hint` when elements not available
|
||||
|
||||
---
|
||||
|
||||
### Issue 8: Debug File Shows Raw Response, Not Validated Structure ⚠️ **NOT FIXED**
|
||||
|
||||
**Problem**:
|
||||
- Debug file writes `aiResponse.content` (raw AI response) before validation
|
||||
- Can't verify if `generation_hint` was added by validation
|
||||
|
||||
**Location**: `subStructureGenerator.py` lines 77-84
|
||||
|
||||
**Impact**: Low - Makes debugging harder but doesn't affect functionality
|
||||
|
||||
**Recommendation**: Write validated structure to separate debug file
|
||||
|
||||
---
|
||||
|
||||
### Issue 9: Missing Unit Tests ⚠️ **NOT IMPLEMENTED**
|
||||
|
||||
**Problem**:
|
||||
- No unit tests for any components (Phase 7 requirement)
|
||||
- No tests for structure generation
|
||||
- No tests for content generation
|
||||
- No tests for integration
|
||||
|
||||
**Impact**: High - No way to verify correctness or catch regressions
|
||||
|
||||
**Recommendation**: Add comprehensive unit tests
|
||||
|
||||
---
|
||||
|
||||
### Issue 10: Missing Integration Tests ⚠️ **NOT IMPLEMENTED**
|
||||
|
||||
**Problem**:
|
||||
- No end-to-end tests
|
||||
- No tests with images
|
||||
- No tests with long documents
|
||||
- No error scenario tests
|
||||
|
||||
**Impact**: High - No verification of complete flow
|
||||
|
||||
**Recommendation**: Add integration tests
|
||||
|
||||
---
|
||||
|
||||
### Issue 11: Content Caching Not Optimized ⚠️ **PARTIALLY IMPLEMENTED**
|
||||
|
||||
**Problem**:
|
||||
- Content is extracted and cached, but:
|
||||
- No cache validation (check if documents changed)
|
||||
- No cache reuse verification
|
||||
- Content is passed to prompts but may not be formatted efficiently
|
||||
|
||||
**Expected** (from Phase 5):
|
||||
- Cache validation
|
||||
- Efficient formatting
|
||||
- Performance testing
|
||||
|
||||
**Current**: Basic caching exists but not optimized
|
||||
|
||||
**Impact**: Medium - Works but could be more efficient
|
||||
|
||||
**Recommendation**: Add cache validation and optimization
|
||||
|
||||
---
|
||||
|
||||
### Issue 12: Renderer Updates Not Verified ⚠️ **UNKNOWN**
|
||||
|
||||
**Problem**:
|
||||
- Implementation plan requires renderer updates for images
|
||||
- HTML renderer should create separate image files
|
||||
- PDF/XLSX/PPTX renderers should embed images
|
||||
- **Status unknown** - need to verify renderers handle images correctly
|
||||
|
||||
**Impact**: High - Images may not render correctly
|
||||
|
||||
**Recommendation**: Verify all renderers handle images correctly
|
||||
|
||||
---
|
||||
|
||||
## 📋 Architecture Compliance Check
|
||||
|
||||
### Data Structure Compliance ✅
|
||||
|
||||
| Field | Required | Implemented | Status |
|
||||
|-------|----------|-------------|--------|
|
||||
| `metadata.title` | Yes | ✅ | ✅ |
|
||||
| `metadata.split_strategy` | Yes | ✅ | ✅ |
|
||||
| `sections[].id` | Yes | ✅ | ✅ |
|
||||
| `sections[].content_type` | Yes | ✅ | ✅ |
|
||||
| `sections[].complexity` | Yes | ✅ | ✅ |
|
||||
| `sections[].generation_hint` | Yes | ✅ | ✅ |
|
||||
| `sections[].order` | Yes | ✅ | ✅ |
|
||||
| `sections[].elements` | Yes | ✅ | ✅ |
|
||||
| `sections[].image_prompt` | Image only | ✅ | ✅ |
|
||||
|
||||
### Component Method Compliance ✅
|
||||
|
||||
| Component | Method | Required | Implemented | Status |
|
||||
|-----------|--------|----------|-------------|--------|
|
||||
| StructureGenerator | `generateStructure()` | Yes | ✅ | ✅ |
|
||||
| StructureGenerator | `_createStructurePrompt()` | Yes | ✅ | ✅ |
|
||||
| StructureGenerator | `_identifySectionComplexity()` | Yes | ✅ | ✅ |
|
||||
| StructureGenerator | `_extractImagePrompts()` | Yes | ✅ | ✅ |
|
||||
| StructureGenerator | `_validateAndEnhanceStructure()` | Yes | ✅ | ✅ |
|
||||
| StructureGenerator | `_extractMeaningfulHint()` | Yes | ✅ | ✅ |
|
||||
| ContentGenerator | `generateContent()` | Yes | ✅ | ✅ |
|
||||
| ContentGenerator | `_generateSectionContent()` | Yes | ✅ | ✅ |
|
||||
| ContentGenerator | `_generateSimpleSection()` | Yes | ✅ | ✅ |
|
||||
| ContentGenerator | `_generateComplexTextSection()` | Yes | ✅ | ✅ |
|
||||
| ContentGenerator | `_generateImageSection()` | Yes | ✅ | ✅ |
|
||||
| ContentGenerator | `_generateSectionsParallel()` | Yes | ✅ | ✅ |
|
||||
| ContentGenerator | `_generateSectionsSequential()` | Yes | ✅ | ✅ |
|
||||
| ContentGenerator | `_createSectionPrompt()` | Yes | ✅ | ✅ |
|
||||
| ContentIntegrator | `integrateContent()` | Yes | ✅ | ✅ |
|
||||
| ContentIntegrator | `validateCompleteness()` | Yes | ✅ | ✅ |
|
||||
| ContentIntegrator | `createErrorSection()` | Yes | ✅ | ✅ |
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Priority Fixes Needed
|
||||
|
||||
### Critical (Must Fix)
|
||||
1. ✅ **Issue 2**: Variable shadowing bug - **FIXED**
|
||||
2. ✅ **Issue 3**: Missing generation_hint - **FIXED**
|
||||
3. ✅ **Issue 4**: JSON template mismatch - **FIXED**
|
||||
4. ✅ **Issue 5**: Prompt instructions mismatch - **FIXED**
|
||||
5. ⚠️ **Issue 1**: Previous sections context - **NEEDS VERIFICATION**
|
||||
|
||||
### High Priority (Should Fix)
|
||||
6. ⚠️ **Issue 12**: Renderer image handling - **NEEDS VERIFICATION**
|
||||
7. ⚠️ **Issue 9**: Missing unit tests - **NOT IMPLEMENTED**
|
||||
8. ⚠️ **Issue 10**: Missing integration tests - **NOT IMPLEMENTED**
|
||||
|
||||
### Medium Priority (Nice to Have)
|
||||
9. ⚠️ **Issue 7**: Previous sections formatting fallback - **PARTIALLY IMPLEMENTED**
|
||||
10. ⚠️ **Issue 11**: Content caching optimization - **PARTIALLY IMPLEMENTED**
|
||||
11. ⚠️ **Issue 6**: Structure validation - **NOT IMPLEMENTED**
|
||||
12. ⚠️ **Issue 8**: Debug file improvements - **NOT IMPLEMENTED**
|
||||
|
||||
---
|
||||
|
||||
## ✅ Summary
|
||||
|
||||
### What Works
|
||||
- Core infrastructure is implemented
|
||||
- Image generation is integrated
|
||||
- Parallel processing is implemented
|
||||
- Error handling is in place
|
||||
- Progress logging works
|
||||
|
||||
### What's Fixed (This Session)
|
||||
- Variable shadowing bug
|
||||
- Missing generation_hint extraction
|
||||
- JSON template architecture mismatch
|
||||
- Prompt instructions clarity
|
||||
- Previous sections tracking (needs verification)
|
||||
|
||||
### What Needs Work
|
||||
- Unit and integration tests
|
||||
- Renderer verification
|
||||
- Previous sections formatting fallback
|
||||
- Cache optimization
|
||||
- Structure validation
|
||||
|
||||
### Overall Status
|
||||
**Architecture**: ✅ **85% Compliant**
|
||||
**Implementation**: ✅ **80% Complete**
|
||||
**Testing**: ❌ **0% Complete**
|
||||
**Production Ready**: ⚠️ **Not Yet** (needs testing and verification)
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Verify Issue 1 Fix**: Test that previous sections are correctly tracked in parallel mode
|
||||
2. **Verify Issue 12**: Test that all renderers handle images correctly
|
||||
3. **Add Unit Tests**: Start with critical components (StructureGenerator, ContentGenerator)
|
||||
4. **Add Integration Tests**: Test end-to-end flow with various scenarios
|
||||
5. **Improve Previous Sections Formatting**: Add fallback to show generation_hint when elements not available
|
||||
6. **Add Structure Validation**: Explicit validation before content generation
|
||||
7. **Optimize Content Caching**: Add cache validation and efficient formatting
|
||||
|
||||
---
|
||||
|
||||
**Analysis Complete**: 2025-12-22
|
||||
|
||||
|
|
@ -1,459 +0,0 @@
|
|||
# Concept: Hierarchical Document Generation with Image Integration
|
||||
|
||||
## Executive Summary
|
||||
|
||||
This concept proposes a **three-phase hierarchical approach** to document generation that enables proper image integration and handles complex documents efficiently.
|
||||
|
||||
**Key Decisions**:
|
||||
- ✅ **Performance**: Parallel processing with ChatLog progress messages
|
||||
- ✅ **Error Handling**: Skip failed sections, show error messages
|
||||
- ✅ **Image Storage**: Store as base64 in JSON (renderers need direct access)
|
||||
- ✅ **Backward Compatibility**: Not needed - implement as new default
|
||||
|
||||
**Renderer Status**:
|
||||
- ✅ **Ready**: Text, Markdown, DOCX renderers
|
||||
- ⚠️ **Needs Update**: HTML (create separate image files), PDF (embed images)
|
||||
- ⚠️ **Needs Implementation**: XLSX, PPTX (add image support)
|
||||
|
||||
## Problem Statement
|
||||
|
||||
Currently, the document generation system has the following limitations:
|
||||
|
||||
1. **No Image Integration**: Images are generated separately but cannot be embedded into document structures
|
||||
2. **Single-Pass Generation**: Documents are generated in one AI call, making it difficult to handle complex sections (long text, images, chapters)
|
||||
3. **Repeated Extraction**: Content extraction may happen multiple times unnecessarily
|
||||
4. **No Structured Approach**: No mechanism to first define document structure, then populate sections
|
||||
|
||||
## Current Architecture Analysis
|
||||
|
||||
### Current Flow:
|
||||
```
|
||||
User Request → ai.generateDocument → ai.process → AI JSON Generation → Renderer → Final Document
|
||||
```
|
||||
|
||||
### Issues:
|
||||
- AI generates complete JSON structure in one pass
|
||||
- Images are generated separately via `ai.generate` action
|
||||
- No mechanism to integrate generated images into document structure
|
||||
- JSON schema supports `image` content_type, but AI rarely generates it
|
||||
- Content extraction happens per action, not cached/reused
|
||||
|
||||
### Current Image Handling:
|
||||
- Images can be rendered IF they exist in JSON structure (`content_type: "image"`)
|
||||
- Image data expected as `base64Data` in elements
|
||||
- Renderers support image rendering (Docx, PDF, HTML, etc.)
|
||||
- But images are never generated WITHIN document generation
|
||||
|
||||
## Proposed Solution: Hierarchical Document Generation
|
||||
|
||||
### Core Concept
|
||||
|
||||
**Three-Phase Approach:**
|
||||
1. **Structure Generation Phase**: Generate document skeleton with section placeholders
|
||||
2. **Content Generation Phase**: Generate content for each section (text or image) via sub-prompts
|
||||
3. **Integration Phase**: Merge all generated content into final document structure
|
||||
|
||||
### Architecture Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Phase 1: Structure Generation │
|
||||
│ - Generate document skeleton │
|
||||
│ - Identify sections (text, image, complex) │
|
||||
│ - Create section placeholders with metadata │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
↓
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Phase 2: Content Generation (Tree-like) │
|
||||
│ │
|
||||
│ ┌──────────────────────────────────────────────┐ │
|
||||
│ │ Section 1: Heading (simple) │ │
|
||||
│ │ → Generate directly │ │
|
||||
│ └──────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
│ ┌──────────────────────────────────────────────┐ │
|
||||
│ │ Section 2: Paragraph (simple) │ │
|
||||
│ │ → Generate directly │ │
|
||||
│ └──────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
│ ┌──────────────────────────────────────────────┐ │
|
||||
│ │ Section 3: Image (complex) │ │
|
||||
│ │ → Sub-prompt: Generate image │ │
|
||||
│ │ → Store image data │ │
|
||||
│ │ → Create image section with base64Data │ │
|
||||
│ └──────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
│ ┌──────────────────────────────────────────────┐ │
|
||||
│ │ Section 4: Long Chapter (complex) │ │
|
||||
│ │ → Sub-prompt: Generate chapter content │ │
|
||||
│ │ → Split into subsections if needed │ │
|
||||
│ └──────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
↓
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Phase 3: Integration │
|
||||
│ - Merge all generated content │
|
||||
│ - Replace placeholders with actual data │
|
||||
│ - Validate structure completeness │
|
||||
│ - Render to final format │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Detailed Design
|
||||
|
||||
### Phase 1: Structure Generation
|
||||
|
||||
**Purpose**: Create document skeleton with section metadata
|
||||
|
||||
**Process**:
|
||||
1. AI generates document structure with sections
|
||||
2. Each section includes:
|
||||
- `id`: Unique identifier
|
||||
- `content_type`: Type (heading, paragraph, image, table, etc.)
|
||||
- `complexity`: "simple" or "complex"
|
||||
- `generation_hint`: Instructions for content generation
|
||||
- `order`: Section order
|
||||
- `elements`: Empty or placeholder
|
||||
|
||||
**Example Structure**:
|
||||
```json
|
||||
{
|
||||
"metadata": {
|
||||
"title": "Children's Bedtime Story",
|
||||
"split_strategy": "single_document"
|
||||
},
|
||||
"documents": [{
|
||||
"id": "doc_1",
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_title",
|
||||
"content_type": "heading",
|
||||
"complexity": "simple",
|
||||
"generation_hint": "Story title",
|
||||
"order": 1,
|
||||
"elements": []
|
||||
},
|
||||
{
|
||||
"id": "section_intro",
|
||||
"content_type": "paragraph",
|
||||
"complexity": "simple",
|
||||
"generation_hint": "Introduction paragraph",
|
||||
"order": 2,
|
||||
"elements": []
|
||||
},
|
||||
{
|
||||
"id": "section_image_1",
|
||||
"content_type": "image",
|
||||
"complexity": "complex",
|
||||
"generation_hint": "Illustration: Rabbit meeting owl in moonlit forest",
|
||||
"image_prompt": "A small brown rabbit sitting in a peaceful forest clearing under moonlight with stars, meeting a wise owl perched on a branch",
|
||||
"order": 3,
|
||||
"elements": []
|
||||
},
|
||||
{
|
||||
"id": "section_chapter_1",
|
||||
"content_type": "paragraph",
|
||||
"complexity": "complex",
|
||||
"generation_hint": "First chapter: Rabbit's adventure begins",
|
||||
"order": 4,
|
||||
"elements": []
|
||||
}
|
||||
]
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 2: Content Generation
|
||||
|
||||
**Purpose**: Generate actual content for each section
|
||||
|
||||
**Process**:
|
||||
1. Iterate through sections in order
|
||||
2. For each section:
|
||||
- **Simple sections** (heading, short paragraph):
|
||||
- Generate content directly via AI
|
||||
- Populate `elements` array
|
||||
- **Complex sections** (image, long chapter):
|
||||
- Create sub-prompt based on `generation_hint` and `image_prompt`
|
||||
- Generate content via specialized action:
|
||||
- Images: `ai.generate` with image generation
|
||||
- Long text: `ai.process` with focused prompt
|
||||
- Store generated content
|
||||
- Populate `elements` array
|
||||
|
||||
**Content Caching**:
|
||||
- Extract content from source documents ONCE at the start
|
||||
- Cache extracted content for reuse across all sections
|
||||
- Pass cached content to sub-prompts to avoid re-extraction
|
||||
|
||||
**Image Generation**:
|
||||
- For `content_type: "image"` sections:
|
||||
- Use `image_prompt` from structure
|
||||
- Call `ai.generate` action with image generation
|
||||
- Receive base64 image data
|
||||
- Create image element:
|
||||
```json
|
||||
{
|
||||
"url": "data:image/png;base64,<base64_data>",
|
||||
"base64Data": "<base64_data>",
|
||||
"altText": "<alt_text>",
|
||||
"caption": "<caption>"
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 3: Integration
|
||||
|
||||
**Purpose**: Merge all content into final document structure
|
||||
|
||||
**Process**:
|
||||
1. Validate all sections have content
|
||||
2. Merge generated content into structure
|
||||
3. Replace placeholders with actual data
|
||||
4. Finalize JSON structure
|
||||
5. Render to target format (docx, pdf, html, etc.)
|
||||
|
||||
## Implementation Strategy
|
||||
|
||||
### New Components Needed
|
||||
|
||||
1. **Structure Generator** (`structureGenerator.py`)
|
||||
- Generates document skeleton
|
||||
- Identifies section complexity
|
||||
- Creates generation hints
|
||||
|
||||
2. **Content Generator** (`contentGenerator.py`)
|
||||
- Generates content for each section
|
||||
- Handles simple vs complex sections
|
||||
- Manages sub-prompts and image generation
|
||||
- Caches extracted content
|
||||
|
||||
3. **Content Integrator** (`contentIntegrator.py`)
|
||||
- Merges generated content
|
||||
- Validates completeness
|
||||
- Finalizes document structure
|
||||
|
||||
### Modified Components
|
||||
|
||||
1. **`generateDocument` action**
|
||||
- Implement hierarchical generation as default
|
||||
- Orchestrate three phases
|
||||
- Add progress logging for each phase
|
||||
|
||||
2. **`process` action**
|
||||
- Support content caching (extract once, reuse)
|
||||
- Support sub-prompt generation for sections
|
||||
|
||||
3. **Prompt Builder** (`subPromptBuilderGeneration.py`)
|
||||
- Add structure generation prompt
|
||||
- Add section-specific content prompts
|
||||
- Add image generation prompt templates
|
||||
|
||||
4. **Renderers** (Update required):
|
||||
- **HTML Renderer**: Create separate image files and link them
|
||||
- **PDF Renderer**: Embed images using reportlab
|
||||
- **XLSX Renderer**: Add image embedding support
|
||||
- **PPTX Renderer**: Add image embedding support
|
||||
|
||||
### New Action Parameters
|
||||
|
||||
**For `generateDocument`**:
|
||||
- `enableImageIntegration`: boolean (default: true)
|
||||
- `maxSectionLength`: int (threshold for "complex" sections, default: 500 words)
|
||||
- `parallelGeneration`: boolean (default: true) - enable parallel section generation
|
||||
- `progressLogging`: boolean (default: true) - send ChatLog progress updates
|
||||
|
||||
**For sub-prompts**:
|
||||
- `sectionContext`: Previous sections for context
|
||||
- `cachedContent`: Extracted content cache (to avoid re-extraction)
|
||||
- `targetSection`: Section metadata
|
||||
- `previousSections`: Array of already-generated sections for continuity
|
||||
|
||||
## Benefits
|
||||
|
||||
1. **Image Integration**: Images can be generated and embedded into documents
|
||||
2. **Structured Approach**: Clear separation of structure and content
|
||||
3. **Efficiency**: Content extracted once, reused across sections
|
||||
4. **Scalability**: Can handle very long documents by splitting into sections
|
||||
5. **Quality**: Better control over complex sections (images, long chapters)
|
||||
6. **Flexibility**: Can generate different content types per section
|
||||
|
||||
## Migration Strategy
|
||||
|
||||
**Note**: No backwards compatibility needed - can implement directly as new default.
|
||||
|
||||
1. **Phase 1**: Implement hierarchical generation as new default
|
||||
2. **Phase 2**: Update renderers (HTML, PDF, XLSX, PPTX) for image support
|
||||
3. **Phase 3**: Testing and refinement
|
||||
4. **Phase 4**: Remove old single-pass mode (or keep as internal fallback only)
|
||||
|
||||
## Example Workflow
|
||||
|
||||
**User Request**: "Create a children's bedtime story with 5 illustrations"
|
||||
|
||||
**Phase 1 Output**:
|
||||
```json
|
||||
{
|
||||
"metadata": {"title": "Flöckchen's Adventure"},
|
||||
"documents": [{
|
||||
"sections": [
|
||||
{"id": "title", "content_type": "heading", "complexity": "simple", ...},
|
||||
{"id": "intro", "content_type": "paragraph", "complexity": "simple", ...},
|
||||
{"id": "img1", "content_type": "image", "complexity": "complex",
|
||||
"image_prompt": "Rabbit meeting owl", ...},
|
||||
{"id": "chapter1", "content_type": "paragraph", "complexity": "complex", ...},
|
||||
{"id": "img2", "content_type": "image", "complexity": "complex", ...},
|
||||
...
|
||||
]
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
**Phase 2 Process**:
|
||||
- Generate title → populate elements
|
||||
- Generate intro → populate elements
|
||||
- Generate image 1 → call `ai.generate`, store base64 → populate elements
|
||||
- Generate chapter 1 → sub-prompt → populate elements
|
||||
- Generate image 2 → call `ai.generate`, store base64 → populate elements
|
||||
- ...
|
||||
|
||||
**Phase 3 Output**: Complete document with all sections populated, ready for rendering
|
||||
|
||||
## Renderer Readiness Assessment
|
||||
|
||||
### Current Renderer Status for Image Handling:
|
||||
|
||||
1. **Text Renderer** (`rendererText.py`): ✅ **READY**
|
||||
- Skips images, shows placeholder: `[Image: altText]`
|
||||
- No changes needed
|
||||
|
||||
2. **Markdown Renderer** (`rendererMarkdown.py`): ✅ **READY**
|
||||
- Shows placeholder with truncated base64: ``
|
||||
- No changes needed (markdown limitation)
|
||||
|
||||
3. **HTML Renderer** (`rendererHtml.py`): ⚠️ **NEEDS UPDATE**
|
||||
- Currently: Embeds base64 directly in `<img>` tag as data URI
|
||||
- **Required Change**: Create separate image files and link to them
|
||||
- Implementation: Generate image files (e.g., `image_1.png`, `image_2.png`) alongside HTML
|
||||
- Update `<img>` tags to use relative paths: `<img src="image_1.png" alt="...">`
|
||||
- Return multiple files: HTML file + image files
|
||||
|
||||
4. **PDF Renderer** (`rendererPdf.py`): ⚠️ **NEEDS UPDATE**
|
||||
- Currently: Shows placeholder `[Image: altText]`
|
||||
- **Required Change**: Embed images directly in PDF using reportlab
|
||||
- Implementation: Use `reportlab.platypus.Image()` with base64 decoded bytes
|
||||
|
||||
5. **DOCX Renderer** (`rendererDocx.py`): ✅ **READY**
|
||||
- Embeds images directly using `doc.add_picture()`
|
||||
- Adds captions below images
|
||||
- No changes needed
|
||||
|
||||
6. **XLSX Renderer** (`rendererXlsx.py`): ⚠️ **NEEDS IMPLEMENTATION**
|
||||
- Currently: No image handling found
|
||||
- **Required Change**: Add image support using openpyxl
|
||||
- Implementation: Use `openpyxl.drawing.image.Image()` to embed images in cells
|
||||
- Store images in worksheet cells or as floating images
|
||||
|
||||
7. **PPTX Renderer** (`rendererPptx.py`): ⚠️ **NEEDS IMPLEMENTATION**
|
||||
- Currently: No image handling found
|
||||
- **Required Change**: Add image support using python-pptx
|
||||
- Implementation: Use `slide.shapes.add_picture()` to add images to slides
|
||||
|
||||
### Renderer Update Requirements:
|
||||
|
||||
**Priority 1 (Critical for HTML output)**:
|
||||
- HTML Renderer: Create separate image files and link them
|
||||
|
||||
**Priority 2 (Important for document formats)**:
|
||||
- PDF Renderer: Embed images using reportlab
|
||||
- XLSX Renderer: Add image embedding support
|
||||
- PPTX Renderer: Add image embedding support
|
||||
|
||||
## Answers to Open Questions
|
||||
|
||||
### 1. Performance: How to handle very large documents (100+ sections)?
|
||||
|
||||
**Answer**: Use parallel processing where possible, with progress ChatLog messages.
|
||||
|
||||
**Implementation Strategy**:
|
||||
- **Parallel Section Generation**: Generate independent sections in parallel using asyncio
|
||||
- **Batch Processing**: Process sections in batches (e.g., 10 sections at a time)
|
||||
- **Progress Tracking**: Send ChatLog progress updates:
|
||||
- "Generating structure..." (Phase 1)
|
||||
- "Generating content for section X/Y..." (Phase 2)
|
||||
- "Generating image for section X..." (Phase 2 - images)
|
||||
- "Merging content..." (Phase 3)
|
||||
- "Rendering final document..." (Phase 3)
|
||||
- **Streaming**: For very large documents, consider streaming partial results
|
||||
|
||||
**Example Progress Messages**:
|
||||
```
|
||||
Phase 1: Structure Generation (0% → 33%)
|
||||
Phase 2: Content Generation (33% → 90%)
|
||||
- Section 1/10: Heading (34%)
|
||||
- Section 2/10: Paragraph (40%)
|
||||
- Section 3/10: Image generation (50%)
|
||||
- Section 4/10: Chapter (60%)
|
||||
...
|
||||
Phase 3: Integration & Rendering (90% → 100%)
|
||||
```
|
||||
|
||||
### 2. Error Handling: What if one section fails?
|
||||
|
||||
**Answer**: Skip failed sections, keep section title and type, show error message in the section.
|
||||
|
||||
**Implementation Strategy**:
|
||||
- **Graceful Degradation**: Continue processing remaining sections
|
||||
- **Error Section**: Create error placeholder section:
|
||||
```json
|
||||
{
|
||||
"id": "section_failed_3",
|
||||
"content_type": "paragraph",
|
||||
"elements": [{
|
||||
"text": "[ERROR: Failed to generate content for this section. Error: <error_message>]"
|
||||
}],
|
||||
"order": 3,
|
||||
"error": true,
|
||||
"errorMessage": "<detailed_error>"
|
||||
}
|
||||
```
|
||||
- **Logging**: Log errors for debugging but don't fail entire document
|
||||
- **User Notification**: Include error count in final progress message
|
||||
|
||||
### 3. Image Storage: Where to store generated images?
|
||||
|
||||
**Answer**: Store images in JSON as base64, as renderers need them afterwards.
|
||||
|
||||
**Implementation Strategy**:
|
||||
- **In-Memory Storage**: Keep base64 strings in JSON structure during generation
|
||||
- **JSON Structure**: Store in section elements:
|
||||
```json
|
||||
{
|
||||
"url": "data:image/png;base64,<base64_data>",
|
||||
"base64Data": "<full_base64_string>",
|
||||
"altText": "Image description",
|
||||
"caption": "Optional caption"
|
||||
}
|
||||
```
|
||||
- **Memory Management**: For very large images, consider compression or chunking
|
||||
- **Renderer Access**: All renderers can access `base64Data` directly from JSON
|
||||
- **HTML Special Case**: HTML renderer will extract base64, decode, and save as separate files during rendering
|
||||
|
||||
### 4. Backward Compatibility: How to ensure existing workflows still work?
|
||||
|
||||
**Answer**: No backwards compatibility needed.
|
||||
|
||||
**Implementation Strategy**:
|
||||
- **New Default**: Hierarchical generation becomes the default mode
|
||||
- **Clean Migration**: All document generation uses hierarchical approach
|
||||
- **No Fallback**: Remove single-pass mode (or keep as internal fallback only)
|
||||
- **Breaking Change**: Acceptable since this is a new feature/enhancement
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Review and Approval**: Get feedback on concept
|
||||
2. **Detailed Design**: Design API and data structures
|
||||
3. **Prototype**: Implement Phase 1 (structure generation)
|
||||
4. **Testing**: Test with real use cases
|
||||
5. **Full Implementation**: Implement all phases
|
||||
6. **Migration**: Migrate existing workflows
|
||||
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,398 +0,0 @@
|
|||
# Implementation Plan: Hierarchical Document Generation
|
||||
|
||||
## Overview
|
||||
|
||||
This document outlines the step-by-step implementation plan for the hierarchical document generation system with image integration.
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1: Core Infrastructure (Week 1)
|
||||
|
||||
**Goal**: Set up core components and data structures
|
||||
|
||||
#### Tasks:
|
||||
|
||||
1. **Create StructureGenerator Component**
|
||||
- [ ] Create `subStructureGenerator.py`
|
||||
- [ ] Implement `generateStructure()` method
|
||||
- [ ] Implement `_createStructurePrompt()` method
|
||||
- [ ] Implement `_identifySectionComplexity()` method
|
||||
- [ ] Implement `_extractImagePrompts()` method
|
||||
- [ ] Add unit tests
|
||||
|
||||
2. **Create ContentGenerator Component**
|
||||
- [ ] Create `subContentGenerator.py`
|
||||
- [ ] Implement `generateContent()` method
|
||||
- [ ] Implement `_generateSectionContent()` method
|
||||
- [ ] Implement `_generateSimpleSection()` method
|
||||
- [ ] Implement `_generateComplexTextSection()` method
|
||||
- [ ] Implement `_createSectionPrompt()` method
|
||||
- [ ] Add unit tests
|
||||
|
||||
3. **Create ContentIntegrator Component**
|
||||
- [ ] Create `subContentIntegrator.py`
|
||||
- [ ] Implement `integrateContent()` method
|
||||
- [ ] Implement `validateCompleteness()` method
|
||||
- [ ] Implement `createErrorSection()` method
|
||||
- [ ] Add unit tests
|
||||
|
||||
4. **Update generateDocument Action**
|
||||
- [ ] Modify `generateDocument.py` to use hierarchical approach
|
||||
- [ ] Add Phase 1: Structure generation
|
||||
- [ ] Add Phase 2: Content generation (sequential first)
|
||||
- [ ] Add Phase 3: Integration & rendering
|
||||
- [ ] Add basic progress logging
|
||||
- [ ] Add error handling
|
||||
|
||||
**Deliverables**:
|
||||
- Core components created
|
||||
- Basic hierarchical generation working (sequential)
|
||||
- Unit tests passing
|
||||
|
||||
**Estimated Time**: 3-4 days
|
||||
|
||||
---
|
||||
|
||||
### Phase 2: Image Generation Integration (Week 1-2)
|
||||
|
||||
**Goal**: Integrate image generation into content generation
|
||||
|
||||
#### Tasks:
|
||||
|
||||
1. **Implement Image Section Generation**
|
||||
- [ ] Add `_generateImageSection()` method to ContentGenerator
|
||||
- [ ] Integrate with `ai.generate` action
|
||||
- [ ] Handle base64 image data storage
|
||||
- [ ] Add image prompt extraction from structure
|
||||
- [ ] Add error handling for image generation failures
|
||||
|
||||
2. **Update Structure Generation Prompt**
|
||||
- [ ] Add image section detection in structure prompt
|
||||
- [ ] Add image_prompt field extraction
|
||||
- [ ] Test with user prompts requesting images
|
||||
|
||||
3. **Test Image Integration**
|
||||
- [ ] Test image generation in document structure
|
||||
- [ ] Test multiple images in one document
|
||||
- [ ] Test image generation failures
|
||||
|
||||
**Deliverables**:
|
||||
- Image generation integrated
|
||||
- Images stored as base64 in JSON
|
||||
- Error handling for image failures
|
||||
|
||||
**Estimated Time**: 2-3 days
|
||||
|
||||
---
|
||||
|
||||
### Phase 3: Parallel Processing & Progress Logging (Week 2)
|
||||
|
||||
**Goal**: Implement parallel section generation and detailed progress logging
|
||||
|
||||
#### Tasks:
|
||||
|
||||
1. **Implement Parallel Generation**
|
||||
- [ ] Add `_generateSectionsParallel()` method
|
||||
- [ ] Use `asyncio.gather()` for parallel execution
|
||||
- [ ] Add batch processing for large documents
|
||||
- [ ] Handle exceptions in parallel execution
|
||||
- [ ] Test parallel vs sequential performance
|
||||
|
||||
2. **Enhance Progress Logging**
|
||||
- [ ] Create progress callback system
|
||||
- [ ] Add detailed progress messages:
|
||||
- Structure generation progress
|
||||
- Section-by-section progress
|
||||
- Image generation progress
|
||||
- Rendering progress
|
||||
- [ ] Calculate accurate progress percentages
|
||||
- [ ] Test progress updates
|
||||
|
||||
3. **Update generateDocument Action**
|
||||
- [ ] Integrate parallel generation
|
||||
- [ ] Add progress callback to content generation
|
||||
- [ ] Update progress logging throughout phases
|
||||
|
||||
**Deliverables**:
|
||||
- Parallel section generation working
|
||||
- Detailed progress logging
|
||||
- Performance improvements
|
||||
|
||||
**Estimated Time**: 2-3 days
|
||||
|
||||
---
|
||||
|
||||
### Phase 4: Renderer Updates (Week 2-3)
|
||||
|
||||
**Goal**: Update renderers to properly handle images
|
||||
|
||||
#### Tasks:
|
||||
|
||||
1. **Update HTML Renderer**
|
||||
- [ ] Modify `rendererHtml.py`
|
||||
- [ ] Add `_extractImages()` method
|
||||
- [ ] Implement separate image file creation
|
||||
- [ ] Update HTML to use relative image paths
|
||||
- [ ] Handle multiple image files
|
||||
- [ ] Test HTML + image files output
|
||||
|
||||
2. **Update PDF Renderer**
|
||||
- [ ] Modify `rendererPdf.py`
|
||||
- [ ] Update `_renderJsonImage()` to embed images
|
||||
- [ ] Use `reportlab.platypus.Image()` with base64
|
||||
- [ ] Handle image sizing and positioning
|
||||
- [ ] Test PDF with embedded images
|
||||
|
||||
3. **Update XLSX Renderer**
|
||||
- [ ] Modify `rendererXlsx.py`
|
||||
- [ ] Add `_renderJsonImage()` method
|
||||
- [ ] Use `openpyxl.drawing.image.Image()` to embed images
|
||||
- [ ] Handle image placement in cells
|
||||
- [ ] Test XLSX with images
|
||||
|
||||
4. **Update PPTX Renderer**
|
||||
- [ ] Modify `rendererPptx.py`
|
||||
- [ ] Add `_renderJsonImage()` method
|
||||
- [ ] Use `slide.shapes.add_picture()` to add images
|
||||
- [ ] Handle image sizing on slides
|
||||
- [ ] Test PPTX with images
|
||||
|
||||
**Deliverables**:
|
||||
- All renderers support images
|
||||
- HTML creates separate image files
|
||||
- PDF/XLSX/PPTX embed images directly
|
||||
|
||||
**Estimated Time**: 4-5 days
|
||||
|
||||
---
|
||||
|
||||
### Phase 5: Content Caching & Optimization (Week 3)
|
||||
|
||||
**Goal**: Implement content caching to avoid re-extraction
|
||||
|
||||
#### Tasks:
|
||||
|
||||
1. **Implement Content Cache**
|
||||
- [ ] Create ContentCache data structure
|
||||
- [ ] Extract content once at start of generation
|
||||
- [ ] Pass cached content to all sub-prompts
|
||||
- [ ] Add cache validation (check if documents changed)
|
||||
- [ ] Test cache reuse
|
||||
|
||||
2. **Optimize Prompt Building**
|
||||
- [ ] Update structure prompt to use cached content
|
||||
- [ ] Update section prompts to use cached content
|
||||
- [ ] Format cached content efficiently
|
||||
- [ ] Test prompt sizes
|
||||
|
||||
3. **Performance Testing**
|
||||
- [ ] Test with large documents
|
||||
- [ ] Test with multiple source documents
|
||||
- [ ] Measure performance improvements
|
||||
- [ ] Optimize bottlenecks
|
||||
|
||||
**Deliverables**:
|
||||
- Content caching implemented
|
||||
- No redundant content extraction
|
||||
- Performance optimized
|
||||
|
||||
**Estimated Time**: 2-3 days
|
||||
|
||||
---
|
||||
|
||||
### Phase 6: Error Handling & Edge Cases (Week 3-4)
|
||||
|
||||
**Goal**: Robust error handling and edge case coverage
|
||||
|
||||
#### Tasks:
|
||||
|
||||
1. **Enhance Error Handling**
|
||||
- [ ] Improve error section creation
|
||||
- [ ] Add error recovery strategies
|
||||
- [ ] Handle partial failures gracefully
|
||||
- [ ] Add error logging and reporting
|
||||
|
||||
2. **Handle Edge Cases**
|
||||
- [ ] Empty document list
|
||||
- [ ] No sections generated
|
||||
- [ ] All sections fail
|
||||
- [ ] Very large images
|
||||
- [ ] Very long documents (100+ sections)
|
||||
- [ ] Missing image prompts
|
||||
- [ ] Invalid section types
|
||||
|
||||
3. **Add Validation**
|
||||
- [ ] Validate structure before content generation
|
||||
- [ ] Validate content before integration
|
||||
- [ ] Validate final document before rendering
|
||||
- [ ] Add comprehensive error messages
|
||||
|
||||
**Deliverables**:
|
||||
- Robust error handling
|
||||
- Edge cases covered
|
||||
- Clear error messages
|
||||
|
||||
**Estimated Time**: 2-3 days
|
||||
|
||||
---
|
||||
|
||||
### Phase 7: Testing & Refinement (Week 4)
|
||||
|
||||
**Goal**: Comprehensive testing and refinement
|
||||
|
||||
#### Tasks:
|
||||
|
||||
1. **Unit Testing**
|
||||
- [ ] Complete unit tests for all components
|
||||
- [ ] Test all methods
|
||||
- [ ] Test error scenarios
|
||||
- [ ] Achieve >80% code coverage
|
||||
|
||||
2. **Integration Testing**
|
||||
- [ ] Test end-to-end document generation
|
||||
- [ ] Test with various document types
|
||||
- [ ] Test with images
|
||||
- [ ] Test with long documents
|
||||
- [ ] Test error scenarios
|
||||
|
||||
3. **Performance Testing**
|
||||
- [ ] Test with 10, 50, 100+ sections
|
||||
- [ ] Measure generation time
|
||||
- [ ] Measure memory usage
|
||||
- [ ] Compare parallel vs sequential
|
||||
- [ ] Optimize if needed
|
||||
|
||||
4. **User Acceptance Testing**
|
||||
- [ ] Test with real user scenarios
|
||||
- [ ] Test bedtime story with images (original use case)
|
||||
- [ ] Test business documents
|
||||
- [ ] Test technical documents
|
||||
- [ ] Gather feedback
|
||||
|
||||
5. **Documentation**
|
||||
- [ ] Update API documentation
|
||||
- [ ] Add code comments
|
||||
- [ ] Update user guides
|
||||
- [ ] Create examples
|
||||
|
||||
**Deliverables**:
|
||||
- Comprehensive test suite
|
||||
- Performance benchmarks
|
||||
- Documentation complete
|
||||
- Ready for production
|
||||
|
||||
**Estimated Time**: 3-4 days
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
### External Dependencies
|
||||
- `asyncio` - For parallel processing
|
||||
- `base64` - For image encoding/decoding
|
||||
- `reportlab` - For PDF image embedding
|
||||
- `openpyxl` - For XLSX image embedding
|
||||
- `python-pptx` - For PPTX image embedding
|
||||
|
||||
### Internal Dependencies
|
||||
- `serviceGeneration` - Main generation service
|
||||
- `serviceAi` - AI service for generation
|
||||
- `serviceExtraction` - Content extraction service
|
||||
- `methodAi.actions.generate` - Image generation action
|
||||
- `methodAi.actions.process` - Text generation action
|
||||
|
||||
## Risk Mitigation
|
||||
|
||||
### Risks and Mitigation Strategies
|
||||
|
||||
1. **Risk**: Image generation failures break entire document
|
||||
- **Mitigation**: Error handling creates error sections, continues processing
|
||||
|
||||
2. **Risk**: Parallel generation causes memory issues
|
||||
- **Mitigation**: Batch processing, limit concurrent operations
|
||||
|
||||
3. **Risk**: Large base64 images cause JSON size issues
|
||||
- **Mitigation**: Consider compression or chunking for very large images
|
||||
|
||||
4. **Risk**: HTML renderer needs to return multiple files
|
||||
- **Mitigation**: Modify return type or create file bundle system
|
||||
|
||||
5. **Risk**: Performance not meeting expectations
|
||||
- **Mitigation**: Profile and optimize bottlenecks, consider caching
|
||||
|
||||
## Success Criteria
|
||||
|
||||
### Functional Requirements
|
||||
- ✅ Documents can be generated with embedded images
|
||||
- ✅ HTML renderer creates separate image files
|
||||
- ✅ PDF/XLSX/PPTX renderers embed images
|
||||
- ✅ Progress logging shows detailed progress
|
||||
- ✅ Error handling prevents complete failures
|
||||
- ✅ Content extraction happens only once
|
||||
|
||||
### Performance Requirements
|
||||
- ✅ Parallel generation improves performance by 2x+ for multi-section documents
|
||||
- ✅ Progress updates appear within 1 second of action
|
||||
- ✅ Documents with 50+ sections complete in <5 minutes
|
||||
|
||||
### Quality Requirements
|
||||
- ✅ >80% code coverage
|
||||
- ✅ All edge cases handled
|
||||
- ✅ Clear error messages
|
||||
- ✅ Comprehensive documentation
|
||||
|
||||
## Rollout Plan
|
||||
|
||||
### Step 1: Internal Testing (Week 4)
|
||||
- Deploy to development environment
|
||||
- Internal team testing
|
||||
- Fix critical issues
|
||||
|
||||
### Step 2: Beta Testing (Week 5)
|
||||
- Deploy to staging environment
|
||||
- Select beta users
|
||||
- Gather feedback
|
||||
- Fix issues
|
||||
|
||||
### Step 3: Production Deployment (Week 6)
|
||||
- Deploy to production
|
||||
- Monitor performance
|
||||
- Monitor errors
|
||||
- Gather user feedback
|
||||
|
||||
### Step 4: Optimization (Ongoing)
|
||||
- Monitor usage patterns
|
||||
- Optimize based on real-world usage
|
||||
- Add enhancements based on feedback
|
||||
|
||||
## Timeline Summary
|
||||
|
||||
| Phase | Duration | Start | End |
|
||||
|-------|----------|-------|-----|
|
||||
| Phase 1: Core Infrastructure | 3-4 days | Day 1 | Day 4 |
|
||||
| Phase 2: Image Integration | 2-3 days | Day 4 | Day 7 |
|
||||
| Phase 3: Parallel Processing | 2-3 days | Day 7 | Day 10 |
|
||||
| Phase 4: Renderer Updates | 4-5 days | Day 10 | Day 15 |
|
||||
| Phase 5: Content Caching | 2-3 days | Day 15 | Day 18 |
|
||||
| Phase 6: Error Handling | 2-3 days | Day 18 | Day 21 |
|
||||
| Phase 7: Testing & Refinement | 3-4 days | Day 21 | Day 25 |
|
||||
|
||||
**Total Estimated Time**: 4-5 weeks
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Review and Approve Plan**
|
||||
- Review implementation plan
|
||||
- Approve timeline
|
||||
- Assign resources
|
||||
|
||||
2. **Set Up Development Environment**
|
||||
- Create feature branch
|
||||
- Set up test infrastructure
|
||||
- Prepare development tools
|
||||
|
||||
3. **Begin Phase 1**
|
||||
- Start with StructureGenerator
|
||||
- Set up project structure
|
||||
- Begin implementation
|
||||
|
||||
|
|
@ -167,50 +167,86 @@ class WorkflowManager:
|
|||
|
||||
self.workflowProcessor = WorkflowProcessor(self.services)
|
||||
|
||||
# Get workflow mode to determine if complexity detection is needed
|
||||
# Get workflow mode to determine if combined analysis is needed
|
||||
workflowMode = getattr(self.services.workflow, 'workflowMode', None)
|
||||
skipComplexityDetection = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
|
||||
skipCombinedAnalysis = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
|
||||
|
||||
if skipComplexityDetection:
|
||||
logger.info("Skipping complexity detection for AUTOMATION mode - using predefined plan")
|
||||
if skipCombinedAnalysis:
|
||||
logger.info("Skipping combined analysis for AUTOMATION mode - using predefined plan")
|
||||
complexity = "moderate" # Default for automation workflows
|
||||
needsWorkflowHistory = False # Automation workflows don't need history
|
||||
detectedLanguage = None # No language detection in automation mode
|
||||
normalizedRequest = userInput.prompt
|
||||
intentText = userInput.prompt
|
||||
contextItems = []
|
||||
workflowIntent = None
|
||||
else:
|
||||
# Process user-uploaded documents from userInput for complexity detection
|
||||
# This is the correct way: use the input data directly, not workflow state
|
||||
# Process user-uploaded documents from userInput for combined analysis
|
||||
documents = []
|
||||
if userInput.listFileId:
|
||||
try:
|
||||
documents = await self._processFileIds(userInput.listFileId, None)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
|
||||
logger.warning(f"Failed to process user fileIds for combined analysis: {e}")
|
||||
|
||||
# Detect complexity (AI-based semantic understanding) using user input documents
|
||||
# Also detects language for fast path responses
|
||||
complexity, needsWorkflowHistory, detectedLanguage = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
|
||||
logger.info(f"Request complexity detected: {complexity}, needsWorkflowHistory: {needsWorkflowHistory}, language: {detectedLanguage}")
|
||||
# Phase 1+2: Kombinierte Analyse: Intent + Komplexität in einem AI-Call
|
||||
analysisResult = await self._analyzeUserInputAndComplexity(userInput.prompt, documents)
|
||||
|
||||
# Set detected language for fast path (if detected)
|
||||
# Extract results
|
||||
detectedLanguage = analysisResult.get('detectedLanguage')
|
||||
normalizedRequest = analysisResult.get('normalizedRequest')
|
||||
intentText = analysisResult.get('intent') or userInput.prompt
|
||||
contextItems = analysisResult.get('contextItems', [])
|
||||
complexity = analysisResult.get('complexity', 'moderate')
|
||||
needsWorkflowHistory = analysisResult.get('needsWorkflowHistory', False)
|
||||
fastTrack = analysisResult.get('fastTrack', False)
|
||||
|
||||
# Extract intent analysis fields and store as workflowIntent
|
||||
workflowIntent = {
|
||||
'primaryGoal': analysisResult.get('primaryGoal'),
|
||||
'dataType': analysisResult.get('dataType', 'unknown'),
|
||||
'expectedFormats': analysisResult.get('expectedFormats', []),
|
||||
'qualityRequirements': analysisResult.get('qualityRequirements', {}),
|
||||
'successCriteria': analysisResult.get('successCriteria', []),
|
||||
'languageUserDetected': detectedLanguage,
|
||||
'needsWorkflowHistory': needsWorkflowHistory
|
||||
}
|
||||
|
||||
# Store needsWorkflowHistory in services
|
||||
setattr(self.services, '_needsWorkflowHistory', bool(needsWorkflowHistory))
|
||||
|
||||
# Store workflowIntent in workflow object for reuse
|
||||
if hasattr(self.services, 'workflow') and self.services.workflow:
|
||||
self.services.workflow._workflowIntent = workflowIntent
|
||||
|
||||
# Store normalized request and intent
|
||||
self.services.currentUserPrompt = intentText or userInput.prompt
|
||||
self.services.currentUserPromptNormalized = normalizedRequest or intentText or userInput.prompt
|
||||
if contextItems is not None:
|
||||
self.services.currentUserContextItems = contextItems
|
||||
|
||||
# Set detected language
|
||||
if detectedLanguage and isinstance(detectedLanguage, str):
|
||||
self._setUserLanguage(detectedLanguage)
|
||||
try:
|
||||
setattr(self.services, 'currentUserLanguage', detectedLanguage)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(f"Combined analysis: complexity={complexity}, needsWorkflowHistory={needsWorkflowHistory}, language={detectedLanguage}, fastTrack={fastTrack}")
|
||||
|
||||
# Route to fast path for simple requests if history is not needed
|
||||
# Skip fast path for automation mode or if history is needed
|
||||
if complexity == "simple" and not needsWorkflowHistory:
|
||||
if not skipCombinedAnalysis and complexity == "simple" and not needsWorkflowHistory:
|
||||
logger.info("Routing to fast path for simple request")
|
||||
await self._executeFastPath(userInput, documents)
|
||||
return # Fast path completes the workflow
|
||||
|
||||
# Now send the first message (which will also process the documents again, but that's fine)
|
||||
await self._sendFirstMessage(userInput)
|
||||
# Now send the first message (use already analyzed data if available)
|
||||
await self._sendFirstMessage(userInput, skipIntentionAnalysis=not skipCombinedAnalysis)
|
||||
|
||||
# Route to full workflow for moderate/complex requests or automation mode
|
||||
logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipComplexityDetection else ""))
|
||||
logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipCombinedAnalysis else ""))
|
||||
taskPlan = await self._planTasks(userInput)
|
||||
await self._executeTasks(taskPlan)
|
||||
await self._processWorkflowResults()
|
||||
|
|
@ -223,6 +259,143 @@ class WorkflowManager:
|
|||
|
||||
# Helper functions
|
||||
|
||||
async def _analyzeUserInputAndComplexity(
|
||||
self,
|
||||
userPrompt: str,
|
||||
documents: List[ChatDocument]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Phase 1+2: Kombinierte Analyse: Intent + Komplexität in einem AI-Call.
|
||||
|
||||
Args:
|
||||
userPrompt: User-Anfrage
|
||||
documents: Liste der Dokumente
|
||||
|
||||
Returns:
|
||||
Dict mit:
|
||||
- detectedLanguage: ISO 639-1 Sprachcode
|
||||
- normalizedRequest: Vollständige, explizite Umformulierung
|
||||
- intent: Kurze Kern-Anfrage
|
||||
- contextItems: Große Datenblöcke als separate Dokumente
|
||||
- complexity: "simple" | "moderate" | "complex"
|
||||
- needsWorkflowHistory: bool
|
||||
- fastTrack: bool
|
||||
- primaryGoal: Hauptziel
|
||||
- dataType: Datentyp
|
||||
- expectedFormats: Erwartete Formate
|
||||
- qualityRequirements: Qualitätsanforderungen
|
||||
- successCriteria: Erfolgskriterien
|
||||
"""
|
||||
# Baue Dokument-Liste für Prompt
|
||||
docListText = ""
|
||||
if documents:
|
||||
for i, doc in enumerate(documents, 1):
|
||||
docListText += f"\n{i}. {doc.fileName} ({doc.mimeType}, {doc.fileSize} bytes)"
|
||||
|
||||
analysisPrompt = f"""You are an input analyzer. From the user's message, perform ALL of the following in one pass:
|
||||
|
||||
1. detectedLanguage: Detect ISO 639-1 language code (e.g., de, en, fr, it)
|
||||
2. normalizedRequest: Full, explicit restatement of the user's request in the detected language; do NOT summarize; preserve ALL constraints and details
|
||||
3. intent: Concise single-paragraph core request in the detected language for high-level routing
|
||||
4. contextItems: Supportive data blocks to attach as separate documents if significantly larger than the intent (large literal content, long lists/tables, code/JSON blocks, transcripts, CSV fragments, detailed specs). Keep URLs in the intent unless they embed large pasted content
|
||||
5. complexity: "simple" | "moderate" | "complex"
|
||||
- "simple": Only if NO documents AND NO web search required. Single question, straightforward answer (5-15s)
|
||||
- "moderate": Multiple steps, some documents, structured response requiring some processing, or web search needed (30-60s)
|
||||
- "complex": Multi-task workflow, many documents, research needed, content generation required, multi-step planning (60-120s)
|
||||
6. needsWorkflowHistory: Boolean indicating if this request needs previous workflow rounds/history (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work)
|
||||
7. fastTrack: Boolean indicating if Fast Track is possible (simple requests without documents and without workflow history)
|
||||
8. primaryGoal: The main objective the user wants to achieve
|
||||
9. dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown)
|
||||
10. expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., ["xlsx", "pdf"]). If format is unclear or not specified, use empty list []
|
||||
11. qualityRequirements: Quality requirements they have (accuracy, completeness) as {{accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}}
|
||||
12. successCriteria: Specific success criteria that define completion (array of strings)
|
||||
|
||||
Rules:
|
||||
- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained
|
||||
- If content exceeds that threshold, move bulky parts into contextItems; keep intent short and clear
|
||||
- Preserve critical references (URLs, filenames) in intent
|
||||
- Normalize to the primary detected language if mixed-language
|
||||
- Consider number of documents provided when determining complexity
|
||||
- Consider need for external research or web search when determining complexity
|
||||
|
||||
Documents provided: {len(documents)} document(s)
|
||||
{docListText}
|
||||
|
||||
Return ONLY JSON (no markdown) with this exact structure:
|
||||
{{
|
||||
"detectedLanguage": "de|en|fr|it|...",
|
||||
"normalizedRequest": "Full explicit instruction in detected language",
|
||||
"intent": "Concise normalized request...",
|
||||
"contextItems": [
|
||||
{{
|
||||
"title": "User context 1",
|
||||
"mimeType": "text/plain",
|
||||
"content": "Full extracted content block here"
|
||||
}}
|
||||
],
|
||||
"complexity": "simple" | "moderate" | "complex",
|
||||
"needsWorkflowHistory": true|false,
|
||||
"fastTrack": true|false,
|
||||
"primaryGoal": "The main objective the user wants to achieve",
|
||||
"dataType": "numbers|text|documents|analysis|code|unknown",
|
||||
"expectedFormats": ["pdf", "docx", "xlsx", "txt", "json", "csv", "html", "md"],
|
||||
"qualityRequirements": {{
|
||||
"accuracyThreshold": 0.0-1.0,
|
||||
"completenessThreshold": 0.0-1.0
|
||||
}},
|
||||
"successCriteria": ["specific criterion 1", "specific criterion 2"]
|
||||
}}
|
||||
|
||||
## User Message
|
||||
The following is the user's original input message. Analyze intent, normalize the request, determine complexity, and identify any large context blocks that should be moved to separate documents:
|
||||
|
||||
################ USER INPUT START #################
|
||||
{userPrompt.replace('{', '{{').replace('}', '}}') if userPrompt else ''}
|
||||
################ USER INPUT FINISH #################
|
||||
"""
|
||||
|
||||
# AI-Call (verwende callAiPlanning für einfache JSON-Responses)
|
||||
# Debug-Logs werden bereits von callAiPlanning geschrieben
|
||||
aiResponse = await self.services.ai.callAiPlanning(
|
||||
prompt=analysisPrompt,
|
||||
placeholders=None,
|
||||
debugType="user_input_analysis"
|
||||
)
|
||||
|
||||
# Parse Result
|
||||
try:
|
||||
jsonStart = aiResponse.find('{') if aiResponse else -1
|
||||
jsonEnd = aiResponse.rfind('}') + 1 if aiResponse else 0
|
||||
if jsonStart != -1 and jsonEnd > jsonStart:
|
||||
result = json.loads(aiResponse[jsonStart:jsonEnd])
|
||||
return result
|
||||
else:
|
||||
logger.warning("Could not parse combined analysis response, using defaults")
|
||||
return self._getDefaultAnalysisResult()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing combined analysis response: {str(e)}, using defaults")
|
||||
return self._getDefaultAnalysisResult()
|
||||
|
||||
def _getDefaultAnalysisResult(self) -> Dict[str, Any]:
|
||||
"""Fallback Default-Werte wenn Parsing fehlschlägt."""
|
||||
return {
|
||||
"detectedLanguage": "en",
|
||||
"normalizedRequest": "",
|
||||
"intent": "",
|
||||
"contextItems": [],
|
||||
"complexity": "moderate",
|
||||
"needsWorkflowHistory": False,
|
||||
"fastTrack": False,
|
||||
"primaryGoal": None,
|
||||
"dataType": "unknown",
|
||||
"expectedFormats": [],
|
||||
"qualityRequirements": {
|
||||
"accuracyThreshold": 0.8,
|
||||
"completenessThreshold": 0.8
|
||||
},
|
||||
"successCriteria": []
|
||||
}
|
||||
|
||||
async def _executeFastPath(self, userInput: UserInputRequest, documents: List[ChatDocument]) -> None:
|
||||
"""Execute fast path for simple requests and deliver result to user"""
|
||||
try:
|
||||
|
|
@ -330,7 +503,7 @@ class WorkflowManager:
|
|||
await self._executeTasks(taskPlan)
|
||||
await self._processWorkflowResults()
|
||||
|
||||
async def _sendFirstMessage(self, userInput: UserInputRequest) -> None:
|
||||
async def _sendFirstMessage(self, userInput: UserInputRequest, skipIntentionAnalysis: bool = False) -> None:
|
||||
"""Send first message to start workflow"""
|
||||
try:
|
||||
workflow = self.services.workflow
|
||||
|
|
@ -360,21 +533,58 @@ class WorkflowManager:
|
|||
}
|
||||
|
||||
# Analyze the user's input to detect language, normalize request, extract intent, and offload bulky context into documents
|
||||
# SKIP user intention analysis for AUTOMATION mode - it uses predefined JSON plans
|
||||
# SKIP user intention analysis if already done in combined analysis (skipIntentionAnalysis=True)
|
||||
# or for AUTOMATION mode - it uses predefined JSON plans
|
||||
createdDocs = []
|
||||
workflowMode = getattr(workflow, 'workflowMode', None)
|
||||
skipIntentionAnalysis = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
|
||||
skipIntentionAnalysis = skipIntentionAnalysis or (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
|
||||
|
||||
if skipIntentionAnalysis:
|
||||
logger.info("Skipping user intention analysis for AUTOMATION mode - using direct user input")
|
||||
# For automation mode, use user input directly without AI analysis
|
||||
self.services.currentUserPrompt = userInput.prompt
|
||||
# Always set currentUserPromptNormalized - use user input directly for automation mode
|
||||
self.services.currentUserPromptNormalized = userInput.prompt
|
||||
detectedLanguage = None
|
||||
normalizedRequest = None
|
||||
intentText = userInput.prompt
|
||||
contextItems = []
|
||||
logger.info("Skipping user intention analysis (already done in combined analysis or AUTOMATION mode)")
|
||||
# Use already analyzed data if available, otherwise use user input directly
|
||||
detectedLanguage = getattr(self.services, 'currentUserLanguage', None)
|
||||
normalizedRequest = getattr(self.services, 'currentUserPromptNormalized', None) or userInput.prompt
|
||||
intentText = getattr(self.services, 'currentUserPrompt', None) or userInput.prompt
|
||||
contextItems = getattr(self.services, 'currentUserContextItems', None) or []
|
||||
workflowIntent = getattr(workflow, '_workflowIntent', None)
|
||||
|
||||
# Create documents for context items (if available from combined analysis)
|
||||
if contextItems and isinstance(contextItems, list):
|
||||
for idx, item in enumerate(contextItems):
|
||||
try:
|
||||
title = item.get('title') if isinstance(item, dict) else None
|
||||
mime = item.get('mimeType') if isinstance(item, dict) else None
|
||||
content = item.get('content') if isinstance(item, dict) else None
|
||||
if not content:
|
||||
continue
|
||||
fileName = (title or f"user_context_{idx+1}.txt").strip()
|
||||
mimeType = (mime or "text/plain").strip()
|
||||
|
||||
# Neutralize content before storing if neutralization is enabled
|
||||
contentBytes = content.encode('utf-8')
|
||||
contentBytes = await self._neutralizeContentIfEnabled(contentBytes, mimeType)
|
||||
|
||||
# Create file in component storage
|
||||
fileItem = self.services.interfaceDbComponent.createFile(
|
||||
name=fileName,
|
||||
mimeType=mimeType,
|
||||
content=contentBytes
|
||||
)
|
||||
# Persist file data
|
||||
self.services.interfaceDbComponent.createFileData(fileItem.id, contentBytes)
|
||||
|
||||
# Collect file info
|
||||
fileInfo = self.services.chat.getFileInfo(fileItem.id)
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
doc = ChatDocument(
|
||||
fileId=fileItem.id,
|
||||
fileName=fileInfo.get("fileName", fileName) if fileInfo else fileName,
|
||||
fileSize=fileInfo.get("size", len(contentBytes)) if fileInfo else len(contentBytes),
|
||||
mimeType=fileInfo.get("mimeType", mimeType) if fileInfo else mimeType
|
||||
)
|
||||
createdDocs.append(doc)
|
||||
except Exception:
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
analyzerPrompt = (
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ class DocumentGenerationFormatsTester:
|
|||
self.workflow = None
|
||||
self.testResults = {}
|
||||
self.generatedDocuments = {}
|
||||
self.pdfFileId = None # Store PDF file ID for reuse
|
||||
|
||||
async def initialize(self):
|
||||
"""Initialize the test environment."""
|
||||
|
|
@ -53,17 +54,123 @@ class DocumentGenerationFormatsTester:
|
|||
print(f"Initialized test with user: {self.testUser.id}")
|
||||
print(f"Mandate ID: {self.testUser.mandateId}")
|
||||
print(f"Debug logging enabled: {APP_CONFIG.get('APP_DEBUG_CHAT_WORKFLOW_ENABLED', False)}")
|
||||
|
||||
# Upload PDF file for testing
|
||||
await self.uploadPdfFile()
|
||||
|
||||
async def uploadPdfFile(self):
|
||||
"""Upload the PDF file and store its file ID."""
|
||||
pdfPath = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "temp", "B2025-02c.pdf")
|
||||
pdfPath = os.path.abspath(pdfPath)
|
||||
|
||||
if not os.path.exists(pdfPath):
|
||||
print(f"⚠️ Warning: PDF file not found at {pdfPath}")
|
||||
print(" Test will continue without PDF attachment")
|
||||
return
|
||||
|
||||
try:
|
||||
# Read PDF file
|
||||
with open(pdfPath, "rb") as f:
|
||||
pdfContent = f.read()
|
||||
|
||||
# Create file using services.interfaceDbComponent
|
||||
if not hasattr(self.services, 'interfaceDbComponent') or not self.services.interfaceDbComponent:
|
||||
print("⚠️ Warning: interfaceDbComponent not available in services")
|
||||
print(" Test will continue without PDF attachment")
|
||||
return
|
||||
|
||||
interfaceDbComponent = self.services.interfaceDbComponent
|
||||
|
||||
fileItem = interfaceDbComponent.createFile(
|
||||
name="B2025-02c.pdf",
|
||||
mimeType="application/pdf",
|
||||
content=pdfContent
|
||||
)
|
||||
|
||||
# Store file data
|
||||
interfaceDbComponent.createFileData(fileItem.id, pdfContent)
|
||||
|
||||
self.pdfFileId = fileItem.id
|
||||
print(f"✅ Uploaded PDF file: {fileItem.fileName} (ID: {self.pdfFileId}, Size: {len(pdfContent)} bytes)")
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"⚠️ Warning: Failed to upload PDF file: {str(e)}")
|
||||
print(f" Traceback: {traceback.format_exc()}")
|
||||
print(" Test will continue without PDF attachment")
|
||||
|
||||
def createTestPrompt(self, format: str) -> str:
|
||||
"""Create a test prompt for document generation in the specified format."""
|
||||
"""Create a unified test prompt for document generation in the specified format.
|
||||
|
||||
The prompt requests:
|
||||
- Extraction of images from the attached PDF
|
||||
- Generation of a new image
|
||||
- Document creation with both images
|
||||
"""
|
||||
basePrompt = (
|
||||
"Create a professional document about 'Fuel Station Receipt Analysis' with the following content:\n"
|
||||
"1) A main title\n"
|
||||
"2) An introduction paragraph explaining the receipt analysis\n"
|
||||
"3) Extract and include the image from the attached PDF document (B2025-02c.pdf)\n"
|
||||
"4) A section analyzing the receipt data with bullet points\n"
|
||||
"5) Generate a new image showing a visual representation of fuel consumption trends\n"
|
||||
"6) A conclusion paragraph with recommendations\n\n"
|
||||
"Make sure to include both: the image extracted from the PDF and the newly generated image.\n"
|
||||
f"Format the output as {format.upper()}."
|
||||
)
|
||||
return basePrompt
|
||||
|
||||
def createRefactoringTestPrompt(self, testType: str, format: str = "html") -> str:
|
||||
"""Create test prompts for specific refactoring features.
|
||||
|
||||
Args:
|
||||
testType: Type of refactoring test:
|
||||
- "intent_analysis": Test DocumentIntent analysis
|
||||
- "conditional_extraction": Test conditional extraction (extract vs render)
|
||||
- "image_render": Test image rendering as asset
|
||||
- "multi_document": Test multi-document rendering
|
||||
- "metadata_preservation": Test metadata preservation
|
||||
format: Output format (default: html)
|
||||
"""
|
||||
prompts = {
|
||||
"html": "Create a professional HTML document about 'The Future of Artificial Intelligence' with: 1) A main title, 2) An introduction paragraph, 3) Three key sections with headings, 4) A bullet list of benefits, 5) An image showing AI technology (generate it), 6) A conclusion paragraph. Format as HTML.",
|
||||
"pdf": "Create a professional PDF report about 'Climate Change Impact Analysis' with: 1) A title page, 2) An executive summary, 3) Three main sections with data tables, 4) Charts/graphs described, 5) An image showing environmental impact (generate it), 6) Conclusions and recommendations. Format as PDF.",
|
||||
"docx": "Create a comprehensive Word document about 'Project Management Best Practices' with: 1) A cover page with title, 2) Table of contents, 3) Five chapters with headings and paragraphs, 4) A table comparing methodologies, 5) An image illustrating project workflow (generate it), 6) Appendices. Format as DOCX.",
|
||||
"xlsx": "Create an Excel workbook about 'Sales Performance Analysis' with: 1) A summary sheet with key metrics, 2) A detailed data sheet with sales data in a table format (columns: Month, Product, Sales, Units, Revenue), 3) A chart sheet with visualizations described, 4) An analysis sheet with calculations. Format as XLSX.",
|
||||
"pptx": "Create a PowerPoint presentation about 'Digital Transformation Strategy' with: 1) A title slide, 2) An agenda slide, 3) Five content slides with bullet points, 4) A slide with an image showing transformation roadmap (generate it), 5) A conclusion slide. Format as PPTX."
|
||||
"intent_analysis": (
|
||||
"Create a document with the following requirements:\n"
|
||||
"1) Extract text content from the attached PDF\n"
|
||||
"2) Include images from the PDF as visual elements (render them, don't extract text from them)\n"
|
||||
"3) Generate a summary document\n\n"
|
||||
"This tests that the system correctly identifies which documents need extraction vs rendering."
|
||||
),
|
||||
"conditional_extraction": (
|
||||
"Create a document that:\n"
|
||||
"1) Extracts and uses text from the attached PDF\n"
|
||||
"2) Renders images from the PDF as visual assets (not as extracted text)\n"
|
||||
"3) Generates new content based on the extracted text\n\n"
|
||||
"This tests conditional extraction - only extract what needs extraction, render what needs rendering."
|
||||
),
|
||||
"image_render": (
|
||||
"Create a document that includes images from the attached PDF.\n"
|
||||
"The images should be rendered as visual elements in the document, not extracted as text.\n"
|
||||
"Include a title and description for each image.\n\n"
|
||||
"This tests the image asset pipeline with render intent."
|
||||
),
|
||||
"multi_document": (
|
||||
"Create multiple separate documents:\n"
|
||||
"1) Document 1: Summary of the PDF content\n"
|
||||
"2) Document 2: Analysis of the PDF content\n"
|
||||
"3) Document 3: Recommendations based on the PDF content\n\n"
|
||||
"Each document should be separate and complete.\n"
|
||||
"This tests multi-document generation and rendering."
|
||||
),
|
||||
"metadata_preservation": (
|
||||
"Create a document that extracts content from the attached PDF.\n"
|
||||
"The document should clearly show which content came from which source document.\n"
|
||||
"Include source references in the generated content.\n\n"
|
||||
"This tests that metadata (documentId, mimeType) is preserved in the generation prompt."
|
||||
)
|
||||
}
|
||||
return prompts.get(format.lower(), prompts["docx"])
|
||||
|
||||
prompt = prompts.get(testType, self.createTestPrompt(format))
|
||||
return f"{prompt}\n\nFormat the output as {format.upper()}."
|
||||
|
||||
async def generateDocumentInFormat(self, format: str) -> Dict[str, Any]:
|
||||
"""Generate a document in the specified format using workflow."""
|
||||
|
|
@ -74,9 +181,18 @@ class DocumentGenerationFormatsTester:
|
|||
prompt = self.createTestPrompt(format)
|
||||
print(f"Prompt: {prompt[:200]}...")
|
||||
|
||||
# Create user input request with PDF file attachment
|
||||
listFileId = []
|
||||
if self.pdfFileId:
|
||||
listFileId = [self.pdfFileId]
|
||||
print(f"Attaching PDF file (ID: {self.pdfFileId})")
|
||||
else:
|
||||
print("⚠️ No PDF file attached (file upload may have failed)")
|
||||
|
||||
# Create user input request
|
||||
userInput = UserInputRequest(
|
||||
prompt=prompt,
|
||||
listFileId=listFileId,
|
||||
userLanguage="en"
|
||||
)
|
||||
|
||||
|
|
@ -281,6 +397,166 @@ class DocumentGenerationFormatsTester:
|
|||
|
||||
return verification
|
||||
|
||||
async def testRefactoringFeatures(self) -> Dict[str, Any]:
|
||||
"""Test specific refactoring features."""
|
||||
print("\n" + "="*80)
|
||||
print("TESTING REFACTORING FEATURES")
|
||||
print("="*80)
|
||||
|
||||
refactoringTests = [
|
||||
("intent_analysis", "html"),
|
||||
("conditional_extraction", "html"),
|
||||
("image_render", "html"),
|
||||
("multi_document", "html"),
|
||||
("metadata_preservation", "html")
|
||||
]
|
||||
|
||||
results = {}
|
||||
|
||||
for testType, format in refactoringTests:
|
||||
try:
|
||||
print(f"\n{'='*80}")
|
||||
print(f"Testing Refactoring Feature: {testType}")
|
||||
print(f"{'='*80}")
|
||||
|
||||
prompt = self.createRefactoringTestPrompt(testType, format)
|
||||
print(f"Prompt: {prompt[:200]}...")
|
||||
|
||||
# Create user input request with PDF file attachment
|
||||
listFileId = []
|
||||
if self.pdfFileId:
|
||||
listFileId = [self.pdfFileId]
|
||||
print(f"Attaching PDF file (ID: {self.pdfFileId})")
|
||||
else:
|
||||
print("⚠️ No PDF file attached (file upload may have failed)")
|
||||
|
||||
userInput = UserInputRequest(
|
||||
prompt=prompt,
|
||||
listFileId=listFileId,
|
||||
userLanguage="en"
|
||||
)
|
||||
|
||||
# Start workflow
|
||||
print(f"\nStarting workflow for {testType} test...")
|
||||
workflow = await chatStart(
|
||||
currentUser=self.testUser,
|
||||
userInput=userInput,
|
||||
workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC,
|
||||
workflowId=None
|
||||
)
|
||||
|
||||
if not workflow:
|
||||
results[testType] = {
|
||||
"success": False,
|
||||
"error": "Failed to start workflow"
|
||||
}
|
||||
continue
|
||||
|
||||
self.workflow = workflow
|
||||
print(f"Workflow started: {workflow.id}")
|
||||
|
||||
# Wait for workflow completion
|
||||
completed = await self.waitForWorkflowCompletion(timeout=300)
|
||||
|
||||
if not completed:
|
||||
results[testType] = {
|
||||
"success": False,
|
||||
"error": "Workflow did not complete within timeout",
|
||||
"workflowId": workflow.id
|
||||
}
|
||||
continue
|
||||
|
||||
# Analyze results
|
||||
workflowResults = self.analyzeWorkflowResults()
|
||||
|
||||
# Check for specific refactoring features
|
||||
verification = self.verifyRefactoringFeature(testType, workflowResults)
|
||||
|
||||
results[testType] = {
|
||||
"success": True,
|
||||
"workflowId": workflow.id,
|
||||
"verification": verification,
|
||||
"workflowResults": workflowResults
|
||||
}
|
||||
|
||||
print(f"\n✅ {testType} test completed!")
|
||||
print(f" Verification: {'✅ PASS' if verification.get('passed', False) else '❌ FAIL'}")
|
||||
if verification.get("details"):
|
||||
for detail in verification["details"]:
|
||||
print(f" - {detail}")
|
||||
|
||||
await asyncio.sleep(2)
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"\n❌ Error testing {testType}: {str(e)}")
|
||||
print(traceback.format_exc())
|
||||
results[testType] = {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"traceback": traceback.format_exc()
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
def verifyRefactoringFeature(self, testType: str, workflowResults: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Verify that a refactoring feature works correctly."""
|
||||
documents = workflowResults.get("documents", [])
|
||||
logs = workflowResults.get("logs", [])
|
||||
|
||||
verification = {
|
||||
"testType": testType,
|
||||
"passed": False,
|
||||
"details": []
|
||||
}
|
||||
|
||||
if testType == "intent_analysis":
|
||||
# Check that intent analysis was performed
|
||||
intentLogs = [log for log in logs if "intent" in str(log).lower() or "analyzing document intent" in str(log).lower()]
|
||||
if intentLogs:
|
||||
verification["details"].append("Intent analysis logs found")
|
||||
verification["passed"] = True
|
||||
else:
|
||||
verification["details"].append("No intent analysis logs found")
|
||||
|
||||
elif testType == "conditional_extraction":
|
||||
# Check that extraction and rendering both occurred
|
||||
extractionLogs = [log for log in logs if "extract" in str(log).lower()]
|
||||
renderLogs = [log for log in logs if "render" in str(log).lower() or "image" in str(log).lower()]
|
||||
if extractionLogs and renderLogs:
|
||||
verification["details"].append("Both extraction and rendering occurred")
|
||||
verification["passed"] = True
|
||||
else:
|
||||
verification["details"].append(f"Missing logs: extraction={len(extractionLogs)}, render={len(renderLogs)}")
|
||||
|
||||
elif testType == "image_render":
|
||||
# Check that images were rendered (not extracted as text)
|
||||
imageLogs = [log for log in logs if "image" in str(log).lower()]
|
||||
if imageLogs:
|
||||
verification["details"].append("Image rendering logs found")
|
||||
verification["passed"] = True
|
||||
else:
|
||||
verification["details"].append("No image rendering logs found")
|
||||
|
||||
elif testType == "multi_document":
|
||||
# Check that multiple documents were generated
|
||||
if len(documents) >= 2:
|
||||
verification["details"].append(f"Multiple documents generated: {len(documents)}")
|
||||
verification["passed"] = True
|
||||
else:
|
||||
verification["details"].append(f"Expected multiple documents, got {len(documents)}")
|
||||
|
||||
elif testType == "metadata_preservation":
|
||||
# Check that metadata was preserved (check logs for documentId references)
|
||||
metadataLogs = [log for log in logs if "documentId" in str(log) or "SOURCE:" in str(log)]
|
||||
if metadataLogs:
|
||||
verification["details"].append("Metadata preservation logs found")
|
||||
verification["passed"] = True
|
||||
else:
|
||||
verification["details"].append("No metadata preservation logs found")
|
||||
|
||||
return verification
|
||||
|
||||
async def testAllFormats(self) -> Dict[str, Any]:
|
||||
"""Test document generation in all formats."""
|
||||
print("\n" + "="*80)
|
||||
|
|
@ -334,8 +610,12 @@ class DocumentGenerationFormatsTester:
|
|||
|
||||
return results
|
||||
|
||||
async def runTest(self):
|
||||
"""Run the complete test."""
|
||||
async def runTest(self, includeRefactoringTests: bool = True):
|
||||
"""Run the complete test.
|
||||
|
||||
Args:
|
||||
includeRefactoringTests: If True, also run refactoring feature tests
|
||||
"""
|
||||
print("\n" + "="*80)
|
||||
print("DOCUMENT GENERATION FORMATS TEST")
|
||||
print("="*80)
|
||||
|
|
@ -344,18 +624,43 @@ class DocumentGenerationFormatsTester:
|
|||
# Initialize
|
||||
await self.initialize()
|
||||
|
||||
# Test refactoring features first (if enabled)
|
||||
refactoringResults = {}
|
||||
if includeRefactoringTests:
|
||||
refactoringResults = await self.testRefactoringFeatures()
|
||||
|
||||
# Test all formats
|
||||
results = await self.testAllFormats()
|
||||
formatResults = await self.testAllFormats()
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*80)
|
||||
print("TEST SUMMARY")
|
||||
print("="*80)
|
||||
|
||||
# Refactoring tests summary
|
||||
refactoringSuccessCount = 0
|
||||
refactoringFailCount = 0
|
||||
if includeRefactoringTests and refactoringResults:
|
||||
print("\nRefactoring Features:")
|
||||
for testType, result in refactoringResults.items():
|
||||
if result.get("success"):
|
||||
refactoringSuccessCount += 1
|
||||
verification = result.get("verification", {})
|
||||
passed = verification.get("passed", False)
|
||||
statusIcon = "✅" if passed else "⚠️"
|
||||
print(f"{statusIcon} {testType:25s}: {'PASS' if passed else 'FAIL'}")
|
||||
else:
|
||||
refactoringFailCount += 1
|
||||
error = result.get("error", "Unknown error")
|
||||
print(f"❌ {testType:25s}: FAIL - {error}")
|
||||
print(f"Refactoring Tests: {refactoringSuccessCount} passed, {refactoringFailCount} failed out of {len(refactoringResults)} tests")
|
||||
|
||||
# Format tests summary
|
||||
print("\nFormat Tests:")
|
||||
successCount = 0
|
||||
failCount = 0
|
||||
|
||||
for format, result in results.items():
|
||||
for format, result in formatResults.items():
|
||||
if result.get("success"):
|
||||
successCount += 1
|
||||
status = "✅ PASS"
|
||||
|
|
@ -369,14 +674,28 @@ class DocumentGenerationFormatsTester:
|
|||
error = result.get("error", "Unknown error")
|
||||
print(f"❌ {format.upper():6s}: FAIL - {error}")
|
||||
|
||||
print(f"\nTotal: {successCount} passed, {failCount} failed out of {len(results)} formats")
|
||||
print(f"\nFormat Tests: {successCount} passed, {failCount} failed out of {len(formatResults)} formats")
|
||||
|
||||
# Calculate totals
|
||||
totalSuccess = successCount + refactoringSuccessCount if includeRefactoringTests else successCount
|
||||
totalFail = failCount + refactoringFailCount if includeRefactoringTests else failCount
|
||||
|
||||
self.testResults = {
|
||||
"success": failCount == 0,
|
||||
"successCount": successCount,
|
||||
"failCount": failCount,
|
||||
"totalFormats": len(results),
|
||||
"results": results
|
||||
"success": failCount == 0 and (not includeRefactoringTests or refactoringFailCount == 0),
|
||||
"formatTests": {
|
||||
"successCount": successCount,
|
||||
"failCount": failCount,
|
||||
"totalFormats": len(formatResults),
|
||||
"results": formatResults
|
||||
},
|
||||
"refactoringTests": {
|
||||
"successCount": refactoringSuccessCount if includeRefactoringTests else 0,
|
||||
"failCount": refactoringFailCount if includeRefactoringTests else 0,
|
||||
"totalTests": len(refactoringResults) if includeRefactoringTests else 0,
|
||||
"results": refactoringResults if includeRefactoringTests else {}
|
||||
},
|
||||
"totalSuccess": totalSuccess,
|
||||
"totalFail": totalFail
|
||||
}
|
||||
|
||||
return self.testResults
|
||||
|
|
|
|||
Loading…
Reference in a new issue