fixes for content handling

This commit is contained in:
ValueOn AG 2026-01-02 21:35:32 +01:00
parent 3ef87cd083
commit 3408e7b463
22 changed files with 3346 additions and 224 deletions

File diff suppressed because it is too large Load diff

View file

@ -17,6 +17,7 @@ from modules.datamodels.datamodelExtraction import ContentPart
from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
from modules.services.serviceAi.subLoopingUseCases import LoopingUseCaseRegistry
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -134,6 +135,7 @@ class AiCallLooper:
# Make AI call
try:
checkWorkflowStopped(self.services)
if iterationOperationId:
self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model")
# ARCHITECTURE: Pass ContentParts directly to AiCallRequest
@ -621,6 +623,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
# Write KPI definition prompt to debug file
self.services.utils.writeDebugFile(kpiDefinitionPrompt, f"{debugPrefix}_kpi_definition_prompt")
checkWorkflowStopped(self.services)
response = await self.aiService.callAi(request)
# Write KPI definition response to debug file

View file

@ -16,6 +16,7 @@ from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -70,6 +71,7 @@ class ContentExtractor:
allContentParts = []
for document in documents:
checkWorkflowStopped(self.services)
# Check if document is already a ContentExtracted document (pre-extracted JSON)
logger.debug(f"Checking document {document.id} ({document.fileName}, mimeType={document.mimeType}) for pre-extracted content")
preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(document)
@ -92,12 +94,28 @@ class ContentExtractor:
logger.warning(f" ⚠️ No intent found for pre-extracted document {document.id}! Available intent documentIds: {[i.documentId for i in documentIntents]}")
if contentExtracted.parts:
# CRITICAL: Process pre-extracted parts - analyze structure parts for nested content
processedParts = []
for part in contentExtracted.parts:
# Überspringe leere Parts (Container ohne Daten)
if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0):
if part.typeGroup == "container":
continue # Überspringe leere Container
# CRITICAL: Check if structure part contains nested parts (e.g., JSON with documentData.parts)
if part.typeGroup == "structure" and part.mimeType == "application/json" and part.data:
nestedParts = self._extractNestedPartsFromStructure(part, document, preExtracted, intent)
if nestedParts:
# Replace structure part with extracted nested parts
processedParts.extend(nestedParts)
logger.info(f"✅ Extracted {len(nestedParts)} nested parts from structure part {part.id}")
continue # Skip original structure part
# Keep original part if no nested parts found
processedParts.append(part)
# Use processed parts (with nested parts extracted)
for part in processedParts:
if not part.metadata:
part.metadata = {}
@ -352,6 +370,7 @@ class ContentExtractor:
)
# extractContent ist nicht async - keine await nötig
checkWorkflowStopped(self.services)
extractedResults = self.services.extraction.extractContent(
[document],
extractionOptions,
@ -431,6 +450,7 @@ class ContentExtractor:
)
# Verwende AI-Service für Vision AI-Verarbeitung
checkWorkflowStopped(self.services)
response = await self.aiService.callAi(request)
# Debug-Log für Response (harmonisiert)
@ -504,6 +524,7 @@ class ContentExtractor:
)
# Verwende AI-Service für Text-Verarbeitung
checkWorkflowStopped(self.services)
response = await self.aiService.callAi(request)
# Debug-Log für Response (harmonisiert)
@ -537,4 +558,84 @@ class ContentExtractor:
"application/x-zip-compressed"
]
return mimeType in binaryTypes or mimeType.startswith("image/") or mimeType.startswith("video/") or mimeType.startswith("audio/")
def _extractNestedPartsFromStructure(
self,
structurePart: ContentPart,
document: ChatDocument,
preExtracted: Dict[str, Any],
intent: Optional[Any]
) -> List[ContentPart]:
"""
Extract nested parts from a structure ContentPart (e.g., JSON with documentData.parts).
This is a generic function that analyzes pre-processed ContentParts and extracts
any nested parts that are embedded in structure data (typically JSON).
Works with standard ContentExtracted format: documentData.parts array.
Each nested part is extracted as a separate ContentPart with proper metadata.
Args:
structurePart: ContentPart with typeGroup="structure" containing nested parts
document: The document this part belongs to
preExtracted: Pre-extracted document metadata
intent: Document intent for nested parts
Returns:
List of extracted ContentParts, empty if no nested parts found
"""
nestedParts = []
try:
# Parse JSON structure
jsonData = json.loads(structurePart.data)
# Check for standard ContentExtracted format: documentData.parts
if isinstance(jsonData, dict):
documentData = jsonData.get("documentData")
if isinstance(documentData, dict):
parts = documentData.get("parts", [])
if isinstance(parts, list) and len(parts) > 0:
# Extract each nested part
for nestedPartData in parts:
if not isinstance(nestedPartData, dict):
continue
nestedPartId = nestedPartData.get("id") or f"nested_{len(nestedParts)}"
nestedTypeGroup = nestedPartData.get("typeGroup", "text")
nestedMimeType = nestedPartData.get("mimeType", "text/plain")
nestedLabel = nestedPartData.get("label", structurePart.label)
nestedData = nestedPartData.get("data", "")
nestedMetadata = nestedPartData.get("metadata", {})
# Create ContentPart for nested part
nestedPart = ContentPart(
id=f"{structurePart.id}_{nestedPartId}",
parentId=structurePart.id,
label=nestedLabel,
typeGroup=nestedTypeGroup,
mimeType=nestedMimeType,
data=nestedData,
metadata={
**nestedMetadata,
"documentId": document.id,
"fromNestedStructure": True,
"parentStructurePartId": structurePart.id,
"originalFileName": preExtracted["originalDocument"]["fileName"]
}
)
nestedParts.append(nestedPart)
logger.debug(f"✅ Extracted nested part: {nestedPart.id} (typeGroup={nestedTypeGroup}, mimeType={nestedMimeType})")
# If no nested parts found, return empty list (original part will be kept)
if not nestedParts:
logger.debug(f"No nested parts found in structure part {structurePart.id}")
except json.JSONDecodeError as e:
logger.warning(f"Could not parse structure part {structurePart.id} as JSON: {str(e)}")
except Exception as e:
logger.error(f"Error extracting nested parts from structure part {structurePart.id}: {str(e)}")
return nestedParts

View file

@ -14,6 +14,7 @@ from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelExtraction import DocumentIntent
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -86,6 +87,7 @@ class DocumentIntentAnalyzer:
# AI-Call (verwende callAiPlanning für einfache JSON-Responses)
# Debug-Logs werden bereits von callAiPlanning geschrieben
checkWorkflowStopped(self.services)
aiResponse = await self.aiService.callAiPlanning(
prompt=intentPrompt,
debugType="document_intent_analysis"

View file

@ -16,6 +16,7 @@ from typing import Dict, Any, List, Optional, Tuple
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -51,6 +52,33 @@ class StructureFiller:
pass
return 'en' # Default fallback
def _getDocumentLanguage(self, structure: Dict[str, Any], documentId: str) -> str:
"""
Get language for a specific document from structure.
Falls back to user language if not specified.
Args:
structure: The document structure with documents array
documentId: The ID of the document to get language for
Returns:
ISO 639-1 language code (e.g., "de", "en", "fr")
"""
# Try to find document in structure
for doc in structure.get("documents", []):
if doc.get("id") == documentId:
docLanguage = doc.get("language")
if docLanguage:
return docLanguage
# Fallback to metadata language
metadataLanguage = structure.get("metadata", {}).get("language")
if metadataLanguage:
return metadataLanguage
# Fallback to user language
return self._getUserLanguage()
def _extractContentPartInfo(self, chapter: Dict[str, Any]) -> Tuple[List[str], Dict[str, Any]]:
"""
Extract contentPartIds and contentPartInstructions from chapter's contentParts structure.
@ -60,11 +88,15 @@ class StructureFiller:
"""
contentParts = chapter.get("contentParts", {})
contentPartIds = list(contentParts.keys())
# Extract instructions (only entries with "instruction" field)
# Extract instructions (entries with "instruction" field) and captions (entries with "caption" field)
contentPartInstructions = {}
for partId, partInfo in contentParts.items():
if isinstance(partInfo, dict) and "instruction" in partInfo:
contentPartInstructions[partId] = {"instruction": partInfo["instruction"]}
if isinstance(partInfo, dict):
if "instruction" in partInfo:
contentPartInstructions[partId] = {"instruction": partInfo["instruction"]}
elif "caption" in partInfo:
# For entries with only caption (no instruction), still add to dict so it's available
contentPartInstructions[partId] = {"caption": partInfo["caption"]}
return contentPartIds, contentPartInstructions
def _getContentPartCaption(self, chapter: Dict[str, Any], partId: str) -> Optional[str]:
@ -219,6 +251,7 @@ class StructureFiller:
# AI-Call für Chapter-Struktur-Generierung
# Note: Debug logging is handled by callAiPlanning
checkWorkflowStopped(self.services)
aiResponse = await self.aiService.callAiPlanning(
prompt=chapterPrompt,
debugType=f"chapter_structure_{chapterId}"
@ -311,6 +344,10 @@ class StructureFiller:
chapterIndex = 0
for doc in chapterStructure.get("documents", []):
docId = doc.get("id", "unknown")
# Get language for this specific document
docLanguage = self._getDocumentLanguage(chapterStructure, docId)
for chapter in doc.get("chapters", []):
chapterIndex += 1
chapterId = chapter.get("id", "unknown")
@ -320,7 +357,8 @@ class StructureFiller:
contentPartIds, contentPartInstructions = self._extractContentPartInfo(chapter)
# Create task for parallel processing with semaphore
async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions):
async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage):
checkWorkflowStopped(self.services)
async with semaphore:
return await self._generateSingleChapterSectionsStructure(
chapter=chapter,
@ -333,13 +371,13 @@ class StructureFiller:
contentPartInstructions=contentPartInstructions,
contentParts=contentParts,
userPrompt=userPrompt,
language=language,
language=docLanguage, # Use document-specific language
parentOperationId=parentOperationId,
totalChapters=totalChapters
)
task = processChapterWithSemaphore(
chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions
chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage
)
chapterTasks.append((chapterIndex, chapter, task))
@ -367,7 +405,8 @@ class StructureFiller:
operationType: OperationTypeEnum,
sectionId: str,
generationHint: str,
generatedElements: List[Dict[str, Any]]
generatedElements: List[Dict[str, Any]],
section: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""
Helper method to process AI response and extract elements.
@ -424,13 +463,16 @@ class StructureFiller:
# Image already processed as JSON, skip
pass
elif base64Data:
# Get caption from section if available
caption = section.get("caption") or section.get("metadata", {}).get("caption") or ""
elements.append({
"type": "image",
"content": {
"base64Data": base64Data,
"altText": generationHint or "Generated image",
"caption": ""
}
"caption": caption # Use caption from section if available
},
"caption": caption # Also at element level for compatibility
})
logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}")
else:
@ -566,14 +608,26 @@ class StructureFiller:
})
elif contentFormat == "object":
if part.typeGroup == "image":
elements.append({
"type": "image",
"content": {
"base64Data": part.data,
"altText": part.metadata.get("usageHint", part.label),
"caption": part.metadata.get("caption", "")
}
})
# Validate that image data exists
if not part.data:
logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (object format). Skipping image element.")
elements.append({
"type": "error",
"message": f"Image ContentPart {part.id} has no data",
"sectionId": sectionId
})
else:
# Get caption from section (priority: section.caption > part.metadata.caption)
caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "")
elements.append({
"type": "image",
"content": {
"base64Data": part.data,
"altText": part.metadata.get("usageHint", part.label),
"caption": caption # Use caption from section
},
"caption": caption # Also at element level for compatibility
})
else:
elements.append({
"type": part.typeGroup,
@ -615,6 +669,7 @@ class StructureFiller:
contentParts=[part]
)
checkWorkflowStopped(self.services)
visionResponse = await self.aiService.callAi(visionRequest)
# Write debug file for image extraction response
@ -715,6 +770,7 @@ class StructureFiller:
processingMode=ProcessingModeEnum.DETAILED
)
)
checkWorkflowStopped(self.services)
aiResponse = await self.aiService.callAi(request)
generatedElements = []
@ -773,6 +829,7 @@ The JSON should be a fragment that can be merged with the previous response."""
processingMode=ProcessingModeEnum.DETAILED
)
checkWorkflowStopped(self.services)
aiResponseJson = await self.aiService.callAiWithLooping(
prompt=generationPrompt,
options=options,
@ -858,7 +915,8 @@ The JSON should be a fragment that can be merged with the previous response."""
operationType=operationType,
sectionId=sectionId,
generationHint=generationHint,
generatedElements=generatedElements
generatedElements=generatedElements,
section=section
)
elements.extend(responseElements)
@ -1061,7 +1119,8 @@ The JSON should be a fragment that can be merged with the previous response."""
operationType=operationType,
sectionId=sectionId,
generationHint=generationHint,
generatedElements=generatedElements
generatedElements=generatedElements,
section=section
)
elements.extend(responseElements)
@ -1106,14 +1165,26 @@ The JSON should be a fragment that can be merged with the previous response."""
elif contentFormat == "object":
if part.typeGroup == "image":
elements.append({
"type": "image",
"content": {
"base64Data": part.data,
"altText": part.metadata.get("usageHint", part.label),
"caption": part.metadata.get("caption", "")
}
})
# Validate that image data exists
if not part.data:
logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (object format). Skipping image element.")
elements.append({
"type": "error",
"message": f"Image ContentPart {part.id} has no data",
"sectionId": sectionId
})
else:
# Get caption from section (priority: section.caption > part.metadata.caption)
caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "")
elements.append({
"type": "image",
"content": {
"base64Data": part.data,
"altText": part.metadata.get("usageHint", part.label),
"caption": caption # Use caption from section
},
"caption": caption # Also at element level for compatibility
})
else:
elements.append({
"type": part.typeGroup,
@ -1125,6 +1196,12 @@ The JSON should be a fragment that can be merged with the previous response."""
})
elif contentFormat == "extracted":
# CRITICAL: If useAiCall is true, extracted parts are used as input for AI generation
# and should NOT be added as elements. Only add extracted text as element if useAiCall is false.
if useAiCall:
# Extracted part will be used as input for AI call - skip adding as element
logger.debug(f"Section {sectionId}: Skipping extracted part {part.id} as element (useAiCall=true, will be used as AI input)")
# Continue to process this part for AI call, but don't add as element yet
# Check if this is an image that needs Vision AI extraction
originalPartId = part.id
if (part.typeGroup == "image" and
@ -1143,6 +1220,7 @@ The JSON should be a fragment that can be merged with the previous response."""
contentParts=[part]
)
checkWorkflowStopped(self.services)
visionResponse = await self.aiService.callAi(visionRequest)
if visionResponse and visionResponse.content:
@ -1344,7 +1422,8 @@ The JSON should be a fragment that can be merged with the previous response."""
operationType=operationType,
sectionId=sectionId,
generationHint=generationHint,
generatedElements=generatedElements
generatedElements=generatedElements,
section=section
)
elements.extend(responseElements)
@ -1373,24 +1452,114 @@ The JSON should be a fragment that can be merged with the previous response."""
)
else:
# Füge extrahierten Content direkt hinzu (kein AI-Call)
if part.typeGroup == "image":
logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call")
elements.append({
"type": "image",
"content": {
"base64Data": part.data,
"altText": part.metadata.get("usageHint", part.label),
"caption": part.metadata.get("caption", "")
}
})
# CRITICAL: If content_type is "image", we must render an image, not extracted text
if contentType == "image":
# Section wants to display an image - find the image part
if part.typeGroup == "image":
# Direct image part - use it
logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call")
# Validate that image data exists
if not part.data:
logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (extracted format without AI call). Skipping image element.")
elements.append({
"type": "error",
"message": f"Image ContentPart {part.id} has no data",
"sectionId": sectionId
})
else:
# Get caption from section (priority: section.caption > part.metadata.caption)
caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "")
elements.append({
"type": "image",
"content": {
"base64Data": part.data,
"altText": part.metadata.get("usageHint", part.label),
"caption": caption # Use caption from section
},
"caption": caption # Also at element level for compatibility
})
elif part.typeGroup == "text" and part.metadata.get("sourceImagePartId"):
# This is a vision-extracted text part - find the original image object part
sourceImagePartId = part.metadata.get("sourceImagePartId")
logger.debug(f"Processing section {sectionId}: Found vision-extracted text part, looking for original image object part: {sourceImagePartId}")
# Try to find the object part (format: "obj_...")
objectPartId = part.metadata.get("relatedObjectPartId")
objectPart = None
if objectPartId:
objectPart = self._findContentPartById(objectPartId, contentParts)
# If not found via metadata, search through all contentParts for object part
if not objectPart:
# Search for object part that references the source image part ID
for candidatePart in contentParts:
if (candidatePart.metadata.get("contentFormat") == "object" and
candidatePart.typeGroup == "image" and
sourceImagePartId in candidatePart.id):
objectPart = candidatePart
objectPartId = candidatePart.id
logger.debug(f"Section {sectionId}: Found object part {objectPartId} by searching all contentParts")
break
if objectPart and objectPart.typeGroup == "image" and objectPart.data:
logger.info(f"Section {sectionId}: Found object part {objectPartId} for image rendering")
caption = section.get("caption") or section.get("metadata", {}).get("caption") or objectPart.metadata.get("caption", "")
elements.append({
"type": "image",
"content": {
"base64Data": objectPart.data,
"altText": objectPart.metadata.get("usageHint", objectPart.label),
"caption": caption
},
"caption": caption
})
else:
logger.warning(f"Section {sectionId}: No object part found for vision-extracted text part {part.id} (sourceImagePartId={sourceImagePartId}), cannot render image")
elements.append({
"type": "error",
"message": f"Cannot render image: no object part found for extracted text part (sourceImagePartId={sourceImagePartId})",
"sectionId": sectionId
})
else:
logger.warning(f"Section {sectionId}: ContentPart {part.id} is not an image (typeGroup={part.typeGroup}), but section content_type is 'image'. Cannot render image.")
elements.append({
"type": "error",
"message": f"Cannot render image: ContentPart is not an image type",
"sectionId": sectionId
})
else:
logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call")
elements.append({
"type": "extracted_text",
"content": part.data,
"source": part.metadata.get("documentId"),
"extractionPrompt": part.metadata.get("extractionPrompt")
})
# content_type is not "image" - add extracted text as normal
if part.typeGroup == "image":
logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call")
# Validate that image data exists
if not part.data:
logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (extracted format without AI call). Skipping image element.")
elements.append({
"type": "error",
"message": f"Image ContentPart {part.id} has no data",
"sectionId": sectionId
})
else:
# Get caption from section (priority: section.caption > part.metadata.caption)
caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "")
elements.append({
"type": "image",
"content": {
"base64Data": part.data,
"altText": part.metadata.get("usageHint", part.label),
"caption": caption # Use caption from section
},
"caption": caption # Also at element level for compatibility
})
else:
logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call")
elements.append({
"type": "extracted_text",
"content": part.data,
"source": part.metadata.get("documentId"),
"extractionPrompt": part.metadata.get("extractionPrompt")
})
# Update progress after section completion
chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0
@ -1462,6 +1631,10 @@ The JSON should be a fragment that can be merged with the previous response."""
# Process chapters sequentially with chapter-level progress
chapterIndex = 0
for doc in chapterStructure.get("documents", []):
docId = doc.get("id", "unknown")
# Get language for this specific document
docLanguage = self._getDocumentLanguage(chapterStructure, docId)
for chapter in doc.get("chapters", []):
chapterIndex += 1
chapterId = chapter.get("id", "unknown")
@ -1483,7 +1656,8 @@ The JSON should be a fragment that can be merged with the previous response."""
sectionTasks = []
for sectionIndex, section in enumerate(sections):
# Create task wrapper with semaphore for parallel processing
async def processSectionWithSemaphore(section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, language, calculateOverallProgress):
async def processSectionWithSemaphore(section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, docLanguage, calculateOverallProgress):
checkWorkflowStopped(self.services)
async with sectionSemaphore:
return await self._processSingleSection(
section=section,
@ -1497,12 +1671,12 @@ The JSON should be a fragment that can be merged with the previous response."""
contentParts=contentParts,
userPrompt=userPrompt,
all_sections_list=all_sections_list,
language=language,
language=docLanguage, # Use document-specific language
calculateOverallProgress=calculateOverallProgress
)
task = processSectionWithSemaphore(
section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, language, calculateOverallProgress
section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, docLanguage, calculateOverallProgress
)
sectionTasks.append((sectionIndex, section, task))
@ -1675,15 +1849,30 @@ The JSON should be a fragment that can be merged with the previous response."""
for partId in contentPartIds:
part = self._findContentPartById(partId, contentParts)
if not part:
# Part not found - try to show info from chapter structure
partInfo = contentPartInstructions.get(partId, {})
if partInfo:
logger.warning(f"Chapter {chapterId}: ContentPart {partId} not found in contentParts list, but has chapter structure info.")
contentPartsIndex += f"\n- ContentPart ID: {partId}\n"
if "instruction" in partInfo:
contentPartsIndex += f" Instruction: {partInfo['instruction']}\n"
if "caption" in partInfo:
contentPartsIndex += f" Caption: {partInfo['caption']}\n"
contentPartsIndex += f" Note: ContentPart not found in contentParts list (ID may be from nested structure)\n"
continue
contentFormat = part.metadata.get("contentFormat", "unknown")
instruction = contentPartInstructions.get(partId, {}).get("instruction", "Use content as needed")
partInfo = contentPartInstructions.get(partId, {})
instruction = partInfo.get("instruction", "Use content as needed")
caption = partInfo.get("caption")
contentPartsIndex += f"\n- ContentPart ID: {partId}\n"
contentPartsIndex += f" Format: {contentFormat}\n"
contentPartsIndex += f" Type: {part.typeGroup}\n"
contentPartsIndex += f" Instruction: {instruction}\n"
if instruction and instruction != "Use content as needed":
contentPartsIndex += f" Instruction: {instruction}\n"
if caption:
contentPartsIndex += f" Caption: {caption}\n"
if not contentPartsIndex:
contentPartsIndex = "\n(No content parts specified for this chapter)"
@ -1695,6 +1884,8 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
CHAPTER: {chapterTitle} (Level {chapterLevel}, ID: {chapterId})
GENERATION HINT: {generationHint}
**CRITICAL**: The chapter's generationHint above describes what content this chapter should generate. If the generationHint references documents/images/data, then EACH section that generates content for this chapter MUST assign the relevant ContentParts from AVAILABLE CONTENT PARTS below.
NOTE: Chapter already has a heading section. Do NOT generate a heading for the chapter title.
## SECTION INDEPENDENCE
@ -1705,7 +1896,18 @@ NOTE: Chapter already has a heading section. Do NOT generate a heading for the c
AVAILABLE CONTENT PARTS:
{contentPartsIndex}
CONTENT TYPES: table, bullet_list, heading, paragraph, code_block, image
## CONTENT ASSIGNMENT RULE - CRITICAL
If AVAILABLE CONTENT PARTS are listed above, then EVERY section that generates content related to those ContentParts MUST assign them explicitly.
**Assignment logic:**
- If section generates text content ABOUT a ContentPart assign "extracted" format ContentPart with appropriate instruction
- If section DISPLAYS a ContentPart assign "object" format ContentPart
- If section's generationHint or purpose relates to a ContentPart listed above → it MUST have contentPartIds assigned
- If chapter's generationHint references documents/images/data AND section generates content for that chapter → section MUST assign relevant ContentParts
- Empty contentPartIds [] are only allowed if section generates content WITHOUT referencing any available ContentParts AND WITHOUT relating to chapter's generationHint
## CONTENT TYPES
Available content types for sections: table, bullet_list, heading, paragraph, code_block, image
useAiCall RULES:
- useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed
@ -1728,15 +1930,12 @@ RETURN JSON:
]
}}
EXAMPLES (all content types):
- paragraph: {{"id": "s1", "content_type": "paragraph", "contentPartIds": ["extracted_1"], "generationHint": "Include full text", "useAiCall": false, "elements": []}}
- bullet_list: {{"id": "s2", "content_type": "bullet_list", "contentPartIds": ["extracted_1"], "generationHint": "Create bullet list", "useAiCall": true, "elements": []}}
- table: {{"id": "s3", "content_type": "table", "contentPartIds": ["extracted_1", "extracted_2"], "generationHint": "Create table", "useAiCall": true, "elements": []}}
- heading: {{"id": "s4", "content_type": "heading", "contentPartIds": ["extracted_1"], "generationHint": "Extract heading", "useAiCall": true, "elements": []}}
- code_block: {{"id": "s5", "content_type": "code_block", "contentPartIds": ["extracted_1"], "generationHint": "Format code", "useAiCall": true, "elements": []}}
- image: {{"id": "s6", "content_type": "image", "contentPartIds": ["obj_1"], "generationHint": "Display image", "caption": "Figure 1: Description of the image", "useAiCall": false, "elements": []}}
- reference: {{"id": "s7", "content_type": "paragraph", "contentPartIds": ["ref_1"], "generationHint": "Reference", "useAiCall": false, "elements": []}}
- NO CONTENT PARTS (generate from scratch): {{"id": "s8", "content_type": "paragraph", "contentPartIds": [], "generationHint": "Write a detailed professional paragraph explaining [specific topic or purpose]. Include [key points to cover]. Address [important aspects]. Conclude with [summary or recommendations].", "useAiCall": true, "elements": []}}
**MANDATORY CONTENT ASSIGNMENT CHECK:**
For each section, verify:
1. Are ContentParts listed in AVAILABLE CONTENT PARTS above?
2. Does this section's generationHint or purpose relate to those ContentParts?
3. If YES to both section MUST have contentPartIds assigned (cannot be empty [])
4. Assign ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above
IMAGE SECTIONS:
- For image sections, always provide a "caption" field with a descriptive caption for the image.
@ -1793,13 +1992,40 @@ Return only valid JSON. Do not include any explanatory text outside the JSON.
contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n"
if contentFormat == "extracted":
# Zeige Preview von extrahiertem Text (länger für besseren Kontext)
previewLength = 1000
if part.data:
preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data
contentPartsText += f" Content preview:\n```\n{preview}\n```\n"
# CRITICAL: Check if this is binary/image data - NEVER include in text prompt!
isBinaryOrImage = (
part.typeGroup == "image" or
part.typeGroup == "binary" or
(part.mimeType and (
part.mimeType.startswith("image/") or
part.mimeType.startswith("video/") or
part.mimeType.startswith("audio/") or
self._isBinaryMimeType(part.mimeType)
)) or
# Heuristic check: if data looks like base64 (long string with base64 chars)
(part.data and isinstance(part.data, str) and
len(part.data) > 100 and
self._looksLikeBase64(part.data))
)
if isBinaryOrImage:
# NEVER include binary/base64 data in text prompt - security risk and token explosion!
dataLength = len(part.data) if part.data else 0
contentPartsText += f" Type: {part.typeGroup}\n"
contentPartsText += f" MIME type: {part.mimeType or 'unknown'}\n"
contentPartsText += f" Data size: {dataLength} chars (binary/base64 - not shown in prompt)\n"
if part.metadata.get("needsVisionExtraction"):
contentPartsText += f" Note: Will be processed with Vision AI\n"
if part.metadata.get("usageHint"):
contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n"
else:
contentPartsText += f" Content: (empty)\n"
# Only for text data: Show preview
previewLength = 1000
if part.data:
preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data
contentPartsText += f" Content preview:\n```\n{preview}\n```\n"
else:
contentPartsText += f" Content: (empty)\n"
elif contentFormat == "reference":
contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n"
if part.metadata.get("usageHint"):
@ -1901,7 +2127,12 @@ Output requirements:
{contextText if contextText else ""}
"""
else:
prompt = f"""# TASK: Generate Section Content
# Determine if we have ContentParts or need to generate from scratch
hasContentParts = len(validParts) > 0
if hasContentParts:
# EXTRACT MODE: Extract data from provided ContentParts
prompt = f"""# TASK: Extract Section Content from Provided Data
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
@ -1911,7 +2142,7 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
- Generation Hint: {generationHint}
## AVAILABLE CONTENT FOR THIS SECTION
{contentPartsText if contentPartsText else "(No content parts specified for this section)"}
{contentPartsText}
## INSTRUCTIONS
1. Extract data only from provided ContentParts. Never invent or generate data.
@ -1942,6 +2173,49 @@ Output requirements:
{userPrompt}
```
## CONTEXT
{contextText if contextText else ""}
"""
else:
# GENERATE MODE: Generate content from scratch based on generationHint
prompt = f"""# TASK: Generate Section Content
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
## SECTION METADATA
- Section ID: {sectionId}
- Content Type: {contentType}
- Generation Hint: {generationHint}
## INSTRUCTIONS
1. Generate content based on the Generation Hint above.
2. Create appropriate content that matches the content_type ({contentType}).
3. The content should be relevant to the USER REQUEST and fit the context of surrounding sections.
4. Return only valid JSON with "elements" array.
5. No HTML/styling: Plain text only, no markup.
## OUTPUT FORMAT
Return a JSON object with this structure:
{{
"elements": [
{{
"type": "{contentType}",
"content": {contentStructureExample}
}}
]
}}
Output requirements:
- "content" must be an object (never a string)
- Return only valid JSON, no explanatory text
- Generate meaningful content based on the Generation Hint
## USER REQUEST
```
{userPrompt}
```
## CONTEXT
{contextText if contextText else ""}
"""
@ -2174,6 +2448,41 @@ Output requirements:
}
}
def _isBinaryMimeType(self, mimeType: str) -> bool:
"""Check if MIME type is binary."""
binaryTypes = [
"application/octet-stream",
"application/pdf",
"application/zip",
"application/x-zip-compressed"
]
return mimeType in binaryTypes
def _looksLikeBase64(self, data: str) -> bool:
"""
Heuristic check if string looks like base64-encoded data.
Base64 contains only: A-Z, a-z, 0-9, +, /, =, and whitespace.
If >95% of characters are base64 chars and no normal text patterns, likely base64.
"""
if not data or len(data) < 100:
return False
base64Chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t ")
sample = data[:500] # Check first 500 chars
if not sample:
return False
base64Ratio = sum(1 for c in sample if c in base64Chars) / len(sample)
# If >95% base64 chars and no normal text patterns (like spaces between words) → likely base64
# Base64 typically has very long strings without spaces or punctuation
hasNormalTextPatterns = any(
c in sample[:200] for c in ".,!?;:()[]{}\"'"
) or " " in sample[:200] # Double spaces suggest text
return base64Ratio > 0.95 and not hasNormalTextPatterns
def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]:
"""Finde ContentPart nach ID."""
for part in contentParts:

View file

@ -13,6 +13,7 @@ from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -139,6 +140,7 @@ Continue generating the remaining chapters now.
# NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
# The contentParts metadata is already included in the prompt (contentPartsIndex)
# Actual content extraction happens later during section generation
checkWorkflowStopped(self.services)
aiResponseJson = await self.aiService.callAiWithLooping(
prompt=structurePrompt,
options=options,
@ -259,36 +261,50 @@ This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generat
{userPrompt}
```
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
DEFAULT LANGUAGE: If no language is specified for a document, use "{language}" (from user prompt). Each document can have its own language specified in the "language" field. Use ISO 639-1 language codes in lowercase (e.g., "de", "en", "fr", "it").
## AVAILABLE CONTENT PARTS
{contentPartsIndex}
## CHAPTER INDEPENDENCE
- Each chapter is independent and self-contained
- One chapter does NOT have information about another chapter
- Each chapter must provide its own context and be understandable alone
## CONTENT ASSIGNMENT RULE - CRITICAL
If the user request mentions documents/images/data, then EVERY chapter that generates content related to those references MUST assign the relevant ContentParts explicitly.
## CONTENT ASSIGNMENT
- Assign ContentParts to chapters via contentParts object
- For data extraction, the type of a contentPart (image, text, etc.) is not relevant - only what is specified in the instruction matters
- Include all relevant parts from same source when needed for structured data extraction
- Each contentPart can have either:
- "instruction": For AI extraction prompts (how to process/extract from this part)
- "caption": For user-facing presentation (how to display/reference this part in the document)
- Both can be present if needed
- Chapters without contentParts can only generate generic content (not document-specific)
**Assignment logic:**
- If chapter DISPLAYS a document/image assign "object" format ContentPart with "caption"
- If chapter generates text content ABOUT a document/image/data assign ContentPart with "instruction":
- Prefer "extracted" format if available (contains analyzed/extracted content)
- If only "object" format is available, use "object" format with "instruction" (to write ABOUT the image/document)
- If chapter's generationHint or purpose relates to a document/image/data mentioned in user request → it MUST have ContentParts assigned
- Multiple chapters might assign the same ContentPart (e.g., one chapter displays image, another writes about it)
- Use ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above
- Empty contentParts are only allowed if chapter generates content WITHOUT referencing any documents/images/data from the user request
**CRITICAL RULE**: If the user request mentions BOTH:
a) Documents/images/data (listed in AVAILABLE CONTENT PARTS above), AND
b) Generic content types (article text, main content, body text, etc.)
Then chapters that generate those generic content types MUST assign the relevant ContentParts, because the content should relate to or be based on the provided documents/images/data.
## FORMATTING
- Formatting is handled automatically - focus on content and structure only
## CHAPTER STRUCTURE
- chapter id, level (1, 2, 3, etc.), title
- contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Compact mapping of part IDs to their extraction instructions and/or presentation captions
- generationHint: Self-contained description that reflects the user's intent for the specific data. If contentParts is empty, must be detailed. If contentParts are present, the hint should guide how to extract and structure the data according to the user's requirements (e.g., specific columns, format, structure)
## CHAPTER STRUCTURE REQUIREMENTS
- Generate chapters based on USER REQUEST - analyze what structure the user wants
- Each chapter needs: id, level (1, 2, 3, etc.), title
- contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Assign ContentParts as required by CONTENT ASSIGNMENT RULE above
- generationHint: Description of what content to generate for this chapter
- The number of chapters depends on the user request - create only what is requested
## DOCUMENT LANGUAGE
- Each document can have its own language (ISO 639-1 code in lowercase: "de", "en", "fr", "it", etc.)
- If no language is specified for a document, use the user prompt language: "{language}"
- The language determines in which language the content of that document will be generated
- Multiple documents can have different languages if needed
- Always use lowercase ISO 639-1 codes in the JSON output (e.g., "de", not "DE")
## OUTPUT FORMAT
Return JSON:
Generate the chapter structure based on the USER REQUEST above. The number and types of chapters depend entirely on what the user requested - do NOT copy the example structure below.
EXAMPLE STRUCTURE (for reference only - adapt to user request):
{{
"metadata": {{
"title": "Document Title",
@ -298,38 +314,39 @@ Return JSON:
"id": "doc_1",
"title": "Document Title",
"filename": "document.{outputFormat}",
"language": "{language}",
"chapters": [
{{
"id": "chapter_1",
"level": 1,
"title": "Introduction",
"title": "Chapter Title",
"contentParts": {{
"part_ext_1": {{
"instruction": "Use full extracted text"
}},
"part_img_1": {{
"instruction": "Analyze image for additional details"
}},
"part_img_2": {{
"instruction": "Analyze image for additional details",
"caption": "Figure 1: Overview diagram"
"extracted_part_id": {{
"instruction": "Use extracted content..."
}}
}},
"generationHint": "Create introduction section",
"sections": []
}},
{{
"id": "chapter_2",
"level": 1,
"title": "Main Title",
"contentParts": {{}},
"generationHint": "Create [specific content description] with [formatting details]. Include [required information]. Purpose: [explanation of what this chapter provides].",
"generationHint": "Description of chapter content",
"sections": []
}}
]
}}]
}}
CRITICAL INSTRUCTIONS:
- Generate chapters based on USER REQUEST, NOT based on the example above
- The example shows the JSON structure format, NOT the required chapters
- Create only the chapters that match the user's request
- Adapt chapter titles and structure to match the user's specific request
**MANDATORY CONTENT ASSIGNMENT CHECK:**
For each chapter, verify:
1. Does the user request mention documents/images/data? (e.g., "photo", "image", "document", "data", "based on", "about")
2. Does this chapter's generationHint, title, or purpose relate to those documents/images/data mentioned in step 1?
- Examples: "article about the photo", "text describing the image", "analysis of the document", "content based on the data"
- Even if chapter doesn't explicitly say "about the image", if user request mentions both the image AND this chapter's content type relate them
3. If YES to both chapter MUST have contentParts assigned (cannot be empty {{}})
4. If ContentPart is "object" format and chapter needs to write ABOUT it assign with "instruction" field, not just "caption"
OUTPUT FORMAT: Start with {{ and end with }}. Do NOT use markdown code fences (```json). Do NOT add explanatory text before or after the JSON. Return ONLY the JSON object itself.
"""
return prompt

View file

@ -14,6 +14,7 @@ from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata,
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
from modules.datamodels.datamodelDocument import RenderedDocument
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -58,6 +59,35 @@ class DocumentGenerationPath:
if documentList:
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
# Filter: Entferne Original-Dokumente, wenn bereits Pre-Extracted JSONs existieren
# (um Duplikate zu vermeiden - Pre-Extracted JSONs enthalten bereits die ContentParts)
# Schritt 1: Identifiziere alle Original-Dokument-IDs, die durch Pre-Extracted JSONs abgedeckt werden
originalDocIdsCoveredByPreExtracted = set()
for doc in documents:
preExtracted = self.services.ai.intentAnalyzer.resolvePreExtractedDocument(doc)
if preExtracted:
originalDocId = preExtracted["originalDocument"]["id"]
originalDocIdsCoveredByPreExtracted.add(originalDocId)
logger.debug(f"Found pre-extracted JSON {doc.id} covering original document {originalDocId}")
# Schritt 2: Filtere Dokumente - entferne Original-Dokumente, die bereits durch Pre-Extracted JSONs abgedeckt werden
filteredDocuments = []
for doc in documents:
preExtracted = self.services.ai.intentAnalyzer.resolvePreExtractedDocument(doc)
if preExtracted:
# Pre-Extracted JSON behalten
filteredDocuments.append(doc)
elif doc.id in originalDocIdsCoveredByPreExtracted:
# Original-Dokument, das bereits durch Pre-Extracted JSON abgedeckt wird - entfernen
logger.info(f"Skipping original document {doc.id} ({doc.fileName}) - already covered by pre-extracted JSON")
else:
# Normales Dokument ohne Pre-Extracted JSON - behalten
filteredDocuments.append(doc)
documents = filteredDocuments
checkWorkflowStopped(self.services)
if not documentIntents and documents:
documentIntents = await self.services.ai.clarifyDocumentIntents(
documents,
@ -66,6 +96,8 @@ class DocumentGenerationPath:
docOperationId
)
checkWorkflowStopped(self.services)
# Schritt 5B: Extrahiere und bereite Content vor
if documents:
preparedContentParts = await self.services.ai.extractAndPrepareContent(
@ -91,6 +123,8 @@ class DocumentGenerationPath:
if contentParts:
logger.info(f"Using {len(contentParts)} content parts for generation (no AI extraction at this stage)")
checkWorkflowStopped(self.services)
# Schritt 5C: Generiere Struktur
structure = await self.services.ai.generateStructure(
userPrompt,
@ -99,6 +133,8 @@ class DocumentGenerationPath:
docOperationId
)
checkWorkflowStopped(self.services)
# Schritt 5D: Fülle Struktur
# Language will be extracted from services (user intention analysis) in fillStructure
filledStructure = await self.services.ai.fillStructure(
@ -108,6 +144,8 @@ class DocumentGenerationPath:
docOperationId
)
checkWorkflowStopped(self.services)
# Schritt 5E: Rendere Resultat
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
renderedDocuments = await self.services.ai.renderResult(

View file

@ -71,8 +71,9 @@ class RendererCsv(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
documentTitle = title if title else metadata.get("title", "Generated Document")
# Generate CSV content
csvRows = []

View file

@ -121,8 +121,9 @@ class RendererDocx(BaseRenderer):
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
# Use title from JSON metadata if available, otherwise use provided title
document_title = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
document_title = title if title else metadata.get("title", "Generated Document")
# Add document title using Title style
if document_title:

View file

@ -107,8 +107,9 @@ class RendererHtml(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
documentTitle = title if title else metadata.get("title", "Generated Document")
# Build HTML document
htmlParts = []

View file

@ -86,8 +86,9 @@ class RendererImage(BaseRenderer):
# Extract metadata from standardized schema
metadata = self._extractMetadata(extractedContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
documentTitle = title if title else metadata.get("title", "Generated Document")
# Create AI prompt for image generation
imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)

View file

@ -82,8 +82,9 @@ class RendererMarkdown(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
documentTitle = title if title else metadata.get("title", "Generated Document")
# Build markdown content
markdownParts = []

View file

@ -110,8 +110,9 @@ class RendererPdf(BaseRenderer):
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
# Use title from JSON metadata if available, otherwise use provided title
document_title = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
document_title = title if title else metadata.get("title", "Generated Document")
# Make title shorter to prevent wrapping/overlapping
if len(document_title) > 40:

View file

@ -601,8 +601,9 @@ JSON ONLY. NO OTHER TEXT."""
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
# Use title from JSON metadata if available, otherwise use provided title
document_title = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
document_title = title if title else metadata.get("title", "Generated Document")
# Create title slide
slides.append({

View file

@ -104,8 +104,9 @@ class RendererText(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
documentTitle = title if title else metadata.get("title", "Generated Document")
# Build text content
textParts = []

View file

@ -290,8 +290,9 @@ class RendererXlsx(BaseRenderer):
# Extract metadata from standardized schema
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
document_title = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
document_title = title if title else metadata.get("title", "Generated Document")
# Create workbook
wb = Workbook()
@ -689,7 +690,12 @@ class RendererXlsx(BaseRenderer):
# If no level 1 headings found, use document title
if not sheetNames:
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
# Use documents[].title as primary source, fallback to metadata.title
documents = jsonContent.get("documents", [])
if documents and isinstance(documents[0], dict) and documents[0].get("title"):
documentTitle = documents[0].get("title")
else:
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
sheetNames.append(self._sanitizeSheetName(documentTitle))
return sheetNames
@ -825,8 +831,12 @@ class RendererXlsx(BaseRenderer):
def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
"""Populate the main sheet with document overview and all content."""
try:
# Document title
documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
# Document title - use documents[].title as primary source, fallback to metadata.title
documents = jsonContent.get("documents", [])
if documents and isinstance(documents[0], dict) and documents[0].get("title"):
documentTitle = documents[0].get("title")
else:
documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
sheet['A1'] = documentTitle
# Safety check for title style

View file

@ -13,6 +13,7 @@ import re
import traceback
from typing import Dict, Any, Optional, List, Callable
from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -167,6 +168,7 @@ class ContentGenerator:
contentPartsMap[partId] = part
for idx, section in enumerate(sections):
checkWorkflowStopped(self.services)
try:
contentType = section.get("content_type", "content")
sectionId = section.get("id", f"section_{idx}")
@ -229,7 +231,8 @@ class ContentGenerator:
sections: List[Dict[str, Any]],
cachedContent: Optional[Dict[str, Any]],
userPrompt: str,
documentMetadata: Dict[str, Any],
contentParts: Optional[List[Any]] = None,
documentMetadata: Dict[str, Any] = {},
progressCallback: Optional[Callable] = None,
batchSize: int = 10
) -> List[Dict[str, Any]]:
@ -240,6 +243,7 @@ class ContentGenerator:
sections: List of sections to generate
cachedContent: Extracted content cache
userPrompt: Original user prompt
contentParts: List of all available ContentParts (for mapping by contentPartIds)
documentMetadata: Document metadata
progressCallback: Progress callback function
batchSize: Number of sections to process in parallel per batch
@ -253,6 +257,14 @@ class ContentGenerator:
if totalSections == 0:
return []
# Create ContentParts lookup map by ID
contentPartsMap = {}
if contentParts:
for part in contentParts:
partId = part.id if hasattr(part, 'id') else part.get('id', '')
if partId:
contentPartsMap[partId] = part
# Adjust batch size based on section types (images take longer)
imageCount = sum(1 for s in sections if s.get("content_type") == "image")
if imageCount > 0:
@ -277,6 +289,7 @@ class ContentGenerator:
)
async def generateWithProgress(section: Dict[str, Any], globalIndex: int, localIndex: int, batchPreviousSections: List[Dict[str, Any]]):
checkWorkflowStopped(self.services)
try:
contentType = section.get("content_type", "content")
sectionId = section.get("id", f"section_{globalIndex}")
@ -422,6 +435,7 @@ class ContentGenerator:
resultFormat="json"
)
checkWorkflowStopped(self.services)
aiResponse = await self.services.ai.callAiContent(
prompt=sectionPrompt,
options=options,
@ -603,6 +617,59 @@ class ContentGenerator:
) -> Dict[str, Any]:
"""Generate image for image section or include existing image"""
try:
# First, check if section has image ContentParts to integrate directly
sectionContentParts = context.get("sectionContentParts", [])
if sectionContentParts:
# Look for image ContentParts
for part in sectionContentParts:
partTypeGroup = part.typeGroup if hasattr(part, 'typeGroup') else part.get('typeGroup', '')
partMimeType = part.mimeType if hasattr(part, 'mimeType') else part.get('mimeType', '')
isImage = partTypeGroup == "image" or (partMimeType and partMimeType.startswith("image/"))
if isImage:
# Extract image data from ContentPart
partData = part.data if hasattr(part, 'data') else part.get('data', '')
partId = part.id if hasattr(part, 'id') else part.get('id', '')
# Get base64 data
base64Data = None
if isinstance(partData, str):
# Check if it's already base64 or needs extraction
if partData.startswith("data:image"):
# Extract base64 from data URL
base64Data = partData.split(",", 1)[1] if "," in partData else partData
elif len(partData) > 100: # Likely base64 string
base64Data = partData
elif isinstance(partData, bytes):
import base64
base64Data = base64.b64encode(partData).decode('utf-8')
if base64Data:
# Get caption from section (priority: section.caption > metadata.caption)
caption = section.get("caption") or section.get("metadata", {}).get("caption")
# Get alt text from ContentPart metadata or section
altText = part.metadata.get("altText") if hasattr(part, 'metadata') else part.get('metadata', {}).get('altText')
if not altText:
altText = section.get("generation_hint", "Image")
# Get mime type
mimeType = partMimeType or "image/png"
# Create image element with caption
section["elements"] = [{
"type": "image",
"content": {
"base64Data": base64Data,
"altText": altText,
"caption": caption # Include caption from section
},
"caption": caption # Also at element level for compatibility
}]
logger.info(f"Successfully integrated image from ContentPart {partId} for section {section.get('id')} with caption: {caption}")
return section
# Check if this is an existing image to include or render
imageSource = section.get("image_source", "generate")
@ -623,12 +690,17 @@ class ContentGenerator:
# Create image element from existing/render image
altText = imageDoc.get("altText", section.get("generation_hint", "Image"))
mimeType = imageDoc.get("mimeType", "image/png")
caption = section.get("caption") or section.get("metadata", {}).get("caption")
# Use nested content structure for consistency with renderers
section["elements"] = [{
"base64Data": imageDoc.get("base64Data"),
"altText": altText,
"mimeType": mimeType,
"caption": section.get("caption") or section.get("metadata", {}).get("caption")
"type": "image",
"content": {
"base64Data": imageDoc.get("base64Data"),
"altText": altText,
"caption": caption # Include caption in content structure
},
"caption": caption # Also at element level for compatibility
}]
logger.info(f"Successfully integrated image {imageRefId} for section {section.get('id')} (source={imageSource})")
@ -666,6 +738,7 @@ class ContentGenerator:
logger.info(f"Starting image generation for section {section.get('id')}: {imagePrompt[:100]}...")
# Call AI for image generation
checkWorkflowStopped(self.services)
aiResponse = await self.services.ai.callAiContent(
prompt=promptJson,
options=options,
@ -704,11 +777,15 @@ class ContentGenerator:
caption = section.get("caption") or section.get("metadata", {}).get("caption")
# Use nested content structure for consistency with renderers
section["elements"] = [{
"url": f"data:image/png;base64,{base64Data}",
"base64Data": base64Data,
"altText": altText,
"caption": caption
"type": "image",
"content": {
"base64Data": base64Data,
"altText": altText,
"caption": caption # Include caption in content structure
},
"caption": caption # Also at element level for compatibility
}]
logger.info(f"Successfully generated image for section {section.get('id')}")

View file

@ -17,25 +17,11 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
return ActionResult.isFailure(error="prompt is required")
documentList = parameters.get("documentList", [])
# Optional: if omitted, formats determined from prompt by AI
resultType = parameters.get("resultType")
# Auto-detect format from prompt if not provided
if not resultType:
promptLower = prompt.lower()
if ".html" in promptLower or "html file" in promptLower:
resultType = "html"
elif ".js" in promptLower or "javascript" in promptLower:
resultType = "js"
elif ".py" in promptLower or "python" in promptLower:
resultType = "py"
elif ".ts" in promptLower or "typescript" in promptLower:
resultType = "ts"
elif ".java" in promptLower:
resultType = "java"
elif ".cpp" in promptLower or ".c++" in promptLower:
resultType = "cpp"
else:
resultType = "txt" # Default
logger.debug("resultType not provided - formats will be determined from prompt by AI")
# Create operation ID for progress tracking
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
@ -67,11 +53,12 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
processingMode=ProcessingModeEnum.DETAILED
)
# outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse: AiResponse = await self.services.ai.callAiContent(
prompt=prompt,
options=options,
documentList=docRefList,
outputFormat=resultType,
outputFormat=resultType, # Can be None - AI determines from prompt
title=title,
parentOperationId=parentOperationId,
generationIntent="code" # Explicit intent, skips detection
@ -93,7 +80,8 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
# If no documents but content exists, create a document from content
if not documents and aiResponse.content:
# Determine document name from metadata
docName = f"code.{resultType}"
resultTypeFallback = resultType or "txt" # Fallback for file naming
docName = f"code.{resultTypeFallback}"
if aiResponse.metadata and aiResponse.metadata.filename:
docName = aiResponse.metadata.filename
elif aiResponse.metadata and aiResponse.metadata.title:
@ -101,8 +89,8 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if sanitized:
if not sanitized.lower().endswith(f".{resultType}"):
docName = f"{sanitized}.{resultType}"
if not sanitized.lower().endswith(f".{resultTypeFallback}"):
docName = f"{sanitized}.{resultTypeFallback}"
else:
docName = sanitized

View file

@ -18,23 +18,11 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
documentList = parameters.get("documentList", [])
documentType = parameters.get("documentType")
resultType = parameters.get("resultType", "txt")
# Optional: if omitted, formats determined from prompt by AI
resultType = parameters.get("resultType")
# Auto-detect format from prompt if not explicitly provided
if resultType == "txt" and prompt:
promptLower = prompt.lower()
if "html" in promptLower or "html5" in promptLower:
resultType = "html"
logger.info(f"Auto-detected HTML format from prompt")
elif "pdf" in promptLower:
resultType = "pdf"
logger.info(f"Auto-detected PDF format from prompt")
elif "markdown" in promptLower or " md " in promptLower or promptLower.endswith(" md"):
resultType = "md"
logger.info(f"Auto-detected Markdown format from prompt")
elif ("text" in promptLower or "txt" in promptLower) and "html" not in promptLower:
resultType = "txt"
logger.info(f"Auto-detected Text format from prompt")
if not resultType:
logger.debug("resultType not provided - formats will be determined from prompt by AI")
# Create operation ID for progress tracking
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
@ -69,11 +57,12 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
compressContext=False
)
# outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse: AiResponse = await self.services.ai.callAiContent(
prompt=prompt,
options=options,
documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E
outputFormat=resultType,
outputFormat=resultType, # Can be None - AI determines from prompt
title=title,
parentOperationId=parentOperationId,
generationIntent="document" # NEW: Explicit intent, skips detection
@ -95,7 +84,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
# If no documents but content exists, create a document from content
if not documents and aiResponse.content:
# Determine document name from metadata
docName = f"document.{resultType}"
resultTypeFallback = resultType or "txt" # Fallback for file naming
docName = f"document.{resultTypeFallback}"
if aiResponse.metadata and aiResponse.metadata.filename:
docName = aiResponse.metadata.filename
elif aiResponse.metadata and aiResponse.metadata.title:
@ -103,8 +93,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if sanitized:
if not sanitized.lower().endswith(f".{resultType}"):
docName = f"{sanitized}.{resultType}"
if not sanitized.lower().endswith(f".{resultTypeFallback}"):
docName = f"{sanitized}.{resultTypeFallback}"
else:
docName = sanitized

View file

@ -54,8 +54,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
logger.error(f"Invalid documentList type: {type(documentListParam)}")
documentList = DocumentReferenceList(references=[])
resultType = parameters.get("resultType", "txt")
# Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
resultType = parameters.get("resultType")
if not aiPrompt:
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
@ -63,11 +63,20 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
error="AI prompt is required"
)
# Determine output extension and default MIME type without duplicating service logic
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
output_extension = f".{normalized_result_type}"
# Handle optional resultType: if None, formats determined from prompt by AI
if resultType:
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
output_extension = f".{normalized_result_type}"
output_format = output_extension.replace('.', '') or 'txt'
logger.info(f"Using result type: {resultType} -> {output_extension}")
else:
# No format specified - AI will determine formats from prompt
normalized_result_type = None
output_extension = None
output_format = None
logger.debug("resultType not provided - formats will be determined from prompt by AI")
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
logger.info(f"Using result type: {resultType} -> {output_extension}")
# Phase 7.3: Extract content first if documents provided, then use contentParts
# Check if contentParts are already provided (preferred path)
@ -121,54 +130,33 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
# Update progress - preparing AI call
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
# Detect image generation from resultType
# Detect image generation from resultType (if provided)
imageFormats = ["png", "jpg", "jpeg", "gif", "webp"]
isImageGeneration = normalized_result_type in imageFormats
isImageGeneration = normalized_result_type in imageFormats if normalized_result_type else False
# Build options with correct operationType
output_format = output_extension.replace('.', '') or 'txt'
from modules.datamodels.datamodelAi import OperationTypeEnum
options = AiCallOptions(
resultFormat=output_format,
resultFormat=output_format or "txt", # Fallback for options, but outputFormat can be None for callAiContent
operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
)
# Get generationIntent from parameters
generationIntent = parameters.get("generationIntent")
# For DATA_GENERATE, generationIntent is REQUIRED
# If not provided, default to "document" for document formats (xlsx, docx, pdf, txt, html, etc.)
# This is format-based defaulting, not prompt-based auto-detection
if options.operationType == OperationTypeEnum.DATA_GENERATE and not generationIntent:
# Document formats (default to document generation)
documentFormats = ["xlsx", "docx", "pdf", "txt", "md", "html", "csv", "xml", "json", "pptx"]
# Code formats (should use ai.generateCode instead, but default to code if ai.process is used)
codeFormats = ["py", "js", "ts", "java", "cpp", "c", "go", "rs", "rb", "php", "swift", "kt"]
if normalized_result_type in documentFormats:
generationIntent = "document"
logger.info(f"Defaulting generationIntent to 'document' for resultType '{normalized_result_type}'")
elif normalized_result_type in codeFormats:
generationIntent = "code"
logger.info(f"Defaulting generationIntent to 'code' for resultType '{normalized_result_type}'")
else:
# Unknown format - default to document (most common use case)
generationIntent = "document"
logger.warning(
f"Unknown resultType '{normalized_result_type}', defaulting generationIntent to 'document'. "
f"For code generation, use ai.generateCode action or explicitly pass generationIntent='code'."
)
# Get generationIntent from parameters (required for DATA_GENERATE)
# Default to "document" if not provided (most common use case)
# For code generation, use ai.generateCode action or explicitly pass generationIntent="code"
generationIntent = parameters.get("generationIntent", "document")
# Update progress - calling AI
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
# Use unified callAiContent method with contentParts (extraction is now separate)
# ContentParts are already extracted above (or None if no documents)
# outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse = await self.services.ai.callAiContent(
prompt=aiPrompt,
options=options,
contentParts=contentParts, # Already extracted (or None if no documents)
outputFormat=output_format,
outputFormat=output_format, # Can be None - AI determines from prompt
parentOperationId=operationId,
generationIntent=generationIntent # REQUIRED for DATA_GENERATE
)
@ -198,7 +186,7 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
final_documents = action_documents
else:
# Text response - create document from content
extension = output_extension.lstrip('.')
extension = output_extension.lstrip('.') if output_extension else "txt"
meaningful_name = self._generateMeaningfulFileName(
base_name="ai",
extension=extension,
@ -206,8 +194,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
)
validationMetadata = {
"actionType": "ai.process",
"resultType": normalized_result_type,
"outputFormat": output_format,
"resultType": normalized_result_type or "auto",
"outputFormat": output_format or "auto",
"hasDocuments": False,
"contentType": "text"
}

View file

@ -60,7 +60,7 @@ class MethodAi(MethodBase):
frontendOptions=["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"],
required=False,
default="txt",
description="Output file extension. All output documents will use this format"
description="Output file extension. Optional: if omitted, formats are determined from prompt by AI. Default \"txt\" is validation fallback only. With per-document format determination, AI can determine different formats for different documents based on prompt."
),
"generationIntent": WorkflowActionParameter(
name="generationIntent",
@ -68,7 +68,8 @@ class MethodAi(MethodBase):
frontendType=FrontendType.SELECT,
frontendOptions=["document", "code", "image"],
required=False,
description="Explicit generation intent (\"document\" | \"code\" | \"image\"). For DATA_GENERATE operations, if not provided, defaults based on resultType: document formats (xlsx, docx, pdf, etc.) → \"document\", code formats (py, js, ts, etc.) → \"code\". For IMAGE_GENERATE operations, this parameter is ignored. Best practice: Use qualified actions (ai.generateDocument, ai.generateCode) instead of ai.process."
default="document",
description="Explicit generation intent (\"document\" | \"code\" | \"image\"). Required for DATA_GENERATE operations. Defaults to \"document\" if not provided. For code generation, use ai.generateCode action or explicitly pass generationIntent=\"code\". For IMAGE_GENERATE operations, this parameter is ignored."
)
},
execute=process.__get__(self, self.__class__)
@ -267,7 +268,7 @@ class MethodAi(MethodBase):
frontendType=FrontendType.TEXT,
required=False,
default="txt",
description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Any format supported by renderers is acceptable. Default: txt"
description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Optional: if omitted, formats are determined from prompt by AI. Default \"txt\" is validation fallback only. With per-document format determination, AI can determine different formats for different documents based on prompt."
)
},
execute=generateDocument.__get__(self, self.__class__)
@ -297,7 +298,7 @@ class MethodAi(MethodBase):
frontendType=FrontendType.SELECT,
frontendOptions=["py", "js", "ts", "html", "java", "cpp", "txt"],
required=False,
description="Output format (html, js, py, etc.). Default: based on prompt"
description="Output format (html, js, py, etc.). Optional: if omitted, formats are determined from prompt by AI. With per-document format determination, AI can determine different formats for different documents based on prompt."
)
},
execute=generateCode.__get__(self, self.__class__)

View file

@ -693,12 +693,38 @@ The following is the user's original input message. Analyze intent, normalize th
setattr(self.services, '_needsWorkflowHistory', False)
# Update services state
# CRITICAL: Validate language from AI response
# If AI didn't return language or invalid → use user language
# If user language not set → use "en"
validatedLanguage = None
# Validate AI-detected language
if detectedLanguage and isinstance(detectedLanguage, str):
self._setUserLanguage(detectedLanguage)
try:
setattr(self.services, 'currentUserLanguage', detectedLanguage)
except Exception:
pass
detectedLanguage = detectedLanguage.strip().lower()
# Check if it's a valid 2-character ISO code
if len(detectedLanguage) == 2 and detectedLanguage.isalpha():
validatedLanguage = detectedLanguage
# If AI didn't return valid language, use user language
if not validatedLanguage:
userLanguage = getattr(self.services.user, 'language', None) if hasattr(self.services, 'user') and self.services.user else None
if userLanguage and isinstance(userLanguage, str):
userLanguage = userLanguage.strip().lower()
if len(userLanguage) == 2 and userLanguage.isalpha():
validatedLanguage = userLanguage
# Final fallback to "en"
if not validatedLanguage:
validatedLanguage = "en"
logger.warning("Language not detected from AI and user language not set - using default 'en'")
# Set validated language
self._setUserLanguage(validatedLanguage)
try:
setattr(self.services, 'currentUserLanguage', validatedLanguage)
logger.debug(f"Set currentUserLanguage to validated value: {validatedLanguage}")
except Exception:
pass
self.services.currentUserPrompt = intentText or userInput.prompt
# Always set currentUserPromptNormalized - use normalizedRequest if available, otherwise fallback to currentUserPrompt
normalizedValue = normalizedRequest or intentText or userInput.prompt