fixes for content handling
This commit is contained in:
parent
3ef87cd083
commit
3408e7b463
22 changed files with 3346 additions and 224 deletions
2564
modules/services/serviceAi/CONTENT_EXTRACTION_ANALYSIS.md
Normal file
2564
modules/services/serviceAi/CONTENT_EXTRACTION_ANALYSIS.md
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -17,6 +17,7 @@ from modules.datamodels.datamodelExtraction import ContentPart
|
|||
from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson
|
||||
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
|
||||
from modules.services.serviceAi.subLoopingUseCases import LoopingUseCaseRegistry
|
||||
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -134,6 +135,7 @@ class AiCallLooper:
|
|||
|
||||
# Make AI call
|
||||
try:
|
||||
checkWorkflowStopped(self.services)
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model")
|
||||
# ARCHITECTURE: Pass ContentParts directly to AiCallRequest
|
||||
|
|
@ -621,6 +623,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
# Write KPI definition prompt to debug file
|
||||
self.services.utils.writeDebugFile(kpiDefinitionPrompt, f"{debugPrefix}_kpi_definition_prompt")
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
response = await self.aiService.callAi(request)
|
||||
|
||||
# Write KPI definition response to debug file
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from typing import Dict, Any, List, Optional
|
|||
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
|
||||
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -70,6 +71,7 @@ class ContentExtractor:
|
|||
allContentParts = []
|
||||
|
||||
for document in documents:
|
||||
checkWorkflowStopped(self.services)
|
||||
# Check if document is already a ContentExtracted document (pre-extracted JSON)
|
||||
logger.debug(f"Checking document {document.id} ({document.fileName}, mimeType={document.mimeType}) for pre-extracted content")
|
||||
preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(document)
|
||||
|
|
@ -92,12 +94,28 @@ class ContentExtractor:
|
|||
logger.warning(f" ⚠️ No intent found for pre-extracted document {document.id}! Available intent documentIds: {[i.documentId for i in documentIntents]}")
|
||||
|
||||
if contentExtracted.parts:
|
||||
# CRITICAL: Process pre-extracted parts - analyze structure parts for nested content
|
||||
processedParts = []
|
||||
for part in contentExtracted.parts:
|
||||
# Überspringe leere Parts (Container ohne Daten)
|
||||
if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0):
|
||||
if part.typeGroup == "container":
|
||||
continue # Überspringe leere Container
|
||||
|
||||
# CRITICAL: Check if structure part contains nested parts (e.g., JSON with documentData.parts)
|
||||
if part.typeGroup == "structure" and part.mimeType == "application/json" and part.data:
|
||||
nestedParts = self._extractNestedPartsFromStructure(part, document, preExtracted, intent)
|
||||
if nestedParts:
|
||||
# Replace structure part with extracted nested parts
|
||||
processedParts.extend(nestedParts)
|
||||
logger.info(f"✅ Extracted {len(nestedParts)} nested parts from structure part {part.id}")
|
||||
continue # Skip original structure part
|
||||
|
||||
# Keep original part if no nested parts found
|
||||
processedParts.append(part)
|
||||
|
||||
# Use processed parts (with nested parts extracted)
|
||||
for part in processedParts:
|
||||
if not part.metadata:
|
||||
part.metadata = {}
|
||||
|
||||
|
|
@ -352,6 +370,7 @@ class ContentExtractor:
|
|||
)
|
||||
|
||||
# extractContent ist nicht async - keine await nötig
|
||||
checkWorkflowStopped(self.services)
|
||||
extractedResults = self.services.extraction.extractContent(
|
||||
[document],
|
||||
extractionOptions,
|
||||
|
|
@ -431,6 +450,7 @@ class ContentExtractor:
|
|||
)
|
||||
|
||||
# Verwende AI-Service für Vision AI-Verarbeitung
|
||||
checkWorkflowStopped(self.services)
|
||||
response = await self.aiService.callAi(request)
|
||||
|
||||
# Debug-Log für Response (harmonisiert)
|
||||
|
|
@ -504,6 +524,7 @@ class ContentExtractor:
|
|||
)
|
||||
|
||||
# Verwende AI-Service für Text-Verarbeitung
|
||||
checkWorkflowStopped(self.services)
|
||||
response = await self.aiService.callAi(request)
|
||||
|
||||
# Debug-Log für Response (harmonisiert)
|
||||
|
|
@ -537,4 +558,84 @@ class ContentExtractor:
|
|||
"application/x-zip-compressed"
|
||||
]
|
||||
return mimeType in binaryTypes or mimeType.startswith("image/") or mimeType.startswith("video/") or mimeType.startswith("audio/")
|
||||
|
||||
def _extractNestedPartsFromStructure(
|
||||
self,
|
||||
structurePart: ContentPart,
|
||||
document: ChatDocument,
|
||||
preExtracted: Dict[str, Any],
|
||||
intent: Optional[Any]
|
||||
) -> List[ContentPart]:
|
||||
"""
|
||||
Extract nested parts from a structure ContentPart (e.g., JSON with documentData.parts).
|
||||
|
||||
This is a generic function that analyzes pre-processed ContentParts and extracts
|
||||
any nested parts that are embedded in structure data (typically JSON).
|
||||
|
||||
Works with standard ContentExtracted format: documentData.parts array.
|
||||
Each nested part is extracted as a separate ContentPart with proper metadata.
|
||||
|
||||
Args:
|
||||
structurePart: ContentPart with typeGroup="structure" containing nested parts
|
||||
document: The document this part belongs to
|
||||
preExtracted: Pre-extracted document metadata
|
||||
intent: Document intent for nested parts
|
||||
|
||||
Returns:
|
||||
List of extracted ContentParts, empty if no nested parts found
|
||||
"""
|
||||
nestedParts = []
|
||||
|
||||
try:
|
||||
# Parse JSON structure
|
||||
jsonData = json.loads(structurePart.data)
|
||||
|
||||
# Check for standard ContentExtracted format: documentData.parts
|
||||
if isinstance(jsonData, dict):
|
||||
documentData = jsonData.get("documentData")
|
||||
if isinstance(documentData, dict):
|
||||
parts = documentData.get("parts", [])
|
||||
if isinstance(parts, list) and len(parts) > 0:
|
||||
# Extract each nested part
|
||||
for nestedPartData in parts:
|
||||
if not isinstance(nestedPartData, dict):
|
||||
continue
|
||||
|
||||
nestedPartId = nestedPartData.get("id") or f"nested_{len(nestedParts)}"
|
||||
nestedTypeGroup = nestedPartData.get("typeGroup", "text")
|
||||
nestedMimeType = nestedPartData.get("mimeType", "text/plain")
|
||||
nestedLabel = nestedPartData.get("label", structurePart.label)
|
||||
nestedData = nestedPartData.get("data", "")
|
||||
nestedMetadata = nestedPartData.get("metadata", {})
|
||||
|
||||
# Create ContentPart for nested part
|
||||
nestedPart = ContentPart(
|
||||
id=f"{structurePart.id}_{nestedPartId}",
|
||||
parentId=structurePart.id,
|
||||
label=nestedLabel,
|
||||
typeGroup=nestedTypeGroup,
|
||||
mimeType=nestedMimeType,
|
||||
data=nestedData,
|
||||
metadata={
|
||||
**nestedMetadata,
|
||||
"documentId": document.id,
|
||||
"fromNestedStructure": True,
|
||||
"parentStructurePartId": structurePart.id,
|
||||
"originalFileName": preExtracted["originalDocument"]["fileName"]
|
||||
}
|
||||
)
|
||||
|
||||
nestedParts.append(nestedPart)
|
||||
logger.debug(f"✅ Extracted nested part: {nestedPart.id} (typeGroup={nestedTypeGroup}, mimeType={nestedMimeType})")
|
||||
|
||||
# If no nested parts found, return empty list (original part will be kept)
|
||||
if not nestedParts:
|
||||
logger.debug(f"No nested parts found in structure part {structurePart.id}")
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Could not parse structure part {structurePart.id} as JSON: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting nested parts from structure part {structurePart.id}: {str(e)}")
|
||||
|
||||
return nestedParts
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from typing import Dict, Any, List, Optional
|
|||
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.datamodels.datamodelExtraction import DocumentIntent
|
||||
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -86,6 +87,7 @@ class DocumentIntentAnalyzer:
|
|||
|
||||
# AI-Call (verwende callAiPlanning für einfache JSON-Responses)
|
||||
# Debug-Logs werden bereits von callAiPlanning geschrieben
|
||||
checkWorkflowStopped(self.services)
|
||||
aiResponse = await self.aiService.callAiPlanning(
|
||||
prompt=intentPrompt,
|
||||
debugType="document_intent_analysis"
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from typing import Dict, Any, List, Optional, Tuple
|
|||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -51,6 +52,33 @@ class StructureFiller:
|
|||
pass
|
||||
return 'en' # Default fallback
|
||||
|
||||
def _getDocumentLanguage(self, structure: Dict[str, Any], documentId: str) -> str:
|
||||
"""
|
||||
Get language for a specific document from structure.
|
||||
Falls back to user language if not specified.
|
||||
|
||||
Args:
|
||||
structure: The document structure with documents array
|
||||
documentId: The ID of the document to get language for
|
||||
|
||||
Returns:
|
||||
ISO 639-1 language code (e.g., "de", "en", "fr")
|
||||
"""
|
||||
# Try to find document in structure
|
||||
for doc in structure.get("documents", []):
|
||||
if doc.get("id") == documentId:
|
||||
docLanguage = doc.get("language")
|
||||
if docLanguage:
|
||||
return docLanguage
|
||||
|
||||
# Fallback to metadata language
|
||||
metadataLanguage = structure.get("metadata", {}).get("language")
|
||||
if metadataLanguage:
|
||||
return metadataLanguage
|
||||
|
||||
# Fallback to user language
|
||||
return self._getUserLanguage()
|
||||
|
||||
def _extractContentPartInfo(self, chapter: Dict[str, Any]) -> Tuple[List[str], Dict[str, Any]]:
|
||||
"""
|
||||
Extract contentPartIds and contentPartInstructions from chapter's contentParts structure.
|
||||
|
|
@ -60,11 +88,15 @@ class StructureFiller:
|
|||
"""
|
||||
contentParts = chapter.get("contentParts", {})
|
||||
contentPartIds = list(contentParts.keys())
|
||||
# Extract instructions (only entries with "instruction" field)
|
||||
# Extract instructions (entries with "instruction" field) and captions (entries with "caption" field)
|
||||
contentPartInstructions = {}
|
||||
for partId, partInfo in contentParts.items():
|
||||
if isinstance(partInfo, dict) and "instruction" in partInfo:
|
||||
contentPartInstructions[partId] = {"instruction": partInfo["instruction"]}
|
||||
if isinstance(partInfo, dict):
|
||||
if "instruction" in partInfo:
|
||||
contentPartInstructions[partId] = {"instruction": partInfo["instruction"]}
|
||||
elif "caption" in partInfo:
|
||||
# For entries with only caption (no instruction), still add to dict so it's available
|
||||
contentPartInstructions[partId] = {"caption": partInfo["caption"]}
|
||||
return contentPartIds, contentPartInstructions
|
||||
|
||||
def _getContentPartCaption(self, chapter: Dict[str, Any], partId: str) -> Optional[str]:
|
||||
|
|
@ -219,6 +251,7 @@ class StructureFiller:
|
|||
|
||||
# AI-Call für Chapter-Struktur-Generierung
|
||||
# Note: Debug logging is handled by callAiPlanning
|
||||
checkWorkflowStopped(self.services)
|
||||
aiResponse = await self.aiService.callAiPlanning(
|
||||
prompt=chapterPrompt,
|
||||
debugType=f"chapter_structure_{chapterId}"
|
||||
|
|
@ -311,6 +344,10 @@ class StructureFiller:
|
|||
chapterIndex = 0
|
||||
|
||||
for doc in chapterStructure.get("documents", []):
|
||||
docId = doc.get("id", "unknown")
|
||||
# Get language for this specific document
|
||||
docLanguage = self._getDocumentLanguage(chapterStructure, docId)
|
||||
|
||||
for chapter in doc.get("chapters", []):
|
||||
chapterIndex += 1
|
||||
chapterId = chapter.get("id", "unknown")
|
||||
|
|
@ -320,7 +357,8 @@ class StructureFiller:
|
|||
contentPartIds, contentPartInstructions = self._extractContentPartInfo(chapter)
|
||||
|
||||
# Create task for parallel processing with semaphore
|
||||
async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions):
|
||||
async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage):
|
||||
checkWorkflowStopped(self.services)
|
||||
async with semaphore:
|
||||
return await self._generateSingleChapterSectionsStructure(
|
||||
chapter=chapter,
|
||||
|
|
@ -333,13 +371,13 @@ class StructureFiller:
|
|||
contentPartInstructions=contentPartInstructions,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
language=language,
|
||||
language=docLanguage, # Use document-specific language
|
||||
parentOperationId=parentOperationId,
|
||||
totalChapters=totalChapters
|
||||
)
|
||||
|
||||
task = processChapterWithSemaphore(
|
||||
chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions
|
||||
chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions, docLanguage
|
||||
)
|
||||
chapterTasks.append((chapterIndex, chapter, task))
|
||||
|
||||
|
|
@ -367,7 +405,8 @@ class StructureFiller:
|
|||
operationType: OperationTypeEnum,
|
||||
sectionId: str,
|
||||
generationHint: str,
|
||||
generatedElements: List[Dict[str, Any]]
|
||||
generatedElements: List[Dict[str, Any]],
|
||||
section: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Helper method to process AI response and extract elements.
|
||||
|
|
@ -424,13 +463,16 @@ class StructureFiller:
|
|||
# Image already processed as JSON, skip
|
||||
pass
|
||||
elif base64Data:
|
||||
# Get caption from section if available
|
||||
caption = section.get("caption") or section.get("metadata", {}).get("caption") or ""
|
||||
elements.append({
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": base64Data,
|
||||
"altText": generationHint or "Generated image",
|
||||
"caption": ""
|
||||
}
|
||||
"caption": caption # Use caption from section if available
|
||||
},
|
||||
"caption": caption # Also at element level for compatibility
|
||||
})
|
||||
logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}")
|
||||
else:
|
||||
|
|
@ -566,14 +608,26 @@ class StructureFiller:
|
|||
})
|
||||
elif contentFormat == "object":
|
||||
if part.typeGroup == "image":
|
||||
elements.append({
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": part.data,
|
||||
"altText": part.metadata.get("usageHint", part.label),
|
||||
"caption": part.metadata.get("caption", "")
|
||||
}
|
||||
})
|
||||
# Validate that image data exists
|
||||
if not part.data:
|
||||
logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (object format). Skipping image element.")
|
||||
elements.append({
|
||||
"type": "error",
|
||||
"message": f"Image ContentPart {part.id} has no data",
|
||||
"sectionId": sectionId
|
||||
})
|
||||
else:
|
||||
# Get caption from section (priority: section.caption > part.metadata.caption)
|
||||
caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "")
|
||||
elements.append({
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": part.data,
|
||||
"altText": part.metadata.get("usageHint", part.label),
|
||||
"caption": caption # Use caption from section
|
||||
},
|
||||
"caption": caption # Also at element level for compatibility
|
||||
})
|
||||
else:
|
||||
elements.append({
|
||||
"type": part.typeGroup,
|
||||
|
|
@ -615,6 +669,7 @@ class StructureFiller:
|
|||
contentParts=[part]
|
||||
)
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
visionResponse = await self.aiService.callAi(visionRequest)
|
||||
|
||||
# Write debug file for image extraction response
|
||||
|
|
@ -715,6 +770,7 @@ class StructureFiller:
|
|||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
)
|
||||
checkWorkflowStopped(self.services)
|
||||
aiResponse = await self.aiService.callAi(request)
|
||||
generatedElements = []
|
||||
|
||||
|
|
@ -773,6 +829,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
aiResponseJson = await self.aiService.callAiWithLooping(
|
||||
prompt=generationPrompt,
|
||||
options=options,
|
||||
|
|
@ -858,7 +915,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
operationType=operationType,
|
||||
sectionId=sectionId,
|
||||
generationHint=generationHint,
|
||||
generatedElements=generatedElements
|
||||
generatedElements=generatedElements,
|
||||
section=section
|
||||
)
|
||||
elements.extend(responseElements)
|
||||
|
||||
|
|
@ -1061,7 +1119,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
operationType=operationType,
|
||||
sectionId=sectionId,
|
||||
generationHint=generationHint,
|
||||
generatedElements=generatedElements
|
||||
generatedElements=generatedElements,
|
||||
section=section
|
||||
)
|
||||
elements.extend(responseElements)
|
||||
|
||||
|
|
@ -1106,14 +1165,26 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
|
||||
elif contentFormat == "object":
|
||||
if part.typeGroup == "image":
|
||||
elements.append({
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": part.data,
|
||||
"altText": part.metadata.get("usageHint", part.label),
|
||||
"caption": part.metadata.get("caption", "")
|
||||
}
|
||||
})
|
||||
# Validate that image data exists
|
||||
if not part.data:
|
||||
logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (object format). Skipping image element.")
|
||||
elements.append({
|
||||
"type": "error",
|
||||
"message": f"Image ContentPart {part.id} has no data",
|
||||
"sectionId": sectionId
|
||||
})
|
||||
else:
|
||||
# Get caption from section (priority: section.caption > part.metadata.caption)
|
||||
caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "")
|
||||
elements.append({
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": part.data,
|
||||
"altText": part.metadata.get("usageHint", part.label),
|
||||
"caption": caption # Use caption from section
|
||||
},
|
||||
"caption": caption # Also at element level for compatibility
|
||||
})
|
||||
else:
|
||||
elements.append({
|
||||
"type": part.typeGroup,
|
||||
|
|
@ -1125,6 +1196,12 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
})
|
||||
|
||||
elif contentFormat == "extracted":
|
||||
# CRITICAL: If useAiCall is true, extracted parts are used as input for AI generation
|
||||
# and should NOT be added as elements. Only add extracted text as element if useAiCall is false.
|
||||
if useAiCall:
|
||||
# Extracted part will be used as input for AI call - skip adding as element
|
||||
logger.debug(f"Section {sectionId}: Skipping extracted part {part.id} as element (useAiCall=true, will be used as AI input)")
|
||||
# Continue to process this part for AI call, but don't add as element yet
|
||||
# Check if this is an image that needs Vision AI extraction
|
||||
originalPartId = part.id
|
||||
if (part.typeGroup == "image" and
|
||||
|
|
@ -1143,6 +1220,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
contentParts=[part]
|
||||
)
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
visionResponse = await self.aiService.callAi(visionRequest)
|
||||
|
||||
if visionResponse and visionResponse.content:
|
||||
|
|
@ -1344,7 +1422,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
operationType=operationType,
|
||||
sectionId=sectionId,
|
||||
generationHint=generationHint,
|
||||
generatedElements=generatedElements
|
||||
generatedElements=generatedElements,
|
||||
section=section
|
||||
)
|
||||
elements.extend(responseElements)
|
||||
|
||||
|
|
@ -1373,24 +1452,114 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
)
|
||||
else:
|
||||
# Füge extrahierten Content direkt hinzu (kein AI-Call)
|
||||
if part.typeGroup == "image":
|
||||
logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call")
|
||||
elements.append({
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": part.data,
|
||||
"altText": part.metadata.get("usageHint", part.label),
|
||||
"caption": part.metadata.get("caption", "")
|
||||
}
|
||||
})
|
||||
# CRITICAL: If content_type is "image", we must render an image, not extracted text
|
||||
if contentType == "image":
|
||||
# Section wants to display an image - find the image part
|
||||
if part.typeGroup == "image":
|
||||
# Direct image part - use it
|
||||
logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call")
|
||||
# Validate that image data exists
|
||||
if not part.data:
|
||||
logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (extracted format without AI call). Skipping image element.")
|
||||
elements.append({
|
||||
"type": "error",
|
||||
"message": f"Image ContentPart {part.id} has no data",
|
||||
"sectionId": sectionId
|
||||
})
|
||||
else:
|
||||
# Get caption from section (priority: section.caption > part.metadata.caption)
|
||||
caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "")
|
||||
elements.append({
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": part.data,
|
||||
"altText": part.metadata.get("usageHint", part.label),
|
||||
"caption": caption # Use caption from section
|
||||
},
|
||||
"caption": caption # Also at element level for compatibility
|
||||
})
|
||||
elif part.typeGroup == "text" and part.metadata.get("sourceImagePartId"):
|
||||
# This is a vision-extracted text part - find the original image object part
|
||||
sourceImagePartId = part.metadata.get("sourceImagePartId")
|
||||
logger.debug(f"Processing section {sectionId}: Found vision-extracted text part, looking for original image object part: {sourceImagePartId}")
|
||||
|
||||
# Try to find the object part (format: "obj_...")
|
||||
objectPartId = part.metadata.get("relatedObjectPartId")
|
||||
objectPart = None
|
||||
|
||||
if objectPartId:
|
||||
objectPart = self._findContentPartById(objectPartId, contentParts)
|
||||
|
||||
# If not found via metadata, search through all contentParts for object part
|
||||
if not objectPart:
|
||||
# Search for object part that references the source image part ID
|
||||
for candidatePart in contentParts:
|
||||
if (candidatePart.metadata.get("contentFormat") == "object" and
|
||||
candidatePart.typeGroup == "image" and
|
||||
sourceImagePartId in candidatePart.id):
|
||||
objectPart = candidatePart
|
||||
objectPartId = candidatePart.id
|
||||
logger.debug(f"Section {sectionId}: Found object part {objectPartId} by searching all contentParts")
|
||||
break
|
||||
|
||||
if objectPart and objectPart.typeGroup == "image" and objectPart.data:
|
||||
logger.info(f"Section {sectionId}: Found object part {objectPartId} for image rendering")
|
||||
caption = section.get("caption") or section.get("metadata", {}).get("caption") or objectPart.metadata.get("caption", "")
|
||||
elements.append({
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": objectPart.data,
|
||||
"altText": objectPart.metadata.get("usageHint", objectPart.label),
|
||||
"caption": caption
|
||||
},
|
||||
"caption": caption
|
||||
})
|
||||
else:
|
||||
logger.warning(f"Section {sectionId}: No object part found for vision-extracted text part {part.id} (sourceImagePartId={sourceImagePartId}), cannot render image")
|
||||
elements.append({
|
||||
"type": "error",
|
||||
"message": f"Cannot render image: no object part found for extracted text part (sourceImagePartId={sourceImagePartId})",
|
||||
"sectionId": sectionId
|
||||
})
|
||||
else:
|
||||
logger.warning(f"Section {sectionId}: ContentPart {part.id} is not an image (typeGroup={part.typeGroup}), but section content_type is 'image'. Cannot render image.")
|
||||
elements.append({
|
||||
"type": "error",
|
||||
"message": f"Cannot render image: ContentPart is not an image type",
|
||||
"sectionId": sectionId
|
||||
})
|
||||
else:
|
||||
logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call")
|
||||
elements.append({
|
||||
"type": "extracted_text",
|
||||
"content": part.data,
|
||||
"source": part.metadata.get("documentId"),
|
||||
"extractionPrompt": part.metadata.get("extractionPrompt")
|
||||
})
|
||||
# content_type is not "image" - add extracted text as normal
|
||||
if part.typeGroup == "image":
|
||||
logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call")
|
||||
# Validate that image data exists
|
||||
if not part.data:
|
||||
logger.warning(f"Section {sectionId}: Image ContentPart {part.id} has no data (extracted format without AI call). Skipping image element.")
|
||||
elements.append({
|
||||
"type": "error",
|
||||
"message": f"Image ContentPart {part.id} has no data",
|
||||
"sectionId": sectionId
|
||||
})
|
||||
else:
|
||||
# Get caption from section (priority: section.caption > part.metadata.caption)
|
||||
caption = section.get("caption") or section.get("metadata", {}).get("caption") or part.metadata.get("caption", "")
|
||||
elements.append({
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": part.data,
|
||||
"altText": part.metadata.get("usageHint", part.label),
|
||||
"caption": caption # Use caption from section
|
||||
},
|
||||
"caption": caption # Also at element level for compatibility
|
||||
})
|
||||
else:
|
||||
logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call")
|
||||
elements.append({
|
||||
"type": "extracted_text",
|
||||
"content": part.data,
|
||||
"source": part.metadata.get("documentId"),
|
||||
"extractionPrompt": part.metadata.get("extractionPrompt")
|
||||
})
|
||||
|
||||
# Update progress after section completion
|
||||
chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0
|
||||
|
|
@ -1462,6 +1631,10 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
# Process chapters sequentially with chapter-level progress
|
||||
chapterIndex = 0
|
||||
for doc in chapterStructure.get("documents", []):
|
||||
docId = doc.get("id", "unknown")
|
||||
# Get language for this specific document
|
||||
docLanguage = self._getDocumentLanguage(chapterStructure, docId)
|
||||
|
||||
for chapter in doc.get("chapters", []):
|
||||
chapterIndex += 1
|
||||
chapterId = chapter.get("id", "unknown")
|
||||
|
|
@ -1483,7 +1656,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
sectionTasks = []
|
||||
for sectionIndex, section in enumerate(sections):
|
||||
# Create task wrapper with semaphore for parallel processing
|
||||
async def processSectionWithSemaphore(section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, language, calculateOverallProgress):
|
||||
async def processSectionWithSemaphore(section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, docLanguage, calculateOverallProgress):
|
||||
checkWorkflowStopped(self.services)
|
||||
async with sectionSemaphore:
|
||||
return await self._processSingleSection(
|
||||
section=section,
|
||||
|
|
@ -1497,12 +1671,12 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
all_sections_list=all_sections_list,
|
||||
language=language,
|
||||
language=docLanguage, # Use document-specific language
|
||||
calculateOverallProgress=calculateOverallProgress
|
||||
)
|
||||
|
||||
task = processSectionWithSemaphore(
|
||||
section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, language, calculateOverallProgress
|
||||
section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, docLanguage, calculateOverallProgress
|
||||
)
|
||||
sectionTasks.append((sectionIndex, section, task))
|
||||
|
||||
|
|
@ -1675,15 +1849,30 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
for partId in contentPartIds:
|
||||
part = self._findContentPartById(partId, contentParts)
|
||||
if not part:
|
||||
# Part not found - try to show info from chapter structure
|
||||
partInfo = contentPartInstructions.get(partId, {})
|
||||
if partInfo:
|
||||
logger.warning(f"Chapter {chapterId}: ContentPart {partId} not found in contentParts list, but has chapter structure info.")
|
||||
contentPartsIndex += f"\n- ContentPart ID: {partId}\n"
|
||||
if "instruction" in partInfo:
|
||||
contentPartsIndex += f" Instruction: {partInfo['instruction']}\n"
|
||||
if "caption" in partInfo:
|
||||
contentPartsIndex += f" Caption: {partInfo['caption']}\n"
|
||||
contentPartsIndex += f" Note: ContentPart not found in contentParts list (ID may be from nested structure)\n"
|
||||
continue
|
||||
|
||||
contentFormat = part.metadata.get("contentFormat", "unknown")
|
||||
instruction = contentPartInstructions.get(partId, {}).get("instruction", "Use content as needed")
|
||||
partInfo = contentPartInstructions.get(partId, {})
|
||||
instruction = partInfo.get("instruction", "Use content as needed")
|
||||
caption = partInfo.get("caption")
|
||||
|
||||
contentPartsIndex += f"\n- ContentPart ID: {partId}\n"
|
||||
contentPartsIndex += f" Format: {contentFormat}\n"
|
||||
contentPartsIndex += f" Type: {part.typeGroup}\n"
|
||||
contentPartsIndex += f" Instruction: {instruction}\n"
|
||||
if instruction and instruction != "Use content as needed":
|
||||
contentPartsIndex += f" Instruction: {instruction}\n"
|
||||
if caption:
|
||||
contentPartsIndex += f" Caption: {caption}\n"
|
||||
|
||||
if not contentPartsIndex:
|
||||
contentPartsIndex = "\n(No content parts specified for this chapter)"
|
||||
|
|
@ -1695,6 +1884,8 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
|
|||
CHAPTER: {chapterTitle} (Level {chapterLevel}, ID: {chapterId})
|
||||
GENERATION HINT: {generationHint}
|
||||
|
||||
**CRITICAL**: The chapter's generationHint above describes what content this chapter should generate. If the generationHint references documents/images/data, then EACH section that generates content for this chapter MUST assign the relevant ContentParts from AVAILABLE CONTENT PARTS below.
|
||||
|
||||
NOTE: Chapter already has a heading section. Do NOT generate a heading for the chapter title.
|
||||
|
||||
## SECTION INDEPENDENCE
|
||||
|
|
@ -1705,7 +1896,18 @@ NOTE: Chapter already has a heading section. Do NOT generate a heading for the c
|
|||
AVAILABLE CONTENT PARTS:
|
||||
{contentPartsIndex}
|
||||
|
||||
CONTENT TYPES: table, bullet_list, heading, paragraph, code_block, image
|
||||
## CONTENT ASSIGNMENT RULE - CRITICAL
|
||||
If AVAILABLE CONTENT PARTS are listed above, then EVERY section that generates content related to those ContentParts MUST assign them explicitly.
|
||||
|
||||
**Assignment logic:**
|
||||
- If section generates text content ABOUT a ContentPart → assign "extracted" format ContentPart with appropriate instruction
|
||||
- If section DISPLAYS a ContentPart → assign "object" format ContentPart
|
||||
- If section's generationHint or purpose relates to a ContentPart listed above → it MUST have contentPartIds assigned
|
||||
- If chapter's generationHint references documents/images/data AND section generates content for that chapter → section MUST assign relevant ContentParts
|
||||
- Empty contentPartIds [] are only allowed if section generates content WITHOUT referencing any available ContentParts AND WITHOUT relating to chapter's generationHint
|
||||
|
||||
## CONTENT TYPES
|
||||
Available content types for sections: table, bullet_list, heading, paragraph, code_block, image
|
||||
|
||||
useAiCall RULES:
|
||||
- useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed
|
||||
|
|
@ -1728,15 +1930,12 @@ RETURN JSON:
|
|||
]
|
||||
}}
|
||||
|
||||
EXAMPLES (all content types):
|
||||
- paragraph: {{"id": "s1", "content_type": "paragraph", "contentPartIds": ["extracted_1"], "generationHint": "Include full text", "useAiCall": false, "elements": []}}
|
||||
- bullet_list: {{"id": "s2", "content_type": "bullet_list", "contentPartIds": ["extracted_1"], "generationHint": "Create bullet list", "useAiCall": true, "elements": []}}
|
||||
- table: {{"id": "s3", "content_type": "table", "contentPartIds": ["extracted_1", "extracted_2"], "generationHint": "Create table", "useAiCall": true, "elements": []}}
|
||||
- heading: {{"id": "s4", "content_type": "heading", "contentPartIds": ["extracted_1"], "generationHint": "Extract heading", "useAiCall": true, "elements": []}}
|
||||
- code_block: {{"id": "s5", "content_type": "code_block", "contentPartIds": ["extracted_1"], "generationHint": "Format code", "useAiCall": true, "elements": []}}
|
||||
- image: {{"id": "s6", "content_type": "image", "contentPartIds": ["obj_1"], "generationHint": "Display image", "caption": "Figure 1: Description of the image", "useAiCall": false, "elements": []}}
|
||||
- reference: {{"id": "s7", "content_type": "paragraph", "contentPartIds": ["ref_1"], "generationHint": "Reference", "useAiCall": false, "elements": []}}
|
||||
- NO CONTENT PARTS (generate from scratch): {{"id": "s8", "content_type": "paragraph", "contentPartIds": [], "generationHint": "Write a detailed professional paragraph explaining [specific topic or purpose]. Include [key points to cover]. Address [important aspects]. Conclude with [summary or recommendations].", "useAiCall": true, "elements": []}}
|
||||
**MANDATORY CONTENT ASSIGNMENT CHECK:**
|
||||
For each section, verify:
|
||||
1. Are ContentParts listed in AVAILABLE CONTENT PARTS above?
|
||||
2. Does this section's generationHint or purpose relate to those ContentParts?
|
||||
3. If YES to both → section MUST have contentPartIds assigned (cannot be empty [])
|
||||
4. Assign ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above
|
||||
|
||||
IMAGE SECTIONS:
|
||||
- For image sections, always provide a "caption" field with a descriptive caption for the image.
|
||||
|
|
@ -1793,13 +1992,40 @@ Return only valid JSON. Do not include any explanatory text outside the JSON.
|
|||
contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n"
|
||||
|
||||
if contentFormat == "extracted":
|
||||
# Zeige Preview von extrahiertem Text (länger für besseren Kontext)
|
||||
previewLength = 1000
|
||||
if part.data:
|
||||
preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data
|
||||
contentPartsText += f" Content preview:\n```\n{preview}\n```\n"
|
||||
# CRITICAL: Check if this is binary/image data - NEVER include in text prompt!
|
||||
isBinaryOrImage = (
|
||||
part.typeGroup == "image" or
|
||||
part.typeGroup == "binary" or
|
||||
(part.mimeType and (
|
||||
part.mimeType.startswith("image/") or
|
||||
part.mimeType.startswith("video/") or
|
||||
part.mimeType.startswith("audio/") or
|
||||
self._isBinaryMimeType(part.mimeType)
|
||||
)) or
|
||||
# Heuristic check: if data looks like base64 (long string with base64 chars)
|
||||
(part.data and isinstance(part.data, str) and
|
||||
len(part.data) > 100 and
|
||||
self._looksLikeBase64(part.data))
|
||||
)
|
||||
|
||||
if isBinaryOrImage:
|
||||
# NEVER include binary/base64 data in text prompt - security risk and token explosion!
|
||||
dataLength = len(part.data) if part.data else 0
|
||||
contentPartsText += f" Type: {part.typeGroup}\n"
|
||||
contentPartsText += f" MIME type: {part.mimeType or 'unknown'}\n"
|
||||
contentPartsText += f" Data size: {dataLength} chars (binary/base64 - not shown in prompt)\n"
|
||||
if part.metadata.get("needsVisionExtraction"):
|
||||
contentPartsText += f" Note: Will be processed with Vision AI\n"
|
||||
if part.metadata.get("usageHint"):
|
||||
contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n"
|
||||
else:
|
||||
contentPartsText += f" Content: (empty)\n"
|
||||
# Only for text data: Show preview
|
||||
previewLength = 1000
|
||||
if part.data:
|
||||
preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data
|
||||
contentPartsText += f" Content preview:\n```\n{preview}\n```\n"
|
||||
else:
|
||||
contentPartsText += f" Content: (empty)\n"
|
||||
elif contentFormat == "reference":
|
||||
contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n"
|
||||
if part.metadata.get("usageHint"):
|
||||
|
|
@ -1901,7 +2127,12 @@ Output requirements:
|
|||
{contextText if contextText else ""}
|
||||
"""
|
||||
else:
|
||||
prompt = f"""# TASK: Generate Section Content
|
||||
# Determine if we have ContentParts or need to generate from scratch
|
||||
hasContentParts = len(validParts) > 0
|
||||
|
||||
if hasContentParts:
|
||||
# EXTRACT MODE: Extract data from provided ContentParts
|
||||
prompt = f"""# TASK: Extract Section Content from Provided Data
|
||||
|
||||
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
|
||||
|
||||
|
|
@ -1911,7 +2142,7 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
|
|||
- Generation Hint: {generationHint}
|
||||
|
||||
## AVAILABLE CONTENT FOR THIS SECTION
|
||||
{contentPartsText if contentPartsText else "(No content parts specified for this section)"}
|
||||
{contentPartsText}
|
||||
|
||||
## INSTRUCTIONS
|
||||
1. Extract data only from provided ContentParts. Never invent or generate data.
|
||||
|
|
@ -1942,6 +2173,49 @@ Output requirements:
|
|||
{userPrompt}
|
||||
```
|
||||
|
||||
## CONTEXT
|
||||
{contextText if contextText else ""}
|
||||
"""
|
||||
else:
|
||||
# GENERATE MODE: Generate content from scratch based on generationHint
|
||||
prompt = f"""# TASK: Generate Section Content
|
||||
|
||||
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
|
||||
|
||||
## SECTION METADATA
|
||||
- Section ID: {sectionId}
|
||||
- Content Type: {contentType}
|
||||
- Generation Hint: {generationHint}
|
||||
|
||||
## INSTRUCTIONS
|
||||
1. Generate content based on the Generation Hint above.
|
||||
2. Create appropriate content that matches the content_type ({contentType}).
|
||||
3. The content should be relevant to the USER REQUEST and fit the context of surrounding sections.
|
||||
4. Return only valid JSON with "elements" array.
|
||||
5. No HTML/styling: Plain text only, no markup.
|
||||
|
||||
## OUTPUT FORMAT
|
||||
Return a JSON object with this structure:
|
||||
|
||||
{{
|
||||
"elements": [
|
||||
{{
|
||||
"type": "{contentType}",
|
||||
"content": {contentStructureExample}
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
Output requirements:
|
||||
- "content" must be an object (never a string)
|
||||
- Return only valid JSON, no explanatory text
|
||||
- Generate meaningful content based on the Generation Hint
|
||||
|
||||
## USER REQUEST
|
||||
```
|
||||
{userPrompt}
|
||||
```
|
||||
|
||||
## CONTEXT
|
||||
{contextText if contextText else ""}
|
||||
"""
|
||||
|
|
@ -2174,6 +2448,41 @@ Output requirements:
|
|||
}
|
||||
}
|
||||
|
||||
def _isBinaryMimeType(self, mimeType: str) -> bool:
|
||||
"""Check if MIME type is binary."""
|
||||
binaryTypes = [
|
||||
"application/octet-stream",
|
||||
"application/pdf",
|
||||
"application/zip",
|
||||
"application/x-zip-compressed"
|
||||
]
|
||||
return mimeType in binaryTypes
|
||||
|
||||
def _looksLikeBase64(self, data: str) -> bool:
|
||||
"""
|
||||
Heuristic check if string looks like base64-encoded data.
|
||||
|
||||
Base64 contains only: A-Z, a-z, 0-9, +, /, =, and whitespace.
|
||||
If >95% of characters are base64 chars and no normal text patterns, likely base64.
|
||||
"""
|
||||
if not data or len(data) < 100:
|
||||
return False
|
||||
|
||||
base64Chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t ")
|
||||
sample = data[:500] # Check first 500 chars
|
||||
if not sample:
|
||||
return False
|
||||
|
||||
base64Ratio = sum(1 for c in sample if c in base64Chars) / len(sample)
|
||||
|
||||
# If >95% base64 chars and no normal text patterns (like spaces between words) → likely base64
|
||||
# Base64 typically has very long strings without spaces or punctuation
|
||||
hasNormalTextPatterns = any(
|
||||
c in sample[:200] for c in ".,!?;:()[]{}\"'"
|
||||
) or " " in sample[:200] # Double spaces suggest text
|
||||
|
||||
return base64Ratio > 0.95 and not hasNormalTextPatterns
|
||||
|
||||
def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]:
|
||||
"""Finde ContentPart nach ID."""
|
||||
for part in contentParts:
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from typing import Dict, Any, List, Optional
|
|||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -139,6 +140,7 @@ Continue generating the remaining chapters now.
|
|||
# NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
|
||||
# The contentParts metadata is already included in the prompt (contentPartsIndex)
|
||||
# Actual content extraction happens later during section generation
|
||||
checkWorkflowStopped(self.services)
|
||||
aiResponseJson = await self.aiService.callAiWithLooping(
|
||||
prompt=structurePrompt,
|
||||
options=options,
|
||||
|
|
@ -259,36 +261,50 @@ This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generat
|
|||
{userPrompt}
|
||||
```
|
||||
|
||||
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
|
||||
DEFAULT LANGUAGE: If no language is specified for a document, use "{language}" (from user prompt). Each document can have its own language specified in the "language" field. Use ISO 639-1 language codes in lowercase (e.g., "de", "en", "fr", "it").
|
||||
|
||||
## AVAILABLE CONTENT PARTS
|
||||
{contentPartsIndex}
|
||||
|
||||
## CHAPTER INDEPENDENCE
|
||||
- Each chapter is independent and self-contained
|
||||
- One chapter does NOT have information about another chapter
|
||||
- Each chapter must provide its own context and be understandable alone
|
||||
## CONTENT ASSIGNMENT RULE - CRITICAL
|
||||
If the user request mentions documents/images/data, then EVERY chapter that generates content related to those references MUST assign the relevant ContentParts explicitly.
|
||||
|
||||
## CONTENT ASSIGNMENT
|
||||
- Assign ContentParts to chapters via contentParts object
|
||||
- For data extraction, the type of a contentPart (image, text, etc.) is not relevant - only what is specified in the instruction matters
|
||||
- Include all relevant parts from same source when needed for structured data extraction
|
||||
- Each contentPart can have either:
|
||||
- "instruction": For AI extraction prompts (how to process/extract from this part)
|
||||
- "caption": For user-facing presentation (how to display/reference this part in the document)
|
||||
- Both can be present if needed
|
||||
- Chapters without contentParts can only generate generic content (not document-specific)
|
||||
**Assignment logic:**
|
||||
- If chapter DISPLAYS a document/image → assign "object" format ContentPart with "caption"
|
||||
- If chapter generates text content ABOUT a document/image/data → assign ContentPart with "instruction":
|
||||
- Prefer "extracted" format if available (contains analyzed/extracted content)
|
||||
- If only "object" format is available, use "object" format with "instruction" (to write ABOUT the image/document)
|
||||
- If chapter's generationHint or purpose relates to a document/image/data mentioned in user request → it MUST have ContentParts assigned
|
||||
- Multiple chapters might assign the same ContentPart (e.g., one chapter displays image, another writes about it)
|
||||
- Use ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above
|
||||
- Empty contentParts are only allowed if chapter generates content WITHOUT referencing any documents/images/data from the user request
|
||||
|
||||
**CRITICAL RULE**: If the user request mentions BOTH:
|
||||
a) Documents/images/data (listed in AVAILABLE CONTENT PARTS above), AND
|
||||
b) Generic content types (article text, main content, body text, etc.)
|
||||
Then chapters that generate those generic content types MUST assign the relevant ContentParts, because the content should relate to or be based on the provided documents/images/data.
|
||||
|
||||
## FORMATTING
|
||||
- Formatting is handled automatically - focus on content and structure only
|
||||
|
||||
## CHAPTER STRUCTURE
|
||||
- chapter id, level (1, 2, 3, etc.), title
|
||||
- contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Compact mapping of part IDs to their extraction instructions and/or presentation captions
|
||||
- generationHint: Self-contained description that reflects the user's intent for the specific data. If contentParts is empty, must be detailed. If contentParts are present, the hint should guide how to extract and structure the data according to the user's requirements (e.g., specific columns, format, structure)
|
||||
## CHAPTER STRUCTURE REQUIREMENTS
|
||||
- Generate chapters based on USER REQUEST - analyze what structure the user wants
|
||||
- Each chapter needs: id, level (1, 2, 3, etc.), title
|
||||
- contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Assign ContentParts as required by CONTENT ASSIGNMENT RULE above
|
||||
- generationHint: Description of what content to generate for this chapter
|
||||
- The number of chapters depends on the user request - create only what is requested
|
||||
|
||||
## DOCUMENT LANGUAGE
|
||||
- Each document can have its own language (ISO 639-1 code in lowercase: "de", "en", "fr", "it", etc.)
|
||||
- If no language is specified for a document, use the user prompt language: "{language}"
|
||||
- The language determines in which language the content of that document will be generated
|
||||
- Multiple documents can have different languages if needed
|
||||
- Always use lowercase ISO 639-1 codes in the JSON output (e.g., "de", not "DE")
|
||||
|
||||
## OUTPUT FORMAT
|
||||
Return JSON:
|
||||
Generate the chapter structure based on the USER REQUEST above. The number and types of chapters depend entirely on what the user requested - do NOT copy the example structure below.
|
||||
|
||||
EXAMPLE STRUCTURE (for reference only - adapt to user request):
|
||||
{{
|
||||
"metadata": {{
|
||||
"title": "Document Title",
|
||||
|
|
@ -298,38 +314,39 @@ Return JSON:
|
|||
"id": "doc_1",
|
||||
"title": "Document Title",
|
||||
"filename": "document.{outputFormat}",
|
||||
"language": "{language}",
|
||||
"chapters": [
|
||||
{{
|
||||
"id": "chapter_1",
|
||||
"level": 1,
|
||||
"title": "Introduction",
|
||||
"title": "Chapter Title",
|
||||
"contentParts": {{
|
||||
"part_ext_1": {{
|
||||
"instruction": "Use full extracted text"
|
||||
}},
|
||||
"part_img_1": {{
|
||||
"instruction": "Analyze image for additional details"
|
||||
}},
|
||||
"part_img_2": {{
|
||||
"instruction": "Analyze image for additional details",
|
||||
"caption": "Figure 1: Overview diagram"
|
||||
"extracted_part_id": {{
|
||||
"instruction": "Use extracted content..."
|
||||
}}
|
||||
}},
|
||||
"generationHint": "Create introduction section",
|
||||
"sections": []
|
||||
}},
|
||||
{{
|
||||
"id": "chapter_2",
|
||||
"level": 1,
|
||||
"title": "Main Title",
|
||||
"contentParts": {{}},
|
||||
"generationHint": "Create [specific content description] with [formatting details]. Include [required information]. Purpose: [explanation of what this chapter provides].",
|
||||
"generationHint": "Description of chapter content",
|
||||
"sections": []
|
||||
}}
|
||||
]
|
||||
}}]
|
||||
}}
|
||||
|
||||
CRITICAL INSTRUCTIONS:
|
||||
- Generate chapters based on USER REQUEST, NOT based on the example above
|
||||
- The example shows the JSON structure format, NOT the required chapters
|
||||
- Create only the chapters that match the user's request
|
||||
- Adapt chapter titles and structure to match the user's specific request
|
||||
|
||||
**MANDATORY CONTENT ASSIGNMENT CHECK:**
|
||||
For each chapter, verify:
|
||||
1. Does the user request mention documents/images/data? (e.g., "photo", "image", "document", "data", "based on", "about")
|
||||
2. Does this chapter's generationHint, title, or purpose relate to those documents/images/data mentioned in step 1?
|
||||
- Examples: "article about the photo", "text describing the image", "analysis of the document", "content based on the data"
|
||||
- Even if chapter doesn't explicitly say "about the image", if user request mentions both the image AND this chapter's content type → relate them
|
||||
3. If YES to both → chapter MUST have contentParts assigned (cannot be empty {{}})
|
||||
4. If ContentPart is "object" format and chapter needs to write ABOUT it → assign with "instruction" field, not just "caption"
|
||||
|
||||
OUTPUT FORMAT: Start with {{ and end with }}. Do NOT use markdown code fences (```json). Do NOT add explanatory text before or after the JSON. Return ONLY the JSON object itself.
|
||||
"""
|
||||
return prompt
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata,
|
|||
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -58,6 +59,35 @@ class DocumentGenerationPath:
|
|||
if documentList:
|
||||
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
|
||||
# Filter: Entferne Original-Dokumente, wenn bereits Pre-Extracted JSONs existieren
|
||||
# (um Duplikate zu vermeiden - Pre-Extracted JSONs enthalten bereits die ContentParts)
|
||||
# Schritt 1: Identifiziere alle Original-Dokument-IDs, die durch Pre-Extracted JSONs abgedeckt werden
|
||||
originalDocIdsCoveredByPreExtracted = set()
|
||||
for doc in documents:
|
||||
preExtracted = self.services.ai.intentAnalyzer.resolvePreExtractedDocument(doc)
|
||||
if preExtracted:
|
||||
originalDocId = preExtracted["originalDocument"]["id"]
|
||||
originalDocIdsCoveredByPreExtracted.add(originalDocId)
|
||||
logger.debug(f"Found pre-extracted JSON {doc.id} covering original document {originalDocId}")
|
||||
|
||||
# Schritt 2: Filtere Dokumente - entferne Original-Dokumente, die bereits durch Pre-Extracted JSONs abgedeckt werden
|
||||
filteredDocuments = []
|
||||
for doc in documents:
|
||||
preExtracted = self.services.ai.intentAnalyzer.resolvePreExtractedDocument(doc)
|
||||
if preExtracted:
|
||||
# Pre-Extracted JSON behalten
|
||||
filteredDocuments.append(doc)
|
||||
elif doc.id in originalDocIdsCoveredByPreExtracted:
|
||||
# Original-Dokument, das bereits durch Pre-Extracted JSON abgedeckt wird - entfernen
|
||||
logger.info(f"Skipping original document {doc.id} ({doc.fileName}) - already covered by pre-extracted JSON")
|
||||
else:
|
||||
# Normales Dokument ohne Pre-Extracted JSON - behalten
|
||||
filteredDocuments.append(doc)
|
||||
|
||||
documents = filteredDocuments
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
|
||||
if not documentIntents and documents:
|
||||
documentIntents = await self.services.ai.clarifyDocumentIntents(
|
||||
documents,
|
||||
|
|
@ -66,6 +96,8 @@ class DocumentGenerationPath:
|
|||
docOperationId
|
||||
)
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
|
||||
# Schritt 5B: Extrahiere und bereite Content vor
|
||||
if documents:
|
||||
preparedContentParts = await self.services.ai.extractAndPrepareContent(
|
||||
|
|
@ -91,6 +123,8 @@ class DocumentGenerationPath:
|
|||
if contentParts:
|
||||
logger.info(f"Using {len(contentParts)} content parts for generation (no AI extraction at this stage)")
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
|
||||
# Schritt 5C: Generiere Struktur
|
||||
structure = await self.services.ai.generateStructure(
|
||||
userPrompt,
|
||||
|
|
@ -99,6 +133,8 @@ class DocumentGenerationPath:
|
|||
docOperationId
|
||||
)
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
|
||||
# Schritt 5D: Fülle Struktur
|
||||
# Language will be extracted from services (user intention analysis) in fillStructure
|
||||
filledStructure = await self.services.ai.fillStructure(
|
||||
|
|
@ -108,6 +144,8 @@ class DocumentGenerationPath:
|
|||
docOperationId
|
||||
)
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
|
||||
# Schritt 5E: Rendere Resultat
|
||||
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
|
||||
renderedDocuments = await self.services.ai.renderResult(
|
||||
|
|
|
|||
|
|
@ -71,8 +71,9 @@ class RendererCsv(BaseRenderer):
|
|||
sections = self._extractSections(jsonContent)
|
||||
metadata = self._extractMetadata(jsonContent)
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
documentTitle = metadata.get("title", title)
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
documentTitle = title if title else metadata.get("title", "Generated Document")
|
||||
|
||||
# Generate CSV content
|
||||
csvRows = []
|
||||
|
|
|
|||
|
|
@ -121,8 +121,9 @@ class RendererDocx(BaseRenderer):
|
|||
sections = self._extractSections(json_content)
|
||||
metadata = self._extractMetadata(json_content)
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = metadata.get("title", title)
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
document_title = title if title else metadata.get("title", "Generated Document")
|
||||
|
||||
# Add document title using Title style
|
||||
if document_title:
|
||||
|
|
|
|||
|
|
@ -107,8 +107,9 @@ class RendererHtml(BaseRenderer):
|
|||
sections = self._extractSections(jsonContent)
|
||||
metadata = self._extractMetadata(jsonContent)
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
documentTitle = metadata.get("title", title)
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
documentTitle = title if title else metadata.get("title", "Generated Document")
|
||||
|
||||
# Build HTML document
|
||||
htmlParts = []
|
||||
|
|
|
|||
|
|
@ -86,8 +86,9 @@ class RendererImage(BaseRenderer):
|
|||
# Extract metadata from standardized schema
|
||||
metadata = self._extractMetadata(extractedContent)
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
documentTitle = metadata.get("title", title)
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
documentTitle = title if title else metadata.get("title", "Generated Document")
|
||||
|
||||
# Create AI prompt for image generation
|
||||
imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)
|
||||
|
|
|
|||
|
|
@ -82,8 +82,9 @@ class RendererMarkdown(BaseRenderer):
|
|||
sections = self._extractSections(jsonContent)
|
||||
metadata = self._extractMetadata(jsonContent)
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
documentTitle = metadata.get("title", title)
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
documentTitle = title if title else metadata.get("title", "Generated Document")
|
||||
|
||||
# Build markdown content
|
||||
markdownParts = []
|
||||
|
|
|
|||
|
|
@ -110,8 +110,9 @@ class RendererPdf(BaseRenderer):
|
|||
sections = self._extractSections(json_content)
|
||||
metadata = self._extractMetadata(json_content)
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = metadata.get("title", title)
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
document_title = title if title else metadata.get("title", "Generated Document")
|
||||
|
||||
# Make title shorter to prevent wrapping/overlapping
|
||||
if len(document_title) > 40:
|
||||
|
|
|
|||
|
|
@ -601,8 +601,9 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
sections = self._extractSections(json_content)
|
||||
metadata = self._extractMetadata(json_content)
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = metadata.get("title", title)
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
document_title = title if title else metadata.get("title", "Generated Document")
|
||||
|
||||
# Create title slide
|
||||
slides.append({
|
||||
|
|
|
|||
|
|
@ -104,8 +104,9 @@ class RendererText(BaseRenderer):
|
|||
sections = self._extractSections(jsonContent)
|
||||
metadata = self._extractMetadata(jsonContent)
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
documentTitle = metadata.get("title", title)
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
documentTitle = title if title else metadata.get("title", "Generated Document")
|
||||
|
||||
# Build text content
|
||||
textParts = []
|
||||
|
|
|
|||
|
|
@ -290,8 +290,9 @@ class RendererXlsx(BaseRenderer):
|
|||
# Extract metadata from standardized schema
|
||||
metadata = self._extractMetadata(jsonContent)
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = metadata.get("title", title)
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
document_title = title if title else metadata.get("title", "Generated Document")
|
||||
|
||||
# Create workbook
|
||||
wb = Workbook()
|
||||
|
|
@ -689,7 +690,12 @@ class RendererXlsx(BaseRenderer):
|
|||
|
||||
# If no level 1 headings found, use document title
|
||||
if not sheetNames:
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
|
||||
# Use documents[].title as primary source, fallback to metadata.title
|
||||
documents = jsonContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict) and documents[0].get("title"):
|
||||
documentTitle = documents[0].get("title")
|
||||
else:
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
|
||||
sheetNames.append(self._sanitizeSheetName(documentTitle))
|
||||
|
||||
return sheetNames
|
||||
|
|
@ -825,8 +831,12 @@ class RendererXlsx(BaseRenderer):
|
|||
def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
|
||||
"""Populate the main sheet with document overview and all content."""
|
||||
try:
|
||||
# Document title
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
|
||||
# Document title - use documents[].title as primary source, fallback to metadata.title
|
||||
documents = jsonContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict) and documents[0].get("title"):
|
||||
documentTitle = documents[0].get("title")
|
||||
else:
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
|
||||
sheet['A1'] = documentTitle
|
||||
|
||||
# Safety check for title style
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import re
|
|||
import traceback
|
||||
from typing import Dict, Any, Optional, List, Callable
|
||||
from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator
|
||||
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -167,6 +168,7 @@ class ContentGenerator:
|
|||
contentPartsMap[partId] = part
|
||||
|
||||
for idx, section in enumerate(sections):
|
||||
checkWorkflowStopped(self.services)
|
||||
try:
|
||||
contentType = section.get("content_type", "content")
|
||||
sectionId = section.get("id", f"section_{idx}")
|
||||
|
|
@ -229,7 +231,8 @@ class ContentGenerator:
|
|||
sections: List[Dict[str, Any]],
|
||||
cachedContent: Optional[Dict[str, Any]],
|
||||
userPrompt: str,
|
||||
documentMetadata: Dict[str, Any],
|
||||
contentParts: Optional[List[Any]] = None,
|
||||
documentMetadata: Dict[str, Any] = {},
|
||||
progressCallback: Optional[Callable] = None,
|
||||
batchSize: int = 10
|
||||
) -> List[Dict[str, Any]]:
|
||||
|
|
@ -240,6 +243,7 @@ class ContentGenerator:
|
|||
sections: List of sections to generate
|
||||
cachedContent: Extracted content cache
|
||||
userPrompt: Original user prompt
|
||||
contentParts: List of all available ContentParts (for mapping by contentPartIds)
|
||||
documentMetadata: Document metadata
|
||||
progressCallback: Progress callback function
|
||||
batchSize: Number of sections to process in parallel per batch
|
||||
|
|
@ -253,6 +257,14 @@ class ContentGenerator:
|
|||
if totalSections == 0:
|
||||
return []
|
||||
|
||||
# Create ContentParts lookup map by ID
|
||||
contentPartsMap = {}
|
||||
if contentParts:
|
||||
for part in contentParts:
|
||||
partId = part.id if hasattr(part, 'id') else part.get('id', '')
|
||||
if partId:
|
||||
contentPartsMap[partId] = part
|
||||
|
||||
# Adjust batch size based on section types (images take longer)
|
||||
imageCount = sum(1 for s in sections if s.get("content_type") == "image")
|
||||
if imageCount > 0:
|
||||
|
|
@ -277,6 +289,7 @@ class ContentGenerator:
|
|||
)
|
||||
|
||||
async def generateWithProgress(section: Dict[str, Any], globalIndex: int, localIndex: int, batchPreviousSections: List[Dict[str, Any]]):
|
||||
checkWorkflowStopped(self.services)
|
||||
try:
|
||||
contentType = section.get("content_type", "content")
|
||||
sectionId = section.get("id", f"section_{globalIndex}")
|
||||
|
|
@ -422,6 +435,7 @@ class ContentGenerator:
|
|||
resultFormat="json"
|
||||
)
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=sectionPrompt,
|
||||
options=options,
|
||||
|
|
@ -603,6 +617,59 @@ class ContentGenerator:
|
|||
) -> Dict[str, Any]:
|
||||
"""Generate image for image section or include existing image"""
|
||||
try:
|
||||
# First, check if section has image ContentParts to integrate directly
|
||||
sectionContentParts = context.get("sectionContentParts", [])
|
||||
if sectionContentParts:
|
||||
# Look for image ContentParts
|
||||
for part in sectionContentParts:
|
||||
partTypeGroup = part.typeGroup if hasattr(part, 'typeGroup') else part.get('typeGroup', '')
|
||||
partMimeType = part.mimeType if hasattr(part, 'mimeType') else part.get('mimeType', '')
|
||||
isImage = partTypeGroup == "image" or (partMimeType and partMimeType.startswith("image/"))
|
||||
|
||||
if isImage:
|
||||
# Extract image data from ContentPart
|
||||
partData = part.data if hasattr(part, 'data') else part.get('data', '')
|
||||
partId = part.id if hasattr(part, 'id') else part.get('id', '')
|
||||
|
||||
# Get base64 data
|
||||
base64Data = None
|
||||
if isinstance(partData, str):
|
||||
# Check if it's already base64 or needs extraction
|
||||
if partData.startswith("data:image"):
|
||||
# Extract base64 from data URL
|
||||
base64Data = partData.split(",", 1)[1] if "," in partData else partData
|
||||
elif len(partData) > 100: # Likely base64 string
|
||||
base64Data = partData
|
||||
elif isinstance(partData, bytes):
|
||||
import base64
|
||||
base64Data = base64.b64encode(partData).decode('utf-8')
|
||||
|
||||
if base64Data:
|
||||
# Get caption from section (priority: section.caption > metadata.caption)
|
||||
caption = section.get("caption") or section.get("metadata", {}).get("caption")
|
||||
|
||||
# Get alt text from ContentPart metadata or section
|
||||
altText = part.metadata.get("altText") if hasattr(part, 'metadata') else part.get('metadata', {}).get('altText')
|
||||
if not altText:
|
||||
altText = section.get("generation_hint", "Image")
|
||||
|
||||
# Get mime type
|
||||
mimeType = partMimeType or "image/png"
|
||||
|
||||
# Create image element with caption
|
||||
section["elements"] = [{
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": base64Data,
|
||||
"altText": altText,
|
||||
"caption": caption # Include caption from section
|
||||
},
|
||||
"caption": caption # Also at element level for compatibility
|
||||
}]
|
||||
|
||||
logger.info(f"Successfully integrated image from ContentPart {partId} for section {section.get('id')} with caption: {caption}")
|
||||
return section
|
||||
|
||||
# Check if this is an existing image to include or render
|
||||
imageSource = section.get("image_source", "generate")
|
||||
|
||||
|
|
@ -623,12 +690,17 @@ class ContentGenerator:
|
|||
# Create image element from existing/render image
|
||||
altText = imageDoc.get("altText", section.get("generation_hint", "Image"))
|
||||
mimeType = imageDoc.get("mimeType", "image/png")
|
||||
caption = section.get("caption") or section.get("metadata", {}).get("caption")
|
||||
|
||||
# Use nested content structure for consistency with renderers
|
||||
section["elements"] = [{
|
||||
"base64Data": imageDoc.get("base64Data"),
|
||||
"altText": altText,
|
||||
"mimeType": mimeType,
|
||||
"caption": section.get("caption") or section.get("metadata", {}).get("caption")
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": imageDoc.get("base64Data"),
|
||||
"altText": altText,
|
||||
"caption": caption # Include caption in content structure
|
||||
},
|
||||
"caption": caption # Also at element level for compatibility
|
||||
}]
|
||||
|
||||
logger.info(f"Successfully integrated image {imageRefId} for section {section.get('id')} (source={imageSource})")
|
||||
|
|
@ -666,6 +738,7 @@ class ContentGenerator:
|
|||
logger.info(f"Starting image generation for section {section.get('id')}: {imagePrompt[:100]}...")
|
||||
|
||||
# Call AI for image generation
|
||||
checkWorkflowStopped(self.services)
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=promptJson,
|
||||
options=options,
|
||||
|
|
@ -704,11 +777,15 @@ class ContentGenerator:
|
|||
|
||||
caption = section.get("caption") or section.get("metadata", {}).get("caption")
|
||||
|
||||
# Use nested content structure for consistency with renderers
|
||||
section["elements"] = [{
|
||||
"url": f"data:image/png;base64,{base64Data}",
|
||||
"base64Data": base64Data,
|
||||
"altText": altText,
|
||||
"caption": caption
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": base64Data,
|
||||
"altText": altText,
|
||||
"caption": caption # Include caption in content structure
|
||||
},
|
||||
"caption": caption # Also at element level for compatibility
|
||||
}]
|
||||
|
||||
logger.info(f"Successfully generated image for section {section.get('id')}")
|
||||
|
|
|
|||
|
|
@ -17,25 +17,11 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
return ActionResult.isFailure(error="prompt is required")
|
||||
|
||||
documentList = parameters.get("documentList", [])
|
||||
# Optional: if omitted, formats determined from prompt by AI
|
||||
resultType = parameters.get("resultType")
|
||||
|
||||
# Auto-detect format from prompt if not provided
|
||||
if not resultType:
|
||||
promptLower = prompt.lower()
|
||||
if ".html" in promptLower or "html file" in promptLower:
|
||||
resultType = "html"
|
||||
elif ".js" in promptLower or "javascript" in promptLower:
|
||||
resultType = "js"
|
||||
elif ".py" in promptLower or "python" in promptLower:
|
||||
resultType = "py"
|
||||
elif ".ts" in promptLower or "typescript" in promptLower:
|
||||
resultType = "ts"
|
||||
elif ".java" in promptLower:
|
||||
resultType = "java"
|
||||
elif ".cpp" in promptLower or ".c++" in promptLower:
|
||||
resultType = "cpp"
|
||||
else:
|
||||
resultType = "txt" # Default
|
||||
logger.debug("resultType not provided - formats will be determined from prompt by AI")
|
||||
|
||||
# Create operation ID for progress tracking
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
|
|
@ -67,11 +53,12 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
|
||||
# outputFormat: Optional - if None, formats determined from prompt by AI
|
||||
aiResponse: AiResponse = await self.services.ai.callAiContent(
|
||||
prompt=prompt,
|
||||
options=options,
|
||||
documentList=docRefList,
|
||||
outputFormat=resultType,
|
||||
outputFormat=resultType, # Can be None - AI determines from prompt
|
||||
title=title,
|
||||
parentOperationId=parentOperationId,
|
||||
generationIntent="code" # Explicit intent, skips detection
|
||||
|
|
@ -93,7 +80,8 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
# If no documents but content exists, create a document from content
|
||||
if not documents and aiResponse.content:
|
||||
# Determine document name from metadata
|
||||
docName = f"code.{resultType}"
|
||||
resultTypeFallback = resultType or "txt" # Fallback for file naming
|
||||
docName = f"code.{resultTypeFallback}"
|
||||
if aiResponse.metadata and aiResponse.metadata.filename:
|
||||
docName = aiResponse.metadata.filename
|
||||
elif aiResponse.metadata and aiResponse.metadata.title:
|
||||
|
|
@ -101,8 +89,8 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
|
||||
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
|
||||
if sanitized:
|
||||
if not sanitized.lower().endswith(f".{resultType}"):
|
||||
docName = f"{sanitized}.{resultType}"
|
||||
if not sanitized.lower().endswith(f".{resultTypeFallback}"):
|
||||
docName = f"{sanitized}.{resultTypeFallback}"
|
||||
else:
|
||||
docName = sanitized
|
||||
|
||||
|
|
|
|||
|
|
@ -18,23 +18,11 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
|
||||
documentList = parameters.get("documentList", [])
|
||||
documentType = parameters.get("documentType")
|
||||
resultType = parameters.get("resultType", "txt")
|
||||
# Optional: if omitted, formats determined from prompt by AI
|
||||
resultType = parameters.get("resultType")
|
||||
|
||||
# Auto-detect format from prompt if not explicitly provided
|
||||
if resultType == "txt" and prompt:
|
||||
promptLower = prompt.lower()
|
||||
if "html" in promptLower or "html5" in promptLower:
|
||||
resultType = "html"
|
||||
logger.info(f"Auto-detected HTML format from prompt")
|
||||
elif "pdf" in promptLower:
|
||||
resultType = "pdf"
|
||||
logger.info(f"Auto-detected PDF format from prompt")
|
||||
elif "markdown" in promptLower or " md " in promptLower or promptLower.endswith(" md"):
|
||||
resultType = "md"
|
||||
logger.info(f"Auto-detected Markdown format from prompt")
|
||||
elif ("text" in promptLower or "txt" in promptLower) and "html" not in promptLower:
|
||||
resultType = "txt"
|
||||
logger.info(f"Auto-detected Text format from prompt")
|
||||
if not resultType:
|
||||
logger.debug("resultType not provided - formats will be determined from prompt by AI")
|
||||
|
||||
# Create operation ID for progress tracking
|
||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||
|
|
@ -69,11 +57,12 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
compressContext=False
|
||||
)
|
||||
|
||||
# outputFormat: Optional - if None, formats determined from prompt by AI
|
||||
aiResponse: AiResponse = await self.services.ai.callAiContent(
|
||||
prompt=prompt,
|
||||
options=options,
|
||||
documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E
|
||||
outputFormat=resultType,
|
||||
outputFormat=resultType, # Can be None - AI determines from prompt
|
||||
title=title,
|
||||
parentOperationId=parentOperationId,
|
||||
generationIntent="document" # NEW: Explicit intent, skips detection
|
||||
|
|
@ -95,7 +84,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
# If no documents but content exists, create a document from content
|
||||
if not documents and aiResponse.content:
|
||||
# Determine document name from metadata
|
||||
docName = f"document.{resultType}"
|
||||
resultTypeFallback = resultType or "txt" # Fallback for file naming
|
||||
docName = f"document.{resultTypeFallback}"
|
||||
if aiResponse.metadata and aiResponse.metadata.filename:
|
||||
docName = aiResponse.metadata.filename
|
||||
elif aiResponse.metadata and aiResponse.metadata.title:
|
||||
|
|
@ -103,8 +93,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
|
||||
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
|
||||
if sanitized:
|
||||
if not sanitized.lower().endswith(f".{resultType}"):
|
||||
docName = f"{sanitized}.{resultType}"
|
||||
if not sanitized.lower().endswith(f".{resultTypeFallback}"):
|
||||
docName = f"{sanitized}.{resultTypeFallback}"
|
||||
else:
|
||||
docName = sanitized
|
||||
|
||||
|
|
|
|||
|
|
@ -54,8 +54,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
logger.error(f"Invalid documentList type: {type(documentListParam)}")
|
||||
documentList = DocumentReferenceList(references=[])
|
||||
|
||||
resultType = parameters.get("resultType", "txt")
|
||||
|
||||
# Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
|
||||
resultType = parameters.get("resultType")
|
||||
|
||||
if not aiPrompt:
|
||||
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
|
||||
|
|
@ -63,11 +63,20 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
error="AI prompt is required"
|
||||
)
|
||||
|
||||
# Determine output extension and default MIME type without duplicating service logic
|
||||
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
|
||||
output_extension = f".{normalized_result_type}"
|
||||
# Handle optional resultType: if None, formats determined from prompt by AI
|
||||
if resultType:
|
||||
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
|
||||
output_extension = f".{normalized_result_type}"
|
||||
output_format = output_extension.replace('.', '') or 'txt'
|
||||
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
||||
else:
|
||||
# No format specified - AI will determine formats from prompt
|
||||
normalized_result_type = None
|
||||
output_extension = None
|
||||
output_format = None
|
||||
logger.debug("resultType not provided - formats will be determined from prompt by AI")
|
||||
|
||||
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
||||
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
||||
|
||||
# Phase 7.3: Extract content first if documents provided, then use contentParts
|
||||
# Check if contentParts are already provided (preferred path)
|
||||
|
|
@ -121,54 +130,33 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
# Update progress - preparing AI call
|
||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
|
||||
|
||||
# Detect image generation from resultType
|
||||
# Detect image generation from resultType (if provided)
|
||||
imageFormats = ["png", "jpg", "jpeg", "gif", "webp"]
|
||||
isImageGeneration = normalized_result_type in imageFormats
|
||||
isImageGeneration = normalized_result_type in imageFormats if normalized_result_type else False
|
||||
|
||||
# Build options with correct operationType
|
||||
output_format = output_extension.replace('.', '') or 'txt'
|
||||
from modules.datamodels.datamodelAi import OperationTypeEnum
|
||||
options = AiCallOptions(
|
||||
resultFormat=output_format,
|
||||
resultFormat=output_format or "txt", # Fallback for options, but outputFormat can be None for callAiContent
|
||||
operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
|
||||
)
|
||||
|
||||
# Get generationIntent from parameters
|
||||
generationIntent = parameters.get("generationIntent")
|
||||
|
||||
# For DATA_GENERATE, generationIntent is REQUIRED
|
||||
# If not provided, default to "document" for document formats (xlsx, docx, pdf, txt, html, etc.)
|
||||
# This is format-based defaulting, not prompt-based auto-detection
|
||||
if options.operationType == OperationTypeEnum.DATA_GENERATE and not generationIntent:
|
||||
# Document formats (default to document generation)
|
||||
documentFormats = ["xlsx", "docx", "pdf", "txt", "md", "html", "csv", "xml", "json", "pptx"]
|
||||
# Code formats (should use ai.generateCode instead, but default to code if ai.process is used)
|
||||
codeFormats = ["py", "js", "ts", "java", "cpp", "c", "go", "rs", "rb", "php", "swift", "kt"]
|
||||
|
||||
if normalized_result_type in documentFormats:
|
||||
generationIntent = "document"
|
||||
logger.info(f"Defaulting generationIntent to 'document' for resultType '{normalized_result_type}'")
|
||||
elif normalized_result_type in codeFormats:
|
||||
generationIntent = "code"
|
||||
logger.info(f"Defaulting generationIntent to 'code' for resultType '{normalized_result_type}'")
|
||||
else:
|
||||
# Unknown format - default to document (most common use case)
|
||||
generationIntent = "document"
|
||||
logger.warning(
|
||||
f"Unknown resultType '{normalized_result_type}', defaulting generationIntent to 'document'. "
|
||||
f"For code generation, use ai.generateCode action or explicitly pass generationIntent='code'."
|
||||
)
|
||||
# Get generationIntent from parameters (required for DATA_GENERATE)
|
||||
# Default to "document" if not provided (most common use case)
|
||||
# For code generation, use ai.generateCode action or explicitly pass generationIntent="code"
|
||||
generationIntent = parameters.get("generationIntent", "document")
|
||||
|
||||
# Update progress - calling AI
|
||||
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
|
||||
|
||||
# Use unified callAiContent method with contentParts (extraction is now separate)
|
||||
# ContentParts are already extracted above (or None if no documents)
|
||||
# outputFormat: Optional - if None, formats determined from prompt by AI
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=aiPrompt,
|
||||
options=options,
|
||||
contentParts=contentParts, # Already extracted (or None if no documents)
|
||||
outputFormat=output_format,
|
||||
outputFormat=output_format, # Can be None - AI determines from prompt
|
||||
parentOperationId=operationId,
|
||||
generationIntent=generationIntent # REQUIRED for DATA_GENERATE
|
||||
)
|
||||
|
|
@ -198,7 +186,7 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
final_documents = action_documents
|
||||
else:
|
||||
# Text response - create document from content
|
||||
extension = output_extension.lstrip('.')
|
||||
extension = output_extension.lstrip('.') if output_extension else "txt"
|
||||
meaningful_name = self._generateMeaningfulFileName(
|
||||
base_name="ai",
|
||||
extension=extension,
|
||||
|
|
@ -206,8 +194,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
)
|
||||
validationMetadata = {
|
||||
"actionType": "ai.process",
|
||||
"resultType": normalized_result_type,
|
||||
"outputFormat": output_format,
|
||||
"resultType": normalized_result_type or "auto",
|
||||
"outputFormat": output_format or "auto",
|
||||
"hasDocuments": False,
|
||||
"contentType": "text"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ class MethodAi(MethodBase):
|
|||
frontendOptions=["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"],
|
||||
required=False,
|
||||
default="txt",
|
||||
description="Output file extension. All output documents will use this format"
|
||||
description="Output file extension. Optional: if omitted, formats are determined from prompt by AI. Default \"txt\" is validation fallback only. With per-document format determination, AI can determine different formats for different documents based on prompt."
|
||||
),
|
||||
"generationIntent": WorkflowActionParameter(
|
||||
name="generationIntent",
|
||||
|
|
@ -68,7 +68,8 @@ class MethodAi(MethodBase):
|
|||
frontendType=FrontendType.SELECT,
|
||||
frontendOptions=["document", "code", "image"],
|
||||
required=False,
|
||||
description="Explicit generation intent (\"document\" | \"code\" | \"image\"). For DATA_GENERATE operations, if not provided, defaults based on resultType: document formats (xlsx, docx, pdf, etc.) → \"document\", code formats (py, js, ts, etc.) → \"code\". For IMAGE_GENERATE operations, this parameter is ignored. Best practice: Use qualified actions (ai.generateDocument, ai.generateCode) instead of ai.process."
|
||||
default="document",
|
||||
description="Explicit generation intent (\"document\" | \"code\" | \"image\"). Required for DATA_GENERATE operations. Defaults to \"document\" if not provided. For code generation, use ai.generateCode action or explicitly pass generationIntent=\"code\". For IMAGE_GENERATE operations, this parameter is ignored."
|
||||
)
|
||||
},
|
||||
execute=process.__get__(self, self.__class__)
|
||||
|
|
@ -267,7 +268,7 @@ class MethodAi(MethodBase):
|
|||
frontendType=FrontendType.TEXT,
|
||||
required=False,
|
||||
default="txt",
|
||||
description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Any format supported by renderers is acceptable. Default: txt"
|
||||
description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Optional: if omitted, formats are determined from prompt by AI. Default \"txt\" is validation fallback only. With per-document format determination, AI can determine different formats for different documents based on prompt."
|
||||
)
|
||||
},
|
||||
execute=generateDocument.__get__(self, self.__class__)
|
||||
|
|
@ -297,7 +298,7 @@ class MethodAi(MethodBase):
|
|||
frontendType=FrontendType.SELECT,
|
||||
frontendOptions=["py", "js", "ts", "html", "java", "cpp", "txt"],
|
||||
required=False,
|
||||
description="Output format (html, js, py, etc.). Default: based on prompt"
|
||||
description="Output format (html, js, py, etc.). Optional: if omitted, formats are determined from prompt by AI. With per-document format determination, AI can determine different formats for different documents based on prompt."
|
||||
)
|
||||
},
|
||||
execute=generateCode.__get__(self, self.__class__)
|
||||
|
|
|
|||
|
|
@ -693,12 +693,38 @@ The following is the user's original input message. Analyze intent, normalize th
|
|||
setattr(self.services, '_needsWorkflowHistory', False)
|
||||
|
||||
# Update services state
|
||||
# CRITICAL: Validate language from AI response
|
||||
# If AI didn't return language or invalid → use user language
|
||||
# If user language not set → use "en"
|
||||
validatedLanguage = None
|
||||
|
||||
# Validate AI-detected language
|
||||
if detectedLanguage and isinstance(detectedLanguage, str):
|
||||
self._setUserLanguage(detectedLanguage)
|
||||
try:
|
||||
setattr(self.services, 'currentUserLanguage', detectedLanguage)
|
||||
except Exception:
|
||||
pass
|
||||
detectedLanguage = detectedLanguage.strip().lower()
|
||||
# Check if it's a valid 2-character ISO code
|
||||
if len(detectedLanguage) == 2 and detectedLanguage.isalpha():
|
||||
validatedLanguage = detectedLanguage
|
||||
|
||||
# If AI didn't return valid language, use user language
|
||||
if not validatedLanguage:
|
||||
userLanguage = getattr(self.services.user, 'language', None) if hasattr(self.services, 'user') and self.services.user else None
|
||||
if userLanguage and isinstance(userLanguage, str):
|
||||
userLanguage = userLanguage.strip().lower()
|
||||
if len(userLanguage) == 2 and userLanguage.isalpha():
|
||||
validatedLanguage = userLanguage
|
||||
|
||||
# Final fallback to "en"
|
||||
if not validatedLanguage:
|
||||
validatedLanguage = "en"
|
||||
logger.warning("Language not detected from AI and user language not set - using default 'en'")
|
||||
|
||||
# Set validated language
|
||||
self._setUserLanguage(validatedLanguage)
|
||||
try:
|
||||
setattr(self.services, 'currentUserLanguage', validatedLanguage)
|
||||
logger.debug(f"Set currentUserLanguage to validated value: {validatedLanguage}")
|
||||
except Exception:
|
||||
pass
|
||||
self.services.currentUserPrompt = intentText or userInput.prompt
|
||||
# Always set currentUserPromptNormalized - use normalizedRequest if available, otherwise fallback to currentUserPrompt
|
||||
normalizedValue = normalizedRequest or intentText or userInput.prompt
|
||||
|
|
|
|||
Loading…
Reference in a new issue