1762 lines
90 KiB
Python
1762 lines
90 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Structure Filling Module
|
|
|
|
Handles filling document structure with content, including:
|
|
- Filling sections with content parts
|
|
- Building section generation prompts
|
|
- Aggregation logic
|
|
"""
|
|
import json
|
|
import logging
|
|
import copy
|
|
import asyncio
|
|
from typing import Dict, Any, List, Optional
|
|
|
|
from modules.datamodels.datamodelExtraction import ContentPart
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class StructureFiller:
|
|
"""Handles filling document structure with content."""
|
|
|
|
def __init__(self, services, aiService):
|
|
"""Initialize StructureFiller with service center and AI service access."""
|
|
self.services = services
|
|
self.aiService = aiService
|
|
|
|
def _getUserLanguage(self) -> str:
|
|
"""Get user language for document generation"""
|
|
try:
|
|
if self.services:
|
|
# Prefer detected language if available (from user intention analysis)
|
|
if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
|
|
return self.services.currentUserLanguage
|
|
# Fallback to user's preferred language
|
|
elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
|
|
return self.services.user.language
|
|
except Exception:
|
|
pass
|
|
return 'en' # Default fallback
|
|
|
|
async def fillStructure(
|
|
self,
|
|
structure: Dict[str, Any],
|
|
contentParts: List[ContentPart],
|
|
userPrompt: str,
|
|
parentOperationId: str,
|
|
language: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Phase 5D: Chapter-Content-Generierung (Zwei-Phasen-Ansatz).
|
|
|
|
Phase 5D.1: Generiert Sections-Struktur für jedes Chapter
|
|
Phase 5D.2: Füllt Sections mit ContentParts
|
|
|
|
Args:
|
|
structure: Struktur-Dict mit documents und chapters (nicht sections!)
|
|
contentParts: Alle vorbereiteten ContentParts
|
|
userPrompt: User-Anfrage
|
|
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
|
language: Language identified from user intention analysis (e.g., "de", "en", "fr")
|
|
|
|
Returns:
|
|
Gefüllte Struktur mit elements in jeder Section (nach Flattening)
|
|
"""
|
|
# Erstelle Operation-ID für Struktur-Abfüllen
|
|
fillOperationId = f"{parentOperationId}_structure_filling"
|
|
|
|
# Validate structure has chapters
|
|
hasChapters = False
|
|
for doc in structure.get("documents", []):
|
|
if "chapters" in doc:
|
|
hasChapters = True
|
|
break
|
|
|
|
if not hasChapters:
|
|
error_msg = "Structure must have chapters. Legacy section-based structure is not supported."
|
|
logger.error(error_msg)
|
|
raise ValueError(error_msg)
|
|
|
|
# Get language from services (user intention analysis) or parameter
|
|
if language is None:
|
|
language = self._getUserLanguage()
|
|
logger.debug(f"Using language from services (user intention analysis): {language}")
|
|
else:
|
|
logger.debug(f"Using provided language parameter: {language}")
|
|
|
|
# Starte ChatLog mit Parent-Referenz
|
|
chapterCount = sum(len(doc.get("chapters", [])) for doc in structure.get("documents", []))
|
|
self.services.chat.progressLogStart(
|
|
fillOperationId,
|
|
"Chapter Content Generation",
|
|
"Filling",
|
|
f"Processing {chapterCount} chapters",
|
|
parentOperationId=parentOperationId
|
|
)
|
|
|
|
try:
|
|
filledStructure = copy.deepcopy(structure)
|
|
|
|
# Phase 5D.1: Sections-Struktur für jedes Chapter generieren
|
|
filledStructure = await self._generateChapterSectionsStructure(
|
|
filledStructure, contentParts, userPrompt, fillOperationId, language
|
|
)
|
|
|
|
# Phase 5D.2: Sections mit ContentParts füllen
|
|
filledStructure = await self._fillChapterSections(
|
|
filledStructure, contentParts, userPrompt, fillOperationId, language
|
|
)
|
|
|
|
# Flattening: Chapters zu Sections konvertieren
|
|
flattenedStructure = self._flattenChaptersToSections(filledStructure)
|
|
|
|
# Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung)
|
|
flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts)
|
|
|
|
# ChatLog abschließen
|
|
self.services.chat.progressLogFinish(fillOperationId, True)
|
|
|
|
return flattenedStructure
|
|
|
|
except Exception as e:
|
|
self.services.chat.progressLogFinish(fillOperationId, False)
|
|
logger.error(f"Error in fillStructure: {str(e)}")
|
|
raise
|
|
|
|
async def _generateSingleChapterSectionsStructure(
|
|
self,
|
|
chapter: Dict[str, Any],
|
|
chapterIndex: int,
|
|
chapterId: str,
|
|
chapterLevel: int,
|
|
chapterTitle: str,
|
|
generationHint: str,
|
|
contentPartIds: List[str],
|
|
contentPartInstructions: Dict[str, Any],
|
|
contentParts: List[ContentPart],
|
|
userPrompt: str,
|
|
language: str,
|
|
parentOperationId: str,
|
|
totalChapters: int
|
|
) -> None:
|
|
"""
|
|
Generate sections structure for a single chapter (used for parallel processing).
|
|
Modifies chapter dict in place.
|
|
"""
|
|
try:
|
|
# Update progress for chapter structure generation
|
|
progress = chapterIndex / totalChapters if totalChapters > 0 else 1.0
|
|
self.services.chat.progressLogUpdate(
|
|
parentOperationId,
|
|
progress,
|
|
f"Generating sections for Chapter {chapterIndex}/{totalChapters}: {chapterTitle}"
|
|
)
|
|
|
|
chapterPrompt = self._buildChapterSectionsStructurePrompt(
|
|
chapterId=chapterId,
|
|
chapterLevel=chapterLevel,
|
|
chapterTitle=chapterTitle,
|
|
generationHint=generationHint,
|
|
contentPartIds=contentPartIds,
|
|
contentPartInstructions=contentPartInstructions,
|
|
contentParts=contentParts,
|
|
userPrompt=userPrompt,
|
|
language=language
|
|
)
|
|
|
|
# AI-Call für Chapter-Struktur-Generierung
|
|
# Note: Debug logging is handled by callAiPlanning
|
|
aiResponse = await self.aiService.callAiPlanning(
|
|
prompt=chapterPrompt,
|
|
debugType=f"chapter_structure_{chapterId}"
|
|
)
|
|
|
|
sectionsStructure = json.loads(
|
|
self.services.utils.jsonExtractString(aiResponse)
|
|
)
|
|
|
|
chapter["sections"] = sectionsStructure.get("sections", [])
|
|
|
|
# Setze useAiCall Flag (falls nicht von AI gesetzt)
|
|
# WICHTIG: useAiCall kann nur true sein, wenn mindestens ein ContentPart Format "extracted" hat!
|
|
# "object" und "reference" Formate werden direkt als Elemente hinzugefügt, benötigen kein AI.
|
|
for section in chapter["sections"]:
|
|
if "useAiCall" not in section:
|
|
contentType = section.get("content_type", "paragraph")
|
|
sectionContentPartIds = section.get("contentPartIds", [])
|
|
|
|
# Prüfe ob mindestens ein ContentPart Format "extracted" hat
|
|
hasExtractedPart = False
|
|
for partId in sectionContentPartIds:
|
|
part = self._findContentPartById(partId, contentParts)
|
|
if part:
|
|
contentFormat = part.metadata.get("contentFormat", "unknown")
|
|
if contentFormat == "extracted":
|
|
hasExtractedPart = True
|
|
break
|
|
|
|
# useAiCall kann nur true sein, wenn extracted Parts vorhanden sind
|
|
useAiCall = False
|
|
if hasExtractedPart:
|
|
# Prüfe ob Transformation nötig ist
|
|
useAiCall = contentType != "paragraph"
|
|
|
|
# Prüfe contentPartInstructions für Transformation
|
|
if not useAiCall:
|
|
for partId in sectionContentPartIds:
|
|
instruction = contentPartInstructions.get(partId, {}).get("instruction", "")
|
|
if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]:
|
|
useAiCall = True
|
|
break
|
|
|
|
section["useAiCall"] = useAiCall
|
|
logger.debug(f"Section {section.get('id')}: useAiCall={useAiCall} (hasExtractedPart={hasExtractedPart}, contentType={contentType})")
|
|
|
|
# Update progress after chapter completion
|
|
progress = chapterIndex / totalChapters if totalChapters > 0 else 1.0
|
|
self.services.chat.progressLogUpdate(
|
|
parentOperationId,
|
|
progress,
|
|
f"Chapter {chapterIndex}/{totalChapters} completed: {chapterTitle}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating sections structure for chapter {chapterId}: {str(e)}")
|
|
# Set empty sections on error
|
|
chapter["sections"] = []
|
|
# Update progress even on error
|
|
progress = chapterIndex / totalChapters if totalChapters > 0 else 1.0
|
|
self.services.chat.progressLogUpdate(
|
|
parentOperationId,
|
|
progress,
|
|
f"Chapter {chapterIndex}/{totalChapters} error: {chapterTitle}"
|
|
)
|
|
raise
|
|
|
|
async def _generateChapterSectionsStructure(
|
|
self,
|
|
chapterStructure: Dict[str, Any],
|
|
contentParts: List[ContentPart],
|
|
userPrompt: str,
|
|
parentOperationId: str,
|
|
language: str
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content) in parallel.
|
|
Sections enthalten: content_type, contentPartIds, generationHint, useAiCall
|
|
"""
|
|
# Count total chapters for progress tracking
|
|
totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", []))
|
|
|
|
# Collect all chapters with their indices for parallel processing
|
|
chapterTasks = []
|
|
chapterIndex = 0
|
|
|
|
for doc in chapterStructure.get("documents", []):
|
|
for chapter in doc.get("chapters", []):
|
|
chapterIndex += 1
|
|
chapterId = chapter.get("id", "unknown")
|
|
chapterLevel = chapter.get("level", 1)
|
|
chapterTitle = chapter.get("title", "Untitled Chapter")
|
|
generationHint = chapter.get("generationHint", "")
|
|
contentPartIds = chapter.get("contentPartIds", [])
|
|
contentPartInstructions = chapter.get("contentPartInstructions", {})
|
|
|
|
# Create task for parallel processing
|
|
task = self._generateSingleChapterSectionsStructure(
|
|
chapter=chapter,
|
|
chapterIndex=chapterIndex,
|
|
chapterId=chapterId,
|
|
chapterLevel=chapterLevel,
|
|
chapterTitle=chapterTitle,
|
|
generationHint=generationHint,
|
|
contentPartIds=contentPartIds,
|
|
contentPartInstructions=contentPartInstructions,
|
|
contentParts=contentParts,
|
|
userPrompt=userPrompt,
|
|
language=language,
|
|
parentOperationId=parentOperationId,
|
|
totalChapters=totalChapters
|
|
)
|
|
chapterTasks.append((chapterIndex, chapter, task))
|
|
|
|
# Execute all chapter tasks in parallel
|
|
if chapterTasks:
|
|
# Create list of tasks (without indices for gather)
|
|
tasks = [task for _, _, task in chapterTasks]
|
|
|
|
# Execute in parallel with error handling
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
# Process results in order and handle errors
|
|
for (originalIndex, originalChapter, _), result in zip(chapterTasks, results):
|
|
if isinstance(result, Exception):
|
|
logger.error(f"Error processing chapter {originalChapter.get('id')}: {str(result)}")
|
|
# Chapter already has empty sections set by _generateSingleChapterSectionsStructure
|
|
# Continue processing other chapters
|
|
|
|
return chapterStructure
|
|
|
|
async def _processAiResponseForSection(
|
|
self,
|
|
aiResponse: Any,
|
|
contentType: str,
|
|
operationType: OperationTypeEnum,
|
|
sectionId: str,
|
|
generationHint: str,
|
|
generatedElements: List[Dict[str, Any]]
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Helper method to process AI response and extract elements.
|
|
Handles both IMAGE_GENERATE and DATA_ANALYSE operation types.
|
|
"""
|
|
elements = []
|
|
|
|
# Handle IMAGE_GENERATE differently - returns image data directly
|
|
if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE:
|
|
import base64
|
|
base64Data = ""
|
|
|
|
# Convert image data to base64 string if needed
|
|
if isinstance(aiResponse.content, bytes):
|
|
base64Data = base64.b64encode(aiResponse.content).decode('utf-8')
|
|
elif isinstance(aiResponse.content, str):
|
|
# Check if it's already a JSON structure
|
|
try:
|
|
jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content))
|
|
if isinstance(jsonContent, dict) and jsonContent.get("type") == "image":
|
|
elements.append(jsonContent)
|
|
logger.debug("AI returned proper JSON image structure")
|
|
base64Data = None # Signal that image was already processed
|
|
elif isinstance(jsonContent, list) and len(jsonContent) > 0:
|
|
if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image":
|
|
elements.extend(jsonContent)
|
|
logger.debug("AI returned proper JSON image structure in list")
|
|
base64Data = None # Signal that image was already processed
|
|
else:
|
|
base64Data = "" # Continue with normal processing
|
|
else:
|
|
base64Data = "" # Continue with normal processing
|
|
except (json.JSONDecodeError, ValueError, AttributeError):
|
|
base64Data = "" # Will be processed below
|
|
|
|
# Process base64 if not already handled above
|
|
if base64Data is None:
|
|
# Already processed as JSON, skip base64 processing
|
|
pass
|
|
elif aiResponse.content.startswith("data:image/"):
|
|
# Extract base64 from data URI
|
|
base64Data = aiResponse.content.split(",", 1)[1]
|
|
else:
|
|
content_stripped = aiResponse.content.strip()
|
|
if len(content_stripped) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t " for c in content_stripped[:200]):
|
|
base64Data = content_stripped.replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "")
|
|
else:
|
|
base64Data = aiResponse.content
|
|
else:
|
|
base64Data = ""
|
|
|
|
# Always create proper JSON structure for images (if not already processed)
|
|
if base64Data is None:
|
|
# Image already processed as JSON, skip
|
|
pass
|
|
elif base64Data:
|
|
elements.append({
|
|
"type": "image",
|
|
"content": {
|
|
"base64Data": base64Data,
|
|
"altText": generationHint or "Generated image",
|
|
"caption": ""
|
|
}
|
|
})
|
|
logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}")
|
|
else:
|
|
logger.warning(f"IMAGE_GENERATE returned empty or invalid content for section {sectionId}")
|
|
elements.append({
|
|
"type": "error",
|
|
"message": f"Image generation returned empty or invalid content",
|
|
"sectionId": sectionId
|
|
})
|
|
else:
|
|
# For non-image content: Use already parsed elements from _callAiWithLooping
|
|
if generatedElements:
|
|
elements.extend(generatedElements)
|
|
else:
|
|
# Fallback: Try to parse JSON response directly
|
|
try:
|
|
fallbackElements = json.loads(
|
|
self.services.utils.jsonExtractString(aiResponse.content)
|
|
)
|
|
if isinstance(fallbackElements, list):
|
|
elements.extend(fallbackElements)
|
|
elif isinstance(fallbackElements, dict) and "elements" in fallbackElements:
|
|
elements.extend(fallbackElements["elements"])
|
|
elif isinstance(fallbackElements, dict) and fallbackElements.get("type"):
|
|
elements.append(fallbackElements)
|
|
except (json.JSONDecodeError, ValueError) as json_error:
|
|
logger.error(f"Error parsing JSON response for section {sectionId}: {str(json_error)}")
|
|
elements.append({
|
|
"type": "error",
|
|
"message": f"Failed to parse JSON response: {str(json_error)}",
|
|
"sectionId": sectionId
|
|
})
|
|
|
|
return elements
|
|
|
|
async def _processSingleSection(
|
|
self,
|
|
section: Dict[str, Any],
|
|
sectionIndex: int,
|
|
totalSections: int,
|
|
chapterIndex: int,
|
|
totalChapters: int,
|
|
chapterId: str,
|
|
chapterOperationId: str,
|
|
fillOperationId: str,
|
|
contentParts: List[ContentPart],
|
|
userPrompt: str,
|
|
all_sections_list: List[Dict[str, Any]],
|
|
language: str,
|
|
calculateOverallProgress: callable
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Process a single section and return its elements.
|
|
Used for parallel processing of sections within a chapter.
|
|
"""
|
|
sectionId = section.get("id")
|
|
sectionTitle = section.get("title", sectionId)
|
|
contentPartIds = section.get("contentPartIds", [])
|
|
contentFormats = section.get("contentFormats", {})
|
|
generationHint = section.get("generationHint") or section.get("generation_hint")
|
|
contentType = section.get("content_type", "paragraph")
|
|
useAiCall = section.get("useAiCall", False)
|
|
|
|
# Update overall progress at start of section
|
|
overallProgress = calculateOverallProgress(chapterIndex - 1, totalChapters, sectionIndex, totalSections)
|
|
self.services.chat.progressLogUpdate(
|
|
fillOperationId,
|
|
overallProgress,
|
|
f"Chapter {chapterIndex}/{totalChapters}, Section {sectionIndex + 1}/{totalSections}: {sectionTitle}"
|
|
)
|
|
|
|
# WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden
|
|
if len(contentPartIds) == 0 and not generationHint:
|
|
useAiCall = False
|
|
logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False")
|
|
elif len(contentPartIds) == 0 and generationHint and not useAiCall:
|
|
useAiCall = True
|
|
logger.info(f"Section {sectionId}: Overriding useAiCall=True (has generationHint but no content parts)")
|
|
|
|
elements = []
|
|
|
|
# Prüfe ob Aggregation nötig ist
|
|
needsAggregation = self._needsAggregation(
|
|
contentType=contentType,
|
|
contentPartCount=len(contentPartIds)
|
|
)
|
|
|
|
logger.info(f"Processing section {sectionId}: contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}")
|
|
|
|
try:
|
|
if needsAggregation and useAiCall:
|
|
# Aggregation: Alle Parts zusammen verarbeiten
|
|
sectionParts = [
|
|
self._findContentPartById(pid, contentParts)
|
|
for pid in contentPartIds
|
|
]
|
|
sectionParts = [p for p in sectionParts if p is not None]
|
|
|
|
if sectionParts:
|
|
# Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt)
|
|
extractedParts = [
|
|
p for p in sectionParts
|
|
if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted"
|
|
]
|
|
nonExtractedParts = [
|
|
p for p in sectionParts
|
|
if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted"
|
|
]
|
|
|
|
# Verarbeite non-extracted Parts separat (reference, object)
|
|
for part in nonExtractedParts:
|
|
contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat"))
|
|
|
|
if contentFormat == "reference":
|
|
elements.append({
|
|
"type": "reference",
|
|
"documentReference": part.metadata.get("documentReference"),
|
|
"label": part.metadata.get("usageHint", part.label)
|
|
})
|
|
elif contentFormat == "object":
|
|
if part.typeGroup == "image":
|
|
elements.append({
|
|
"type": "image",
|
|
"content": {
|
|
"base64Data": part.data,
|
|
"altText": part.metadata.get("usageHint", part.label),
|
|
"caption": part.metadata.get("caption", "")
|
|
}
|
|
})
|
|
else:
|
|
elements.append({
|
|
"type": part.typeGroup,
|
|
"content": {
|
|
"data": part.data,
|
|
"mimeType": part.mimeType,
|
|
"label": part.metadata.get("usageHint", part.label)
|
|
}
|
|
})
|
|
|
|
# Aggregiere extracted Parts mit AI
|
|
if extractedParts:
|
|
logger.debug(f"Section {sectionId}: Aggregating {len(extractedParts)} extracted parts with AI")
|
|
isAggregation = True
|
|
generationPrompt = self._buildSectionGenerationPrompt(
|
|
section=section,
|
|
contentParts=extractedParts,
|
|
userPrompt=userPrompt,
|
|
generationHint=generationHint,
|
|
allSections=all_sections_list,
|
|
sectionIndex=sectionIndex,
|
|
isAggregation=isAggregation,
|
|
language=language
|
|
)
|
|
|
|
sectionOperationId = f"{fillOperationId}_section_{sectionId}"
|
|
self.services.chat.progressLogStart(
|
|
sectionOperationId,
|
|
"Section Generation (Aggregation)",
|
|
f"Section {sectionIndex + 1}/{totalSections}",
|
|
f"{sectionTitle} ({len(extractedParts)} parts)",
|
|
parentOperationId=chapterOperationId
|
|
)
|
|
|
|
try:
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt")
|
|
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation")
|
|
|
|
operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE
|
|
|
|
if operationType == OperationTypeEnum.IMAGE_GENERATE:
|
|
maxPromptLength = 4000
|
|
if len(generationPrompt) > maxPromptLength:
|
|
logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters")
|
|
generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0]
|
|
|
|
# Write debug file for IMAGE_GENERATE (direct callAi, no _callAiWithLooping)
|
|
self.services.utils.writeDebugFile(
|
|
generationPrompt,
|
|
f"{chapterId}_section_{sectionId}_prompt"
|
|
)
|
|
|
|
request = AiCallRequest(
|
|
prompt=generationPrompt,
|
|
contentParts=[],
|
|
options=AiCallOptions(
|
|
operationType=operationType,
|
|
priority=PriorityEnum.BALANCED,
|
|
processingMode=ProcessingModeEnum.DETAILED
|
|
)
|
|
)
|
|
aiResponse = await self.aiService.callAi(request)
|
|
generatedElements = []
|
|
|
|
# Write debug file for IMAGE_GENERATE response (direct callAi, no _callAiWithLooping)
|
|
self.services.utils.writeDebugFile(
|
|
aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse),
|
|
f"{chapterId}_section_{sectionId}_response"
|
|
)
|
|
else:
|
|
async def buildSectionPromptWithContinuation(
|
|
section: Dict[str, Any],
|
|
contentParts: List[ContentPart],
|
|
userPrompt: str,
|
|
generationHint: str,
|
|
allSections: List[Dict[str, Any]],
|
|
sectionIndex: int,
|
|
isAggregation: bool,
|
|
continuationContext: Dict[str, Any],
|
|
services: Any
|
|
) -> str:
|
|
basePrompt = self._buildSectionGenerationPrompt(
|
|
section=section,
|
|
contentParts=contentParts,
|
|
userPrompt=userPrompt,
|
|
generationHint=generationHint,
|
|
allSections=allSections,
|
|
sectionIndex=sectionIndex,
|
|
isAggregation=isAggregation,
|
|
language=language
|
|
)
|
|
|
|
continuationInfo = continuationContext.get("delivered_summary", "")
|
|
cutOffElement = continuationContext.get("cut_off_element", "")
|
|
|
|
continuationPrompt = f"""{basePrompt}
|
|
|
|
--- CONTINUATION REQUEST ---
|
|
The previous JSON response was incomplete. Please continue from where it stopped.
|
|
|
|
PREVIOUSLY DELIVERED SUMMARY:
|
|
{continuationInfo}
|
|
|
|
LAST INCOMPLETE ELEMENT:
|
|
{cutOffElement}
|
|
|
|
TASK: Continue generating the JSON elements array from where it was cut off.
|
|
Complete the incomplete element and continue with remaining elements.
|
|
|
|
Return ONLY the continuation JSON (starting from the incomplete element).
|
|
The JSON should be a fragment that can be merged with the previous response."""
|
|
return continuationPrompt
|
|
|
|
options = AiCallOptions(
|
|
operationType=operationType,
|
|
priority=PriorityEnum.BALANCED,
|
|
processingMode=ProcessingModeEnum.DETAILED
|
|
)
|
|
|
|
aiResponseJson = await self.aiService._callAiWithLooping(
|
|
prompt=generationPrompt,
|
|
options=options,
|
|
debugPrefix=f"{chapterId}_section_{sectionId}",
|
|
promptBuilder=buildSectionPromptWithContinuation,
|
|
promptArgs={
|
|
"section": section,
|
|
"contentParts": extractedParts,
|
|
"userPrompt": userPrompt,
|
|
"generationHint": generationHint,
|
|
"allSections": all_sections_list,
|
|
"sectionIndex": sectionIndex,
|
|
"isAggregation": isAggregation,
|
|
"services": self.services
|
|
},
|
|
operationId=sectionOperationId,
|
|
userPrompt=userPrompt,
|
|
contentParts=extractedParts
|
|
)
|
|
|
|
try:
|
|
parsedResponse = json.loads(self.services.utils.jsonExtractString(aiResponseJson))
|
|
if isinstance(parsedResponse, list):
|
|
generatedElements = parsedResponse
|
|
elif isinstance(parsedResponse, dict):
|
|
if "elements" in parsedResponse:
|
|
generatedElements = parsedResponse["elements"]
|
|
elif "sections" in parsedResponse and len(parsedResponse["sections"]) > 0:
|
|
firstSection = parsedResponse["sections"][0]
|
|
generatedElements = firstSection.get("elements", [])
|
|
elif parsedResponse.get("type"):
|
|
generatedElements = [parsedResponse]
|
|
else:
|
|
generatedElements = []
|
|
else:
|
|
generatedElements = []
|
|
|
|
class AiResponse:
|
|
def __init__(self, content):
|
|
self.content = content
|
|
|
|
aiResponse = AiResponse(aiResponseJson)
|
|
except Exception as parseError:
|
|
logger.error(f"Error parsing response from _callAiWithLooping for section {sectionId}: {str(parseError)}")
|
|
class AiResponse:
|
|
def __init__(self, content):
|
|
self.content = content
|
|
aiResponse = AiResponse(aiResponseJson)
|
|
generatedElements = []
|
|
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response")
|
|
# Note: Debug files are written by _callAiWithLooping using debugPrefix
|
|
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content")
|
|
|
|
# Process AI response
|
|
responseElements = await self._processAiResponseForSection(
|
|
aiResponse=aiResponse,
|
|
contentType=contentType,
|
|
operationType=operationType,
|
|
sectionId=sectionId,
|
|
generationHint=generationHint,
|
|
generatedElements=generatedElements
|
|
)
|
|
elements.extend(responseElements)
|
|
|
|
self.services.chat.progressLogFinish(sectionOperationId, True)
|
|
|
|
chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0
|
|
self.services.chat.progressLogUpdate(
|
|
chapterOperationId,
|
|
chapterProgress,
|
|
f"Section {sectionIndex + 1}/{totalSections} completed"
|
|
)
|
|
|
|
except Exception as e:
|
|
self.services.chat.progressLogFinish(sectionOperationId, False)
|
|
elements.append({
|
|
"type": "error",
|
|
"message": f"Error generating section {sectionId}: {str(e)}",
|
|
"sectionId": sectionId
|
|
})
|
|
logger.error(f"Error generating section {sectionId}: {str(e)}")
|
|
chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0
|
|
self.services.chat.progressLogUpdate(
|
|
chapterOperationId,
|
|
chapterProgress,
|
|
f"Section {sectionIndex + 1}/{totalSections} completed (with errors)"
|
|
)
|
|
|
|
else:
|
|
# Einzelverarbeitung: Jeder Part einzeln ODER Generation ohne ContentParts
|
|
if len(contentPartIds) == 0 and useAiCall and generationHint:
|
|
# Generate content from scratch using only generationHint
|
|
logger.debug(f"Processing section {sectionId}: No content parts, generating from generationHint only")
|
|
generationPrompt = self._buildSectionGenerationPrompt(
|
|
section=section,
|
|
contentParts=[],
|
|
userPrompt=userPrompt,
|
|
generationHint=generationHint,
|
|
allSections=all_sections_list,
|
|
sectionIndex=sectionIndex,
|
|
isAggregation=False,
|
|
language=language
|
|
)
|
|
|
|
sectionOperationId = f"{fillOperationId}_section_{sectionId}"
|
|
self.services.chat.progressLogStart(
|
|
sectionOperationId,
|
|
"Section Generation",
|
|
f"Section {sectionIndex + 1}/{totalSections}",
|
|
f"{sectionTitle} (from generationHint)",
|
|
parentOperationId=chapterOperationId
|
|
)
|
|
|
|
try:
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt")
|
|
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation")
|
|
|
|
operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE
|
|
|
|
if operationType == OperationTypeEnum.IMAGE_GENERATE:
|
|
maxPromptLength = 4000
|
|
if len(generationPrompt) > maxPromptLength:
|
|
logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters")
|
|
generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0]
|
|
|
|
# Write debug file for IMAGE_GENERATE (direct callAi, no _callAiWithLooping)
|
|
self.services.utils.writeDebugFile(
|
|
generationPrompt,
|
|
f"{chapterId}_section_{sectionId}_prompt"
|
|
)
|
|
|
|
request = AiCallRequest(
|
|
prompt=generationPrompt,
|
|
contentParts=[],
|
|
options=AiCallOptions(
|
|
operationType=operationType,
|
|
priority=PriorityEnum.BALANCED,
|
|
processingMode=ProcessingModeEnum.DETAILED
|
|
)
|
|
)
|
|
aiResponse = await self.aiService.callAi(request)
|
|
generatedElements = []
|
|
|
|
# Write debug file for IMAGE_GENERATE response (direct callAi, no _callAiWithLooping)
|
|
self.services.utils.writeDebugFile(
|
|
aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse),
|
|
f"{chapterId}_section_{sectionId}_response"
|
|
)
|
|
else:
|
|
isAggregation = False
|
|
|
|
async def buildSectionPromptWithContinuation(
|
|
section: Dict[str, Any],
|
|
contentParts: List[ContentPart],
|
|
userPrompt: str,
|
|
generationHint: str,
|
|
allSections: List[Dict[str, Any]],
|
|
sectionIndex: int,
|
|
isAggregation: bool,
|
|
continuationContext: Dict[str, Any],
|
|
services: Any
|
|
) -> str:
|
|
basePrompt = self._buildSectionGenerationPrompt(
|
|
section=section,
|
|
contentParts=contentParts,
|
|
userPrompt=userPrompt,
|
|
generationHint=generationHint,
|
|
allSections=allSections,
|
|
sectionIndex=sectionIndex,
|
|
isAggregation=isAggregation,
|
|
language=language
|
|
)
|
|
|
|
continuationInfo = continuationContext.get("delivered_summary", "")
|
|
cutOffElement = continuationContext.get("cut_off_element", "")
|
|
|
|
continuationPrompt = f"""{basePrompt}
|
|
|
|
--- CONTINUATION REQUEST ---
|
|
The previous JSON response was incomplete. Please continue from where it stopped.
|
|
|
|
PREVIOUSLY DELIVERED SUMMARY:
|
|
{continuationInfo}
|
|
|
|
LAST INCOMPLETE ELEMENT:
|
|
{cutOffElement}
|
|
|
|
TASK: Continue generating the JSON elements array from where it was cut off.
|
|
Complete the incomplete element and continue with remaining elements.
|
|
|
|
Return ONLY the continuation JSON (starting from the incomplete element).
|
|
The JSON should be a fragment that can be merged with the previous response."""
|
|
return continuationPrompt
|
|
|
|
options = AiCallOptions(
|
|
operationType=operationType,
|
|
priority=PriorityEnum.BALANCED,
|
|
processingMode=ProcessingModeEnum.DETAILED
|
|
)
|
|
|
|
aiResponseJson = await self.aiService._callAiWithLooping(
|
|
prompt=generationPrompt,
|
|
options=options,
|
|
debugPrefix=f"{chapterId}_section_{sectionId}",
|
|
promptBuilder=buildSectionPromptWithContinuation,
|
|
promptArgs={
|
|
"section": section,
|
|
"contentParts": [],
|
|
"userPrompt": userPrompt,
|
|
"generationHint": generationHint,
|
|
"allSections": all_sections_list,
|
|
"sectionIndex": sectionIndex,
|
|
"isAggregation": isAggregation,
|
|
"services": self.services
|
|
},
|
|
operationId=sectionOperationId,
|
|
userPrompt=userPrompt,
|
|
contentParts=[]
|
|
)
|
|
|
|
try:
|
|
parsedResponse = json.loads(self.services.utils.jsonExtractString(aiResponseJson))
|
|
if isinstance(parsedResponse, list):
|
|
generatedElements = parsedResponse
|
|
elif isinstance(parsedResponse, dict):
|
|
if "elements" in parsedResponse:
|
|
generatedElements = parsedResponse["elements"]
|
|
elif "sections" in parsedResponse and len(parsedResponse["sections"]) > 0:
|
|
firstSection = parsedResponse["sections"][0]
|
|
generatedElements = firstSection.get("elements", [])
|
|
elif parsedResponse.get("type"):
|
|
generatedElements = [parsedResponse]
|
|
else:
|
|
generatedElements = []
|
|
else:
|
|
generatedElements = []
|
|
|
|
class AiResponse:
|
|
def __init__(self, content):
|
|
self.content = content
|
|
|
|
aiResponse = AiResponse(aiResponseJson)
|
|
except Exception as parseError:
|
|
logger.error(f"Error parsing response from _callAiWithLooping for section {sectionId}: {str(parseError)}")
|
|
class AiResponse:
|
|
def __init__(self, content):
|
|
self.content = content
|
|
aiResponse = AiResponse(aiResponseJson)
|
|
generatedElements = []
|
|
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response")
|
|
# Note: Debug files are written by _callAiWithLooping using debugPrefix
|
|
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content")
|
|
|
|
responseElements = await self._processAiResponseForSection(
|
|
aiResponse=aiResponse,
|
|
contentType=contentType,
|
|
operationType=operationType,
|
|
sectionId=sectionId,
|
|
generationHint=generationHint,
|
|
generatedElements=generatedElements
|
|
)
|
|
elements.extend(responseElements)
|
|
|
|
self.services.chat.progressLogFinish(sectionOperationId, True)
|
|
|
|
chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0
|
|
self.services.chat.progressLogUpdate(
|
|
chapterOperationId,
|
|
chapterProgress,
|
|
f"Section {sectionIndex + 1}/{totalSections} completed"
|
|
)
|
|
|
|
except Exception as e:
|
|
self.services.chat.progressLogFinish(sectionOperationId, False)
|
|
elements.append({
|
|
"type": "error",
|
|
"message": f"Error generating section {sectionId}: {str(e)}",
|
|
"sectionId": sectionId
|
|
})
|
|
logger.error(f"Error generating section {sectionId}: {str(e)}")
|
|
chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0
|
|
self.services.chat.progressLogUpdate(
|
|
chapterOperationId,
|
|
chapterProgress,
|
|
f"Section {sectionIndex + 1}/{totalSections} completed (with errors)"
|
|
)
|
|
|
|
# Einzelverarbeitung: Jeder Part einzeln
|
|
for partId in contentPartIds:
|
|
part = self._findContentPartById(partId, contentParts)
|
|
if not part:
|
|
continue
|
|
|
|
contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat"))
|
|
|
|
if contentFormat == "reference":
|
|
elements.append({
|
|
"type": "reference",
|
|
"documentReference": part.metadata.get("documentReference"),
|
|
"label": part.metadata.get("usageHint", part.label)
|
|
})
|
|
|
|
elif contentFormat == "object":
|
|
if part.typeGroup == "image":
|
|
elements.append({
|
|
"type": "image",
|
|
"content": {
|
|
"base64Data": part.data,
|
|
"altText": part.metadata.get("usageHint", part.label),
|
|
"caption": part.metadata.get("caption", "")
|
|
}
|
|
})
|
|
else:
|
|
elements.append({
|
|
"type": part.typeGroup,
|
|
"content": {
|
|
"data": part.data,
|
|
"mimeType": part.mimeType,
|
|
"label": part.metadata.get("usageHint", part.label)
|
|
}
|
|
})
|
|
|
|
elif contentFormat == "extracted":
|
|
if useAiCall and generationHint:
|
|
# AI-Call mit einzelnen ContentPart
|
|
logger.debug(f"Processing section {sectionId}: Single extracted part with AI call")
|
|
generationPrompt = self._buildSectionGenerationPrompt(
|
|
section=section,
|
|
contentParts=[part],
|
|
userPrompt=userPrompt,
|
|
generationHint=generationHint,
|
|
allSections=all_sections_list,
|
|
sectionIndex=sectionIndex,
|
|
isAggregation=False,
|
|
language=language
|
|
)
|
|
|
|
sectionOperationId = f"{fillOperationId}_section_{sectionId}"
|
|
self.services.chat.progressLogStart(
|
|
sectionOperationId,
|
|
"Section Generation",
|
|
f"Section {sectionIndex + 1}/{totalSections}",
|
|
f"{sectionTitle} (single part)",
|
|
parentOperationId=chapterOperationId
|
|
)
|
|
|
|
try:
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt")
|
|
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation")
|
|
|
|
operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE
|
|
|
|
if operationType == OperationTypeEnum.IMAGE_GENERATE:
|
|
maxPromptLength = 4000
|
|
if len(generationPrompt) > maxPromptLength:
|
|
logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters")
|
|
generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0]
|
|
|
|
# Write debug file for IMAGE_GENERATE (direct callAi, no _callAiWithLooping)
|
|
self.services.utils.writeDebugFile(
|
|
generationPrompt,
|
|
f"{chapterId}_section_{sectionId}_prompt"
|
|
)
|
|
|
|
request = AiCallRequest(
|
|
prompt=generationPrompt,
|
|
contentParts=[],
|
|
options=AiCallOptions(
|
|
operationType=operationType,
|
|
priority=PriorityEnum.BALANCED,
|
|
processingMode=ProcessingModeEnum.DETAILED
|
|
)
|
|
)
|
|
aiResponse = await self.aiService.callAi(request)
|
|
generatedElements = []
|
|
|
|
# Write debug file for IMAGE_GENERATE response (direct callAi, no _callAiWithLooping)
|
|
self.services.utils.writeDebugFile(
|
|
aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse),
|
|
f"{chapterId}_section_{sectionId}_response"
|
|
)
|
|
else:
|
|
isAggregation = False
|
|
|
|
async def buildSectionPromptWithContinuation(
|
|
section: Dict[str, Any],
|
|
contentParts: List[ContentPart],
|
|
userPrompt: str,
|
|
generationHint: str,
|
|
allSections: List[Dict[str, Any]],
|
|
sectionIndex: int,
|
|
isAggregation: bool,
|
|
continuationContext: Dict[str, Any],
|
|
services: Any
|
|
) -> str:
|
|
basePrompt = self._buildSectionGenerationPrompt(
|
|
section=section,
|
|
contentParts=contentParts,
|
|
userPrompt=userPrompt,
|
|
generationHint=generationHint,
|
|
allSections=allSections,
|
|
sectionIndex=sectionIndex,
|
|
isAggregation=isAggregation,
|
|
language=language
|
|
)
|
|
|
|
continuationInfo = continuationContext.get("delivered_summary", "")
|
|
cutOffElement = continuationContext.get("cut_off_element", "")
|
|
|
|
continuationPrompt = f"""{basePrompt}
|
|
|
|
--- CONTINUATION REQUEST ---
|
|
The previous JSON response was incomplete. Please continue from where it stopped.
|
|
|
|
PREVIOUSLY DELIVERED SUMMARY:
|
|
{continuationInfo}
|
|
|
|
LAST INCOMPLETE ELEMENT:
|
|
{cutOffElement}
|
|
|
|
TASK: Continue generating the JSON elements array from where it was cut off.
|
|
Complete the incomplete element and continue with remaining elements.
|
|
|
|
Return ONLY the continuation JSON (starting from the incomplete element).
|
|
The JSON should be a fragment that can be merged with the previous response."""
|
|
return continuationPrompt
|
|
|
|
options = AiCallOptions(
|
|
operationType=operationType,
|
|
priority=PriorityEnum.BALANCED,
|
|
processingMode=ProcessingModeEnum.DETAILED
|
|
)
|
|
|
|
aiResponseJson = await self.aiService._callAiWithLooping(
|
|
prompt=generationPrompt,
|
|
options=options,
|
|
debugPrefix=f"{chapterId}_section_{sectionId}",
|
|
promptBuilder=buildSectionPromptWithContinuation,
|
|
promptArgs={
|
|
"section": section,
|
|
"contentParts": [part],
|
|
"userPrompt": userPrompt,
|
|
"generationHint": generationHint,
|
|
"allSections": all_sections_list,
|
|
"sectionIndex": sectionIndex,
|
|
"isAggregation": isAggregation,
|
|
"services": self.services
|
|
},
|
|
operationId=sectionOperationId,
|
|
userPrompt=userPrompt,
|
|
contentParts=[part]
|
|
)
|
|
|
|
try:
|
|
parsedResponse = json.loads(self.services.utils.jsonExtractString(aiResponseJson))
|
|
if isinstance(parsedResponse, list):
|
|
generatedElements = parsedResponse
|
|
elif isinstance(parsedResponse, dict):
|
|
if "elements" in parsedResponse:
|
|
generatedElements = parsedResponse["elements"]
|
|
elif "sections" in parsedResponse and len(parsedResponse["sections"]) > 0:
|
|
firstSection = parsedResponse["sections"][0]
|
|
generatedElements = firstSection.get("elements", [])
|
|
elif parsedResponse.get("type"):
|
|
generatedElements = [parsedResponse]
|
|
else:
|
|
generatedElements = []
|
|
else:
|
|
generatedElements = []
|
|
|
|
class AiResponse:
|
|
def __init__(self, content):
|
|
self.content = content
|
|
|
|
aiResponse = AiResponse(aiResponseJson)
|
|
except Exception as parseError:
|
|
logger.error(f"Error parsing response from _callAiWithLooping for section {sectionId}: {str(parseError)}")
|
|
class AiResponse:
|
|
def __init__(self, content):
|
|
self.content = content
|
|
aiResponse = AiResponse(aiResponseJson)
|
|
generatedElements = []
|
|
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response")
|
|
# Note: Debug files are written by _callAiWithLooping using debugPrefix
|
|
|
|
self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content")
|
|
|
|
responseElements = await self._processAiResponseForSection(
|
|
aiResponse=aiResponse,
|
|
contentType=contentType,
|
|
operationType=operationType,
|
|
sectionId=sectionId,
|
|
generationHint=generationHint,
|
|
generatedElements=generatedElements
|
|
)
|
|
elements.extend(responseElements)
|
|
|
|
self.services.chat.progressLogFinish(sectionOperationId, True)
|
|
|
|
chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0
|
|
self.services.chat.progressLogUpdate(
|
|
chapterOperationId,
|
|
chapterProgress,
|
|
f"Section {sectionIndex + 1}/{totalSections} completed"
|
|
)
|
|
|
|
except Exception as e:
|
|
self.services.chat.progressLogFinish(sectionOperationId, False)
|
|
elements.append({
|
|
"type": "error",
|
|
"message": f"Error generating section {sectionId}: {str(e)}",
|
|
"sectionId": sectionId
|
|
})
|
|
logger.error(f"Error generating section {sectionId}: {str(e)}")
|
|
chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0
|
|
self.services.chat.progressLogUpdate(
|
|
chapterOperationId,
|
|
chapterProgress,
|
|
f"Section {sectionIndex + 1}/{totalSections} completed (with errors)"
|
|
)
|
|
else:
|
|
# Füge extrahierten Content direkt hinzu (kein AI-Call)
|
|
if part.typeGroup == "image":
|
|
logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call")
|
|
elements.append({
|
|
"type": "image",
|
|
"content": {
|
|
"base64Data": part.data,
|
|
"altText": part.metadata.get("usageHint", part.label),
|
|
"caption": part.metadata.get("caption", "")
|
|
}
|
|
})
|
|
else:
|
|
logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call")
|
|
elements.append({
|
|
"type": "extracted_text",
|
|
"content": part.data,
|
|
"source": part.metadata.get("documentId"),
|
|
"extractionPrompt": part.metadata.get("extractionPrompt")
|
|
})
|
|
|
|
# Update progress after section completion
|
|
chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0
|
|
self.services.chat.progressLogUpdate(
|
|
chapterOperationId,
|
|
chapterProgress,
|
|
f"Section {sectionIndex + 1}/{totalSections} completed"
|
|
)
|
|
|
|
overallProgress = calculateOverallProgress(chapterIndex - 1, totalChapters, sectionIndex + 1, totalSections)
|
|
self.services.chat.progressLogUpdate(
|
|
fillOperationId,
|
|
overallProgress,
|
|
f"Chapter {chapterIndex}/{totalChapters}, Section {sectionIndex + 1}/{totalSections} completed"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error processing section {sectionId}: {str(e)}")
|
|
elements.append({
|
|
"type": "error",
|
|
"message": f"Unexpected error processing section {sectionId}: {str(e)}",
|
|
"sectionId": sectionId
|
|
})
|
|
|
|
return elements
|
|
|
|
async def _fillChapterSections(
|
|
self,
|
|
chapterStructure: Dict[str, Any],
|
|
contentParts: List[ContentPart],
|
|
userPrompt: str,
|
|
parentOperationId: str,
|
|
language: str
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Phase 5D.2: Füllt Sections mit ContentParts.
|
|
"""
|
|
|
|
# Sammle alle Sections für Kontext-Informationen (für alle Sections)
|
|
all_sections_list = []
|
|
for doc in chapterStructure.get("documents", []):
|
|
for chapter in doc.get("chapters", []):
|
|
for section in chapter.get("sections", []):
|
|
all_sections_list.append(section)
|
|
|
|
# Berechne Gesamtanzahl Chapters für Progress-Tracking
|
|
totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", []))
|
|
fillOperationId = parentOperationId
|
|
|
|
# Helper function to calculate overall progress
|
|
def calculateOverallProgress(chapterIndex, totalChapters, sectionIndex, totalSections):
|
|
"""Calculate overall progress: 0.0 to 1.0"""
|
|
if totalChapters == 0:
|
|
return 1.0
|
|
|
|
# Progress from completed chapters (0 to chapterIndex-1)
|
|
completedChaptersProgress = chapterIndex / totalChapters
|
|
|
|
# Progress from current chapter (sectionIndex / totalSections)
|
|
currentChapterProgress = (sectionIndex / totalSections) / totalChapters if totalSections > 0 else 0
|
|
|
|
return min(1.0, completedChaptersProgress + currentChapterProgress)
|
|
|
|
# Process chapters sequentially with chapter-level progress
|
|
chapterIndex = 0
|
|
for doc in chapterStructure.get("documents", []):
|
|
for chapter in doc.get("chapters", []):
|
|
chapterIndex += 1
|
|
chapterId = chapter.get("id", "unknown")
|
|
chapterTitle = chapter.get("title", "Untitled Chapter")
|
|
sections = chapter.get("sections", [])
|
|
totalSections = len(sections)
|
|
|
|
# Start chapter operation
|
|
chapterOperationId = f"{fillOperationId}_chapter_{chapterId}"
|
|
self.services.chat.progressLogStart(
|
|
chapterOperationId,
|
|
"Chapter Generation",
|
|
f"Chapter {chapterIndex}/{totalChapters}",
|
|
chapterTitle,
|
|
parentOperationId=fillOperationId
|
|
)
|
|
|
|
# Process sections within chapter in parallel
|
|
sectionTasks = []
|
|
for sectionIndex, section in enumerate(sections):
|
|
# Create task for parallel processing
|
|
task = self._processSingleSection(
|
|
section=section,
|
|
sectionIndex=sectionIndex,
|
|
totalSections=totalSections,
|
|
chapterIndex=chapterIndex,
|
|
totalChapters=totalChapters,
|
|
chapterId=chapterId,
|
|
chapterOperationId=chapterOperationId,
|
|
fillOperationId=fillOperationId,
|
|
contentParts=contentParts,
|
|
userPrompt=userPrompt,
|
|
all_sections_list=all_sections_list,
|
|
language=language,
|
|
calculateOverallProgress=calculateOverallProgress
|
|
)
|
|
sectionTasks.append((sectionIndex, section, task))
|
|
|
|
# Execute all section tasks in parallel
|
|
if sectionTasks:
|
|
# Create list of tasks (without indices for gather)
|
|
tasks = [task for _, _, task in sectionTasks]
|
|
|
|
# Execute in parallel with error handling
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
# Process results in order and assign elements to sections
|
|
for (originalIndex, originalSection, _), result in zip(sectionTasks, results):
|
|
if isinstance(result, Exception):
|
|
logger.error(f"Error processing section {originalSection.get('id')}: {str(result)}")
|
|
# Set error element
|
|
originalSection["elements"] = [{
|
|
"type": "error",
|
|
"message": f"Error processing section: {str(result)}",
|
|
"sectionId": originalSection.get("id")
|
|
}]
|
|
else:
|
|
# Assign elements to section in correct order
|
|
originalSection["elements"] = result
|
|
|
|
# Finish chapter operation after all sections processed
|
|
self.services.chat.progressLogFinish(chapterOperationId, True)
|
|
|
|
# Update overall progress after chapter completion
|
|
overallProgress = chapterIndex / totalChapters if totalChapters > 0 else 1.0
|
|
self.services.chat.progressLogUpdate(
|
|
fillOperationId,
|
|
overallProgress,
|
|
f"Chapter {chapterIndex}/{totalChapters} completed: {chapterTitle}"
|
|
)
|
|
|
|
return chapterStructure
|
|
|
|
def _addContentPartsMetadata(
|
|
self,
|
|
structure: Dict[str, Any],
|
|
contentParts: List[ContentPart]
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Fügt ContentParts-Metadaten zur Struktur hinzu, wenn contentPartIds vorhanden sind.
|
|
Dies hilft der Validierung, den Kontext der ContentParts zu verstehen.
|
|
"""
|
|
# Erstelle Mapping von ContentPart-ID zu Metadaten
|
|
contentPartsMap = {}
|
|
for part in contentParts:
|
|
contentPartsMap[part.id] = {
|
|
"id": part.id,
|
|
"format": part.metadata.get("contentFormat", "unknown"),
|
|
"type": part.typeGroup,
|
|
"mimeType": part.mimeType,
|
|
"originalFileName": part.metadata.get("originalFileName"),
|
|
"usageHint": part.metadata.get("usageHint"),
|
|
"documentId": part.metadata.get("documentId"),
|
|
"dataSize": len(str(part.data)) if part.data else 0
|
|
}
|
|
|
|
# Füge Metadaten zu Sections hinzu, die contentPartIds haben
|
|
for doc in structure.get("documents", []):
|
|
# Prüfe ob Chapters vorhanden sind (neue Struktur)
|
|
if "chapters" in doc:
|
|
for chapter in doc.get("chapters", []):
|
|
# Füge Metadaten zu Chapter-Level contentPartIds hinzu
|
|
chapterContentPartIds = chapter.get("contentPartIds", [])
|
|
if chapterContentPartIds:
|
|
chapter["contentPartsMetadata"] = []
|
|
for partId in chapterContentPartIds:
|
|
if partId in contentPartsMap:
|
|
chapter["contentPartsMetadata"].append(contentPartsMap[partId])
|
|
|
|
# Füge Metadaten zu Sections hinzu
|
|
for section in chapter.get("sections", []):
|
|
contentPartIds = section.get("contentPartIds", [])
|
|
if contentPartIds:
|
|
section["contentPartsMetadata"] = []
|
|
for partId in contentPartIds:
|
|
if partId in contentPartsMap:
|
|
section["contentPartsMetadata"].append(contentPartsMap[partId])
|
|
|
|
return structure
|
|
|
|
def _flattenChaptersToSections(
|
|
self,
|
|
chapterStructure: Dict[str, Any]
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Flattening: Konvertiert Chapters zu finaler Section-Struktur.
|
|
Jedes Chapter wird zu einer Heading-Section (Level 1) + dessen Sections.
|
|
|
|
IMPORTANT: Chapters are the main structure elements (heading level 1).
|
|
All section headings with level < 2 are adjusted to level 2.
|
|
"""
|
|
result = {
|
|
"metadata": chapterStructure.get("metadata", {}),
|
|
"documents": []
|
|
}
|
|
|
|
for doc in chapterStructure.get("documents", []):
|
|
flattened_doc = {
|
|
"id": doc.get("id"),
|
|
"title": doc.get("title"),
|
|
"filename": doc.get("filename"),
|
|
"sections": []
|
|
}
|
|
|
|
for chapter in doc.get("chapters", []):
|
|
# 1. Vordefinierte Heading-Section für Chapter-Title (ALWAYS Level 1)
|
|
heading_section = {
|
|
"id": f"{chapter['id']}_heading",
|
|
"content_type": "heading",
|
|
"elements": [{
|
|
"type": "heading",
|
|
"content": {
|
|
"text": chapter.get("title", ""),
|
|
"level": 1 # Chapters are always level 1
|
|
}
|
|
}]
|
|
}
|
|
flattened_doc["sections"].append(heading_section)
|
|
|
|
# 2. Generierte Sections - adjust heading levels
|
|
for section in chapter.get("sections", []):
|
|
adjusted_section = self._adjustSectionHeadingLevels(section)
|
|
flattened_doc["sections"].append(adjusted_section)
|
|
|
|
result["documents"].append(flattened_doc)
|
|
|
|
return result
|
|
|
|
def _adjustSectionHeadingLevels(self, section: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Adjust heading levels in sections: sections with type heading and level < 2 are changed to level 2.
|
|
Only chapter headings have level 1.
|
|
"""
|
|
adjusted_section = copy.deepcopy(section)
|
|
|
|
# Check if this is a heading section
|
|
if adjusted_section.get("content_type") == "heading":
|
|
elements = adjusted_section.get("elements", [])
|
|
for element in elements:
|
|
if isinstance(element, dict) and element.get("type") == "heading":
|
|
content = element.get("content", {})
|
|
if isinstance(content, dict):
|
|
level = content.get("level", 1)
|
|
# If level < 2, change to level 2 (only chapters have level 1)
|
|
if level < 2:
|
|
content["level"] = 2
|
|
|
|
return adjusted_section
|
|
|
|
def _buildChapterSectionsStructurePrompt(
|
|
self,
|
|
chapterId: str,
|
|
chapterLevel: int,
|
|
chapterTitle: str,
|
|
generationHint: str,
|
|
contentPartIds: List[str],
|
|
contentPartInstructions: Dict[str, Any],
|
|
contentParts: List[ContentPart],
|
|
userPrompt: str,
|
|
language: str = "en"
|
|
) -> str:
|
|
"""Baue Prompt für Chapter-Sections-Struktur-Generierung."""
|
|
# Baue ContentParts-Index (nur IDs, keine Previews!)
|
|
contentPartsIndex = ""
|
|
for partId in contentPartIds:
|
|
part = self._findContentPartById(partId, contentParts)
|
|
if not part:
|
|
continue
|
|
|
|
contentFormat = part.metadata.get("contentFormat", "unknown")
|
|
instruction = contentPartInstructions.get(partId, {}).get("instruction", "Use content as needed")
|
|
|
|
contentPartsIndex += f"\n- ContentPart ID: {partId}\n"
|
|
contentPartsIndex += f" Format: {contentFormat}\n"
|
|
contentPartsIndex += f" Type: {part.typeGroup}\n"
|
|
contentPartsIndex += f" Instruction: {instruction}\n"
|
|
|
|
if not contentPartsIndex:
|
|
contentPartsIndex = "\n(No content parts specified for this chapter)"
|
|
|
|
prompt = f"""TASK: Generate Chapter Sections Structure
|
|
|
|
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
|
|
|
|
CHAPTER: {chapterTitle} (Level {chapterLevel}, ID: {chapterId})
|
|
GENERATION HINT: {generationHint}
|
|
|
|
NOTE: Chapter already has a heading section. Do NOT generate a heading for the chapter title.
|
|
|
|
IMPORTANT - SECTION INDEPENDENCE:
|
|
- Each section is independent and self-contained
|
|
- One section does NOT have information about another section
|
|
- Each section must provide its own context and be understandable alone
|
|
|
|
AVAILABLE CONTENT PARTS:
|
|
{contentPartsIndex}
|
|
|
|
CONTENT TYPES: table, bullet_list, heading, paragraph, code_block, image
|
|
|
|
useAiCall RULES:
|
|
- useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed
|
|
- useAiCall: false if Format is "object" or "reference" (direct insertion)
|
|
- useAiCall: false if Format is "extracted" AND simple "include full text" instruction
|
|
- useAiCall: true if NO ContentPartIds provided (content must be generated from scratch); Sections without ContentParts MUST have a clear, detailed generationHint explaining what content to generate
|
|
|
|
RETURN JSON:
|
|
{{
|
|
"sections": [
|
|
{{
|
|
"id": "section_1",
|
|
"content_type": "paragraph",
|
|
"contentPartIds": ["extracted_part_1"],
|
|
"generationHint": "Include full text",
|
|
"useAiCall": false,
|
|
"elements": []
|
|
}}
|
|
]
|
|
}}
|
|
|
|
EXAMPLES (all content types):
|
|
- paragraph: {{"id": "s1", "content_type": "paragraph", "contentPartIds": ["extracted_1"], "generationHint": "Include full text", "useAiCall": false, "elements": []}}
|
|
- bullet_list: {{"id": "s2", "content_type": "bullet_list", "contentPartIds": ["extracted_1"], "generationHint": "Create bullet list", "useAiCall": true, "elements": []}}
|
|
- table: {{"id": "s3", "content_type": "table", "contentPartIds": ["extracted_1", "extracted_2"], "generationHint": "Create table", "useAiCall": true, "elements": []}}
|
|
- heading: {{"id": "s4", "content_type": "heading", "contentPartIds": ["extracted_1"], "generationHint": "Extract heading", "useAiCall": true, "elements": []}}
|
|
- code_block: {{"id": "s5", "content_type": "code_block", "contentPartIds": ["extracted_1"], "generationHint": "Format code", "useAiCall": true, "elements": []}}
|
|
- image: {{"id": "s6", "content_type": "image", "contentPartIds": ["obj_1"], "generationHint": "Display image", "useAiCall": false, "elements": []}}
|
|
- reference: {{"id": "s7", "content_type": "paragraph", "contentPartIds": ["ref_1"], "generationHint": "Reference", "useAiCall": false, "elements": []}}
|
|
- NO CONTENT PARTS (generate from scratch): {{"id": "s8", "content_type": "paragraph", "contentPartIds": [], "generationHint": "Write a detailed professional paragraph explaining [specific topic or purpose]. Include [key points to cover]. Address [important aspects]. Conclude with [summary or recommendations].", "useAiCall": true, "elements": []}}
|
|
|
|
CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
|
|
"""
|
|
return prompt
|
|
|
|
def _getContentStructureExample(self, contentType: str) -> str:
|
|
"""Get the JSON structure example for a specific content type."""
|
|
structures = {
|
|
"table": '{{"headers": ["Column1", "Column2"], "rows": [["Value1", "Value2"], ["Value3", "Value4"]]}}',
|
|
"bullet_list": '{{"items": ["Item 1", "Item 2", "Item 3"]}}',
|
|
"heading": '{{"text": "Section Title", "level": 2}}',
|
|
"paragraph": '{{"text": "This is paragraph text."}}',
|
|
"code_block": '{{"code": "function example() {{ return true; }}", "language": "javascript"}}',
|
|
"image": '{{"base64Data": "<base64_encoded_image_data>", "altText": "Description", "caption": "Optional caption"}}'
|
|
}
|
|
return structures.get(contentType, '{{"text": ""}}')
|
|
|
|
def _buildSectionGenerationPrompt(
|
|
self,
|
|
section: Dict[str, Any],
|
|
contentParts: List[Optional[ContentPart]],
|
|
userPrompt: str,
|
|
generationHint: str,
|
|
allSections: Optional[List[Dict[str, Any]]] = None,
|
|
sectionIndex: Optional[int] = None,
|
|
isAggregation: bool = False,
|
|
language: str = "en"
|
|
) -> str:
|
|
"""Baue Prompt für Section-Generierung mit vollständigem Kontext."""
|
|
# Filtere None-Werte
|
|
validParts = [p for p in contentParts if p is not None]
|
|
|
|
# Section-Metadaten
|
|
sectionId = section.get("id", "unknown")
|
|
contentType = section.get("content_type", "paragraph")
|
|
|
|
# Baue ContentParts-Beschreibung
|
|
contentPartsText = ""
|
|
if isAggregation:
|
|
# Aggregation: Zeige nur Metadaten, nicht Previews
|
|
contentPartsText += f"\n## CONTENT PARTS (Aggregation)\n"
|
|
contentPartsText += f"- Anzahl: {len(validParts)} ContentParts\n"
|
|
contentPartsText += f"- Alle ContentParts werden als Parameter übergeben (nicht im Prompt!)\n"
|
|
contentPartsText += f"- Jeder Part kann sehr groß sein → Chunking automatisch\n"
|
|
contentPartsText += f"- WICHTIG: Aggregiere ALLE Parts zu einem Element (z.B. eine Tabelle)\n\n"
|
|
contentPartsText += f"ContentPart IDs:\n"
|
|
for part in validParts:
|
|
contentFormat = part.metadata.get("contentFormat", "unknown")
|
|
contentPartsText += f" - {part.id} (Format: {contentFormat}, Type: {part.typeGroup}"
|
|
if part.metadata.get("originalFileName"):
|
|
contentPartsText += f", Source: {part.metadata.get('originalFileName')}"
|
|
contentPartsText += ")\n"
|
|
else:
|
|
# Einzelverarbeitung: Zeige Previews
|
|
for part in validParts:
|
|
contentFormat = part.metadata.get("contentFormat", "unknown")
|
|
contentPartsText += f"\n- ContentPart {part.id}:\n"
|
|
contentPartsText += f" Format: {contentFormat}\n"
|
|
contentPartsText += f" Type: {part.typeGroup}\n"
|
|
if part.metadata.get("originalFileName"):
|
|
contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n"
|
|
|
|
if contentFormat == "extracted":
|
|
# Zeige Preview von extrahiertem Text (länger für besseren Kontext)
|
|
previewLength = 1000
|
|
if part.data:
|
|
preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data
|
|
contentPartsText += f" Content preview:\n```\n{preview}\n```\n"
|
|
else:
|
|
contentPartsText += f" Content: (empty)\n"
|
|
elif contentFormat == "reference":
|
|
contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n"
|
|
if part.metadata.get("usageHint"):
|
|
contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n"
|
|
elif contentFormat == "object":
|
|
dataLength = len(part.data) if part.data else 0
|
|
contentPartsText += f" Object type: {part.typeGroup}\n"
|
|
contentPartsText += f" MIME type: {part.mimeType}\n"
|
|
contentPartsText += f" Data size: {dataLength} chars (base64 encoded)\n"
|
|
if part.metadata.get("usageHint"):
|
|
contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n"
|
|
|
|
# Baue Section-Kontext (vorherige und nachfolgende Sections)
|
|
contextText = ""
|
|
if allSections and sectionIndex is not None:
|
|
prevSections = []
|
|
nextSections = []
|
|
|
|
if sectionIndex > 0:
|
|
for i in range(max(0, sectionIndex - 2), sectionIndex):
|
|
prevSection = allSections[i]
|
|
prevSections.append({
|
|
"id": prevSection.get("id"),
|
|
"content_type": prevSection.get("content_type"),
|
|
"generation_hint": prevSection.get("generation_hint", "")[:100]
|
|
})
|
|
|
|
if sectionIndex < len(allSections) - 1:
|
|
for i in range(sectionIndex + 1, min(len(allSections), sectionIndex + 3)):
|
|
nextSection = allSections[i]
|
|
nextSections.append({
|
|
"id": nextSection.get("id"),
|
|
"content_type": nextSection.get("content_type"),
|
|
"generation_hint": nextSection.get("generation_hint", "")[:100]
|
|
})
|
|
|
|
if prevSections or nextSections:
|
|
contextText = "\n## DOCUMENT CONTEXT\n"
|
|
if prevSections:
|
|
contextText += "\nPrevious sections:\n"
|
|
for prev in prevSections:
|
|
contextText += f"- {prev['id']} ({prev['content_type']}): {prev['generation_hint']}\n"
|
|
if nextSections:
|
|
contextText += "\nFollowing sections:\n"
|
|
for next in nextSections:
|
|
contextText += f"- {next['id']} ({next['content_type']}): {next['generation_hint']}\n"
|
|
|
|
contentStructureExample = self._getContentStructureExample(contentType)
|
|
|
|
# Special handling for image content type with IMAGE_GENERATE
|
|
isImageGeneration = contentType == "image" and len(validParts) == 0
|
|
|
|
if isAggregation:
|
|
prompt = f"""# TASK: Generate Section Content (Aggregation)
|
|
|
|
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
|
|
|
|
## SECTION METADATA
|
|
- Section ID: {sectionId}
|
|
- Content Type: {contentType}
|
|
- Generation Hint: {generationHint}
|
|
|
|
## AVAILABLE CONTENT FOR THIS SECTION
|
|
{contentPartsText if contentPartsText else "(No content parts specified for this section)"}
|
|
|
|
## INSTRUCTIONS
|
|
1. Generate content for section "{sectionId}" based on the generation hint above
|
|
2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data)
|
|
3. For table content_type: Create a single table with headers and rows from all ContentParts
|
|
4. For bullet_list content_type: Create a single list with items from all ContentParts
|
|
5. Format appropriately based on content_type ({contentType})
|
|
6. Ensure the generated content is self-contained and understandable independently
|
|
7. Return ONLY a JSON object with an "elements" array
|
|
8. Each element should match the content_type: {contentType}
|
|
9. CRITICAL - NO HTML/STYLING: Do NOT include HTML tags, CSS styles, or any formatting markup in text content. Return plain text only. Formatting is handled automatically by the renderer.
|
|
10. For paragraphs: Return plain text only, no HTML tags like <div>, <span>, <p>, or style attributes
|
|
11. For headings: Return plain text only, no HTML tags or styling
|
|
12. For images: Do NOT include base64 data in JSON - images are handled separately
|
|
|
|
## OUTPUT FORMAT
|
|
Return a JSON object with this structure:
|
|
|
|
{{
|
|
"elements": [
|
|
{{
|
|
"type": "{contentType}",
|
|
"content": {contentStructureExample}
|
|
}}
|
|
]
|
|
}}
|
|
|
|
CRITICAL:
|
|
- "content" MUST always be an object (never a string)
|
|
- For text content: Return plain text only, NO HTML tags, NO CSS styles, NO formatting markup
|
|
- Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
|
|
|
|
## CONTEXT (for reference only)
|
|
{contextText if contextText else ""}
|
|
```
|
|
{userPrompt}
|
|
```
|
|
"""
|
|
else:
|
|
prompt = f"""# TASK: Generate Section Content
|
|
|
|
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
|
|
|
|
## SECTION METADATA
|
|
- Section ID: {sectionId}
|
|
- Content Type: {contentType}
|
|
- Generation Hint: {generationHint}
|
|
|
|
## AVAILABLE CONTENT FOR THIS SECTION
|
|
{contentPartsText if contentPartsText else "(No content parts specified for this section)"}
|
|
|
|
## INSTRUCTIONS
|
|
1. Generate content for section "{sectionId}" based on the generation hint above
|
|
2. Use the available content parts to populate this section
|
|
3. For extracted text: Format appropriately based on content_type ({contentType})
|
|
4. Ensure the generated content is self-contained and understandable independently
|
|
5. Return ONLY a JSON object with an "elements" array
|
|
6. Each element should match the content_type: {contentType}
|
|
7. CRITICAL - NO HTML/STYLING: Do NOT include HTML tags, CSS styles, or any formatting markup in text content. Return plain text only. Formatting is handled automatically by the renderer.
|
|
8. For paragraphs: Return plain text only, no HTML tags like <div>, <span>, <p>, or style attributes
|
|
9. For headings: Return plain text only, no HTML tags or styling
|
|
10. For images: If you need to reference an image, describe it in altText. Do NOT include base64 data - images are handled separately
|
|
|
|
## OUTPUT FORMAT
|
|
Return a JSON object with this structure:
|
|
|
|
{{
|
|
"elements": [
|
|
{{
|
|
"type": "{contentType}",
|
|
"content": {contentStructureExample}
|
|
}}
|
|
]
|
|
}}
|
|
|
|
CRITICAL:
|
|
- "content" MUST always be an object (never a string)
|
|
- For text content: Return plain text only, NO HTML tags, NO CSS styles, NO formatting markup
|
|
- Return ONLY valid JSON. Do not include any explanatory text outside the JSON
|
|
|
|
## CONTEXT (for reference only)
|
|
{contextText if contextText else ""}
|
|
```
|
|
{userPrompt}
|
|
```
|
|
"""
|
|
return prompt
|
|
|
|
def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]:
|
|
"""Finde ContentPart nach ID."""
|
|
for part in contentParts:
|
|
if part.id == partId:
|
|
return part
|
|
return None
|
|
|
|
def _needsAggregation(
|
|
self,
|
|
contentType: str,
|
|
contentPartCount: int
|
|
) -> bool:
|
|
"""
|
|
Bestimmt ob mehrere ContentParts aggregiert werden müssen.
|
|
|
|
Aggregation nötig wenn:
|
|
- content_type erfordert Aggregation (table, bullet_list)
|
|
- UND mehrere ContentParts vorhanden sind (> 1)
|
|
|
|
Args:
|
|
contentType: Section content_type
|
|
contentPartCount: Anzahl der ContentParts in dieser Section
|
|
|
|
Returns:
|
|
True wenn Aggregation nötig, False sonst
|
|
"""
|
|
aggregationTypes = ["table", "bullet_list"]
|
|
|
|
if contentType in aggregationTypes and contentPartCount > 1:
|
|
return True
|
|
|
|
# Optional: Auch für paragraph wenn mehrere Parts vorhanden
|
|
# (z.B. Vergleich mehrerer Dokumente)
|
|
# Standard: Keine Aggregation für paragraph
|
|
return False
|
|
|