# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Structure Filling Module Handles filling document structure with content, including: - Filling sections with content parts - Building section generation prompts - Aggregation logic """ import json import logging import copy from typing import Dict, Any, List, Optional from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum logger = logging.getLogger(__name__) class StructureFiller: """Handles filling document structure with content.""" def __init__(self, services, aiService): """Initialize StructureFiller with service center and AI service access.""" self.services = services self.aiService = aiService async def fillStructure( self, structure: Dict[str, Any], contentParts: List[ContentPart], userPrompt: str, parentOperationId: str ) -> Dict[str, Any]: """ Phase 5D: Chapter-Content-Generierung (Zwei-Phasen-Ansatz). Phase 5D.1: Generiert Sections-Struktur für jedes Chapter Phase 5D.2: Füllt Sections mit ContentParts Args: structure: Struktur-Dict mit documents und chapters (nicht sections!) contentParts: Alle vorbereiteten ContentParts userPrompt: User-Anfrage parentOperationId: Parent Operation-ID für ChatLog-Hierarchie Returns: Gefüllte Struktur mit elements in jeder Section (nach Flattening) """ # Erstelle Operation-ID für Struktur-Abfüllen fillOperationId = f"{parentOperationId}_structure_filling" # Validate structure has chapters hasChapters = False for doc in structure.get("documents", []): if "chapters" in doc: hasChapters = True break if not hasChapters: error_msg = "Structure must have chapters. Legacy section-based structure is not supported." logger.error(error_msg) raise ValueError(error_msg) # Starte ChatLog mit Parent-Referenz chapterCount = sum(len(doc.get("chapters", [])) for doc in structure.get("documents", [])) self.services.chat.progressLogStart( fillOperationId, "Chapter Content Generation", "Filling", f"Processing {chapterCount} chapters", parentOperationId=parentOperationId ) try: filledStructure = copy.deepcopy(structure) # Phase 5D.1: Sections-Struktur für jedes Chapter generieren filledStructure = await self._generateChapterSectionsStructure( filledStructure, contentParts, userPrompt, fillOperationId ) # Phase 5D.2: Sections mit ContentParts füllen filledStructure = await self._fillChapterSections( filledStructure, contentParts, userPrompt, fillOperationId ) # Flattening: Chapters zu Sections konvertieren flattenedStructure = self._flattenChaptersToSections(filledStructure) # Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung) flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts) # ChatLog abschließen self.services.chat.progressLogFinish(fillOperationId, True) return flattenedStructure except Exception as e: self.services.chat.progressLogFinish(fillOperationId, False) logger.error(f"Error in fillStructure: {str(e)}") raise async def _generateChapterSectionsStructure( self, chapterStructure: Dict[str, Any], contentParts: List[ContentPart], userPrompt: str, parentOperationId: str ) -> Dict[str, Any]: """ Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content). Sections enthalten: content_type, contentPartIds, generationHint, useAiCall """ # Count total chapters for progress tracking totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", [])) chapterIndex = 0 for doc in chapterStructure.get("documents", []): for chapter in doc.get("chapters", []): chapterIndex += 1 chapterId = chapter.get("id", "unknown") chapterLevel = chapter.get("level", 1) chapterTitle = chapter.get("title", "Untitled Chapter") generationHint = chapter.get("generationHint", "") contentPartIds = chapter.get("contentPartIds", []) contentPartInstructions = chapter.get("contentPartInstructions", {}) # Update progress for chapter structure generation progress = chapterIndex / totalChapters if totalChapters > 0 else 1.0 self.services.chat.progressLogUpdate( parentOperationId, progress, f"Generating sections for Chapter {chapterIndex}/{totalChapters}: {chapterTitle}" ) chapterPrompt = self._buildChapterSectionsStructurePrompt( chapterId=chapterId, chapterLevel=chapterLevel, chapterTitle=chapterTitle, generationHint=generationHint, contentPartIds=contentPartIds, contentPartInstructions=contentPartInstructions, contentParts=contentParts, userPrompt=userPrompt ) # AI-Call für Chapter-Struktur-Generierung # Note: Debug logging is handled by callAiPlanning aiResponse = await self.aiService.callAiPlanning( prompt=chapterPrompt, debugType=f"chapter_structure_{chapterId}" ) sectionsStructure = json.loads( self.services.utils.jsonExtractString(aiResponse) ) chapter["sections"] = sectionsStructure.get("sections", []) # Setze useAiCall Flag (falls nicht von AI gesetzt) # WICHTIG: useAiCall kann nur true sein, wenn mindestens ein ContentPart Format "extracted" hat! # "object" und "reference" Formate werden direkt als Elemente hinzugefügt, benötigen kein AI. for section in chapter["sections"]: if "useAiCall" not in section: contentType = section.get("content_type", "paragraph") contentPartIds = section.get("contentPartIds", []) # Prüfe ob mindestens ein ContentPart Format "extracted" hat hasExtractedPart = False for partId in contentPartIds: part = self._findContentPartById(partId, contentParts) if part: contentFormat = part.metadata.get("contentFormat", "unknown") if contentFormat == "extracted": hasExtractedPart = True break # useAiCall kann nur true sein, wenn extracted Parts vorhanden sind useAiCall = False if hasExtractedPart: # Prüfe ob Transformation nötig ist useAiCall = contentType != "paragraph" # Prüfe contentPartInstructions für Transformation if not useAiCall: for partId in contentPartIds: instruction = contentPartInstructions.get(partId, {}).get("instruction", "") if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]: useAiCall = True break section["useAiCall"] = useAiCall logger.debug(f"Section {section.get('id')}: useAiCall={useAiCall} (hasExtractedPart={hasExtractedPart}, contentType={contentType})") return chapterStructure async def _fillChapterSections( self, chapterStructure: Dict[str, Any], contentParts: List[ContentPart], userPrompt: str, parentOperationId: str ) -> Dict[str, Any]: """ Phase 5D.2: Füllt Sections mit ContentParts. """ # Sammle alle Sections für Kontext-Informationen (für alle Sections) all_sections_list = [] for doc in chapterStructure.get("documents", []): for chapter in doc.get("chapters", []): for section in chapter.get("sections", []): all_sections_list.append(section) # Berechne Gesamtanzahl Chapters für Progress-Tracking totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", [])) fillOperationId = parentOperationId # Helper function to calculate overall progress def calculateOverallProgress(chapterIndex, totalChapters, sectionIndex, totalSections): """Calculate overall progress: 0.0 to 1.0""" if totalChapters == 0: return 1.0 # Progress from completed chapters (0 to chapterIndex-1) completedChaptersProgress = chapterIndex / totalChapters # Progress from current chapter (sectionIndex / totalSections) currentChapterProgress = (sectionIndex / totalSections) / totalChapters if totalSections > 0 else 0 return min(1.0, completedChaptersProgress + currentChapterProgress) # Process chapters sequentially with chapter-level progress chapterIndex = 0 for doc in chapterStructure.get("documents", []): for chapter in doc.get("chapters", []): chapterIndex += 1 chapterId = chapter.get("id", "unknown") chapterTitle = chapter.get("title", "Untitled Chapter") sections = chapter.get("sections", []) totalSections = len(sections) # Start chapter operation chapterOperationId = f"{fillOperationId}_chapter_{chapterId}" self.services.chat.progressLogStart( chapterOperationId, "Chapter Generation", f"Chapter {chapterIndex}/{totalChapters}", chapterTitle, parentOperationId=fillOperationId ) # Process sections within chapter for sectionIndex, section in enumerate(sections): sectionId = section.get("id") sectionTitle = section.get("title", sectionId) contentPartIds = section.get("contentPartIds", []) contentFormats = section.get("contentFormats", {}) # Check both camelCase and snake_case for generationHint generationHint = section.get("generationHint") or section.get("generation_hint") contentType = section.get("content_type", "paragraph") useAiCall = section.get("useAiCall", False) # Update overall progress at start of section overallProgress = calculateOverallProgress(chapterIndex - 1, totalChapters, sectionIndex, totalSections) self.services.chat.progressLogUpdate( fillOperationId, overallProgress, f"Chapter {chapterIndex}/{totalChapters}, Section {sectionIndex + 1}/{totalSections}: {sectionTitle}" ) # WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden # Aber: Wenn generationHint vorhanden ist, SOLLTE AI verwendet werden, auch wenn useAiCall=false gesetzt ist # (z.B. wenn AI die Struktur generiert hat, aber useAiCall falsch gesetzt wurde) if len(contentPartIds) == 0 and not generationHint: useAiCall = False logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False") elif len(contentPartIds) == 0 and generationHint and not useAiCall: # Override: If there's a generationHint but no content parts, we should use AI # This handles cases where structure generation set useAiCall=false incorrectly useAiCall = True logger.info(f"Section {sectionId}: Overriding useAiCall=True (has generationHint but no content parts)") elements = [] # Prüfe ob Aggregation nötig ist needsAggregation = self._needsAggregation( contentType=contentType, contentPartCount=len(contentPartIds) ) logger.info(f"Processing section {sectionId}: contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}") if needsAggregation and useAiCall: # Aggregation: Alle Parts zusammen verarbeiten sectionParts = [ self._findContentPartById(pid, contentParts) for pid in contentPartIds ] sectionParts = [p for p in sectionParts if p is not None] if sectionParts: # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt) extractedParts = [ p for p in sectionParts if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" ] nonExtractedParts = [ p for p in sectionParts if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" ] # Verarbeite non-extracted Parts separat (reference, object) for part in nonExtractedParts: contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) if contentFormat == "reference": elements.append({ "type": "reference", "documentReference": part.metadata.get("documentReference"), "label": part.metadata.get("usageHint", part.label) }) elif contentFormat == "object": # Nested content structure for objects if part.typeGroup == "image": elements.append({ "type": "image", "content": { "base64Data": part.data, "altText": part.metadata.get("usageHint", part.label), "caption": part.metadata.get("caption", "") } }) else: elements.append({ "type": part.typeGroup, "content": { "data": part.data, "mimeType": part.mimeType, "label": part.metadata.get("usageHint", part.label) } }) # Aggregiere extracted Parts mit AI if extractedParts: logger.debug(f"Section {sectionId}: Aggregating {len(extractedParts)} extracted parts with AI") generationPrompt = self._buildSectionGenerationPrompt( section=section, contentParts=extractedParts, # ALLE PARTS für Aggregation! userPrompt=userPrompt, generationHint=generationHint, allSections=all_sections_list, sectionIndex=sectionIndex, isAggregation=True ) # Erstelle Operation-ID für Section-Generierung sectionOperationId = f"{fillOperationId}_section_{sectionId}" # Starte ChatLog mit Parent-Referenz (chapter, not fillOperationId) self.services.chat.progressLogStart( sectionOperationId, "Section Generation (Aggregation)", f"Section {sectionIndex + 1}/{totalSections}", f"{sectionTitle} ({len(extractedParts)} parts)", parentOperationId=chapterOperationId ) try: # Update: Building prompt self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt") # Debug: Log Prompt self.services.utils.writeDebugFile( generationPrompt, f"{chapterId}_section_{sectionId}_prompt" ) logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt (aggregation)") # Update: Calling AI self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation") # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) # Use IMAGE_GENERATE for image content type operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) if operationType == OperationTypeEnum.IMAGE_GENERATE: maxPromptLength = 4000 if len(generationPrompt) > maxPromptLength: logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") # Keep the beginning (task, metadata, generation hint) and truncate from end generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline # For IMAGE_GENERATE, don't pass contentParts - image generation uses prompt only, not content chunks contentPartsForCall = [] if operationType == OperationTypeEnum.IMAGE_GENERATE else extractedParts request = AiCallRequest( prompt=generationPrompt, contentParts=contentPartsForCall, # Empty for IMAGE_GENERATE, all parts for others options=AiCallOptions( operationType=operationType, priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.DETAILED ) ) aiResponse = await self.aiService.callAi(request) # Update: Processing response self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response") # Debug: Log Response self.services.utils.writeDebugFile( aiResponse.content, f"{chapterId}_section_{sectionId}_response" ) logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response (aggregation)") # Update: Validating content self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content") # Handle IMAGE_GENERATE differently - returns image data directly if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: import base64 base64Data = "" # Convert image data to base64 string if needed if isinstance(aiResponse.content, bytes): base64Data = base64.b64encode(aiResponse.content).decode('utf-8') elif isinstance(aiResponse.content, str): # Check if it's already a JSON structure try: # Try to parse as JSON first jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content)) # If it's already a proper JSON structure with image element, use it if isinstance(jsonContent, dict) and jsonContent.get("type") == "image": elements.append(jsonContent) logger.debug("AI returned proper JSON image structure") # Skip remaining image processing, but continue with progress updates base64Data = None # Signal that image was already processed elif isinstance(jsonContent, list) and len(jsonContent) > 0: # Check if first element is an image if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image": elements.extend(jsonContent) logger.debug("AI returned proper JSON image structure in list") # Skip remaining image processing, but continue with progress updates base64Data = None # Signal that image was already processed else: base64Data = "" # Continue with normal processing except (json.JSONDecodeError, ValueError, AttributeError): # Not JSON, treat as base64 string or data URI base64Data = "" # Will be processed below # Process base64 if not already handled above if base64Data is None: # Already processed as JSON, skip base64 processing pass elif aiResponse.content.startswith("data:image/"): # Extract base64 from data URI base64Data = aiResponse.content.split(",", 1)[1] else: # Check if it looks like base64 (alphanumeric + / + =) content_stripped = aiResponse.content.strip() if len(content_stripped) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t " for c in content_stripped[:200]): # Looks like base64, use it base64Data = content_stripped.replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") else: base64Data = aiResponse.content else: base64Data = "" # Always create proper JSON structure for images (if not already processed) if base64Data is None: # Image already processed as JSON, skip pass elif base64Data: elements.append({ "type": "image", "content": { "base64Data": base64Data, "altText": generationHint or "Generated image", "caption": "" } }) logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}") else: logger.warning(f"IMAGE_GENERATE returned empty or invalid content for section {sectionId}") elements.append({ "type": "error", "message": f"Image generation returned empty or invalid content", "sectionId": sectionId }) else: # Parse JSON response for other content types try: generatedElements = json.loads( self.services.utils.jsonExtractString(aiResponse.content) ) if isinstance(generatedElements, list): elements.extend(generatedElements) elif isinstance(generatedElements, dict) and "elements" in generatedElements: elements.extend(generatedElements["elements"]) elif isinstance(generatedElements, dict) and generatedElements.get("type"): # Single element in dict format elements.append(generatedElements) except (json.JSONDecodeError, ValueError) as json_error: logger.error(f"Error parsing JSON response for section {sectionId}: {str(json_error)}") # Try to extract any image data that might be in the response if contentType == "image": # Check if response content might be base64 image data content_str = str(aiResponse.content) if len(content_str) > 100: elements.append({ "type": "error", "message": f"Failed to parse image generation response: {str(json_error)}", "sectionId": sectionId }) else: elements.append({ "type": "error", "message": f"Failed to parse JSON response: {str(json_error)}", "sectionId": sectionId }) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) # Update chapter progress after section completion chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 self.services.chat.progressLogUpdate( chapterOperationId, chapterProgress, f"Section {sectionIndex + 1}/{totalSections} completed" ) except Exception as e: # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) self.services.chat.progressLogFinish(sectionOperationId, False) elements.append({ "type": "error", "message": f"Error generating section {sectionId}: {str(e)}", "sectionId": sectionId }) logger.error(f"Error generating section {sectionId}: {str(e)}") # Still update chapter progress even on error chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 self.services.chat.progressLogUpdate( chapterOperationId, chapterProgress, f"Section {sectionIndex + 1}/{totalSections} completed (with errors)" ) # NICHT raise - Section wird mit Fehlermeldung gerendert else: # Einzelverarbeitung: Jeder Part einzeln ODER Generation ohne ContentParts # Handle case where no content parts but generationHint exists (e.g., Executive Summary) if len(contentPartIds) == 0 and useAiCall and generationHint: # Generate content from scratch using only generationHint logger.debug(f"Processing section {sectionId}: No content parts, generating from generationHint only") generationPrompt = self._buildSectionGenerationPrompt( section=section, contentParts=[], # NO PARTS userPrompt=userPrompt, generationHint=generationHint, allSections=all_sections_list, sectionIndex=sectionIndex, isAggregation=False ) # Erstelle Operation-ID für Section-Generierung sectionOperationId = f"{fillOperationId}_section_{sectionId}" # Starte ChatLog mit Parent-Referenz (chapter, not fillOperationId) self.services.chat.progressLogStart( sectionOperationId, "Section Generation", f"Section {sectionIndex + 1}/{totalSections}", f"{sectionTitle} (from generationHint)", parentOperationId=chapterOperationId ) try: # Update: Building prompt self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt") # Debug: Log Prompt self.services.utils.writeDebugFile( generationPrompt, f"{chapterId}_section_{sectionId}_prompt" ) logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt") # Update: Calling AI self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation") # Verwende callAi ohne ContentParts operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) if operationType == OperationTypeEnum.IMAGE_GENERATE: maxPromptLength = 4000 if len(generationPrompt) > maxPromptLength: logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") # Keep the beginning (task, metadata, generation hint) and truncate from end generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline request = AiCallRequest( prompt=generationPrompt, contentParts=[], # NO PARTS options=AiCallOptions( operationType=operationType, priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.DETAILED ) ) aiResponse = await self.aiService.callAi(request) # Update: Processing response self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response") # Debug: Log Response self.services.utils.writeDebugFile( aiResponse.content, f"{chapterId}_section_{sectionId}_response" ) logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response") # Update: Validating content self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content") # Handle IMAGE_GENERATE differently - returns image data directly if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: import base64 base64Data = "" # Convert image data to base64 string if needed if isinstance(aiResponse.content, bytes): base64Data = base64.b64encode(aiResponse.content).decode('utf-8') elif isinstance(aiResponse.content, str): # Check if it's already a JSON structure try: jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content)) if isinstance(jsonContent, dict) and jsonContent.get("type") == "image": elements.append(jsonContent) logger.debug("AI returned proper JSON image structure") # Skip remaining image processing, but continue with progress updates base64Data = None # Signal that image was already processed elif isinstance(jsonContent, list) and len(jsonContent) > 0: if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image": elements.extend(jsonContent) logger.debug("AI returned proper JSON image structure in list") # Skip remaining image processing, but continue with progress updates base64Data = None # Signal that image was already processed else: base64Data = "" # Continue with normal processing except (json.JSONDecodeError, ValueError, AttributeError): base64Data = "" # Will be processed below # Process base64 if not already handled above if base64Data is None: # Already processed as JSON, skip base64 processing pass elif aiResponse.content.startswith("data:image/"): # Extract base64 from data URI base64Data = aiResponse.content.split(",", 1)[1] else: content_stripped = aiResponse.content.strip() if len(content_stripped) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t " for c in content_stripped[:200]): base64Data = content_stripped.replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") else: base64Data = aiResponse.content else: base64Data = "" # Always create proper JSON structure for images (if not already processed) if base64Data is None: # Image already processed as JSON, skip pass elif base64Data: elements.append({ "type": "image", "content": { "base64Data": base64Data, "altText": generationHint or "Generated image", "caption": "" } }) logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}") else: logger.warning(f"IMAGE_GENERATE returned empty content for section {sectionId}") elements.append({ "type": "error", "message": f"Image generation returned empty content", "sectionId": sectionId }) else: # Parse JSON response for other content types try: generatedElements = json.loads( self.services.utils.jsonExtractString(aiResponse.content) ) if isinstance(generatedElements, list): elements.extend(generatedElements) elif isinstance(generatedElements, dict) and "elements" in generatedElements: elements.extend(generatedElements["elements"]) elif isinstance(generatedElements, dict) and generatedElements.get("type"): elements.append(generatedElements) except (json.JSONDecodeError, ValueError) as json_error: logger.error(f"Error parsing JSON response for section {sectionId}: {str(json_error)}") elements.append({ "type": "error", "message": f"Failed to parse JSON response: {str(json_error)}", "sectionId": sectionId }) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) # Update chapter progress after section completion chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 self.services.chat.progressLogUpdate( chapterOperationId, chapterProgress, f"Section {sectionIndex + 1}/{totalSections} completed" ) except Exception as e: # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) self.services.chat.progressLogFinish(sectionOperationId, False) elements.append({ "type": "error", "message": f"Error generating section {sectionId}: {str(e)}", "sectionId": sectionId }) logger.error(f"Error generating section {sectionId}: {str(e)}") # Still update chapter progress even on error chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 self.services.chat.progressLogUpdate( chapterOperationId, chapterProgress, f"Section {sectionIndex + 1}/{totalSections} completed (with errors)" ) # Einzelverarbeitung: Jeder Part einzeln for partId in contentPartIds: part = self._findContentPartById(partId, contentParts) if not part: continue contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) if contentFormat == "reference": # Füge Dokument-Referenz hinzu elements.append({ "type": "reference", "documentReference": part.metadata.get("documentReference"), "label": part.metadata.get("usageHint", part.label) }) elif contentFormat == "object": # Füge base64 Object hinzu (nested in content structure) if part.typeGroup == "image": elements.append({ "type": "image", "content": { "base64Data": part.data, "altText": part.metadata.get("usageHint", part.label), "caption": part.metadata.get("caption", "") } }) else: # For other object types, use generic structure elements.append({ "type": part.typeGroup, "content": { "data": part.data, "mimeType": part.mimeType, "label": part.metadata.get("usageHint", part.label) } }) elif contentFormat == "extracted": # WICHTIG: Prüfe sowohl useAiCall als auch generationHint if useAiCall and generationHint: # AI-Call mit einzelnen ContentPart logger.debug(f"Processing section {sectionId}: Single extracted part with AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)})") generationPrompt = self._buildSectionGenerationPrompt( section=section, contentParts=[part], # EIN PART userPrompt=userPrompt, generationHint=generationHint, allSections=all_sections_list, sectionIndex=sectionIndex, isAggregation=False ) # Erstelle Operation-ID für Section-Generierung sectionOperationId = f"{fillOperationId}_section_{sectionId}" # Starte ChatLog mit Parent-Referenz (chapter, not fillOperationId) self.services.chat.progressLogStart( sectionOperationId, "Section Generation", f"Section {sectionIndex + 1}/{totalSections}", f"{sectionTitle} (single part)", parentOperationId=chapterOperationId ) try: # Update: Building prompt self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt") # Debug: Log Prompt self.services.utils.writeDebugFile( generationPrompt, f"{chapterId}_section_{sectionId}_prompt" ) logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt") # Update: Calling AI self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation") # Verwende callAi für ContentParts-Unterstützung # Use IMAGE_GENERATE for image content type operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) if operationType == OperationTypeEnum.IMAGE_GENERATE: maxPromptLength = 4000 if len(generationPrompt) > maxPromptLength: logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") # Keep the beginning (task, metadata, generation hint) and truncate from end generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline # For IMAGE_GENERATE, don't pass contentParts - image generation uses prompt only, not content chunks contentPartsForCall = [] if operationType == OperationTypeEnum.IMAGE_GENERATE else [part] request = AiCallRequest( prompt=generationPrompt, contentParts=contentPartsForCall, options=AiCallOptions( operationType=operationType, priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.DETAILED ) ) aiResponse = await self.aiService.callAi(request) # Update: Processing response self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response") # Debug: Log Response self.services.utils.writeDebugFile( aiResponse.content, f"{chapterId}_section_{sectionId}_response" ) logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response") # Update: Validating content self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content") # Handle IMAGE_GENERATE differently - returns image data directly if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: import base64 base64Data = "" # Convert image data to base64 string if needed if isinstance(aiResponse.content, bytes): base64Data = base64.b64encode(aiResponse.content).decode('utf-8') elif isinstance(aiResponse.content, str): # Check if it's already a JSON structure try: jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content)) if isinstance(jsonContent, dict) and jsonContent.get("type") == "image": elements.append(jsonContent) logger.debug("AI returned proper JSON image structure") # Skip remaining image processing, but continue with progress updates base64Data = None # Signal that image was already processed elif isinstance(jsonContent, list) and len(jsonContent) > 0: if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image": elements.extend(jsonContent) logger.debug("AI returned proper JSON image structure in list") # Skip remaining image processing, but continue with progress updates base64Data = None # Signal that image was already processed else: base64Data = "" # Continue with normal processing except (json.JSONDecodeError, ValueError, AttributeError): base64Data = "" # Will be processed below # Process base64 if not already handled above if base64Data is None: # Already processed as JSON, skip base64 processing pass elif aiResponse.content.startswith("data:image/"): # Extract base64 from data URI base64Data = aiResponse.content.split(",", 1)[1] else: content_stripped = aiResponse.content.strip() if len(content_stripped) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t " for c in content_stripped[:200]): base64Data = content_stripped.replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") else: base64Data = aiResponse.content else: base64Data = "" # Always create proper JSON structure for images (if not already processed) if base64Data is None: # Image already processed as JSON, skip pass elif base64Data: elements.append({ "type": "image", "content": { "base64Data": base64Data, "altText": generationHint or "Generated image", "caption": "" } }) logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}") else: logger.warning(f"IMAGE_GENERATE returned empty content for section {sectionId}") elements.append({ "type": "error", "message": f"Image generation returned empty content", "sectionId": sectionId }) else: # Parse JSON response for other content types try: generatedElements = json.loads( self.services.utils.jsonExtractString(aiResponse.content) ) if isinstance(generatedElements, list): elements.extend(generatedElements) elif isinstance(generatedElements, dict) and "elements" in generatedElements: elements.extend(generatedElements["elements"]) elif isinstance(generatedElements, dict) and generatedElements.get("type"): elements.append(generatedElements) except (json.JSONDecodeError, ValueError) as json_error: logger.error(f"Error parsing JSON response for section {sectionId}: {str(json_error)}") elements.append({ "type": "error", "message": f"Failed to parse JSON response: {str(json_error)}", "sectionId": sectionId }) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) # Update chapter progress after section completion chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 self.services.chat.progressLogUpdate( chapterOperationId, chapterProgress, f"Section {sectionIndex + 1}/{totalSections} completed" ) except Exception as e: # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) self.services.chat.progressLogFinish(sectionOperationId, False) elements.append({ "type": "error", "message": f"Error generating section {sectionId}: {str(e)}", "sectionId": sectionId }) logger.error(f"Error generating section {sectionId}: {str(e)}") # Still update chapter progress even on error chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 self.services.chat.progressLogUpdate( chapterOperationId, chapterProgress, f"Section {sectionIndex + 1}/{totalSections} completed (with errors)" ) # NICHT raise - Section wird mit Fehlermeldung gerendert else: # Füge extrahierten Content direkt hinzu (kein AI-Call) # CRITICAL: Check part typeGroup to determine correct element type if part.typeGroup == "image": # Image content should be added as image element, not extracted_text logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call - adding as image element") elements.append({ "type": "image", "content": { "base64Data": part.data, "altText": part.metadata.get("usageHint", part.label), "caption": part.metadata.get("caption", "") } }) else: # Text content - add as extracted_text element logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly") elements.append({ "type": "extracted_text", "content": part.data, "source": part.metadata.get("documentId"), "extractionPrompt": part.metadata.get("extractionPrompt") }) # Assign elements to section (for all processing paths) section["elements"] = elements # Update chapter progress after section completion (for all sections, including non-AI) chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 self.services.chat.progressLogUpdate( chapterOperationId, chapterProgress, f"Section {sectionIndex + 1}/{totalSections} completed" ) # Update overall progress after section completion overallProgress = calculateOverallProgress(chapterIndex - 1, totalChapters, sectionIndex + 1, totalSections) self.services.chat.progressLogUpdate( fillOperationId, overallProgress, f"Chapter {chapterIndex}/{totalChapters}, Section {sectionIndex + 1}/{totalSections} completed" ) # Finish chapter operation after all sections processed self.services.chat.progressLogFinish(chapterOperationId, True) # Update overall progress after chapter completion overallProgress = chapterIndex / totalChapters if totalChapters > 0 else 1.0 self.services.chat.progressLogUpdate( fillOperationId, overallProgress, f"Chapter {chapterIndex}/{totalChapters} completed: {chapterTitle}" ) return chapterStructure def _addContentPartsMetadata( self, structure: Dict[str, Any], contentParts: List[ContentPart] ) -> Dict[str, Any]: """ Fügt ContentParts-Metadaten zur Struktur hinzu, wenn contentPartIds vorhanden sind. Dies hilft der Validierung, den Kontext der ContentParts zu verstehen. """ # Erstelle Mapping von ContentPart-ID zu Metadaten contentPartsMap = {} for part in contentParts: contentPartsMap[part.id] = { "id": part.id, "format": part.metadata.get("contentFormat", "unknown"), "type": part.typeGroup, "mimeType": part.mimeType, "originalFileName": part.metadata.get("originalFileName"), "usageHint": part.metadata.get("usageHint"), "documentId": part.metadata.get("documentId"), "dataSize": len(str(part.data)) if part.data else 0 } # Füge Metadaten zu Sections hinzu, die contentPartIds haben for doc in structure.get("documents", []): # Prüfe ob Chapters vorhanden sind (neue Struktur) if "chapters" in doc: for chapter in doc.get("chapters", []): # Füge Metadaten zu Chapter-Level contentPartIds hinzu chapterContentPartIds = chapter.get("contentPartIds", []) if chapterContentPartIds: chapter["contentPartsMetadata"] = [] for partId in chapterContentPartIds: if partId in contentPartsMap: chapter["contentPartsMetadata"].append(contentPartsMap[partId]) # Füge Metadaten zu Sections hinzu for section in chapter.get("sections", []): contentPartIds = section.get("contentPartIds", []) if contentPartIds: section["contentPartsMetadata"] = [] for partId in contentPartIds: if partId in contentPartsMap: section["contentPartsMetadata"].append(contentPartsMap[partId]) return structure def _flattenChaptersToSections( self, chapterStructure: Dict[str, Any] ) -> Dict[str, Any]: """ Flattening: Konvertiert Chapters zu finaler Section-Struktur. Jedes Chapter wird zu einer Heading-Section (Level 1) + dessen Sections. IMPORTANT: Chapters are the main structure elements (heading level 1). All section headings with level < 2 are adjusted to level 2. """ result = { "metadata": chapterStructure.get("metadata", {}), "documents": [] } for doc in chapterStructure.get("documents", []): flattened_doc = { "id": doc.get("id"), "title": doc.get("title"), "filename": doc.get("filename"), "sections": [] } for chapter in doc.get("chapters", []): # 1. Vordefinierte Heading-Section für Chapter-Title (ALWAYS Level 1) heading_section = { "id": f"{chapter['id']}_heading", "content_type": "heading", "elements": [{ "type": "heading", "content": { "text": chapter.get("title", ""), "level": 1 # Chapters are always level 1 } }] } flattened_doc["sections"].append(heading_section) # 2. Generierte Sections - adjust heading levels for section in chapter.get("sections", []): adjusted_section = self._adjustSectionHeadingLevels(section) flattened_doc["sections"].append(adjusted_section) result["documents"].append(flattened_doc) return result def _adjustSectionHeadingLevels(self, section: Dict[str, Any]) -> Dict[str, Any]: """ Adjust heading levels in sections: sections with type heading and level < 2 are changed to level 2. Only chapter headings have level 1. """ adjusted_section = copy.deepcopy(section) # Check if this is a heading section if adjusted_section.get("content_type") == "heading": elements = adjusted_section.get("elements", []) for element in elements: if isinstance(element, dict) and element.get("type") == "heading": content = element.get("content", {}) if isinstance(content, dict): level = content.get("level", 1) # If level < 2, change to level 2 (only chapters have level 1) if level < 2: content["level"] = 2 return adjusted_section def _buildChapterSectionsStructurePrompt( self, chapterId: str, chapterLevel: int, chapterTitle: str, generationHint: str, contentPartIds: List[str], contentPartInstructions: Dict[str, Any], contentParts: List[ContentPart], userPrompt: str ) -> str: """Baue Prompt für Chapter-Sections-Struktur-Generierung.""" # Baue ContentParts-Index (nur IDs, keine Previews!) contentPartsIndex = "" for partId in contentPartIds: part = self._findContentPartById(partId, contentParts) if not part: continue contentFormat = part.metadata.get("contentFormat", "unknown") instruction = contentPartInstructions.get(partId, {}).get("instruction", "Use content as needed") contentPartsIndex += f"\n- ContentPart ID: {partId}\n" contentPartsIndex += f" Format: {contentFormat}\n" contentPartsIndex += f" Type: {part.typeGroup}\n" contentPartsIndex += f" Instruction: {instruction}\n" if not contentPartsIndex: contentPartsIndex = "\n(No content parts specified for this chapter)" prompt = f"""TASK: Generate Chapter Sections Structure CHAPTER: {chapterTitle} (Level {chapterLevel}, ID: {chapterId}) GENERATION HINT: {generationHint} NOTE: Chapter already has a heading section. Do NOT generate a heading for the chapter title. IMPORTANT - SECTION INDEPENDENCE: - Each section is independent and self-contained - One section does NOT have information about another section - Each section must provide its own context and be understandable alone AVAILABLE CONTENT PARTS: {contentPartsIndex} CONTENT TYPES: table, bullet_list, heading, paragraph, code_block, image useAiCall RULES: - useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed - useAiCall: false if Format is "object" or "reference" (direct insertion) - useAiCall: false if Format is "extracted" AND simple "include full text" instruction - useAiCall: true if NO ContentPartIds provided (content must be generated from scratch); Sections without ContentParts MUST have a clear, detailed generationHint explaining what content to generate RETURN JSON: {{ "sections": [ {{ "id": "section_1", "content_type": "paragraph", "contentPartIds": ["extracted_part_1"], "generationHint": "Include full text", "useAiCall": false, "elements": [] }} ] }} EXAMPLES (all content types): - paragraph: {{"id": "s1", "content_type": "paragraph", "contentPartIds": ["extracted_1"], "generationHint": "Include full text", "useAiCall": false, "elements": []}} - bullet_list: {{"id": "s2", "content_type": "bullet_list", "contentPartIds": ["extracted_1"], "generationHint": "Create bullet list", "useAiCall": true, "elements": []}} - table: {{"id": "s3", "content_type": "table", "contentPartIds": ["extracted_1", "extracted_2"], "generationHint": "Create table", "useAiCall": true, "elements": []}} - heading: {{"id": "s4", "content_type": "heading", "contentPartIds": ["extracted_1"], "generationHint": "Extract heading", "useAiCall": true, "elements": []}} - code_block: {{"id": "s5", "content_type": "code_block", "contentPartIds": ["extracted_1"], "generationHint": "Format code", "useAiCall": true, "elements": []}} - image: {{"id": "s6", "content_type": "image", "contentPartIds": ["obj_1"], "generationHint": "Display image", "useAiCall": false, "elements": []}} - reference: {{"id": "s7", "content_type": "paragraph", "contentPartIds": ["ref_1"], "generationHint": "Reference", "useAiCall": false, "elements": []}} - NO CONTENT PARTS (generate from scratch): {{"id": "s8", "content_type": "paragraph", "contentPartIds": [], "generationHint": "Write a detailed professional paragraph explaining [specific topic or purpose]. Include [key points to cover]. Address [important aspects]. Conclude with [summary or recommendations].", "useAiCall": true, "elements": []}} CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. """ return prompt def _getContentStructureExample(self, contentType: str) -> str: """Get the JSON structure example for a specific content type.""" structures = { "table": '{{"headers": ["Column1", "Column2"], "rows": [["Value1", "Value2"], ["Value3", "Value4"]]}}', "bullet_list": '{{"items": ["Item 1", "Item 2", "Item 3"]}}', "heading": '{{"text": "Section Title", "level": 2}}', "paragraph": '{{"text": "This is paragraph text."}}', "code_block": '{{"code": "function example() {{ return true; }}", "language": "javascript"}}', "image": '{{"base64Data": "", "altText": "Description", "caption": "Optional caption"}}' } return structures.get(contentType, '{{"text": ""}}') def _buildSectionGenerationPrompt( self, section: Dict[str, Any], contentParts: List[Optional[ContentPart]], userPrompt: str, generationHint: str, allSections: Optional[List[Dict[str, Any]]] = None, sectionIndex: Optional[int] = None, isAggregation: bool = False ) -> str: """Baue Prompt für Section-Generierung mit vollständigem Kontext.""" # Filtere None-Werte validParts = [p for p in contentParts if p is not None] # Section-Metadaten sectionId = section.get("id", "unknown") contentType = section.get("content_type", "paragraph") # Baue ContentParts-Beschreibung contentPartsText = "" if isAggregation: # Aggregation: Zeige nur Metadaten, nicht Previews contentPartsText += f"\n## CONTENT PARTS (Aggregation)\n" contentPartsText += f"- Anzahl: {len(validParts)} ContentParts\n" contentPartsText += f"- Alle ContentParts werden als Parameter übergeben (nicht im Prompt!)\n" contentPartsText += f"- Jeder Part kann sehr groß sein → Chunking automatisch\n" contentPartsText += f"- WICHTIG: Aggregiere ALLE Parts zu einem Element (z.B. eine Tabelle)\n\n" contentPartsText += f"ContentPart IDs:\n" for part in validParts: contentFormat = part.metadata.get("contentFormat", "unknown") contentPartsText += f" - {part.id} (Format: {contentFormat}, Type: {part.typeGroup}" if part.metadata.get("originalFileName"): contentPartsText += f", Source: {part.metadata.get('originalFileName')}" contentPartsText += ")\n" else: # Einzelverarbeitung: Zeige Previews for part in validParts: contentFormat = part.metadata.get("contentFormat", "unknown") contentPartsText += f"\n- ContentPart {part.id}:\n" contentPartsText += f" Format: {contentFormat}\n" contentPartsText += f" Type: {part.typeGroup}\n" if part.metadata.get("originalFileName"): contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n" if contentFormat == "extracted": # Zeige Preview von extrahiertem Text (länger für besseren Kontext) previewLength = 1000 if part.data: preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data contentPartsText += f" Content preview:\n```\n{preview}\n```\n" else: contentPartsText += f" Content: (empty)\n" elif contentFormat == "reference": contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" if part.metadata.get("usageHint"): contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" elif contentFormat == "object": dataLength = len(part.data) if part.data else 0 contentPartsText += f" Object type: {part.typeGroup}\n" contentPartsText += f" MIME type: {part.mimeType}\n" contentPartsText += f" Data size: {dataLength} chars (base64 encoded)\n" if part.metadata.get("usageHint"): contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" # Baue Section-Kontext (vorherige und nachfolgende Sections) contextText = "" if allSections and sectionIndex is not None: prevSections = [] nextSections = [] if sectionIndex > 0: for i in range(max(0, sectionIndex - 2), sectionIndex): prevSection = allSections[i] prevSections.append({ "id": prevSection.get("id"), "content_type": prevSection.get("content_type"), "generation_hint": prevSection.get("generation_hint", "")[:100] }) if sectionIndex < len(allSections) - 1: for i in range(sectionIndex + 1, min(len(allSections), sectionIndex + 3)): nextSection = allSections[i] nextSections.append({ "id": nextSection.get("id"), "content_type": nextSection.get("content_type"), "generation_hint": nextSection.get("generation_hint", "")[:100] }) if prevSections or nextSections: contextText = "\n## DOCUMENT CONTEXT\n" if prevSections: contextText += "\nPrevious sections:\n" for prev in prevSections: contextText += f"- {prev['id']} ({prev['content_type']}): {prev['generation_hint']}\n" if nextSections: contextText += "\nFollowing sections:\n" for next in nextSections: contextText += f"- {next['id']} ({next['content_type']}): {next['generation_hint']}\n" contentStructureExample = self._getContentStructureExample(contentType) # Special handling for image content type with IMAGE_GENERATE isImageGeneration = contentType == "image" and len(validParts) == 0 if isAggregation: prompt = f"""# TASK: Generate Section Content (Aggregation) ## SECTION METADATA - Section ID: {sectionId} - Content Type: {contentType} - Generation Hint: {generationHint} ## AVAILABLE CONTENT FOR THIS SECTION {contentPartsText if contentPartsText else "(No content parts specified for this section)"} ## INSTRUCTIONS 1. Generate content for section "{sectionId}" based on the generation hint above 2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data) 3. For table content_type: Create a single table with headers and rows from all ContentParts 4. For bullet_list content_type: Create a single list with items from all ContentParts 5. Format appropriately based on content_type ({contentType}) 6. Ensure the generated content is self-contained and understandable independently 7. Return ONLY a JSON object with an "elements" array 8. Each element should match the content_type: {contentType} 9. CRITICAL - NO HTML/STYLING: Do NOT include HTML tags, CSS styles, or any formatting markup in text content. Return plain text only. Formatting is handled automatically by the renderer. 10. For paragraphs: Return plain text only, no HTML tags like
, ,

, or style attributes 11. For headings: Return plain text only, no HTML tags or styling 12. For images: Do NOT include base64 data in JSON - images are handled separately ## OUTPUT FORMAT Return a JSON object with this structure: {{ "elements": [ {{ "type": "{contentType}", "content": {contentStructureExample} }} ] }} CRITICAL: - "content" MUST always be an object (never a string) - For text content: Return plain text only, NO HTML tags, NO CSS styles, NO formatting markup - Return ONLY valid JSON. Do not include any explanatory text outside the JSON. ## CONTEXT (for reference only) {contextText if contextText else ""} ``` {userPrompt} ``` """ else: prompt = f"""# TASK: Generate Section Content ## SECTION METADATA - Section ID: {sectionId} - Content Type: {contentType} - Generation Hint: {generationHint} ## AVAILABLE CONTENT FOR THIS SECTION {contentPartsText if contentPartsText else "(No content parts specified for this section)"} ## INSTRUCTIONS 1. Generate content for section "{sectionId}" based on the generation hint above 2. Use the available content parts to populate this section 3. For extracted text: Format appropriately based on content_type ({contentType}) 4. Ensure the generated content is self-contained and understandable independently 5. Return ONLY a JSON object with an "elements" array 6. Each element should match the content_type: {contentType} 7. CRITICAL - NO HTML/STYLING: Do NOT include HTML tags, CSS styles, or any formatting markup in text content. Return plain text only. Formatting is handled automatically by the renderer. 8. For paragraphs: Return plain text only, no HTML tags like

, ,

, or style attributes 9. For headings: Return plain text only, no HTML tags or styling 10. For images: If you need to reference an image, describe it in altText. Do NOT include base64 data - images are handled separately ## OUTPUT FORMAT Return a JSON object with this structure: {{ "elements": [ {{ "type": "{contentType}", "content": {contentStructureExample} }} ] }} CRITICAL: - "content" MUST always be an object (never a string) - For text content: Return plain text only, NO HTML tags, NO CSS styles, NO formatting markup - Return ONLY valid JSON. Do not include any explanatory text outside the JSON ## CONTEXT (for reference only) {contextText if contextText else ""} ``` {userPrompt} ``` """ return prompt def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]: """Finde ContentPart nach ID.""" for part in contentParts: if part.id == partId: return part return None def _needsAggregation( self, contentType: str, contentPartCount: int ) -> bool: """ Bestimmt ob mehrere ContentParts aggregiert werden müssen. Aggregation nötig wenn: - content_type erfordert Aggregation (table, bullet_list) - UND mehrere ContentParts vorhanden sind (> 1) Args: contentType: Section content_type contentPartCount: Anzahl der ContentParts in dieser Section Returns: True wenn Aggregation nötig, False sonst """ aggregationTypes = ["table", "bullet_list"] if contentType in aggregationTypes and contentPartCount > 1: return True # Optional: Auch für paragraph wenn mehrere Parts vorhanden # (z.B. Vergleich mehrerer Dokumente) # Standard: Keine Aggregation für paragraph return False