# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Structure Filling Module Handles filling document structure with content, including: - Filling sections with content parts - Building section generation prompts - Aggregation logic """ import json import logging import copy from typing import Dict, Any, List, Optional from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum logger = logging.getLogger(__name__) class StructureFiller: """Handles filling document structure with content.""" def __init__(self, services, aiService): """Initialize StructureFiller with service center and AI service access.""" self.services = services self.aiService = aiService async def fillStructure( self, structure: Dict[str, Any], contentParts: List[ContentPart], userPrompt: str, parentOperationId: str ) -> Dict[str, Any]: """ Phase 5D: Chapter-Content-Generierung (Zwei-Phasen-Ansatz). Phase 5D.1: Generiert Sections-Struktur für jedes Chapter Phase 5D.2: Füllt Sections mit ContentParts Args: structure: Struktur-Dict mit documents und chapters (nicht sections!) contentParts: Alle vorbereiteten ContentParts userPrompt: User-Anfrage parentOperationId: Parent Operation-ID für ChatLog-Hierarchie Returns: Gefüllte Struktur mit elements in jeder Section (nach Flattening) """ # Erstelle Operation-ID für Struktur-Abfüllen fillOperationId = f"{parentOperationId}_structure_filling" # Validate structure has chapters hasChapters = False for doc in structure.get("documents", []): if "chapters" in doc: hasChapters = True break if not hasChapters: error_msg = "Structure must have chapters. Legacy section-based structure is not supported." logger.error(error_msg) raise ValueError(error_msg) # Starte ChatLog mit Parent-Referenz chapterCount = sum(len(doc.get("chapters", [])) for doc in structure.get("documents", [])) self.services.chat.progressLogStart( fillOperationId, "Chapter Content Generation", "Filling", f"Processing {chapterCount} chapters", parentOperationId=parentOperationId ) try: filledStructure = copy.deepcopy(structure) # Phase 5D.1: Sections-Struktur für jedes Chapter generieren filledStructure = await self._generateChapterSectionsStructure( filledStructure, contentParts, userPrompt, fillOperationId ) # Phase 5D.2: Sections mit ContentParts füllen filledStructure = await self._fillChapterSections( filledStructure, contentParts, userPrompt, fillOperationId ) # Flattening: Chapters zu Sections konvertieren flattenedStructure = self._flattenChaptersToSections(filledStructure) # Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung) flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts) # ChatLog abschließen self.services.chat.progressLogFinish(fillOperationId, True) return flattenedStructure except Exception as e: self.services.chat.progressLogFinish(fillOperationId, False) logger.error(f"Error in fillStructure: {str(e)}") raise async def _generateChapterSectionsStructure( self, chapterStructure: Dict[str, Any], contentParts: List[ContentPart], userPrompt: str, parentOperationId: str ) -> Dict[str, Any]: """ Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content). Sections enthalten: content_type, contentPartIds, generationHint, useAiCall """ for doc in chapterStructure.get("documents", []): for chapter in doc.get("chapters", []): chapterId = chapter.get("id", "unknown") chapterLevel = chapter.get("level", 1) chapterTitle = chapter.get("title", "") generationHint = chapter.get("generationHint", "") contentPartIds = chapter.get("contentPartIds", []) contentPartInstructions = chapter.get("contentPartInstructions", {}) chapterPrompt = self._buildChapterSectionsStructurePrompt( chapterId=chapterId, chapterLevel=chapterLevel, chapterTitle=chapterTitle, generationHint=generationHint, contentPartIds=contentPartIds, contentPartInstructions=contentPartInstructions, contentParts=contentParts, userPrompt=userPrompt ) # AI-Call für Chapter-Struktur-Generierung # Note: Debug logging is handled by callAiPlanning aiResponse = await self.aiService.callAiPlanning( prompt=chapterPrompt, debugType=f"chapter_structure_{chapterId}" ) sectionsStructure = json.loads( self.services.utils.jsonExtractString(aiResponse) ) chapter["sections"] = sectionsStructure.get("sections", []) # Setze useAiCall Flag (falls nicht von AI gesetzt) # WICHTIG: useAiCall kann nur true sein, wenn mindestens ein ContentPart Format "extracted" hat! # "object" und "reference" Formate werden direkt als Elemente hinzugefügt, benötigen kein AI. for section in chapter["sections"]: if "useAiCall" not in section: contentType = section.get("content_type", "paragraph") contentPartIds = section.get("contentPartIds", []) # Prüfe ob mindestens ein ContentPart Format "extracted" hat hasExtractedPart = False for partId in contentPartIds: part = self._findContentPartById(partId, contentParts) if part: contentFormat = part.metadata.get("contentFormat", "unknown") if contentFormat == "extracted": hasExtractedPart = True break # useAiCall kann nur true sein, wenn extracted Parts vorhanden sind useAiCall = False if hasExtractedPart: # Prüfe ob Transformation nötig ist useAiCall = contentType != "paragraph" # Prüfe contentPartInstructions für Transformation if not useAiCall: for partId in contentPartIds: instruction = contentPartInstructions.get(partId, {}).get("instruction", "") if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]: useAiCall = True break section["useAiCall"] = useAiCall logger.debug(f"Section {section.get('id')}: useAiCall={useAiCall} (hasExtractedPart={hasExtractedPart}, contentType={contentType})") return chapterStructure async def _fillChapterSections( self, chapterStructure: Dict[str, Any], contentParts: List[ContentPart], userPrompt: str, parentOperationId: str ) -> Dict[str, Any]: """ Phase 5D.2: Füllt Sections mit ContentParts. """ # Sammle alle Sections für sequenzielle Verarbeitung sections_to_process = [] all_sections_list = [] # Für Kontext-Informationen for doc in chapterStructure.get("documents", []): for chapter in doc.get("chapters", []): for section in chapter.get("sections", []): all_sections_list.append(section) sections_to_process.append((doc, chapter, section)) # Sequenzielle Section-Generierung fillOperationId = parentOperationId for sectionIndex, (doc, chapter, section) in enumerate(sections_to_process): sectionId = section.get("id") contentPartIds = section.get("contentPartIds", []) contentFormats = section.get("contentFormats", {}) # Check both camelCase and snake_case for generationHint generationHint = section.get("generationHint") or section.get("generation_hint") contentType = section.get("content_type", "paragraph") useAiCall = section.get("useAiCall", False) # WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden # Aber: Wenn generationHint vorhanden ist, SOLLTE AI verwendet werden, auch wenn useAiCall=false gesetzt ist # (z.B. wenn AI die Struktur generiert hat, aber useAiCall falsch gesetzt wurde) if len(contentPartIds) == 0 and not generationHint: useAiCall = False logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False") elif len(contentPartIds) == 0 and generationHint and not useAiCall: # Override: If there's a generationHint but no content parts, we should use AI # This handles cases where structure generation set useAiCall=false incorrectly useAiCall = True logger.info(f"Section {sectionId}: Overriding useAiCall=True (has generationHint but no content parts)") elements = [] # Prüfe ob Aggregation nötig ist needsAggregation = self._needsAggregation( contentType=contentType, contentPartCount=len(contentPartIds) ) logger.info(f"Processing section {sectionId}: contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}") if needsAggregation and useAiCall: # Aggregation: Alle Parts zusammen verarbeiten sectionParts = [ self._findContentPartById(pid, contentParts) for pid in contentPartIds ] sectionParts = [p for p in sectionParts if p is not None] if sectionParts: # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt) extractedParts = [ p for p in sectionParts if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" ] nonExtractedParts = [ p for p in sectionParts if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" ] # Verarbeite non-extracted Parts separat (reference, object) for part in nonExtractedParts: contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) if contentFormat == "reference": elements.append({ "type": "reference", "documentReference": part.metadata.get("documentReference"), "label": part.metadata.get("usageHint", part.label) }) elif contentFormat == "object": # Nested content structure for objects if part.typeGroup == "image": elements.append({ "type": "image", "content": { "base64Data": part.data, "altText": part.metadata.get("usageHint", part.label), "caption": part.metadata.get("caption", "") } }) else: elements.append({ "type": part.typeGroup, "content": { "data": part.data, "mimeType": part.mimeType, "label": part.metadata.get("usageHint", part.label) } }) # Aggregiere extracted Parts mit AI if extractedParts: logger.debug(f"Section {sectionId}: Aggregating {len(extractedParts)} extracted parts with AI") generationPrompt = self._buildSectionGenerationPrompt( section=section, contentParts=extractedParts, # ALLE PARTS für Aggregation! userPrompt=userPrompt, generationHint=generationHint, allSections=all_sections_list, sectionIndex=sectionIndex, isAggregation=True ) # Erstelle Operation-ID für Section-Generierung sectionOperationId = f"{fillOperationId}_section_{sectionId}" # Starte ChatLog mit Parent-Referenz self.services.chat.progressLogStart( sectionOperationId, "Section Generation (Aggregation)", "Section", f"Generating section {sectionId} with {len(extractedParts)} parts", parentOperationId=fillOperationId ) try: # Debug: Log Prompt self.services.utils.writeDebugFile( generationPrompt, f"section_content_{sectionId}_prompt" ) logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt (aggregation)") # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) # Use IMAGE_GENERATE for image content type operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) if operationType == OperationTypeEnum.IMAGE_GENERATE: maxPromptLength = 4000 if len(generationPrompt) > maxPromptLength: logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") # Keep the beginning (task, metadata, generation hint) and truncate from end generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline # For IMAGE_GENERATE, don't pass contentParts - image generation uses prompt only, not content chunks contentPartsForCall = [] if operationType == OperationTypeEnum.IMAGE_GENERATE else extractedParts request = AiCallRequest( prompt=generationPrompt, contentParts=contentPartsForCall, # Empty for IMAGE_GENERATE, all parts for others options=AiCallOptions( operationType=operationType, priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.DETAILED ) ) aiResponse = await self.aiService.callAi(request) # Debug: Log Response self.services.utils.writeDebugFile( aiResponse.content, f"section_content_{sectionId}_response" ) logger.debug(f"Logged section response: section_content_{sectionId}_response (aggregation)") # Handle IMAGE_GENERATE differently - returns image data directly if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: import base64 # Convert image data to base64 string if needed if isinstance(aiResponse.content, bytes): base64Data = base64.b64encode(aiResponse.content).decode('utf-8') elif isinstance(aiResponse.content, str): # Already base64 string or data URI if aiResponse.content.startswith("data:image/"): # Extract base64 from data URI base64Data = aiResponse.content.split(",", 1)[1] else: base64Data = aiResponse.content else: base64Data = "" elements.append({ "type": "image", "content": { "base64Data": base64Data, "altText": generationHint or "Generated image", "caption": "" } }) else: # Parse JSON response for other content types generatedElements = json.loads( self.services.utils.jsonExtractString(aiResponse.content) ) if isinstance(generatedElements, list): elements.extend(generatedElements) elif isinstance(generatedElements, dict) and "elements" in generatedElements: elements.extend(generatedElements["elements"]) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) except Exception as e: # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) self.services.chat.progressLogFinish(sectionOperationId, False) elements.append({ "type": "error", "message": f"Error generating section {sectionId}: {str(e)}", "sectionId": sectionId }) logger.error(f"Error generating section {sectionId}: {str(e)}") # NICHT raise - Section wird mit Fehlermeldung gerendert else: # Einzelverarbeitung: Jeder Part einzeln ODER Generation ohne ContentParts # Handle case where no content parts but generationHint exists (e.g., Executive Summary) if len(contentPartIds) == 0 and useAiCall and generationHint: # Generate content from scratch using only generationHint logger.debug(f"Processing section {sectionId}: No content parts, generating from generationHint only") generationPrompt = self._buildSectionGenerationPrompt( section=section, contentParts=[], # NO PARTS userPrompt=userPrompt, generationHint=generationHint, allSections=all_sections_list, sectionIndex=sectionIndex, isAggregation=False ) # Erstelle Operation-ID für Section-Generierung sectionOperationId = f"{fillOperationId}_section_{sectionId}" # Starte ChatLog mit Parent-Referenz self.services.chat.progressLogStart( sectionOperationId, "Section Generation", "Section", f"Generating section {sectionId} from generationHint", parentOperationId=fillOperationId ) try: # Debug: Log Prompt self.services.utils.writeDebugFile( generationPrompt, f"section_content_{sectionId}_prompt" ) logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt") # Verwende callAi ohne ContentParts operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) if operationType == OperationTypeEnum.IMAGE_GENERATE: maxPromptLength = 4000 if len(generationPrompt) > maxPromptLength: logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") # Keep the beginning (task, metadata, generation hint) and truncate from end generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline request = AiCallRequest( prompt=generationPrompt, contentParts=[], # NO PARTS options=AiCallOptions( operationType=operationType, priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.DETAILED ) ) aiResponse = await self.aiService.callAi(request) # Debug: Log Response self.services.utils.writeDebugFile( aiResponse.content, f"section_content_{sectionId}_response" ) logger.debug(f"Logged section response: section_content_{sectionId}_response") # Handle IMAGE_GENERATE differently - returns image data directly if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: import base64 # Convert image data to base64 string if needed if isinstance(aiResponse.content, bytes): base64Data = base64.b64encode(aiResponse.content).decode('utf-8') elif isinstance(aiResponse.content, str): # Already base64 string or data URI if aiResponse.content.startswith("data:image/"): # Extract base64 from data URI base64Data = aiResponse.content.split(",", 1)[1] else: base64Data = aiResponse.content else: base64Data = "" elements.append({ "type": "image", "content": { "base64Data": base64Data, "altText": generationHint or "Generated image", "caption": "" } }) else: # Parse JSON response for other content types generatedElements = json.loads( self.services.utils.jsonExtractString(aiResponse.content) ) if isinstance(generatedElements, list): elements.extend(generatedElements) elif isinstance(generatedElements, dict) and "elements" in generatedElements: elements.extend(generatedElements["elements"]) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) except Exception as e: # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) self.services.chat.progressLogFinish(sectionOperationId, False) elements.append({ "type": "error", "message": f"Error generating section {sectionId}: {str(e)}", "sectionId": sectionId }) logger.error(f"Error generating section {sectionId}: {str(e)}") # Einzelverarbeitung: Jeder Part einzeln for partId in contentPartIds: part = self._findContentPartById(partId, contentParts) if not part: continue contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) if contentFormat == "reference": # Füge Dokument-Referenz hinzu elements.append({ "type": "reference", "documentReference": part.metadata.get("documentReference"), "label": part.metadata.get("usageHint", part.label) }) elif contentFormat == "object": # Füge base64 Object hinzu (nested in content structure) if part.typeGroup == "image": elements.append({ "type": "image", "content": { "base64Data": part.data, "altText": part.metadata.get("usageHint", part.label), "caption": part.metadata.get("caption", "") } }) else: # For other object types, use generic structure elements.append({ "type": part.typeGroup, "content": { "data": part.data, "mimeType": part.mimeType, "label": part.metadata.get("usageHint", part.label) } }) elif contentFormat == "extracted": # WICHTIG: Prüfe sowohl useAiCall als auch generationHint if useAiCall and generationHint: # AI-Call mit einzelnen ContentPart logger.debug(f"Processing section {sectionId}: Single extracted part with AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)})") generationPrompt = self._buildSectionGenerationPrompt( section=section, contentParts=[part], # EIN PART userPrompt=userPrompt, generationHint=generationHint, allSections=all_sections_list, sectionIndex=sectionIndex, isAggregation=False ) # Erstelle Operation-ID für Section-Generierung sectionOperationId = f"{fillOperationId}_section_{sectionId}" # Starte ChatLog mit Parent-Referenz self.services.chat.progressLogStart( sectionOperationId, "Section Generation", "Section", f"Generating section {sectionId}", parentOperationId=fillOperationId ) try: # Debug: Log Prompt self.services.utils.writeDebugFile( generationPrompt, f"section_content_{sectionId}_prompt" ) logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt") # Verwende callAi für ContentParts-Unterstützung # Use IMAGE_GENERATE for image content type operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) if operationType == OperationTypeEnum.IMAGE_GENERATE: maxPromptLength = 4000 if len(generationPrompt) > maxPromptLength: logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") # Keep the beginning (task, metadata, generation hint) and truncate from end generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline # For IMAGE_GENERATE, don't pass contentParts - image generation uses prompt only, not content chunks contentPartsForCall = [] if operationType == OperationTypeEnum.IMAGE_GENERATE else [part] request = AiCallRequest( prompt=generationPrompt, contentParts=contentPartsForCall, options=AiCallOptions( operationType=operationType, priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.DETAILED ) ) aiResponse = await self.aiService.callAi(request) # Debug: Log Response self.services.utils.writeDebugFile( aiResponse.content, f"section_content_{sectionId}_response" ) logger.debug(f"Logged section response: section_content_{sectionId}_response") # Handle IMAGE_GENERATE differently - returns image data directly if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: import base64 # Convert image data to base64 string if needed if isinstance(aiResponse.content, bytes): base64Data = base64.b64encode(aiResponse.content).decode('utf-8') elif isinstance(aiResponse.content, str): # Already base64 string or data URI if aiResponse.content.startswith("data:image/"): # Extract base64 from data URI base64Data = aiResponse.content.split(",", 1)[1] else: base64Data = aiResponse.content else: base64Data = "" elements.append({ "type": "image", "content": { "base64Data": base64Data, "altText": generationHint or "Generated image", "caption": "" } }) else: # Parse JSON response for other content types generatedElements = json.loads( self.services.utils.jsonExtractString(aiResponse.content) ) if isinstance(generatedElements, list): elements.extend(generatedElements) elif isinstance(generatedElements, dict) and "elements" in generatedElements: elements.extend(generatedElements["elements"]) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) except Exception as e: # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) self.services.chat.progressLogFinish(sectionOperationId, False) elements.append({ "type": "error", "message": f"Error generating section {sectionId}: {str(e)}", "sectionId": sectionId }) logger.error(f"Error generating section {sectionId}: {str(e)}") # NICHT raise - Section wird mit Fehlermeldung gerendert else: # Füge extrahierten Content direkt hinzu (kein AI-Call) # CRITICAL: Check part typeGroup to determine correct element type if part.typeGroup == "image": # Image content should be added as image element, not extracted_text logger.debug(f"Processing section {sectionId}: Single extracted IMAGE part WITHOUT AI call - adding as image element") elements.append({ "type": "image", "content": { "base64Data": part.data, "altText": part.metadata.get("usageHint", part.label), "caption": part.metadata.get("caption", "") } }) else: # Text content - add as extracted_text element logger.debug(f"Processing section {sectionId}: Single extracted TEXT part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly") elements.append({ "type": "extracted_text", "content": part.data, "source": part.metadata.get("documentId"), "extractionPrompt": part.metadata.get("extractionPrompt") }) section["elements"] = elements return chapterStructure def _addContentPartsMetadata( self, structure: Dict[str, Any], contentParts: List[ContentPart] ) -> Dict[str, Any]: """ Fügt ContentParts-Metadaten zur Struktur hinzu, wenn contentPartIds vorhanden sind. Dies hilft der Validierung, den Kontext der ContentParts zu verstehen. """ # Erstelle Mapping von ContentPart-ID zu Metadaten contentPartsMap = {} for part in contentParts: contentPartsMap[part.id] = { "id": part.id, "format": part.metadata.get("contentFormat", "unknown"), "type": part.typeGroup, "mimeType": part.mimeType, "originalFileName": part.metadata.get("originalFileName"), "usageHint": part.metadata.get("usageHint"), "documentId": part.metadata.get("documentId"), "dataSize": len(str(part.data)) if part.data else 0 } # Füge Metadaten zu Sections hinzu, die contentPartIds haben for doc in structure.get("documents", []): # Prüfe ob Chapters vorhanden sind (neue Struktur) if "chapters" in doc: for chapter in doc.get("chapters", []): # Füge Metadaten zu Chapter-Level contentPartIds hinzu chapterContentPartIds = chapter.get("contentPartIds", []) if chapterContentPartIds: chapter["contentPartsMetadata"] = [] for partId in chapterContentPartIds: if partId in contentPartsMap: chapter["contentPartsMetadata"].append(contentPartsMap[partId]) # Füge Metadaten zu Sections hinzu for section in chapter.get("sections", []): contentPartIds = section.get("contentPartIds", []) if contentPartIds: section["contentPartsMetadata"] = [] for partId in contentPartIds: if partId in contentPartsMap: section["contentPartsMetadata"].append(contentPartsMap[partId]) return structure def _flattenChaptersToSections( self, chapterStructure: Dict[str, Any] ) -> Dict[str, Any]: """ Flattening: Konvertiert Chapters zu finaler Section-Struktur. Jedes Chapter wird zu einer Heading-Section + dessen Sections. """ result = { "metadata": chapterStructure.get("metadata", {}), "documents": [] } for doc in chapterStructure.get("documents", []): flattened_doc = { "id": doc.get("id"), "title": doc.get("title"), "filename": doc.get("filename"), "sections": [] } for chapter in doc.get("chapters", []): # 1. Vordefinierte Heading-Section für Chapter-Title heading_section = { "id": f"{chapter['id']}_heading", "content_type": "heading", "elements": [{ "type": "heading", "content": { "text": chapter.get("title", ""), "level": chapter.get("level", 1) } }] } flattened_doc["sections"].append(heading_section) # 2. Generierte Sections flattened_doc["sections"].extend(chapter.get("sections", [])) result["documents"].append(flattened_doc) return result def _buildChapterSectionsStructurePrompt( self, chapterId: str, chapterLevel: int, chapterTitle: str, generationHint: str, contentPartIds: List[str], contentPartInstructions: Dict[str, Any], contentParts: List[ContentPart], userPrompt: str ) -> str: """Baue Prompt für Chapter-Sections-Struktur-Generierung.""" # Baue ContentParts-Index (nur IDs, keine Previews!) contentPartsIndex = "" for partId in contentPartIds: part = self._findContentPartById(partId, contentParts) if not part: continue contentFormat = part.metadata.get("contentFormat", "unknown") instruction = contentPartInstructions.get(partId, {}).get("instruction", "Use content as needed") contentPartsIndex += f"\n- ContentPart ID: {partId}\n" contentPartsIndex += f" Format: {contentFormat}\n" contentPartsIndex += f" Type: {part.typeGroup}\n" contentPartsIndex += f" Instruction: {instruction}\n" if not contentPartsIndex: contentPartsIndex = "\n(No content parts specified for this chapter)" prompt = f"""TASK: Generate Chapter Sections Structure CHAPTER: {chapterTitle} (Level {chapterLevel}, ID: {chapterId}) GENERATION HINT: {generationHint} NOTE: Chapter already has a heading section. Do NOT generate a heading for the chapter title. AVAILABLE CONTENT PARTS: {contentPartsIndex} CONTENT TYPES: table, bullet_list, heading, paragraph, code_block, image useAiCall RULES: - useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed - useAiCall: false if Format is "object" or "reference" (direct insertion) - useAiCall: false if Format is "extracted" AND simple "include full text" instruction RETURN JSON: {{ "sections": [ {{ "id": "section_1", "content_type": "paragraph", "contentPartIds": ["extracted_part_1"], "generationHint": "Include full text", "useAiCall": false, "elements": [] }} ] }} EXAMPLES (all content types): - paragraph: {{"id": "s1", "content_type": "paragraph", "contentPartIds": ["extracted_1"], "generationHint": "Include full text", "useAiCall": false, "elements": []}} - bullet_list: {{"id": "s2", "content_type": "bullet_list", "contentPartIds": ["extracted_1"], "generationHint": "Create bullet list", "useAiCall": true, "elements": []}} - table: {{"id": "s3", "content_type": "table", "contentPartIds": ["extracted_1", "extracted_2"], "generationHint": "Create table", "useAiCall": true, "elements": []}} - heading: {{"id": "s4", "content_type": "heading", "contentPartIds": ["extracted_1"], "generationHint": "Extract heading", "useAiCall": true, "elements": []}} - code_block: {{"id": "s5", "content_type": "code_block", "contentPartIds": ["extracted_1"], "generationHint": "Format code", "useAiCall": true, "elements": []}} - image: {{"id": "s6", "content_type": "image", "contentPartIds": ["obj_1"], "generationHint": "Display image", "useAiCall": false, "elements": []}} - reference: {{"id": "s7", "content_type": "paragraph", "contentPartIds": ["ref_1"], "generationHint": "Reference", "useAiCall": false, "elements": []}} CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. """ return prompt def _getContentStructureExample(self, contentType: str) -> str: """Get the JSON structure example for a specific content type.""" structures = { "table": '{{"headers": ["Column1", "Column2"], "rows": [["Value1", "Value2"], ["Value3", "Value4"]]}}', "bullet_list": '{{"items": ["Item 1", "Item 2", "Item 3"]}}', "heading": '{{"text": "Section Title", "level": 2}}', "paragraph": '{{"text": "This is paragraph text."}}', "code_block": '{{"code": "function example() {{ return true; }}", "language": "javascript"}}', "image": '{{"base64Data": "", "altText": "Description", "caption": "Optional caption"}}' } return structures.get(contentType, '{{"text": ""}}') def _buildSectionGenerationPrompt( self, section: Dict[str, Any], contentParts: List[Optional[ContentPart]], userPrompt: str, generationHint: str, allSections: Optional[List[Dict[str, Any]]] = None, sectionIndex: Optional[int] = None, isAggregation: bool = False ) -> str: """Baue Prompt für Section-Generierung mit vollständigem Kontext.""" # Filtere None-Werte validParts = [p for p in contentParts if p is not None] # Section-Metadaten sectionId = section.get("id", "unknown") contentType = section.get("content_type", "paragraph") # Baue ContentParts-Beschreibung contentPartsText = "" if isAggregation: # Aggregation: Zeige nur Metadaten, nicht Previews contentPartsText += f"\n## CONTENT PARTS (Aggregation)\n" contentPartsText += f"- Anzahl: {len(validParts)} ContentParts\n" contentPartsText += f"- Alle ContentParts werden als Parameter übergeben (nicht im Prompt!)\n" contentPartsText += f"- Jeder Part kann sehr groß sein → Chunking automatisch\n" contentPartsText += f"- WICHTIG: Aggregiere ALLE Parts zu einem Element (z.B. eine Tabelle)\n\n" contentPartsText += f"ContentPart IDs:\n" for part in validParts: contentFormat = part.metadata.get("contentFormat", "unknown") contentPartsText += f" - {part.id} (Format: {contentFormat}, Type: {part.typeGroup}" if part.metadata.get("originalFileName"): contentPartsText += f", Source: {part.metadata.get('originalFileName')}" contentPartsText += ")\n" else: # Einzelverarbeitung: Zeige Previews for part in validParts: contentFormat = part.metadata.get("contentFormat", "unknown") contentPartsText += f"\n- ContentPart {part.id}:\n" contentPartsText += f" Format: {contentFormat}\n" contentPartsText += f" Type: {part.typeGroup}\n" if part.metadata.get("originalFileName"): contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n" if contentFormat == "extracted": # Zeige Preview von extrahiertem Text (länger für besseren Kontext) previewLength = 1000 if part.data: preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data contentPartsText += f" Content preview:\n```\n{preview}\n```\n" else: contentPartsText += f" Content: (empty)\n" elif contentFormat == "reference": contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" if part.metadata.get("usageHint"): contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" elif contentFormat == "object": dataLength = len(part.data) if part.data else 0 contentPartsText += f" Object type: {part.typeGroup}\n" contentPartsText += f" MIME type: {part.mimeType}\n" contentPartsText += f" Data size: {dataLength} chars (base64 encoded)\n" if part.metadata.get("usageHint"): contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" # Baue Section-Kontext (vorherige und nachfolgende Sections) contextText = "" if allSections and sectionIndex is not None: prevSections = [] nextSections = [] if sectionIndex > 0: for i in range(max(0, sectionIndex - 2), sectionIndex): prevSection = allSections[i] prevSections.append({ "id": prevSection.get("id"), "content_type": prevSection.get("content_type"), "generation_hint": prevSection.get("generation_hint", "")[:100] }) if sectionIndex < len(allSections) - 1: for i in range(sectionIndex + 1, min(len(allSections), sectionIndex + 3)): nextSection = allSections[i] nextSections.append({ "id": nextSection.get("id"), "content_type": nextSection.get("content_type"), "generation_hint": nextSection.get("generation_hint", "")[:100] }) if prevSections or nextSections: contextText = "\n## DOCUMENT CONTEXT\n" if prevSections: contextText += "\nPrevious sections:\n" for prev in prevSections: contextText += f"- {prev['id']} ({prev['content_type']}): {prev['generation_hint']}\n" if nextSections: contextText += "\nFollowing sections:\n" for next in nextSections: contextText += f"- {next['id']} ({next['content_type']}): {next['generation_hint']}\n" contentStructureExample = self._getContentStructureExample(contentType) if isAggregation: prompt = f"""# TASK: Generate Section Content (Aggregation) ## SECTION METADATA - Section ID: {sectionId} - Content Type: {contentType} - Generation Hint: {generationHint} {contextText} ## USER REQUEST (for context) ``` {userPrompt} ``` ## AVAILABLE CONTENT FOR THIS SECTION {contentPartsText if contentPartsText else "(No content parts specified for this section)"} ## INSTRUCTIONS 1. Generate content for section "{sectionId}" based on the generation hint above 2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data) 3. For table content_type: Create a single table with headers and rows from all ContentParts 4. For bullet_list content_type: Create a single list with items from all ContentParts 5. Format appropriately based on content_type ({contentType}) 6. Ensure the generated content fits logically between previous and following sections 7. Return ONLY a JSON object with an "elements" array 8. Each element should match the content_type: {contentType} ## OUTPUT FORMAT Return a JSON object with this structure: {{ "elements": [ {{ "type": "{contentType}", "content": {contentStructureExample} }} ] }} CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid JSON. Do not include any explanatory text outside the JSON. """ else: prompt = f"""# TASK: Generate Section Content ## SECTION METADATA - Section ID: {sectionId} - Content Type: {contentType} - Generation Hint: {generationHint} {contextText} ## USER REQUEST (for context) ``` {userPrompt} ``` ## AVAILABLE CONTENT FOR THIS SECTION {contentPartsText if contentPartsText else "(No content parts specified for this section)"} ## INSTRUCTIONS 1. Generate content for section "{sectionId}" based on the generation hint above 2. Use the available content parts to populate this section 3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data 4. For extracted text: Format appropriately based on content_type ({contentType}) 5. Ensure the generated content fits logically between previous and following sections 6. Return ONLY a JSON object with an "elements" array 7. Each element should match the content_type: {contentType} ## OUTPUT FORMAT Return a JSON object with this structure: {{ "elements": [ {{ "type": "{contentType}", "content": {contentStructureExample} }} ] }} CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid JSON. Do not include any explanatory text outside the JSON. """ return prompt def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]: """Finde ContentPart nach ID.""" for part in contentParts: if part.id == partId: return part return None def _needsAggregation( self, contentType: str, contentPartCount: int ) -> bool: """ Bestimmt ob mehrere ContentParts aggregiert werden müssen. Aggregation nötig wenn: - content_type erfordert Aggregation (table, bullet_list) - UND mehrere ContentParts vorhanden sind (> 1) Args: contentType: Section content_type contentPartCount: Anzahl der ContentParts in dieser Section Returns: True wenn Aggregation nötig, False sonst """ aggregationTypes = ["table", "bullet_list"] if contentType in aggregationTypes and contentPartCount > 1: return True # Optional: Auch für paragraph wenn mehrere Parts vorhanden # (z.B. Vergleich mehrerer Dokumente) # Standard: Keine Aggregation für paragraph return False