# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Structure Generation Module Handles document structure generation, including: - Generating document structure with sections - Building structure prompts """ import json import logging from typing import Dict, Any, List, Optional from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum logger = logging.getLogger(__name__) class StructureGenerator: """Handles document structure generation.""" def __init__(self, services, aiService): """Initialize StructureGenerator with service center and AI service access.""" self.services = services self.aiService = aiService def _getUserLanguage(self) -> str: """Get user language for document generation""" try: if self.services: # Prefer detected language if available (from user intention analysis) if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage: return self.services.currentUserLanguage # Fallback to user's preferred language elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'): return self.services.user.language except Exception: pass return 'en' # Default fallback async def generateStructure( self, userPrompt: str, contentParts: List[ContentPart], outputFormat: str, parentOperationId: str ) -> Dict[str, Any]: """ Phase 5C: Generiert Chapter-Struktur (Table of Contents). Definiert für jedes Chapter: - Level, Title - contentParts (unified object with instruction and/or caption per part) - generationHint Args: userPrompt: User-Anfrage contentParts: Alle vorbereiteten ContentParts mit Metadaten outputFormat: Ziel-Format (html, docx, pdf, etc.) parentOperationId: Parent Operation-ID für ChatLog-Hierarchie Returns: Struktur-Dict mit documents und chapters (nicht sections!) """ # Erstelle Operation-ID für Struktur-Generierung structureOperationId = f"{parentOperationId}_structure_generation" # Starte ChatLog mit Parent-Referenz self.services.chat.progressLogStart( structureOperationId, "Chapter Structure Generation", "Structure", f"Generating chapter structure for {outputFormat}", parentOperationId=parentOperationId ) try: # Baue Chapter-Struktur-Prompt mit Content-Index structurePrompt = self._buildChapterStructurePrompt( userPrompt=userPrompt, contentParts=contentParts, outputFormat=outputFormat ) # AI-Call für Chapter-Struktur-Generierung mit Looping-Unterstützung # Use _callAiWithLooping instead of callAiPlanning to support continuation if response is cut options = AiCallOptions( operationType=OperationTypeEnum.DATA_GENERATE, priority=PriorityEnum.QUALITY, processingMode=ProcessingModeEnum.DETAILED, compressPrompt=False, compressContext=False, resultFormat="json" ) # Create prompt builder for continuation support async def buildChapterStructurePromptWithContinuation( continuationContext: Optional[Dict[str, Any]] = None, **kwargs ) -> str: """Build chapter structure prompt with optional continuation context.""" basePrompt = self._buildChapterStructurePrompt( userPrompt=userPrompt, contentParts=contentParts, outputFormat=outputFormat ) if continuationContext: # Add continuation instructions deliveredSummary = continuationContext.get("delivered_summary", "") elementBeforeCutoff = continuationContext.get("element_before_cutoff", "") cutOffElement = continuationContext.get("cut_off_element", "") continuationText = f"{deliveredSummary}\n\n" continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n" if elementBeforeCutoff: continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n" continuationText += f"{elementBeforeCutoff}\n\n" if cutOffElement: continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n" continuationText += f"{cutOffElement}\n\n" continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n" continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n" continuationText += "Start directly with the next chapter that should follow.\n\n" return f"""{basePrompt} {continuationText} Continue generating the remaining chapters now. """ else: return basePrompt # Call AI with looping support # NOTE: Do NOT pass contentParts here - we only need metadata for structure generation # The contentParts metadata is already included in the prompt (contentPartsIndex) # Actual content extraction happens later during section generation aiResponseJson = await self.aiService.callAiWithLooping( prompt=structurePrompt, options=options, debugPrefix="chapter_structure_generation", promptBuilder=buildChapterStructurePromptWithContinuation, promptArgs={ "userPrompt": userPrompt, "outputFormat": outputFormat, "services": self.services }, useCaseId="chapter_structure", # REQUIRED: Explicit use case ID operationId=structureOperationId, userPrompt=userPrompt, contentParts=None # Do not pass ContentParts - only metadata needed, not content extraction ) # Parse the complete JSON response (looping system already handles completion) extractedJson = self.services.utils.jsonExtractString(aiResponseJson) parsedJson, parseError, cleanedJson = self.services.utils.jsonTryParse(extractedJson) if parseError is not None: # Even with looping, try repair as fallback logger.warning(f"JSON parsing failed after looping: {str(parseError)}. Attempting repair...") from modules.shared import jsonUtils repairedJson = jsonUtils.repairBrokenJson(extractedJson) if repairedJson: parsedJson, parseError, _ = self.services.utils.jsonTryParse(json.dumps(repairedJson)) if parseError is None: logger.info("Successfully repaired and parsed JSON structure after looping") structure = parsedJson else: logger.error(f"Failed to parse repaired JSON: {str(parseError)}") raise ValueError(f"Failed to parse JSON structure after repair: {str(parseError)}") else: logger.error(f"Failed to repair JSON. Parse error: {str(parseError)}") logger.error(f"Cleaned JSON preview (first 500 chars): {cleanedJson[:500]}") raise ValueError(f"Failed to parse JSON structure: {str(parseError)}") else: structure = parsedJson # ChatLog abschließen self.services.chat.progressLogFinish(structureOperationId, True) return structure except Exception as e: self.services.chat.progressLogFinish(structureOperationId, False) logger.error(f"Error in generateStructure: {str(e)}") raise def _buildChapterStructurePrompt( self, userPrompt: str, contentParts: List[ContentPart], outputFormat: str ) -> str: """Baue Prompt für Chapter-Struktur-Generierung.""" # Baue ContentParts-Index - filtere leere Parts heraus contentPartsIndex = "" validParts = [] filteredParts = [] for part in contentParts: contentFormat = part.metadata.get("contentFormat", "unknown") # WICHTIG: Reference Parts haben absichtlich leere Daten - immer einschließen if contentFormat == "reference": validParts.append(part) logger.debug(f"Including reference ContentPart {part.id} (intentionally empty data)") continue # Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt) # ABER: Reference Parts wurden bereits oben behandelt if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): # Überspringe Container-Parts ohne Daten if part.typeGroup == "container" and not part.data: filteredParts.append((part.id, "container without data")) continue # Überspringe andere leere Parts (aber nicht Reference, die wurden bereits behandelt) if not part.data: filteredParts.append((part.id, f"no data (format: {contentFormat})")) continue validParts.append(part) logger.debug(f"Including ContentPart {part.id}: format={contentFormat}, type={part.typeGroup}, dataLength={len(str(part.data)) if part.data else 0}") if filteredParts: logger.debug(f"Filtered out {len(filteredParts)} empty ContentParts: {filteredParts}") logger.info(f"Building structure prompt with {len(validParts)} valid ContentParts (from {len(contentParts)} total)") # Baue Index nur für gültige Parts for i, part in enumerate(validParts, 1): contentFormat = part.metadata.get("contentFormat", "unknown") originalFileName = part.metadata.get('originalFileName', 'N/A') contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n" contentPartsIndex += f" Format: {contentFormat}\n" contentPartsIndex += f" Type: {part.typeGroup}\n" contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n" contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n" contentPartsIndex += f" Original file name: {originalFileName}\n" contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n" if not contentPartsIndex: contentPartsIndex = "\n(No content parts available)" # Get language from services (user intention analysis) language = self._getUserLanguage() logger.debug(f"Using language from services (user intention analysis) for structure generation: {language}") prompt = f"""CRITICAL OUTPUT REQUIREMENT: This is a PLANNING task, not a generation task. You MUST return EXACTLY ONE complete JSON object. Do NOT generate multiple JSON objects, alternatives, or variations. Do NOT use separators like "---" between JSON objects. Return the single best JSON structure that matches the requirements below. USER REQUEST (for context): ``` {userPrompt} ``` LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}. AVAILABLE CONTENT PARTS: {contentPartsIndex} TASK: Generate Chapter Structure for the documents to be generated. IMPORTANT - CHAPTER INDEPENDENCE: - Each chapter is independent and self-contained - One chapter does NOT have information about another chapter - Each chapter must provide its own context and be understandable alone CONTENT ASSIGNMENT: - Assign ContentParts to chapters via contentParts object - For data extraction, the type of a contentPart (image, text, etc.) is NOT relevant - only what is specified in the instruction matters - Include ALL relevant parts from same source when needed for structured data extraction - Each contentPart can have either: - "instruction": For AI extraction prompts (how to process/extract from this part) - "caption": For user-facing presentation (how to display/reference this part in the document) - Both can be present if needed - Chapters without contentParts can only generate generic content (not document-specific) FORMATTING: - Formatting is handled automatically - focus on CONTENT and STRUCTURE only CHAPTER STRUCTURE: - chapter id, level (1, 2, 3, etc.), title - contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Compact mapping of part IDs to their extraction instructions and/or presentation captions - generationHint: Self-contained description (if contentParts is empty, must be VERY DETAILED) RETURN JSON: {{ "metadata": {{ "title": "Document Title", "language": "{language}" }}, "documents": [{{ "id": "doc_1", "title": "Document Title", "filename": "document.{outputFormat}", "chapters": [ {{ "id": "chapter_1", "level": 1, "title": "Introduction", "contentParts": {{ "part_ext_1": {{ "instruction": "Use full extracted text" }}, "part_img_1": {{ "instruction": "Analyze image for additional details" }}, "part_img_2": {{ "instruction": "Analyze image for additional details", "caption": "Figure 1: Overview diagram" }} }}, "generationHint": "Create introduction section", "sections": [] }}, {{ "id": "chapter_2", "level": 1, "title": "Main Title", "contentParts": {{}}, "generationHint": "Create [specific content description] with [formatting details]. Include [required information]. Purpose: [explanation of what this chapter provides].", "sections": [] }} ] }}] }} OUTPUT FORMAT: Start with {{ and end with }}. Do NOT use markdown code fences (```json). Do NOT add explanatory text before or after the JSON. Return ONLY the JSON object itself. """ return prompt