# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Structure Generation Module Handles document structure generation, including: - Generating document structure with sections - Building structure prompts """ import json import logging from typing import Dict, Any, List, Optional from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum from modules.workflows.processing.shared.stateTools import checkWorkflowStopped logger = logging.getLogger(__name__) class StructureGenerator: """Handles document structure generation.""" def __init__(self, services, aiService): """Initialize StructureGenerator with service center and AI service access.""" self.services = services self.aiService = aiService def _getUserLanguage(self) -> str: """Get user language for document generation""" try: if self.services: # Prefer detected language if available (from user intention analysis) if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage: return self.services.currentUserLanguage # Fallback to user's preferred language elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'): return self.services.user.language except Exception: pass return 'en' # Default fallback async def generateStructure( self, userPrompt: str, contentParts: List[ContentPart], outputFormat: Optional[str] = None, parentOperationId: str = None ) -> Dict[str, Any]: """ Phase 5C: Generiert Chapter-Struktur (Table of Contents). Definiert für jedes Chapter: - Level, Title - contentParts (unified object with instruction and/or caption per part) - generationHint Generate document structure with per-document format determination. Multiple documents can be produced with different formats (e.g., one PDF, one HTML). AI determines formats per-document from user prompt. The outputFormat parameter is only a validation fallback - used if AI doesn't return format per document. Args: userPrompt: User-Anfrage contentParts: Alle vorbereiteten ContentParts mit Metadaten outputFormat: Optional global format fallback. If omitted, formats are determined from user prompt by AI. Used as validation fallback if AI doesn't return format per document. Defaults to "txt" if not provided. parentOperationId: Parent Operation-ID für ChatLog-Hierarchie Returns: Struktur-Dict mit documents und chapters (nicht sections!) """ # If outputFormat not provided, use "txt" as fallback for validation # AI will determine formats per document from user prompt if not outputFormat: outputFormat = "txt" logger.debug("outputFormat not provided - using 'txt' as validation fallback, formats determined from prompt") # Erstelle Operation-ID für Struktur-Generierung structureOperationId = f"{parentOperationId}_structure_generation" # Starte ChatLog mit Parent-Referenz formatDisplay = outputFormat if outputFormat else "auto-determined" self.services.chat.progressLogStart( structureOperationId, "Chapter Structure Generation", "Structure", f"Generating chapter structure (format: {formatDisplay})", parentOperationId=parentOperationId ) try: # Baue Chapter-Struktur-Prompt mit Content-Index structurePrompt = self._buildChapterStructurePrompt( userPrompt=userPrompt, contentParts=contentParts, outputFormat=outputFormat ) # AI-Call für Chapter-Struktur-Generierung mit Looping-Unterstützung # Use _callAiWithLooping instead of callAiPlanning to support continuation if response is cut options = AiCallOptions( operationType=OperationTypeEnum.DATA_GENERATE, priority=PriorityEnum.QUALITY, processingMode=ProcessingModeEnum.DETAILED, compressPrompt=False, compressContext=False, resultFormat="json" ) structurePrompt, templateStructure = self._buildChapterStructurePrompt( userPrompt=userPrompt, contentParts=contentParts, outputFormat=outputFormat ) # Create prompt builder for continuation support async def buildChapterStructurePromptWithContinuation( continuationContext: Any, templateStructure: str, basePrompt: str ) -> str: """Build chapter structure prompt with continuation context. Uses unified signature. Note: All initial context (userPrompt, contentParts, outputFormat, etc.) is already contained in basePrompt. This function only adds continuation-specific instructions. """ # Extract continuation context fields (only what's needed for continuation) incompletePart = continuationContext.incomplete_part lastRawJson = continuationContext.last_raw_json # Build overlap context: extract cut part and full part before (same level) for overlap overlapContext = "" if lastRawJson: # Find break position breakPos = len(lastRawJson.rstrip()) if incompletePart: pos = lastRawJson.find(incompletePart) if pos >= 0: breakPos = pos # Extract cut part and full part before (same level) overlapContext = StructureGenerator._extractOverlapContext(lastRawJson, breakPos) # Build unified context showing structure hierarchy with cut point unifiedContext = "" if lastRawJson: # Find break position in raw JSON if incompletePart: breakPos = lastRawJson.find(incompletePart) if breakPos == -1: breakPos = len(lastRawJson.rstrip()) else: breakPos = len(lastRawJson.rstrip()) # Build intelligent context showing hierarchy from modules.shared.jsonUtils import buildIncompleteContext unifiedContext = buildIncompleteContext(lastRawJson, breakPos) elif incompletePart: unifiedContext = incompletePart else: unifiedContext = "Unable to extract context - response was completely broken" # Build unified continuation prompt format continuationPrompt = f"""{basePrompt} --- CONTINUATION REQUEST --- The previous JSON response was incomplete. Continue from where it stopped. JSON Structure Template: ```json {templateStructure} ``` Context showing structure hierarchy with cut point: ``` {unifiedContext} ``` Overlap Requirement: To ensure proper merging, your response MUST start by repeating the cut part and the full part before (same level) shown below, then continue with new content. Overlap context (cut part and full part before at same level): ```json {overlapContext if overlapContext else "No overlap context available"} ``` TASK: 1. Start your response by repeating the overlap context shown above (cut part and full part before at same level) 2. Complete the incomplete element shown in the context above (marked with CUT POINT) 3. Continue generating the remaining content following the JSON structure template above 4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects CRITICAL: - Your response must be valid JSON matching the structure template above - Start with overlap context (cut part and full part before at same level) then continue seamlessly - Complete the incomplete element and continue with remaining elements""" return continuationPrompt """ Extract overlap context: cut part and full part before (same level). Returns a string showing: 1. The last complete element at the same level before the cut point 2. The cut part (incomplete element at the cut point) """ if not jsonContent or breakPosition <= 0: return jsonContent[-200:].strip() if jsonContent else "" from modules.shared.jsonUtils import findStructureHierarchy, extractCutPiece # Find structure hierarchy hierarchy = findStructureHierarchy(jsonContent, breakPosition) if not hierarchy: # Fallback: show last 200 chars before break start = max(0, breakPosition - 200) return jsonContent[start:breakPosition + 100].strip() # Get cut level (the array/object containing the cut piece) cutLevel = hierarchy[-1] cutLevelStart = cutLevel['start_pos'] cutLevelType = cutLevel['type'] # Extract cut piece (incomplete element) cutPiece = extractCutPiece(jsonContent, breakPosition) # Find the last complete element at the same level before the cut point overlapParts = [] if cutLevelType == 'array': # Find the last complete array element before breakPosition i = breakPosition - 1 depth = 0 inString = False escapeNext = False elementStart = breakPosition # Find the start of the incomplete element (or last complete element) while i >= cutLevelStart: char = jsonContent[i] if escapeNext: escapeNext = False i -= 1 continue if char == '\\': escapeNext = True i -= 1 continue if char == '"': inString = not inString i -= 1 continue if not inString: if char == ']': depth += 1 elif char == '[': depth -= 1 if depth < 0: elementStart = i + 1 break elif char == ',' and depth == 0: elementStart = i + 1 break i -= 1 # Extract the last complete element (if exists) and the cut part if elementStart < breakPosition: contentBeforeBreak = jsonContent[max(cutLevelStart, elementStart - 500):breakPosition].strip() # Find the last complete element by looking for balanced brackets/braces lastCompleteEnd = breakPosition braceCount = 0 bracketCount = 0 inString = False escapeNext = False # Go backwards from breakPosition to find where last complete element ends for j in range(breakPosition - 1, max(cutLevelStart, breakPosition - 1000), -1): char = jsonContent[j] if escapeNext: escapeNext = False continue if char == '\\': escapeNext = True continue if char == '"': inString = not inString continue if not inString: if char == '}': braceCount += 1 elif char == '{': braceCount -= 1 if braceCount == 0 and bracketCount == 0: lastCompleteEnd = j break elif char == ']': bracketCount += 1 elif char == '[': bracketCount -= 1 if bracketCount == 0 and braceCount == 0: lastCompleteEnd = j + 1 break elif char == ',' and braceCount == 0 and bracketCount == 0: lastCompleteEnd = j + 1 break # Extract last complete element and cut part if lastCompleteEnd < breakPosition: lastCompleteElement = jsonContent[max(cutLevelStart, lastCompleteEnd - 300):lastCompleteEnd].strip() cutPart = jsonContent[lastCompleteEnd:breakPosition + len(cutPiece)].strip() if lastCompleteElement: overlapParts.append(f"Last complete element at same level:\n{lastCompleteElement}") if cutPart: overlapParts.append(f"Cut part (incomplete):\n{cutPart}") else: contextStart = max(cutLevelStart, breakPosition - 300) overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip()) else: contextStart = max(cutLevelStart, breakPosition - 300) overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip()) else: # For objects or other types, show context around break point contextStart = max(cutLevelStart, breakPosition - 300) overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip()) return "\n\n".join(overlapParts) if overlapParts else jsonContent[max(0, breakPosition - 200):breakPosition + 100].strip() # Call AI with looping support # NOTE: Do NOT pass contentParts here - we only need metadata for structure generation # The contentParts metadata is already included in the prompt (contentPartsIndex) # Actual content extraction happens later during section generation checkWorkflowStopped(self.services) aiResponseJson = await self.aiService.callAiWithLooping( prompt=structurePrompt, options=options, debugPrefix="chapter_structure_generation", promptBuilder=buildChapterStructurePromptWithContinuation, promptArgs={ "userPrompt": userPrompt, "outputFormat": outputFormat, "templateStructure": templateStructure, "basePrompt": structurePrompt }, useCaseId="chapter_structure", # REQUIRED: Explicit use case ID operationId=structureOperationId, userPrompt=userPrompt, contentParts=None # Do not pass ContentParts - only metadata needed, not content extraction ) # Parse the complete JSON response (looping system already handles completion) extractedJson = self.services.utils.jsonExtractString(aiResponseJson) parsedJson, parseError, cleanedJson = self.services.utils.jsonTryParse(extractedJson) if parseError is not None: # Even with looping, try repair as fallback logger.warning(f"JSON parsing failed after looping: {str(parseError)}. Attempting repair...") from modules.shared import jsonUtils repairedJson = jsonUtils.repairBrokenJson(extractedJson) if repairedJson: parsedJson, parseError, _ = self.services.utils.jsonTryParse(json.dumps(repairedJson)) if parseError is None: logger.info("Successfully repaired and parsed JSON structure after looping") structure = parsedJson else: logger.error(f"Failed to parse repaired JSON: {str(parseError)}") raise ValueError(f"Failed to parse JSON structure after repair: {str(parseError)}") else: logger.error(f"Failed to repair JSON. Parse error: {str(parseError)}") logger.error(f"Cleaned JSON preview (first 500 chars): {cleanedJson[:500]}") raise ValueError(f"Failed to parse JSON structure: {str(parseError)}") else: structure = parsedJson # State 3 Validation: Validate and auto-fix structure # Validation 3.1: Structure missing 'documents' field if "documents" not in structure: raise ValueError("Structure missing 'documents' field - cannot auto-fix") documents = structure["documents"] # Validation 3.2: Structure has no documents if not isinstance(documents, list) or len(documents) == 0: raise ValueError("Structure has no documents - cannot generate without documents") # Import renderer registry for format validation (existing infrastructure) from modules.services.serviceGeneration.renderers.registry import getRenderer # Validate and fix each document for doc in documents: # Validation 3.3 & 3.4: Document outputFormat # outputFormat parameter is optional - if omitted, formats determined from prompt by AI # Use as fallback only if AI doesn't return format per document # Multiple documents can have different formats (e.g., one PDF, one HTML) globalFormatFallback = outputFormat or "txt" # Fallback for validation if "outputFormat" not in doc or not doc["outputFormat"]: # AI didn't return format or returned empty - use global fallback doc["outputFormat"] = globalFormatFallback logger.warning(f"Document {doc.get('id')} missing outputFormat - using fallback: {doc['outputFormat']}") else: # AI returned format - validate using existing renderer registry formatName = str(doc["outputFormat"]).lower().strip() renderer = getRenderer(formatName) # Uses existing infrastructure if not renderer: # Format doesn't match any renderer - use txt (simple approach) logger.warning(f"Document {doc.get('id')} has format without renderer: {formatName}, using 'txt'") doc["outputFormat"] = "txt" else: # Valid format with renderer - normalize and keep AI result doc["outputFormat"] = formatName logger.debug(f"Document {doc.get('id')} using AI-determined format: {formatName}") # Validation 3.5 & 3.6: Document language # Use validated currentUserLanguage (always valid, validated during user intention analysis) # Access via _getUserLanguage() which uses self.services.currentUserLanguage userPromptLanguage = self._getUserLanguage() # Uses validated currentUserLanguage infrastructure if "language" not in doc or not isinstance(doc["language"], str) or len(doc["language"]) != 2: # AI didn't return language or invalid format - use validated currentUserLanguage doc["language"] = userPromptLanguage if "language" not in doc: logger.warning(f"Document {doc.get('id')} missing language - using currentUserLanguage: {userPromptLanguage}") else: logger.warning(f"Document {doc.get('id')} has invalid language format from AI: {doc['language']}, using currentUserLanguage") else: # AI returned valid language format - normalize doc["language"] = doc["language"].lower().strip()[:2] logger.debug(f"Document {doc.get('id')} using AI-determined language: {doc['language']}") # Validation 3.7: Document missing 'chapters' field if "chapters" not in doc: raise ValueError(f"Document {doc.get('id')} missing 'chapters' field - cannot auto-fix") # Validation 3.8: Chapter missing 'contentParts' field for chapter in doc["chapters"]: if "contentParts" not in chapter: raise ValueError(f"Chapter {chapter.get('id')} missing 'contentParts' field - cannot auto-fix") # ChatLog abschließen self.services.chat.progressLogFinish(structureOperationId, True) return structure except Exception as e: self.services.chat.progressLogFinish(structureOperationId, False) logger.error(f"Error in generateStructure: {str(e)}") raise @staticmethod def _extractOverlapContext(jsonContent: str, breakPosition: int) -> str: """ Extract overlap context: cut part and full part before (same level). Delegates to shared function in jsonUtils for consistency. """ from modules.shared.jsonUtils import extractOverlapContext return extractOverlapContext(jsonContent, breakPosition) def _buildChapterStructurePrompt( self, userPrompt: str, contentParts: List[ContentPart], outputFormat: str ) -> tuple[str, str]: """Baue Prompt für Chapter-Struktur-Generierung.""" # Baue ContentParts-Index - filtere leere Parts heraus contentPartsIndex = "" validParts = [] filteredParts = [] for part in contentParts: contentFormat = part.metadata.get("contentFormat", "unknown") # WICHTIG: Reference Parts haben absichtlich leere Daten - immer einschließen if contentFormat == "reference": validParts.append(part) logger.debug(f"Including reference ContentPart {part.id} (intentionally empty data)") continue # Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt) # ABER: Reference Parts wurden bereits oben behandelt if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): # Überspringe Container-Parts ohne Daten if part.typeGroup == "container" and not part.data: filteredParts.append((part.id, "container without data")) continue # Überspringe andere leere Parts (aber nicht Reference, die wurden bereits behandelt) if not part.data: filteredParts.append((part.id, f"no data (format: {contentFormat})")) continue validParts.append(part) logger.debug(f"Including ContentPart {part.id}: format={contentFormat}, type={part.typeGroup}, dataLength={len(str(part.data)) if part.data else 0}") if filteredParts: logger.debug(f"Filtered out {len(filteredParts)} empty ContentParts: {filteredParts}") logger.info(f"Building structure prompt with {len(validParts)} valid ContentParts (from {len(contentParts)} total)") # Baue Index nur für gültige Parts for i, part in enumerate(validParts, 1): contentFormat = part.metadata.get("contentFormat", "unknown") originalFileName = part.metadata.get('originalFileName', 'N/A') contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n" contentPartsIndex += f" Format: {contentFormat}\n" contentPartsIndex += f" Type: {part.typeGroup}\n" contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n" contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n" contentPartsIndex += f" Original file name: {originalFileName}\n" contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n" if not contentPartsIndex: contentPartsIndex = "\n(No content parts available)" # Get language from services (user intention analysis) language = self._getUserLanguage() logger.debug(f"Using language from services (user intention analysis) for structure generation: {language}") # Create template structure explicitly (not extracted from prompt) # This ensures exact identity between initial and continuation prompts templateStructure = f"""{{ "metadata": {{ "title": "Document Title", "language": "{language}" }}, "documents": [{{ "id": "doc_1", "title": "Document Title", "filename": "document.{outputFormat}", "outputFormat": "{outputFormat}", "language": "{language}", "chapters": [ {{ "id": "chapter_1", "level": 1, "title": "Chapter Title", "contentParts": {{ "extracted_part_id": {{ "instruction": "Use extracted content with ALL relevant details from user request" }} }}, "generationHint": "Detailed description including ALL relevant details from user request for this chapter", "sections": [] }} ] }}] }}""" prompt = f"""# TASK: Generate Chapter Structure This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generate multiple JSON objects, alternatives, or variations. Do not use separators like "---" between JSON objects. ## USER REQUEST (for context) ``` {userPrompt} ``` ## AVAILABLE CONTENT PARTS {contentPartsIndex} ## CONTENT ASSIGNMENT RULE If the user request mentions documents/images/data, then EVERY chapter that generates content related to those references MUST assign the relevant ContentParts explicitly. Assignment logic: - If chapter DISPLAYS a document/image → assign "object" format ContentPart with "caption" - If chapter generates text content ABOUT a document/image/data → assign ContentPart with "instruction": - Prefer "extracted" format if available (contains analyzed/extracted content) - If only "object" format is available, use "object" format with "instruction" (to write ABOUT the image/document) - If chapter's generationHint or purpose relates to a document/image/data mentioned in user request → it MUST have ContentParts assigned - Multiple chapters might assign the same ContentPart (e.g., one chapter displays image, another writes about it) - Use ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above - Empty contentParts are only allowed if chapter generates content WITHOUT referencing any documents/images/data from the user request CRITICAL RULE: If the user request mentions BOTH: a) Documents/images/data (listed in AVAILABLE CONTENT PARTS above), AND b) Generic content types (article text, main content, body text, etc.) Then chapters that generate those generic content types MUST assign the relevant ContentParts, because the content should relate to or be based on the provided documents/images/data. ## CHAPTER STRUCTURE REQUIREMENTS - Generate chapters based on USER REQUEST - analyze what structure the user wants - Each chapter needs: id, level (1, 2, 3, etc.), title - contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Assign ContentParts as required by CONTENT ASSIGNMENT RULE above - The "instruction" field for each ContentPart MUST contain ALL relevant details from the USER REQUEST that apply to content extraction for this specific chapter. Include all formatting rules, data requirements, constraints, and specifications mentioned in the user request that are relevant for processing this ContentPart in this chapter. - generationHint: Description of what content to generate for this chapter The generationHint MUST contain ALL relevant details from the USER REQUEST that apply to this specific chapter. Include all formatting rules, data requirements, constraints, column specifications, validation rules, and any other specifications mentioned in the user request that are relevant for generating content for this chapter. Do NOT use generic descriptions - include specific details from the user request. - The number of chapters depends on the user request - create only what is requested ## DOCUMENT OUTPUT FORMAT For each document, determine the output format by analyzing the USER REQUEST: - Look for explicit format mentions - Infer from document purpose - Infer from content type - If format cannot be determined from the prompt, use: "{outputFormat}" - Include "outputFormat" field in each document in the JSON structure - Multiple documents can have different formats ## FORMAT-APPROPRIATE CHAPTER STRUCTURE When determining the chapter structure, consider the document's output format and ensure chapters are structured appropriately for that format: - Different formats have different capabilities and constraints - Structure chapters to match what the format can effectively represent - Consider what content types work best for each format - Ensure the chapter structure aligns with the format's strengths and limitations ## DOCUMENT LANGUAGE For each document, determine the language by analyzing the USER REQUEST: - Look for explicit language mentions - Map language names to ISO 639-1 codes - If language cannot be determined from the prompt, use: "{language}" - Include "language" field in each document in the JSON structure - Multiple documents can have different languages ## JSON STRUCTURE REQUIREMENTS - metadata: {{"title": "...", "language": "..."}} - documents: Array of document objects, each with: - id: Unique document identifier (e.g., "doc_1") - title: Document title - filename: Output filename with extension (e.g., "document.docx") - outputFormat: Format code (e.g., "docx", "pdf", "html", "xlsx", "pptx", "txt") - language: ISO 639-1 language code (e.g., "de", "en", "fr", "it") - chapters: Array of chapter objects, each with: - id: Unique chapter identifier (e.g., "chapter_1") - level: Heading level (1, 2, 3, etc.) - title: Chapter title - contentParts: Object mapping ContentPart IDs to usage instructions {{"partId": {{"instruction": "..."}} or {{"caption": "..."}}}} - generationHint: Description of what content to generate - sections: Empty array [] EXAMPLE STRUCTURE (for reference only - adapt to user request): {{ "metadata": {{ "title": "Document Title", "language": "{language}" }}, "documents": [{{ "id": "doc_1", "title": "Document Title", "filename": "document.{outputFormat}", "outputFormat": "{outputFormat}", "language": "{language}", "chapters": [ {{ "id": "chapter_1", "level": 1, "title": "Chapter Title", "contentParts": {{ "extracted_part_id": {{ "instruction": "Use extracted content with ALL relevant details from user request" }} }}, "generationHint": "Detailed description including ALL relevant details from user request for this chapter", "sections": [] }} ] }}] }} CRITICAL INSTRUCTIONS: - Generate chapters based on USER REQUEST, NOT based on the example above - The example shows the JSON structure format, NOT the required chapters - Create only the chapters that match the user's request - Adapt chapter titles and structure to match the user's specific request - Determine outputFormat and language for each document by analyzing the USER REQUEST above - The example shows placeholders "{outputFormat}" and "{language}" - YOU MUST REPLACE THESE with actual values determined from the USER REQUEST MANDATORY CONTENT ASSIGNMENT CHECK: For each chapter, verify: 1. Does the user request mention documents/images/data? (e.g., "photo", "image", "document", "data", "based on", "about") 2. Does this chapter's generationHint, title, or purpose relate to those documents/images/data mentioned in step 1? - Examples: "article about the photo", "text describing the image", "analysis of the document", "content based on the data" - Even if chapter doesn't explicitly say "about the image", if user request mentions both the image AND this chapter's content type → relate them 3. If YES to both → chapter MUST have contentParts assigned (cannot be empty {{}}) 4. If ContentPart is "object" format and chapter needs to write ABOUT it → assign with "instruction" field, not just "caption" OUTPUT FORMAT: Start with {{ and end with }}. Do NOT use markdown code fences (```json). Do NOT add explanatory text before or after the JSON. Return ONLY the JSON object itself. """ return prompt, templateStructure