gateway/modules/aichat/serviceGeneration/subStructureGenerator.py
2026-01-22 21:11:25 +01:00

540 lines
24 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Structure Generator for hierarchical document generation.
Generates document skeleton with section placeholders.
"""
import logging
import json
from typing import Dict, Any, Optional, List
from modules.datamodels.datamodelJson import jsonTemplateDocument
logger = logging.getLogger(__name__)
class StructureGenerator:
"""Generates document structure with section placeholders"""
def __init__(self, services: Any):
self.services = services
async def generateStructure(
self,
userPrompt: str,
documentList: Optional[Any] = None,
cachedContent: Optional[Dict[str, Any]] = None,
contentParts: Optional[List[Any]] = None,
maxSectionLength: int = 500,
existingImages: Optional[List[Dict[str, Any]]] = None
) -> Dict[str, Any]:
"""
Generate document structure with sections.
Args:
userPrompt: User's original prompt
documentList: Optional document references
cachedContent: Optional extracted content cache
contentParts: Optional list of ContentParts to analyze for structure generation
maxSectionLength: Maximum words for simple sections
existingImages: Optional list of existing images to include
Returns:
Document structure with empty elements arrays and contentPartIds per section
"""
try:
# Create structure generation prompt
structurePrompt = self._createStructurePrompt(
userPrompt=userPrompt,
cachedContent=cachedContent,
contentParts=contentParts,
maxSectionLength=maxSectionLength,
existingImages=existingImages or []
)
# Debug: Log structure generation prompt (harmonisiert - keine Checks nötig)
self.services.utils.writeDebugFile(
structurePrompt,
"document_generation_structure_prompt"
)
# Call AI to generate structure
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
resultFormat="json"
)
aiResponse = await self.services.ai.callAiContent(
prompt=structurePrompt,
options=options,
outputFormat="json"
)
# Debug: Log structure generation response (harmonisiert - keine Checks nötig)
self.services.utils.writeDebugFile(
aiResponse.content if aiResponse and aiResponse.content else '',
"document_generation_structure_response"
)
if not aiResponse or not aiResponse.content:
raise ValueError("AI structure generation returned empty response")
# Extract and parse JSON
extractedJson = self.services.utils.jsonExtractString(aiResponse.content)
if not extractedJson:
raise ValueError("No JSON found in AI structure response")
structure = json.loads(extractedJson)
# Validate and enhance structure
structure = self._validateAndEnhanceStructure(structure, maxSectionLength)
return structure
except Exception as e:
logger.error(f"Error generating structure: {str(e)}")
raise
def _createStructurePrompt(
self,
userPrompt: str,
cachedContent: Optional[Dict[str, Any]] = None,
contentParts: Optional[List[Any]] = None,
maxSectionLength: int = 500,
existingImages: Optional[List[Dict[str, Any]]] = None
) -> str:
"""
Create prompt for structure generation.
"""
# Get user language
userLanguage = self._getUserLanguage()
# Format cached content if available
cachedContentText = ""
if cachedContent and cachedContent.get("extractedContent"):
cachedContentText = self._formatCachedContent(cachedContent)
# Use provided existingImages or extract from cachedContent
if existingImages is None:
existingImages = []
if cachedContent and cachedContent.get("imageDocuments"):
existingImages = cachedContent.get("imageDocuments", [])
# Format ContentParts as JSON for structure generation
contentPartsJson = ""
if contentParts:
try:
import json
# Convert ContentParts to dict format for JSON serialization
contentPartsList = []
for part in contentParts:
if hasattr(part, 'dict'):
partDict = part.dict()
elif isinstance(part, dict):
partDict = part
else:
# Try to convert to dict
partDict = {
"id": getattr(part, 'id', ''),
"typeGroup": getattr(part, 'typeGroup', ''),
"mimeType": getattr(part, 'mimeType', ''),
"label": getattr(part, 'label', ''),
"metadata": getattr(part, 'metadata', {})
}
# Only include essential fields for structure generation (not full data)
contentPartsList.append({
"id": partDict.get("id", ""),
"typeGroup": partDict.get("typeGroup", ""),
"mimeType": partDict.get("mimeType", ""),
"label": partDict.get("label", ""),
"metadata": partDict.get("metadata", {})
})
contentPartsJson = json.dumps(contentPartsList, indent=2, ensure_ascii=False)
except Exception as e:
logger.warning(f"Could not format ContentParts as JSON: {str(e)}")
contentPartsJson = ""
# Create structure template
structureTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", "Document Title")
prompt = f"""{'='*80}
USER REQUEST:
{'='*80}
{userPrompt}
{'='*80}
TASK: Generate a document STRUCTURE (skeleton) with sections.
Do NOT generate actual content yet - only the structure.
{'='*80}
EXTRACTED CONTENT (if available):
{'='*80}
{cachedContentText if cachedContentText else "No source documents provided."}
{'='*80}
INSTRUCTIONS:
1. Analyze the user request, extracted content, and available ContentParts
2. Create a document structure with CONTENT sections only
3. For each section, specify:
- id: Unique identifier (e.g., "section_title_1", "section_image_1")
- content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block"
- complexity: "simple" (can generate directly) or "complex" (needs sub-prompt)
- generation_hint: Brief description of what content should be generated
- contentPartIds: Array of ContentPart IDs that should be used for this section (e.g., ["part_1", "part_2"]) - can be empty []
- extractionPrompt: (optional) Specific prompt for extracting/processing ContentParts for this section
- image_prompt: (only for image sections) Detailed prompt for image generation
- order: Section order number (starting from 1)
- elements: [] (empty array - will be populated later)
4. Identify image sections:
- If user requests illustrations/images, create image sections
- If existing images are provided in documentList (check EXISTING IMAGES section below), create image sections that reference them
- Add image_prompt field with detailed description for image generation (only for new images)
- Set complexity to "complex" for new images, "simple" for existing/render images
- For existing images: Set image_source to "existing" and image_reference_id to the image document ID
- For images to render (from input documents): Set image_source to "render" and image_reference_id to the image document ID
- Example for new image: {{"id": "section_image_1", "content_type": "image", "complexity": "complex", "generation_hint": "Illustration for chapter 1", "image_prompt": "A detailed description for image generation", "order": 2, "elements": []}}
- Example for existing image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Include provided image", "image_source": "existing", "image_reference_id": "doc_id_here", "order": 2, "elements": []}}
- Example for render image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Render input image", "image_source": "render", "image_reference_id": "doc_id_here", "order": 2, "elements": []}}
{'='*80}
EXISTING IMAGES (to include in document):
{'='*80}
{self._formatExistingImages(existingImages) if existingImages else "No existing images provided."}
{'='*80}
6. Identify complex text sections:
- Long chapters (>{maxSectionLength} words expected) should be marked as "complex"
- Short paragraphs/headings should be "simple"
7. Return ONLY valid JSON following this structure:
{structureTemplate}
5. CRITICAL RULES FOR CONTENT PARTS:
- Analyze available ContentParts and determine which ones are needed for each section
- For image sections (content_type == "image"): Include image ContentParts in contentPartIds - images will be integrated as visual elements
- For other sections (heading, paragraph, etc.): If image ContentParts are referenced, they will be referenced as text in the document language (not integrated as images)
- Each section can reference multiple ContentParts via contentPartIds array
- If specific extraction/processing is needed for ContentParts, provide extractionPrompt
- Image references in non-image sections should be automatically derived in the document language (e.g., "siehe Bild 1" in German, "see Image 1" in English)
6. CRITICAL RULES:
- Return ONLY valid JSON (no comments, no trailing commas, double quotes only)
- Follow the exact JSON schema structure provided
- IMPORTANT: All sections MUST have empty elements arrays: "elements": [] (the template shows examples with content, but you must use empty arrays)
- ALL sections MUST include "generation_hint" field with a brief description of what content should be generated
- ALL sections MUST include "complexity" field: "simple" for short content, "complex" for long chapters/images
- ALL sections MUST include "contentPartIds" field (can be empty array [] if no ContentParts needed)
- Image sections MUST include "image_prompt" field with detailed description for image generation
- Order numbers MUST start from 1 (not 0)
- All content must be in the language '{userLanguage}'
- Do NOT generate actual content - only structure (skeleton)
- Use only supported content_type values: "heading", "paragraph", "image", "table", "bullet_list", "code_block"
Return ONLY the JSON structure. No explanations.
"""
return prompt
def _validateAndEnhanceStructure(
self,
structure: Dict[str, Any],
maxSectionLength: int
) -> Dict[str, Any]:
"""
Validate structure and enhance with complexity identification.
"""
try:
# Ensure structure has required fields
if "documents" not in structure:
if "sections" in structure:
# Convert single-document format to multi-document format
structure = {
"metadata": structure.get("metadata", {}),
"documents": [{
"id": "doc_1",
"title": structure.get("metadata", {}).get("title", "Document"),
"filename": "document.json",
"sections": structure.get("sections", [])
}]
}
else:
raise ValueError("Structure missing 'documents' or 'sections' field")
# Process each document
for doc in structure.get("documents", []):
sections = doc.get("sections", [])
# Process and validate sections according to standardized schema
for idx, section in enumerate(sections):
# Ensure required fields
if "id" not in section:
section["id"] = f"section_{idx + 1}"
sectionId = section.get("id", "")
section["order"] = idx + 1
if "elements" not in section:
section["elements"] = []
# Ensure contentPartIds field exists (can be empty array)
if "contentPartIds" not in section:
section["contentPartIds"] = []
# Ensure extractionPrompt field exists (optional)
if "extractionPrompt" not in section:
section["extractionPrompt"] = None
# Identify complexity if not set
if "complexity" not in section:
section["complexity"] = self._identifySectionComplexity(
section,
maxSectionLength
)
# Ensure generation_hint exists (required for content generation)
if "generation_hint" not in section or not section.get("generation_hint"):
# Create meaningful generation hint from section id or content type
contentType = section.get("content_type", "")
# Extract meaningful hint from section ID
meaningfulHint = self._extractMeaningfulHint(sectionId, contentType, section.get("elements", []))
section["generation_hint"] = meaningfulHint
# Ensure image sections have proper configuration
if section.get("content_type") == "image":
imageSource = section.get("image_source", "generate")
if imageSource == "existing" or imageSource == "render":
# Existing or render image - ensure image_reference_id is set
if "image_reference_id" not in section:
logger.warning(f"Image section {sectionId} has image_source='{imageSource}' but no image_reference_id")
# Existing/render images are simple (no generation needed, code integration)
section["complexity"] = "simple"
else:
# New image generation - ensure image_prompt
if "image_prompt" not in section or not section.get("image_prompt"):
# Try to extract from generation_hint
generationHint = section.get("generation_hint", "")
if generationHint:
# Enhance generation_hint to be a proper image prompt
section["image_prompt"] = self._enhanceImagePrompt(generationHint)
else:
# Create default based on document context
docTitle = doc.get("title", "Document")
section["image_prompt"] = f"Generate an illustration for: {docTitle}"
# Ensure complexity is set to complex for new image generation
section["complexity"] = "complex"
return structure
except Exception as e:
logger.error(f"Error validating structure: {str(e)}")
raise
def _identifySectionComplexity(
self,
section: Dict[str, Any],
maxSectionLength: int
) -> str:
"""
Identify if section is simple or complex.
Rules:
- Images: always complex
- Long chapters (>maxSectionLength words): complex
- Others: simple
"""
contentType = section.get("content_type", "")
# Images are always complex
if contentType == "image":
return "complex"
# Check generation_hint for length indicators
generationHint = section.get("generation_hint", "").lower()
# Keywords indicating long content
longContentKeywords = [
"chapter", "long", "detailed", "comprehensive",
"extensive", "full", "complete story"
]
if any(keyword in generationHint for keyword in longContentKeywords):
return "complex"
# Default to simple
return "simple"
def _extractMeaningfulHint(
self,
sectionId: str,
contentType: str,
elements: List[Any]
) -> str:
"""
Extract meaningful generation hint from section ID, content type, or elements.
Args:
sectionId: Section identifier (e.g., "section_heading_current_state")
contentType: Content type (e.g., "heading", "paragraph")
elements: Existing elements if any
Returns:
Meaningful generation hint string
"""
sectionIdLower = sectionId.lower()
# Try to extract text from existing elements first (most accurate)
if elements and isinstance(elements, list) and len(elements) > 0:
firstElement = elements[0]
if isinstance(firstElement, dict):
if "text" in firstElement and firstElement["text"]:
if contentType == "heading":
return firstElement["text"]
elif contentType == "paragraph":
return f"Content paragraph: {firstElement['text'][:50]}..."
# Extract meaningful text from section ID
# Remove common prefixes: "section_", "section_heading_", "section_paragraph_", etc.
meaningfulPart = sectionId
for prefix in ["section_heading_", "section_paragraph_", "section_bullet_list_",
"section_code_block_", "section_image_", "section_"]:
if meaningfulPart.lower().startswith(prefix):
meaningfulPart = meaningfulPart[len(prefix):]
break
# Convert snake_case to Title Case
# e.g., "current_state" -> "Current State"
words = meaningfulPart.replace("_", " ").split()
titleCase = " ".join(word.capitalize() for word in words if word)
# Handle special cases
if "introduction" in sectionIdLower or "intro" in sectionIdLower:
return "Introduction paragraph"
elif "conclusion" in sectionIdLower:
return "Conclusion paragraph"
elif "footer" in sectionIdLower or "copyright" in sectionIdLower:
return "Footer content"
elif "title" in sectionIdLower and "main" in sectionIdLower:
# Main title - try to get from document title or use generic
return "Main document title"
# Create hint based on content type and extracted text
if contentType == "heading":
if titleCase:
return titleCase
else:
return "Section heading"
elif contentType == "paragraph":
if titleCase:
return f"Content paragraph about {titleCase.lower()}"
else:
return f"Content paragraph"
elif contentType == "bullet_list":
if titleCase:
return f"Bullet list: {titleCase.lower()}"
else:
return "Bullet list items"
elif contentType == "code_block":
return "Code content"
else:
if titleCase:
return f"Content for {titleCase.lower()}"
else:
return f"Content for {contentType} section"
def _extractImagePrompts(
self,
structure: Dict[str, Any]
) -> Dict[str, str]:
"""
Extract image generation prompts from structure.
Maps section_id -> image_prompt
"""
imagePrompts = {}
for doc in structure.get("documents", []):
for section in doc.get("sections", []):
if section.get("content_type") == "image":
sectionId = section.get("id")
imagePrompt = section.get("image_prompt")
if sectionId and imagePrompt:
imagePrompts[sectionId] = imagePrompt
return imagePrompts
def _formatCachedContent(
self,
cachedContent: Dict[str, Any]
) -> str:
"""
Format cached content for prompt inclusion.
"""
try:
extractedContent = cachedContent.get("extractedContent", [])
if not extractedContent:
return "No content extracted."
# Format ContentPart objects
formattedParts = []
for extracted in extractedContent:
if hasattr(extracted, 'parts'):
for part in extracted.parts:
if hasattr(part, 'content'):
formattedParts.append(part.content)
elif isinstance(extracted, dict):
formattedParts.append(str(extracted))
else:
formattedParts.append(str(extracted))
return "\n\n".join(formattedParts) if formattedParts else "No content extracted."
except Exception as e:
logger.warning(f"Error formatting cached content: {str(e)}")
return "Error formatting cached content."
def _enhanceImagePrompt(self, generationHint: str) -> str:
"""
Enhance generation hint to be a proper image generation prompt.
Adds visual details and style guidance if missing.
"""
# If hint already contains visual details, use as-is
visualKeywords = ["illustration", "image", "picture", "visual", "depict", "show", "drawing"]
if any(keyword.lower() in generationHint.lower() for keyword in visualKeywords):
return generationHint
# Enhance with visual description
enhanced = f"Create a professional illustration: {generationHint}"
return enhanced
def _formatExistingImages(self, imageDocuments: List[Dict[str, Any]]) -> str:
"""Format existing images list for prompt inclusion"""
if not imageDocuments:
return "No existing images provided."
formatted = []
for i, imgDoc in enumerate(imageDocuments, 1):
formatted.append(f"{i}. Image ID: {imgDoc.get('id')}")
formatted.append(f" File Name: {imgDoc.get('fileName', 'Unknown')}")
formatted.append(f" MIME Type: {imgDoc.get('mimeType', 'Unknown')}")
formatted.append(f" Alt Text: {imgDoc.get('altText', 'Image')}")
formatted.append("")
return "\n".join(formatted)
def _getUserLanguage(self) -> str:
"""Get user language for document generation"""
try:
if self.services:
if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
return self.services.currentUserLanguage
elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
return self.services.user.language
except Exception:
pass
return 'en' # Default fallback