163 lines
6.3 KiB
Python
163 lines
6.3 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Content Integrator for hierarchical document generation.
|
|
Merges generated content into document structure and validates completeness.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Dict, Any, List, Tuple
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ContentIntegrator:
|
|
"""Integrates generated content into document structure"""
|
|
|
|
def __init__(self, services: Any = None):
|
|
self.services = services
|
|
|
|
def integrateContent(
|
|
self,
|
|
structure: Dict[str, Any],
|
|
generatedSections: List[Dict[str, Any]]
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Merge generated sections into document structure.
|
|
|
|
Args:
|
|
structure: Original document structure
|
|
generatedSections: List of sections with populated elements
|
|
|
|
Returns:
|
|
Complete document structure ready for rendering
|
|
"""
|
|
try:
|
|
# Create mapping of section IDs to generated sections
|
|
sectionMap = {section.get("id"): section for section in generatedSections}
|
|
|
|
# Process each document
|
|
for doc in structure.get("documents", []):
|
|
sections = doc.get("sections", [])
|
|
|
|
for idx, section in enumerate(sections):
|
|
sectionId = section.get("id")
|
|
|
|
# Find corresponding generated section
|
|
if sectionId in sectionMap:
|
|
generatedSection = sectionMap[sectionId]
|
|
|
|
# Merge elements into structure section
|
|
if "elements" in generatedSection:
|
|
section["elements"] = generatedSection["elements"]
|
|
|
|
# Preserve error information if present
|
|
if generatedSection.get("error"):
|
|
section["error"] = True
|
|
section["errorMessage"] = generatedSection.get("errorMessage")
|
|
section["originalContentType"] = generatedSection.get("originalContentType")
|
|
else:
|
|
# Section not generated - create error section
|
|
logger.warning(f"Section {sectionId} not found in generated sections")
|
|
section = self.createErrorSection(
|
|
section,
|
|
f"Section {sectionId} was not generated"
|
|
)
|
|
sections[idx] = section
|
|
|
|
# Debug: Write final merged structure to debug file (harmonisiert - keine Checks nötig)
|
|
import json
|
|
structureJson = json.dumps(structure, indent=2, ensure_ascii=False)
|
|
self.services.utils.writeDebugFile(
|
|
structureJson,
|
|
"document_generation_final_merged_json"
|
|
)
|
|
logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)")
|
|
|
|
return structure
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error integrating content: {str(e)}")
|
|
raise
|
|
|
|
def validateCompleteness(
|
|
self,
|
|
document: Dict[str, Any]
|
|
) -> Tuple[bool, List[str]]:
|
|
"""
|
|
Validate that all sections have content.
|
|
|
|
Args:
|
|
document: Document structure to validate
|
|
|
|
Returns:
|
|
(is_complete, list_of_missing_sections)
|
|
"""
|
|
missingSections = []
|
|
|
|
try:
|
|
for doc in document.get("documents", []):
|
|
sections = doc.get("sections", [])
|
|
|
|
for section in sections:
|
|
sectionId = section.get("id", "unknown")
|
|
elements = section.get("elements", [])
|
|
|
|
# Check if section has content
|
|
if not elements or len(elements) == 0:
|
|
# Skip error sections (they have error text)
|
|
if not section.get("error"):
|
|
missingSections.append(sectionId)
|
|
else:
|
|
# Validate elements have actual content
|
|
hasContent = False
|
|
for element in elements:
|
|
# Check different content types
|
|
if element.get("text") or element.get("base64Data") or \
|
|
element.get("headers") or element.get("items") or \
|
|
element.get("code"):
|
|
hasContent = True
|
|
break
|
|
|
|
if not hasContent and not section.get("error"):
|
|
missingSections.append(sectionId)
|
|
|
|
return len(missingSections) == 0, missingSections
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error validating completeness: {str(e)}")
|
|
return False, [f"Validation error: {str(e)}"]
|
|
|
|
def createErrorSection(
|
|
self,
|
|
originalSection: Dict[str, Any],
|
|
errorMessage: str
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Create error placeholder section.
|
|
|
|
Args:
|
|
originalSection: Original section that failed
|
|
errorMessage: Error message to display
|
|
|
|
Returns:
|
|
Error section with placeholder content
|
|
"""
|
|
contentType = originalSection.get("content_type", "content")
|
|
sectionId = originalSection.get("id", "unknown")
|
|
|
|
return {
|
|
"id": sectionId,
|
|
"content_type": "paragraph", # Change to paragraph for error display
|
|
"elements": [{
|
|
"text": f"[ERROR: Failed to generate {contentType} for section '{sectionId}'. Error: {errorMessage}]"
|
|
}],
|
|
"order": originalSection.get("order", 0),
|
|
"error": True,
|
|
"errorMessage": errorMessage,
|
|
"originalContentType": contentType,
|
|
"title": originalSection.get("title"),
|
|
"generation_hint": originalSection.get("generation_hint"),
|
|
"complexity": originalSection.get("complexity")
|
|
}
|
|
|