gateway/modules/services/serviceGeneration/subContentIntegrator.py
2025-12-23 00:34:15 +01:00

167 lines
6.5 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Content Integrator for hierarchical document generation.
Merges generated content into document structure and validates completeness.
"""
import logging
from typing import Dict, Any, List, Tuple
logger = logging.getLogger(__name__)
class ContentIntegrator:
"""Integrates generated content into document structure"""
def __init__(self, services: Any = None):
self.services = services
def integrateContent(
self,
structure: Dict[str, Any],
generatedSections: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""
Merge generated sections into document structure.
Args:
structure: Original document structure
generatedSections: List of sections with populated elements
Returns:
Complete document structure ready for rendering
"""
try:
# Create mapping of section IDs to generated sections
sectionMap = {section.get("id"): section for section in generatedSections}
# Process each document
for doc in structure.get("documents", []):
sections = doc.get("sections", [])
for idx, section in enumerate(sections):
sectionId = section.get("id")
# Find corresponding generated section
if sectionId in sectionMap:
generatedSection = sectionMap[sectionId]
# Merge elements into structure section
if "elements" in generatedSection:
section["elements"] = generatedSection["elements"]
# Preserve error information if present
if generatedSection.get("error"):
section["error"] = True
section["errorMessage"] = generatedSection.get("errorMessage")
section["originalContentType"] = generatedSection.get("originalContentType")
else:
# Section not generated - create error section
logger.warning(f"Section {sectionId} not found in generated sections")
section = self.createErrorSection(
section,
f"Section {sectionId} was not generated"
)
sections[idx] = section
# Debug: Write final merged structure to debug file
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
import json
structureJson = json.dumps(structure, indent=2, ensure_ascii=False)
self.services.utils.writeDebugFile(
structureJson,
"document_generation_final_merged_json"
)
logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)")
except Exception as e:
logger.debug(f"Could not write debug file for final merged JSON: {e}")
return structure
except Exception as e:
logger.error(f"Error integrating content: {str(e)}")
raise
def validateCompleteness(
self,
document: Dict[str, Any]
) -> Tuple[bool, List[str]]:
"""
Validate that all sections have content.
Args:
document: Document structure to validate
Returns:
(is_complete, list_of_missing_sections)
"""
missingSections = []
try:
for doc in document.get("documents", []):
sections = doc.get("sections", [])
for section in sections:
sectionId = section.get("id", "unknown")
elements = section.get("elements", [])
# Check if section has content
if not elements or len(elements) == 0:
# Skip error sections (they have error text)
if not section.get("error"):
missingSections.append(sectionId)
else:
# Validate elements have actual content
hasContent = False
for element in elements:
# Check different content types
if element.get("text") or element.get("base64Data") or \
element.get("headers") or element.get("items") or \
element.get("code"):
hasContent = True
break
if not hasContent and not section.get("error"):
missingSections.append(sectionId)
return len(missingSections) == 0, missingSections
except Exception as e:
logger.error(f"Error validating completeness: {str(e)}")
return False, [f"Validation error: {str(e)}"]
def createErrorSection(
self,
originalSection: Dict[str, Any],
errorMessage: str
) -> Dict[str, Any]:
"""
Create error placeholder section.
Args:
originalSection: Original section that failed
errorMessage: Error message to display
Returns:
Error section with placeholder content
"""
contentType = originalSection.get("content_type", "content")
sectionId = originalSection.get("id", "unknown")
return {
"id": sectionId,
"content_type": "paragraph", # Change to paragraph for error display
"elements": [{
"text": f"[ERROR: Failed to generate {contentType} for section '{sectionId}'. Error: {errorMessage}]"
}],
"order": originalSection.get("order", 0),
"error": True,
"errorMessage": errorMessage,
"originalContentType": contentType,
"title": originalSection.get("title"),
"generation_hint": originalSection.get("generation_hint"),
"complexity": originalSection.get("complexity")
}