gateway/modules/services/serviceAi/subStructureGeneration.py

453 lines
23 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Structure Generation Module
Handles document structure generation, including:
- Generating document structure with sections
- Building structure prompts
"""
import json
import logging
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
class StructureGenerator:
"""Handles document structure generation."""
def __init__(self, services, aiService):
"""Initialize StructureGenerator with service center and AI service access."""
self.services = services
self.aiService = aiService
def _getUserLanguage(self) -> str:
"""Get user language for document generation"""
try:
if self.services:
# Prefer detected language if available (from user intention analysis)
if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
return self.services.currentUserLanguage
# Fallback to user's preferred language
elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
return self.services.user.language
except Exception:
pass
return 'en' # Default fallback
async def generateStructure(
self,
userPrompt: str,
contentParts: List[ContentPart],
outputFormat: Optional[str] = None,
parentOperationId: str = None
) -> Dict[str, Any]:
"""
Phase 5C: Generiert Chapter-Struktur (Table of Contents).
Definiert für jedes Chapter:
- Level, Title
- contentParts (unified object with instruction and/or caption per part)
- generationHint
Generate document structure with per-document format determination.
Multiple documents can be produced with different formats (e.g., one PDF, one HTML).
AI determines formats per-document from user prompt. The outputFormat parameter is
only a validation fallback - used if AI doesn't return format per document.
Args:
userPrompt: User-Anfrage
contentParts: Alle vorbereiteten ContentParts mit Metadaten
outputFormat: Optional global format fallback. If omitted, formats are determined
from user prompt by AI. Used as validation fallback if AI doesn't
return format per document. Defaults to "txt" if not provided.
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
Returns:
Struktur-Dict mit documents und chapters (nicht sections!)
"""
# If outputFormat not provided, use "txt" as fallback for validation
# AI will determine formats per document from user prompt
if not outputFormat:
outputFormat = "txt"
logger.debug("outputFormat not provided - using 'txt' as validation fallback, formats determined from prompt")
# Erstelle Operation-ID für Struktur-Generierung
structureOperationId = f"{parentOperationId}_structure_generation"
# Starte ChatLog mit Parent-Referenz
formatDisplay = outputFormat if outputFormat else "auto-determined"
self.services.chat.progressLogStart(
structureOperationId,
"Chapter Structure Generation",
"Structure",
f"Generating chapter structure (format: {formatDisplay})",
parentOperationId=parentOperationId
)
try:
# Baue Chapter-Struktur-Prompt mit Content-Index
structurePrompt = self._buildChapterStructurePrompt(
userPrompt=userPrompt,
contentParts=contentParts,
outputFormat=outputFormat
)
# AI-Call für Chapter-Struktur-Generierung mit Looping-Unterstützung
# Use _callAiWithLooping instead of callAiPlanning to support continuation if response is cut
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
compressPrompt=False,
compressContext=False,
resultFormat="json"
)
# Create prompt builder for continuation support
async def buildChapterStructurePromptWithContinuation(
continuationContext: Optional[Dict[str, Any]] = None,
**kwargs
) -> str:
"""Build chapter structure prompt with optional continuation context."""
basePrompt = self._buildChapterStructurePrompt(
userPrompt=userPrompt,
contentParts=contentParts,
outputFormat=outputFormat
)
if continuationContext:
# Add continuation instructions
deliveredSummary = continuationContext.get("delivered_summary", "")
elementBeforeCutoff = continuationContext.get("element_before_cutoff", "")
cutOffElement = continuationContext.get("cut_off_element", "")
continuationText = f"{deliveredSummary}\n\n"
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
if elementBeforeCutoff:
continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
continuationText += f"{elementBeforeCutoff}\n\n"
if cutOffElement:
continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
continuationText += f"{cutOffElement}\n\n"
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
continuationText += "Start directly with the next chapter that should follow.\n\n"
return f"""{basePrompt}
{continuationText}
Continue generating the remaining chapters now.
"""
else:
return basePrompt
# Call AI with looping support
# NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
# The contentParts metadata is already included in the prompt (contentPartsIndex)
# Actual content extraction happens later during section generation
checkWorkflowStopped(self.services)
aiResponseJson = await self.aiService.callAiWithLooping(
prompt=structurePrompt,
options=options,
debugPrefix="chapter_structure_generation",
promptBuilder=buildChapterStructurePromptWithContinuation,
promptArgs={
"userPrompt": userPrompt,
"outputFormat": outputFormat,
"services": self.services
},
useCaseId="chapter_structure", # REQUIRED: Explicit use case ID
operationId=structureOperationId,
userPrompt=userPrompt,
contentParts=None # Do not pass ContentParts - only metadata needed, not content extraction
)
# Parse the complete JSON response (looping system already handles completion)
extractedJson = self.services.utils.jsonExtractString(aiResponseJson)
parsedJson, parseError, cleanedJson = self.services.utils.jsonTryParse(extractedJson)
if parseError is not None:
# Even with looping, try repair as fallback
logger.warning(f"JSON parsing failed after looping: {str(parseError)}. Attempting repair...")
from modules.shared import jsonUtils
repairedJson = jsonUtils.repairBrokenJson(extractedJson)
if repairedJson:
parsedJson, parseError, _ = self.services.utils.jsonTryParse(json.dumps(repairedJson))
if parseError is None:
logger.info("Successfully repaired and parsed JSON structure after looping")
structure = parsedJson
else:
logger.error(f"Failed to parse repaired JSON: {str(parseError)}")
raise ValueError(f"Failed to parse JSON structure after repair: {str(parseError)}")
else:
logger.error(f"Failed to repair JSON. Parse error: {str(parseError)}")
logger.error(f"Cleaned JSON preview (first 500 chars): {cleanedJson[:500]}")
raise ValueError(f"Failed to parse JSON structure: {str(parseError)}")
else:
structure = parsedJson
# State 3 Validation: Validate and auto-fix structure
# Validation 3.1: Structure missing 'documents' field
if "documents" not in structure:
raise ValueError("Structure missing 'documents' field - cannot auto-fix")
documents = structure["documents"]
# Validation 3.2: Structure has no documents
if not isinstance(documents, list) or len(documents) == 0:
raise ValueError("Structure has no documents - cannot generate without documents")
# Import renderer registry for format validation (existing infrastructure)
from modules.services.serviceGeneration.renderers.registry import getRenderer
# Validate and fix each document
for doc in documents:
# Validation 3.3 & 3.4: Document outputFormat
# outputFormat parameter is optional - if omitted, formats determined from prompt by AI
# Use as fallback only if AI doesn't return format per document
# Multiple documents can have different formats (e.g., one PDF, one HTML)
globalFormatFallback = outputFormat or "txt" # Fallback for validation
if "outputFormat" not in doc or not doc["outputFormat"]:
# AI didn't return format or returned empty - use global fallback
doc["outputFormat"] = globalFormatFallback
logger.warning(f"Document {doc.get('id')} missing outputFormat - using fallback: {doc['outputFormat']}")
else:
# AI returned format - validate using existing renderer registry
formatName = str(doc["outputFormat"]).lower().strip()
renderer = getRenderer(formatName) # Uses existing infrastructure
if not renderer:
# Format doesn't match any renderer - use txt (simple approach)
logger.warning(f"Document {doc.get('id')} has format without renderer: {formatName}, using 'txt'")
doc["outputFormat"] = "txt"
else:
# Valid format with renderer - normalize and keep AI result
doc["outputFormat"] = formatName
logger.debug(f"Document {doc.get('id')} using AI-determined format: {formatName}")
# Validation 3.5 & 3.6: Document language
# Use validated currentUserLanguage (always valid, validated during user intention analysis)
# Access via _getUserLanguage() which uses self.services.currentUserLanguage
userPromptLanguage = self._getUserLanguage() # Uses validated currentUserLanguage infrastructure
if "language" not in doc or not isinstance(doc["language"], str) or len(doc["language"]) != 2:
# AI didn't return language or invalid format - use validated currentUserLanguage
doc["language"] = userPromptLanguage
if "language" not in doc:
logger.warning(f"Document {doc.get('id')} missing language - using currentUserLanguage: {userPromptLanguage}")
else:
logger.warning(f"Document {doc.get('id')} has invalid language format from AI: {doc['language']}, using currentUserLanguage")
else:
# AI returned valid language format - normalize
doc["language"] = doc["language"].lower().strip()[:2]
logger.debug(f"Document {doc.get('id')} using AI-determined language: {doc['language']}")
# Validation 3.7: Document missing 'chapters' field
if "chapters" not in doc:
raise ValueError(f"Document {doc.get('id')} missing 'chapters' field - cannot auto-fix")
# Validation 3.8: Chapter missing 'contentParts' field
for chapter in doc["chapters"]:
if "contentParts" not in chapter:
raise ValueError(f"Chapter {chapter.get('id')} missing 'contentParts' field - cannot auto-fix")
# ChatLog abschließen
self.services.chat.progressLogFinish(structureOperationId, True)
return structure
except Exception as e:
self.services.chat.progressLogFinish(structureOperationId, False)
logger.error(f"Error in generateStructure: {str(e)}")
raise
def _buildChapterStructurePrompt(
self,
userPrompt: str,
contentParts: List[ContentPart],
outputFormat: str
) -> str:
"""Baue Prompt für Chapter-Struktur-Generierung."""
# Baue ContentParts-Index - filtere leere Parts heraus
contentPartsIndex = ""
validParts = []
filteredParts = []
for part in contentParts:
contentFormat = part.metadata.get("contentFormat", "unknown")
# WICHTIG: Reference Parts haben absichtlich leere Daten - immer einschließen
if contentFormat == "reference":
validParts.append(part)
logger.debug(f"Including reference ContentPart {part.id} (intentionally empty data)")
continue
# Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt)
# ABER: Reference Parts wurden bereits oben behandelt
if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0):
# Überspringe Container-Parts ohne Daten
if part.typeGroup == "container" and not part.data:
filteredParts.append((part.id, "container without data"))
continue
# Überspringe andere leere Parts (aber nicht Reference, die wurden bereits behandelt)
if not part.data:
filteredParts.append((part.id, f"no data (format: {contentFormat})"))
continue
validParts.append(part)
logger.debug(f"Including ContentPart {part.id}: format={contentFormat}, type={part.typeGroup}, dataLength={len(str(part.data)) if part.data else 0}")
if filteredParts:
logger.debug(f"Filtered out {len(filteredParts)} empty ContentParts: {filteredParts}")
logger.info(f"Building structure prompt with {len(validParts)} valid ContentParts (from {len(contentParts)} total)")
# Baue Index nur für gültige Parts
for i, part in enumerate(validParts, 1):
contentFormat = part.metadata.get("contentFormat", "unknown")
originalFileName = part.metadata.get('originalFileName', 'N/A')
contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n"
contentPartsIndex += f" Format: {contentFormat}\n"
contentPartsIndex += f" Type: {part.typeGroup}\n"
contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n"
contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n"
contentPartsIndex += f" Original file name: {originalFileName}\n"
contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n"
if not contentPartsIndex:
contentPartsIndex = "\n(No content parts available)"
# Get language from services (user intention analysis)
language = self._getUserLanguage()
logger.debug(f"Using language from services (user intention analysis) for structure generation: {language}")
prompt = f"""# TASK: Generate Chapter Structure
This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generate multiple JSON objects, alternatives, or variations. Do not use separators like "---" between JSON objects.
## USER REQUEST (for context)
```
{userPrompt}
```
## AVAILABLE CONTENT PARTS
{contentPartsIndex}
## CONTENT ASSIGNMENT RULE
If the user request mentions documents/images/data, then EVERY chapter that generates content related to those references MUST assign the relevant ContentParts explicitly.
Assignment logic:
- If chapter DISPLAYS a document/image → assign "object" format ContentPart with "caption"
- If chapter generates text content ABOUT a document/image/data → assign ContentPart with "instruction":
- Prefer "extracted" format if available (contains analyzed/extracted content)
- If only "object" format is available, use "object" format with "instruction" (to write ABOUT the image/document)
- If chapter's generationHint or purpose relates to a document/image/data mentioned in user request → it MUST have ContentParts assigned
- Multiple chapters might assign the same ContentPart (e.g., one chapter displays image, another writes about it)
- Use ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above
- Empty contentParts are only allowed if chapter generates content WITHOUT referencing any documents/images/data from the user request
CRITICAL RULE: If the user request mentions BOTH:
a) Documents/images/data (listed in AVAILABLE CONTENT PARTS above), AND
b) Generic content types (article text, main content, body text, etc.)
Then chapters that generate those generic content types MUST assign the relevant ContentParts, because the content should relate to or be based on the provided documents/images/data.
## CHAPTER STRUCTURE REQUIREMENTS
- Generate chapters based on USER REQUEST - analyze what structure the user wants
- Each chapter needs: id, level (1, 2, 3, etc.), title
- contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Assign ContentParts as required by CONTENT ASSIGNMENT RULE above
- generationHint: Description of what content to generate for this chapter
- The number of chapters depends on the user request - create only what is requested
## DOCUMENT OUTPUT FORMAT
For each document, determine the output format by analyzing the USER REQUEST:
- Look for explicit format mentions
- Infer from document purpose
- Infer from content type
- If format cannot be determined from the prompt, use: "{outputFormat}"
- Include "outputFormat" field in each document in the JSON structure
- Multiple documents can have different formats
## DOCUMENT LANGUAGE
For each document, determine the language by analyzing the USER REQUEST:
- Look for explicit language mentions
- Map language names to ISO 639-1 codes
- If language cannot be determined from the prompt, use: "{language}"
- Include "language" field in each document in the JSON structure
- Multiple documents can have different languages
## JSON STRUCTURE REQUIREMENTS
- metadata: {{"title": "...", "language": "..."}}
- documents: Array of document objects, each with:
- id: Unique document identifier (e.g., "doc_1")
- title: Document title
- filename: Output filename with extension (e.g., "document.docx")
- outputFormat: Format code (e.g., "docx", "pdf", "html", "xlsx", "pptx", "txt")
- language: ISO 639-1 language code (e.g., "de", "en", "fr", "it")
- chapters: Array of chapter objects, each with:
- id: Unique chapter identifier (e.g., "chapter_1")
- level: Heading level (1, 2, 3, etc.)
- title: Chapter title
- contentParts: Object mapping ContentPart IDs to usage instructions {{"partId": {{"instruction": "..."}} or {{"caption": "..."}}}}
- generationHint: Description of what content to generate
- sections: Empty array []
EXAMPLE STRUCTURE (for reference only - adapt to user request):
{{
"metadata": {{
"title": "Document Title",
"language": "{language}"
}},
"documents": [{{
"id": "doc_1",
"title": "Document Title",
"filename": "document.{outputFormat}",
"outputFormat": "{outputFormat}",
"language": "{language}",
"chapters": [
{{
"id": "chapter_1",
"level": 1,
"title": "Chapter Title",
"contentParts": {{
"extracted_part_id": {{
"instruction": "Use extracted content..."
}}
}},
"generationHint": "Description of chapter content",
"sections": []
}}
]
}}]
}}
CRITICAL INSTRUCTIONS:
- Generate chapters based on USER REQUEST, NOT based on the example above
- The example shows the JSON structure format, NOT the required chapters
- Create only the chapters that match the user's request
- Adapt chapter titles and structure to match the user's specific request
- Determine outputFormat and language for each document by analyzing the USER REQUEST above
- The example shows placeholders "{outputFormat}" and "{language}" - YOU MUST REPLACE THESE with actual values determined from the USER REQUEST
MANDATORY CONTENT ASSIGNMENT CHECK:
For each chapter, verify:
1. Does the user request mention documents/images/data? (e.g., "photo", "image", "document", "data", "based on", "about")
2. Does this chapter's generationHint, title, or purpose relate to those documents/images/data mentioned in step 1?
- Examples: "article about the photo", "text describing the image", "analysis of the document", "content based on the data"
- Even if chapter doesn't explicitly say "about the image", if user request mentions both the image AND this chapter's content type → relate them
3. If YES to both → chapter MUST have contentParts assigned (cannot be empty {{}})
4. If ContentPart is "object" format and chapter needs to write ABOUT it → assign with "instruction" field, not just "caption"
OUTPUT FORMAT: Start with {{ and end with }}. Do NOT use markdown code fences (```json). Do NOT add explanatory text before or after the JSON. Return ONLY the JSON object itself.
"""
return prompt