gateway/modules/services/serviceAi/subStructureGeneration.py
2026-01-02 21:35:32 +01:00

353 lines
17 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Structure Generation Module
Handles document structure generation, including:
- Generating document structure with sections
- Building structure prompts
"""
import json
import logging
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
class StructureGenerator:
"""Handles document structure generation."""
def __init__(self, services, aiService):
"""Initialize StructureGenerator with service center and AI service access."""
self.services = services
self.aiService = aiService
def _getUserLanguage(self) -> str:
"""Get user language for document generation"""
try:
if self.services:
# Prefer detected language if available (from user intention analysis)
if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
return self.services.currentUserLanguage
# Fallback to user's preferred language
elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
return self.services.user.language
except Exception:
pass
return 'en' # Default fallback
async def generateStructure(
self,
userPrompt: str,
contentParts: List[ContentPart],
outputFormat: str,
parentOperationId: str
) -> Dict[str, Any]:
"""
Phase 5C: Generiert Chapter-Struktur (Table of Contents).
Definiert für jedes Chapter:
- Level, Title
- contentParts (unified object with instruction and/or caption per part)
- generationHint
Args:
userPrompt: User-Anfrage
contentParts: Alle vorbereiteten ContentParts mit Metadaten
outputFormat: Ziel-Format (html, docx, pdf, etc.)
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
Returns:
Struktur-Dict mit documents und chapters (nicht sections!)
"""
# Erstelle Operation-ID für Struktur-Generierung
structureOperationId = f"{parentOperationId}_structure_generation"
# Starte ChatLog mit Parent-Referenz
self.services.chat.progressLogStart(
structureOperationId,
"Chapter Structure Generation",
"Structure",
f"Generating chapter structure for {outputFormat}",
parentOperationId=parentOperationId
)
try:
# Baue Chapter-Struktur-Prompt mit Content-Index
structurePrompt = self._buildChapterStructurePrompt(
userPrompt=userPrompt,
contentParts=contentParts,
outputFormat=outputFormat
)
# AI-Call für Chapter-Struktur-Generierung mit Looping-Unterstützung
# Use _callAiWithLooping instead of callAiPlanning to support continuation if response is cut
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
compressPrompt=False,
compressContext=False,
resultFormat="json"
)
# Create prompt builder for continuation support
async def buildChapterStructurePromptWithContinuation(
continuationContext: Optional[Dict[str, Any]] = None,
**kwargs
) -> str:
"""Build chapter structure prompt with optional continuation context."""
basePrompt = self._buildChapterStructurePrompt(
userPrompt=userPrompt,
contentParts=contentParts,
outputFormat=outputFormat
)
if continuationContext:
# Add continuation instructions
deliveredSummary = continuationContext.get("delivered_summary", "")
elementBeforeCutoff = continuationContext.get("element_before_cutoff", "")
cutOffElement = continuationContext.get("cut_off_element", "")
continuationText = f"{deliveredSummary}\n\n"
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
if elementBeforeCutoff:
continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
continuationText += f"{elementBeforeCutoff}\n\n"
if cutOffElement:
continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
continuationText += f"{cutOffElement}\n\n"
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
continuationText += "Start directly with the next chapter that should follow.\n\n"
return f"""{basePrompt}
{continuationText}
Continue generating the remaining chapters now.
"""
else:
return basePrompt
# Call AI with looping support
# NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
# The contentParts metadata is already included in the prompt (contentPartsIndex)
# Actual content extraction happens later during section generation
checkWorkflowStopped(self.services)
aiResponseJson = await self.aiService.callAiWithLooping(
prompt=structurePrompt,
options=options,
debugPrefix="chapter_structure_generation",
promptBuilder=buildChapterStructurePromptWithContinuation,
promptArgs={
"userPrompt": userPrompt,
"outputFormat": outputFormat,
"services": self.services
},
useCaseId="chapter_structure", # REQUIRED: Explicit use case ID
operationId=structureOperationId,
userPrompt=userPrompt,
contentParts=None # Do not pass ContentParts - only metadata needed, not content extraction
)
# Parse the complete JSON response (looping system already handles completion)
extractedJson = self.services.utils.jsonExtractString(aiResponseJson)
parsedJson, parseError, cleanedJson = self.services.utils.jsonTryParse(extractedJson)
if parseError is not None:
# Even with looping, try repair as fallback
logger.warning(f"JSON parsing failed after looping: {str(parseError)}. Attempting repair...")
from modules.shared import jsonUtils
repairedJson = jsonUtils.repairBrokenJson(extractedJson)
if repairedJson:
parsedJson, parseError, _ = self.services.utils.jsonTryParse(json.dumps(repairedJson))
if parseError is None:
logger.info("Successfully repaired and parsed JSON structure after looping")
structure = parsedJson
else:
logger.error(f"Failed to parse repaired JSON: {str(parseError)}")
raise ValueError(f"Failed to parse JSON structure after repair: {str(parseError)}")
else:
logger.error(f"Failed to repair JSON. Parse error: {str(parseError)}")
logger.error(f"Cleaned JSON preview (first 500 chars): {cleanedJson[:500]}")
raise ValueError(f"Failed to parse JSON structure: {str(parseError)}")
else:
structure = parsedJson
# ChatLog abschließen
self.services.chat.progressLogFinish(structureOperationId, True)
return structure
except Exception as e:
self.services.chat.progressLogFinish(structureOperationId, False)
logger.error(f"Error in generateStructure: {str(e)}")
raise
def _buildChapterStructurePrompt(
self,
userPrompt: str,
contentParts: List[ContentPart],
outputFormat: str
) -> str:
"""Baue Prompt für Chapter-Struktur-Generierung."""
# Baue ContentParts-Index - filtere leere Parts heraus
contentPartsIndex = ""
validParts = []
filteredParts = []
for part in contentParts:
contentFormat = part.metadata.get("contentFormat", "unknown")
# WICHTIG: Reference Parts haben absichtlich leere Daten - immer einschließen
if contentFormat == "reference":
validParts.append(part)
logger.debug(f"Including reference ContentPart {part.id} (intentionally empty data)")
continue
# Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt)
# ABER: Reference Parts wurden bereits oben behandelt
if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0):
# Überspringe Container-Parts ohne Daten
if part.typeGroup == "container" and not part.data:
filteredParts.append((part.id, "container without data"))
continue
# Überspringe andere leere Parts (aber nicht Reference, die wurden bereits behandelt)
if not part.data:
filteredParts.append((part.id, f"no data (format: {contentFormat})"))
continue
validParts.append(part)
logger.debug(f"Including ContentPart {part.id}: format={contentFormat}, type={part.typeGroup}, dataLength={len(str(part.data)) if part.data else 0}")
if filteredParts:
logger.debug(f"Filtered out {len(filteredParts)} empty ContentParts: {filteredParts}")
logger.info(f"Building structure prompt with {len(validParts)} valid ContentParts (from {len(contentParts)} total)")
# Baue Index nur für gültige Parts
for i, part in enumerate(validParts, 1):
contentFormat = part.metadata.get("contentFormat", "unknown")
originalFileName = part.metadata.get('originalFileName', 'N/A')
contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n"
contentPartsIndex += f" Format: {contentFormat}\n"
contentPartsIndex += f" Type: {part.typeGroup}\n"
contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n"
contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n"
contentPartsIndex += f" Original file name: {originalFileName}\n"
contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n"
if not contentPartsIndex:
contentPartsIndex = "\n(No content parts available)"
# Get language from services (user intention analysis)
language = self._getUserLanguage()
logger.debug(f"Using language from services (user intention analysis) for structure generation: {language}")
prompt = f"""# TASK: Generate Chapter Structure
This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generate multiple JSON objects, alternatives, or variations. Do not use separators like "---" between JSON objects.
## USER REQUEST (for context)
```
{userPrompt}
```
DEFAULT LANGUAGE: If no language is specified for a document, use "{language}" (from user prompt). Each document can have its own language specified in the "language" field. Use ISO 639-1 language codes in lowercase (e.g., "de", "en", "fr", "it").
## AVAILABLE CONTENT PARTS
{contentPartsIndex}
## CONTENT ASSIGNMENT RULE - CRITICAL
If the user request mentions documents/images/data, then EVERY chapter that generates content related to those references MUST assign the relevant ContentParts explicitly.
**Assignment logic:**
- If chapter DISPLAYS a document/image → assign "object" format ContentPart with "caption"
- If chapter generates text content ABOUT a document/image/data → assign ContentPart with "instruction":
- Prefer "extracted" format if available (contains analyzed/extracted content)
- If only "object" format is available, use "object" format with "instruction" (to write ABOUT the image/document)
- If chapter's generationHint or purpose relates to a document/image/data mentioned in user request → it MUST have ContentParts assigned
- Multiple chapters might assign the same ContentPart (e.g., one chapter displays image, another writes about it)
- Use ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above
- Empty contentParts are only allowed if chapter generates content WITHOUT referencing any documents/images/data from the user request
**CRITICAL RULE**: If the user request mentions BOTH:
a) Documents/images/data (listed in AVAILABLE CONTENT PARTS above), AND
b) Generic content types (article text, main content, body text, etc.)
Then chapters that generate those generic content types MUST assign the relevant ContentParts, because the content should relate to or be based on the provided documents/images/data.
## FORMATTING
- Formatting is handled automatically - focus on content and structure only
## CHAPTER STRUCTURE REQUIREMENTS
- Generate chapters based on USER REQUEST - analyze what structure the user wants
- Each chapter needs: id, level (1, 2, 3, etc.), title
- contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Assign ContentParts as required by CONTENT ASSIGNMENT RULE above
- generationHint: Description of what content to generate for this chapter
- The number of chapters depends on the user request - create only what is requested
## DOCUMENT LANGUAGE
- Each document can have its own language (ISO 639-1 code in lowercase: "de", "en", "fr", "it", etc.)
- If no language is specified for a document, use the user prompt language: "{language}"
- The language determines in which language the content of that document will be generated
- Multiple documents can have different languages if needed
- Always use lowercase ISO 639-1 codes in the JSON output (e.g., "de", not "DE")
## OUTPUT FORMAT
Generate the chapter structure based on the USER REQUEST above. The number and types of chapters depend entirely on what the user requested - do NOT copy the example structure below.
EXAMPLE STRUCTURE (for reference only - adapt to user request):
{{
"metadata": {{
"title": "Document Title",
"language": "{language}"
}},
"documents": [{{
"id": "doc_1",
"title": "Document Title",
"filename": "document.{outputFormat}",
"language": "{language}",
"chapters": [
{{
"id": "chapter_1",
"level": 1,
"title": "Chapter Title",
"contentParts": {{
"extracted_part_id": {{
"instruction": "Use extracted content..."
}}
}},
"generationHint": "Description of chapter content",
"sections": []
}}
]
}}]
}}
CRITICAL INSTRUCTIONS:
- Generate chapters based on USER REQUEST, NOT based on the example above
- The example shows the JSON structure format, NOT the required chapters
- Create only the chapters that match the user's request
- Adapt chapter titles and structure to match the user's specific request
**MANDATORY CONTENT ASSIGNMENT CHECK:**
For each chapter, verify:
1. Does the user request mention documents/images/data? (e.g., "photo", "image", "document", "data", "based on", "about")
2. Does this chapter's generationHint, title, or purpose relate to those documents/images/data mentioned in step 1?
- Examples: "article about the photo", "text describing the image", "analysis of the document", "content based on the data"
- Even if chapter doesn't explicitly say "about the image", if user request mentions both the image AND this chapter's content type → relate them
3. If YES to both → chapter MUST have contentParts assigned (cannot be empty {{}})
4. If ContentPart is "object" format and chapter needs to write ABOUT it → assign with "instruction" field, not just "caption"
OUTPUT FORMAT: Start with {{ and end with }}. Do NOT use markdown code fences (```json). Do NOT add explanatory text before or after the JSON. Return ONLY the JSON object itself.
"""
return prompt